Line data Source code
1 : /* SPDX-License-Identifier: GPL-2.0 */
2 : #ifndef _ASM_X86_BITOPS_H
3 : #define _ASM_X86_BITOPS_H
4 :
5 : /*
6 : * Copyright 1992, Linus Torvalds.
7 : *
8 : * Note: inlines with more than a single statement should be marked
9 : * __always_inline to avoid problems with older gcc's inlining heuristics.
10 : */
11 :
12 : #ifndef _LINUX_BITOPS_H
13 : #error only <linux/bitops.h> can be included directly
14 : #endif
15 :
16 : #include <linux/compiler.h>
17 : #include <asm/alternative.h>
18 : #include <asm/rmwcc.h>
19 : #include <asm/barrier.h>
20 :
21 : #if BITS_PER_LONG == 32
22 : # define _BITOPS_LONG_SHIFT 5
23 : #elif BITS_PER_LONG == 64
24 : # define _BITOPS_LONG_SHIFT 6
25 : #else
26 : # error "Unexpected BITS_PER_LONG"
27 : #endif
28 :
29 : #define BIT_64(n) (U64_C(1) << (n))
30 :
31 : /*
32 : * These have to be done with inline assembly: that way the bit-setting
33 : * is guaranteed to be atomic. All bit operations return 0 if the bit
34 : * was cleared before the operation and != 0 if it was not.
35 : *
36 : * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
37 : */
38 :
39 : #define RLONG_ADDR(x) "m" (*(volatile long *) (x))
40 : #define WBYTE_ADDR(x) "+m" (*(volatile char *) (x))
41 :
42 : #define ADDR RLONG_ADDR(addr)
43 :
44 : /*
45 : * We do the locked ops that don't return the old value as
46 : * a mask operation on a byte.
47 : */
48 : #define CONST_MASK_ADDR(nr, addr) WBYTE_ADDR((void *)(addr) + ((nr)>>3))
49 : #define CONST_MASK(nr) (1 << ((nr) & 7))
50 :
51 : static __always_inline void
52 : arch_set_bit(long nr, volatile unsigned long *addr)
53 : {
54 622 : if (__builtin_constant_p(nr)) {
55 540 : asm volatile(LOCK_PREFIX "orb %b1,%0"
56 0 : : CONST_MASK_ADDR(nr, addr)
57 1 : : "iq" (CONST_MASK(nr))
58 : : "memory");
59 : } else {
60 621 : asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0"
61 : : : RLONG_ADDR(addr), "Ir" (nr) : "memory");
62 : }
63 : }
64 :
65 : static __always_inline void
66 : arch___set_bit(unsigned long nr, volatile unsigned long *addr)
67 : {
68 12061 : asm volatile(__ASM_SIZE(bts) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
69 : }
70 :
71 : static __always_inline void
72 : arch_clear_bit(long nr, volatile unsigned long *addr)
73 : {
74 0 : if (__builtin_constant_p(nr)) {
75 2073 : asm volatile(LOCK_PREFIX "andb %b1,%0"
76 175 : : CONST_MASK_ADDR(nr, addr)
77 0 : : "iq" (~CONST_MASK(nr)));
78 : } else {
79 0 : asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0"
80 : : : RLONG_ADDR(addr), "Ir" (nr) : "memory");
81 : }
82 : }
83 :
84 : static __always_inline void
85 : arch_clear_bit_unlock(long nr, volatile unsigned long *addr)
86 : {
87 0 : barrier();
88 0 : arch_clear_bit(nr, addr);
89 : }
90 :
91 : static __always_inline void
92 : arch___clear_bit(unsigned long nr, volatile unsigned long *addr)
93 : {
94 264032 : asm volatile(__ASM_SIZE(btr) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
95 : }
96 :
97 : static __always_inline bool
98 : arch_clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
99 : {
100 : bool negative;
101 0 : asm volatile(LOCK_PREFIX "andb %2,%1"
102 : CC_SET(s)
103 : : CC_OUT(s) (negative), WBYTE_ADDR(addr)
104 : : "ir" ((char) ~(1 << nr)) : "memory");
105 : return negative;
106 : }
107 : #define arch_clear_bit_unlock_is_negative_byte \
108 : arch_clear_bit_unlock_is_negative_byte
109 :
110 : static __always_inline void
111 : arch___clear_bit_unlock(long nr, volatile unsigned long *addr)
112 : {
113 : arch___clear_bit(nr, addr);
114 : }
115 :
116 : static __always_inline void
117 : arch___change_bit(unsigned long nr, volatile unsigned long *addr)
118 : {
119 0 : asm volatile(__ASM_SIZE(btc) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
120 : }
121 :
122 : static __always_inline void
123 : arch_change_bit(long nr, volatile unsigned long *addr)
124 : {
125 : if (__builtin_constant_p(nr)) {
126 : asm volatile(LOCK_PREFIX "xorb %b1,%0"
127 : : CONST_MASK_ADDR(nr, addr)
128 : : "iq" (CONST_MASK(nr)));
129 : } else {
130 : asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0"
131 : : : RLONG_ADDR(addr), "Ir" (nr) : "memory");
132 : }
133 : }
134 :
135 : static __always_inline bool
136 : arch_test_and_set_bit(long nr, volatile unsigned long *addr)
137 : {
138 39 : return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts), *addr, c, "Ir", nr);
139 : }
140 :
141 : static __always_inline bool
142 : arch_test_and_set_bit_lock(long nr, volatile unsigned long *addr)
143 : {
144 0 : return arch_test_and_set_bit(nr, addr);
145 : }
146 :
147 : static __always_inline bool
148 : arch___test_and_set_bit(unsigned long nr, volatile unsigned long *addr)
149 : {
150 : bool oldbit;
151 :
152 0 : asm(__ASM_SIZE(bts) " %2,%1"
153 : CC_SET(c)
154 : : CC_OUT(c) (oldbit)
155 : : ADDR, "Ir" (nr) : "memory");
156 : return oldbit;
157 : }
158 :
159 : static __always_inline bool
160 : arch_test_and_clear_bit(long nr, volatile unsigned long *addr)
161 : {
162 0 : return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btr), *addr, c, "Ir", nr);
163 : }
164 :
165 : /*
166 : * Note: the operation is performed atomically with respect to
167 : * the local CPU, but not other CPUs. Portable code should not
168 : * rely on this behaviour.
169 : * KVM relies on this behaviour on x86 for modifying memory that is also
170 : * accessed from a hypervisor on the same CPU if running in a VM: don't change
171 : * this without also updating arch/x86/kernel/kvm.c
172 : */
173 : static __always_inline bool
174 : arch___test_and_clear_bit(unsigned long nr, volatile unsigned long *addr)
175 : {
176 : bool oldbit;
177 :
178 1 : asm volatile(__ASM_SIZE(btr) " %2,%1"
179 : CC_SET(c)
180 : : CC_OUT(c) (oldbit)
181 : : ADDR, "Ir" (nr) : "memory");
182 : return oldbit;
183 : }
184 :
185 : static __always_inline bool
186 : arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
187 : {
188 : bool oldbit;
189 :
190 : asm volatile(__ASM_SIZE(btc) " %2,%1"
191 : CC_SET(c)
192 : : CC_OUT(c) (oldbit)
193 : : ADDR, "Ir" (nr) : "memory");
194 :
195 : return oldbit;
196 : }
197 :
198 : static __always_inline bool
199 : arch_test_and_change_bit(long nr, volatile unsigned long *addr)
200 : {
201 : return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), *addr, c, "Ir", nr);
202 : }
203 :
204 : static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr)
205 : {
206 4141 : return ((1UL << (nr & (BITS_PER_LONG-1))) &
207 20540 : (addr[nr >> _BITOPS_LONG_SHIFT])) != 0;
208 : }
209 :
210 : static __always_inline bool constant_test_bit_acquire(long nr, const volatile unsigned long *addr)
211 : {
212 : bool oldbit;
213 :
214 0 : asm volatile("testb %2,%1"
215 : CC_SET(nz)
216 : : CC_OUT(nz) (oldbit)
217 0 : : "m" (((unsigned char *)addr)[nr >> 3]),
218 0 : "i" (1 << (nr & 7))
219 : :"memory");
220 :
221 : return oldbit;
222 : }
223 :
224 : static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr)
225 : {
226 : bool oldbit;
227 :
228 40600 : asm volatile(__ASM_SIZE(bt) " %2,%1"
229 : CC_SET(c)
230 : : CC_OUT(c) (oldbit)
231 : : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory");
232 :
233 : return oldbit;
234 : }
235 :
236 : static __always_inline bool
237 : arch_test_bit(unsigned long nr, const volatile unsigned long *addr)
238 : {
239 73398 : return __builtin_constant_p(nr) ? constant_test_bit(nr, addr) :
240 81200 : variable_test_bit(nr, addr);
241 : }
242 :
243 : static __always_inline bool
244 : arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr)
245 : {
246 0 : return __builtin_constant_p(nr) ? constant_test_bit_acquire(nr, addr) :
247 0 : variable_test_bit(nr, addr);
248 : }
249 :
250 : static __always_inline unsigned long variable__ffs(unsigned long word)
251 : {
252 1554 : asm("rep; bsf %1,%0"
253 : : "=r" (word)
254 : : "rm" (word));
255 : return word;
256 : }
257 :
258 : /**
259 : * __ffs - find first set bit in word
260 : * @word: The word to search
261 : *
262 : * Undefined if no bit exists, so code should check against 0 first.
263 : */
264 : #define __ffs(word) \
265 : (__builtin_constant_p(word) ? \
266 : (unsigned long)__builtin_ctzl(word) : \
267 : variable__ffs(word))
268 :
269 : static __always_inline unsigned long variable_ffz(unsigned long word)
270 : {
271 92 : asm("rep; bsf %1,%0"
272 : : "=r" (word)
273 92 : : "r" (~word));
274 : return word;
275 : }
276 :
277 : /**
278 : * ffz - find first zero bit in word
279 : * @word: The word to search
280 : *
281 : * Undefined if no zero exists, so code should check against ~0UL first.
282 : */
283 : #define ffz(word) \
284 : (__builtin_constant_p(word) ? \
285 : (unsigned long)__builtin_ctzl(~word) : \
286 : variable_ffz(word))
287 :
288 : /*
289 : * __fls: find last set bit in word
290 : * @word: The word to search
291 : *
292 : * Undefined if no set bit exists, so code should check against 0 first.
293 : */
294 : static __always_inline unsigned long __fls(unsigned long word)
295 : {
296 17 : asm("bsr %1,%0"
297 : : "=r" (word)
298 : : "rm" (word));
299 : return word;
300 : }
301 :
302 : #undef ADDR
303 :
304 : #ifdef __KERNEL__
305 : static __always_inline int variable_ffs(int x)
306 : {
307 : int r;
308 :
309 : #ifdef CONFIG_X86_64
310 : /*
311 : * AMD64 says BSFL won't clobber the dest reg if x==0; Intel64 says the
312 : * dest reg is undefined if x==0, but their CPU architect says its
313 : * value is written to set it to the same as before, except that the
314 : * top 32 bits will be cleared.
315 : *
316 : * We cannot do this on 32 bits because at the very least some
317 : * 486 CPUs did not behave this way.
318 : */
319 641 : asm("bsfl %1,%0"
320 : : "=r" (r)
321 : : "rm" (x), "0" (-1));
322 : #elif defined(CONFIG_X86_CMOV)
323 : asm("bsfl %1,%0\n\t"
324 : "cmovzl %2,%0"
325 : : "=&r" (r) : "rm" (x), "r" (-1));
326 : #else
327 : asm("bsfl %1,%0\n\t"
328 : "jnz 1f\n\t"
329 : "movl $-1,%0\n"
330 : "1:" : "=r" (r) : "rm" (x));
331 : #endif
332 641 : return r + 1;
333 : }
334 :
335 : /**
336 : * ffs - find first set bit in word
337 : * @x: the word to search
338 : *
339 : * This is defined the same way as the libc and compiler builtin ffs
340 : * routines, therefore differs in spirit from the other bitops.
341 : *
342 : * ffs(value) returns 0 if value is 0 or the position of the first
343 : * set bit if value is nonzero. The first (least significant) bit
344 : * is at position 1.
345 : */
346 : #define ffs(x) (__builtin_constant_p(x) ? __builtin_ffs(x) : variable_ffs(x))
347 :
348 : /**
349 : * fls - find last set bit in word
350 : * @x: the word to search
351 : *
352 : * This is defined in a similar way as the libc and compiler builtin
353 : * ffs, but returns the position of the most significant set bit.
354 : *
355 : * fls(value) returns 0 if value is 0 or the position of the last
356 : * set bit if value is nonzero. The last (most significant) bit is
357 : * at position 32.
358 : */
359 : static __always_inline int fls(unsigned int x)
360 : {
361 : int r;
362 :
363 : #ifdef CONFIG_X86_64
364 : /*
365 : * AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the
366 : * dest reg is undefined if x==0, but their CPU architect says its
367 : * value is written to set it to the same as before, except that the
368 : * top 32 bits will be cleared.
369 : *
370 : * We cannot do this on 32 bits because at the very least some
371 : * 486 CPUs did not behave this way.
372 : */
373 1118 : asm("bsrl %1,%0"
374 : : "=r" (r)
375 : : "rm" (x), "0" (-1));
376 : #elif defined(CONFIG_X86_CMOV)
377 : asm("bsrl %1,%0\n\t"
378 : "cmovzl %2,%0"
379 : : "=&r" (r) : "rm" (x), "rm" (-1));
380 : #else
381 : asm("bsrl %1,%0\n\t"
382 : "jnz 1f\n\t"
383 : "movl $-1,%0\n"
384 : "1:" : "=r" (r) : "rm" (x));
385 : #endif
386 256 : return r + 1;
387 : }
388 :
389 : /**
390 : * fls64 - find last set bit in a 64-bit word
391 : * @x: the word to search
392 : *
393 : * This is defined in a similar way as the libc and compiler builtin
394 : * ffsll, but returns the position of the most significant set bit.
395 : *
396 : * fls64(value) returns 0 if value is 0 or the position of the last
397 : * set bit if value is nonzero. The last (most significant) bit is
398 : * at position 64.
399 : */
400 : #ifdef CONFIG_X86_64
401 : static __always_inline int fls64(__u64 x)
402 : {
403 148 : int bitpos = -1;
404 : /*
405 : * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the
406 : * dest reg is undefined if x==0, but their CPU architect says its
407 : * value is written to set it to the same as before.
408 : */
409 148 : asm("bsrq %1,%q0"
410 : : "+r" (bitpos)
411 : : "rm" (x));
412 126 : return bitpos + 1;
413 : }
414 : #else
415 : #include <asm-generic/bitops/fls64.h>
416 : #endif
417 :
418 : #include <asm-generic/bitops/sched.h>
419 :
420 : #include <asm/arch_hweight.h>
421 :
422 : #include <asm-generic/bitops/const_hweight.h>
423 :
424 : #include <asm-generic/bitops/instrumented-atomic.h>
425 : #include <asm-generic/bitops/instrumented-non-atomic.h>
426 : #include <asm-generic/bitops/instrumented-lock.h>
427 :
428 : #include <asm-generic/bitops/le.h>
429 :
430 : #include <asm-generic/bitops/ext2-atomic-setbit.h>
431 :
432 : #endif /* __KERNEL__ */
433 : #endif /* _ASM_X86_BITOPS_H */
|