1/* Copyright (C) 2002-2016 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
18
19#include <stdint.h>
20#include <tls.h> /* For tcbhead_t. */
21#include <libc-internal.h>
22
23
24typedef int8_t atomic8_t;
25typedef uint8_t uatomic8_t;
26typedef int_fast8_t atomic_fast8_t;
27typedef uint_fast8_t uatomic_fast8_t;
28
29typedef int16_t atomic16_t;
30typedef uint16_t uatomic16_t;
31typedef int_fast16_t atomic_fast16_t;
32typedef uint_fast16_t uatomic_fast16_t;
33
34typedef int32_t atomic32_t;
35typedef uint32_t uatomic32_t;
36typedef int_fast32_t atomic_fast32_t;
37typedef uint_fast32_t uatomic_fast32_t;
38
39typedef int64_t atomic64_t;
40typedef uint64_t uatomic64_t;
41typedef int_fast64_t atomic_fast64_t;
42typedef uint_fast64_t uatomic_fast64_t;
43
44typedef intptr_t atomicptr_t;
45typedef uintptr_t uatomicptr_t;
46typedef intmax_t atomic_max_t;
47typedef uintmax_t uatomic_max_t;
48
49
50#ifndef LOCK_PREFIX
51# ifdef UP
52# define LOCK_PREFIX /* nothing */
53# else
54# define LOCK_PREFIX "lock;"
55# endif
56#endif
57
58#define __HAVE_64B_ATOMICS 1
59#define USE_ATOMIC_COMPILER_BUILTINS 1
60
61#define atomic_compare_and_exchange_val_acq(mem, newval, oldval) \
62 __sync_val_compare_and_swap (mem, oldval, newval)
63#define atomic_compare_and_exchange_bool_acq(mem, newval, oldval) \
64 (! __sync_bool_compare_and_swap (mem, oldval, newval))
65
66
67#define __arch_c_compare_and_exchange_val_8_acq(mem, newval, oldval) \
68 ({ __typeof (*mem) ret; \
69 __asm __volatile ("cmpl $0, %%fs:%P5\n\t" \
70 "je 0f\n\t" \
71 "lock\n" \
72 "0:\tcmpxchgb %b2, %1" \
73 : "=a" (ret), "=m" (*mem) \
74 : "q" (newval), "m" (*mem), "0" (oldval), \
75 "i" (offsetof (tcbhead_t, multiple_threads))); \
76 ret; })
77
78#define __arch_c_compare_and_exchange_val_16_acq(mem, newval, oldval) \
79 ({ __typeof (*mem) ret; \
80 __asm __volatile ("cmpl $0, %%fs:%P5\n\t" \
81 "je 0f\n\t" \
82 "lock\n" \
83 "0:\tcmpxchgw %w2, %1" \
84 : "=a" (ret), "=m" (*mem) \
85 : "q" (newval), "m" (*mem), "0" (oldval), \
86 "i" (offsetof (tcbhead_t, multiple_threads))); \
87 ret; })
88
89#define __arch_c_compare_and_exchange_val_32_acq(mem, newval, oldval) \
90 ({ __typeof (*mem) ret; \
91 __asm __volatile ("cmpl $0, %%fs:%P5\n\t" \
92 "je 0f\n\t" \
93 "lock\n" \
94 "0:\tcmpxchgl %2, %1" \
95 : "=a" (ret), "=m" (*mem) \
96 : "q" (newval), "m" (*mem), "0" (oldval), \
97 "i" (offsetof (tcbhead_t, multiple_threads))); \
98 ret; })
99
100#define __arch_c_compare_and_exchange_val_64_acq(mem, newval, oldval) \
101 ({ __typeof (*mem) ret; \
102 __asm __volatile ("cmpl $0, %%fs:%P5\n\t" \
103 "je 0f\n\t" \
104 "lock\n" \
105 "0:\tcmpxchgq %q2, %1" \
106 : "=a" (ret), "=m" (*mem) \
107 : "q" ((atomic64_t) cast_to_integer (newval)), \
108 "m" (*mem), \
109 "0" ((atomic64_t) cast_to_integer (oldval)), \
110 "i" (offsetof (tcbhead_t, multiple_threads))); \
111 ret; })
112
113
114/* Note that we need no lock prefix. */
115#define atomic_exchange_acq(mem, newvalue) \
116 ({ __typeof (*mem) result; \
117 if (sizeof (*mem) == 1) \
118 __asm __volatile ("xchgb %b0, %1" \
119 : "=q" (result), "=m" (*mem) \
120 : "0" (newvalue), "m" (*mem)); \
121 else if (sizeof (*mem) == 2) \
122 __asm __volatile ("xchgw %w0, %1" \
123 : "=r" (result), "=m" (*mem) \
124 : "0" (newvalue), "m" (*mem)); \
125 else if (sizeof (*mem) == 4) \
126 __asm __volatile ("xchgl %0, %1" \
127 : "=r" (result), "=m" (*mem) \
128 : "0" (newvalue), "m" (*mem)); \
129 else \
130 __asm __volatile ("xchgq %q0, %1" \
131 : "=r" (result), "=m" (*mem) \
132 : "0" ((atomic64_t) cast_to_integer (newvalue)), \
133 "m" (*mem)); \
134 result; })
135
136
137#define __arch_exchange_and_add_body(lock, mem, value) \
138 ({ __typeof (*mem) result; \
139 if (sizeof (*mem) == 1) \
140 __asm __volatile (lock "xaddb %b0, %1" \
141 : "=q" (result), "=m" (*mem) \
142 : "0" (value), "m" (*mem), \
143 "i" (offsetof (tcbhead_t, multiple_threads))); \
144 else if (sizeof (*mem) == 2) \
145 __asm __volatile (lock "xaddw %w0, %1" \
146 : "=r" (result), "=m" (*mem) \
147 : "0" (value), "m" (*mem), \
148 "i" (offsetof (tcbhead_t, multiple_threads))); \
149 else if (sizeof (*mem) == 4) \
150 __asm __volatile (lock "xaddl %0, %1" \
151 : "=r" (result), "=m" (*mem) \
152 : "0" (value), "m" (*mem), \
153 "i" (offsetof (tcbhead_t, multiple_threads))); \
154 else \
155 __asm __volatile (lock "xaddq %q0, %1" \
156 : "=r" (result), "=m" (*mem) \
157 : "0" ((atomic64_t) cast_to_integer (value)), \
158 "m" (*mem), \
159 "i" (offsetof (tcbhead_t, multiple_threads))); \
160 result; })
161
162#define atomic_exchange_and_add(mem, value) \
163 __sync_fetch_and_add (mem, value)
164
165#define __arch_exchange_and_add_cprefix \
166 "cmpl $0, %%fs:%P4\n\tje 0f\n\tlock\n0:\t"
167
168#define catomic_exchange_and_add(mem, value) \
169 __arch_exchange_and_add_body (__arch_exchange_and_add_cprefix, mem, value)
170
171
172#define __arch_add_body(lock, pfx, mem, value) \
173 do { \
174 if (__builtin_constant_p (value) && (value) == 1) \
175 pfx##_increment (mem); \
176 else if (__builtin_constant_p (value) && (value) == -1) \
177 pfx##_decrement (mem); \
178 else if (sizeof (*mem) == 1) \
179 __asm __volatile (lock "addb %b1, %0" \
180 : "=m" (*mem) \
181 : "iq" (value), "m" (*mem), \
182 "i" (offsetof (tcbhead_t, multiple_threads))); \
183 else if (sizeof (*mem) == 2) \
184 __asm __volatile (lock "addw %w1, %0" \
185 : "=m" (*mem) \
186 : "ir" (value), "m" (*mem), \
187 "i" (offsetof (tcbhead_t, multiple_threads))); \
188 else if (sizeof (*mem) == 4) \
189 __asm __volatile (lock "addl %1, %0" \
190 : "=m" (*mem) \
191 : "ir" (value), "m" (*mem), \
192 "i" (offsetof (tcbhead_t, multiple_threads))); \
193 else \
194 __asm __volatile (lock "addq %q1, %0" \
195 : "=m" (*mem) \
196 : "ir" ((atomic64_t) cast_to_integer (value)), \
197 "m" (*mem), \
198 "i" (offsetof (tcbhead_t, multiple_threads))); \
199 } while (0)
200
201#define atomic_add(mem, value) \
202 __arch_add_body (LOCK_PREFIX, atomic, mem, value)
203
204#define __arch_add_cprefix \
205 "cmpl $0, %%fs:%P3\n\tje 0f\n\tlock\n0:\t"
206
207#define catomic_add(mem, value) \
208 __arch_add_body (__arch_add_cprefix, catomic, mem, value)
209
210
211#define atomic_add_negative(mem, value) \
212 ({ unsigned char __result; \
213 if (sizeof (*mem) == 1) \
214 __asm __volatile (LOCK_PREFIX "addb %b2, %0; sets %1" \
215 : "=m" (*mem), "=qm" (__result) \
216 : "iq" (value), "m" (*mem)); \
217 else if (sizeof (*mem) == 2) \
218 __asm __volatile (LOCK_PREFIX "addw %w2, %0; sets %1" \
219 : "=m" (*mem), "=qm" (__result) \
220 : "ir" (value), "m" (*mem)); \
221 else if (sizeof (*mem) == 4) \
222 __asm __volatile (LOCK_PREFIX "addl %2, %0; sets %1" \
223 : "=m" (*mem), "=qm" (__result) \
224 : "ir" (value), "m" (*mem)); \
225 else \
226 __asm __volatile (LOCK_PREFIX "addq %q2, %0; sets %1" \
227 : "=m" (*mem), "=qm" (__result) \
228 : "ir" ((atomic64_t) cast_to_integer (value)), \
229 "m" (*mem)); \
230 __result; })
231
232
233#define atomic_add_zero(mem, value) \
234 ({ unsigned char __result; \
235 if (sizeof (*mem) == 1) \
236 __asm __volatile (LOCK_PREFIX "addb %b2, %0; setz %1" \
237 : "=m" (*mem), "=qm" (__result) \
238 : "iq" (value), "m" (*mem)); \
239 else if (sizeof (*mem) == 2) \
240 __asm __volatile (LOCK_PREFIX "addw %w2, %0; setz %1" \
241 : "=m" (*mem), "=qm" (__result) \
242 : "ir" (value), "m" (*mem)); \
243 else if (sizeof (*mem) == 4) \
244 __asm __volatile (LOCK_PREFIX "addl %2, %0; setz %1" \
245 : "=m" (*mem), "=qm" (__result) \
246 : "ir" (value), "m" (*mem)); \
247 else \
248 __asm __volatile (LOCK_PREFIX "addq %q2, %0; setz %1" \
249 : "=m" (*mem), "=qm" (__result) \
250 : "ir" ((atomic64_t) cast_to_integer (value)), \
251 "m" (*mem)); \
252 __result; })
253
254
255#define __arch_increment_body(lock, mem) \
256 do { \
257 if (sizeof (*mem) == 1) \
258 __asm __volatile (lock "incb %b0" \
259 : "=m" (*mem) \
260 : "m" (*mem), \
261 "i" (offsetof (tcbhead_t, multiple_threads))); \
262 else if (sizeof (*mem) == 2) \
263 __asm __volatile (lock "incw %w0" \
264 : "=m" (*mem) \
265 : "m" (*mem), \
266 "i" (offsetof (tcbhead_t, multiple_threads))); \
267 else if (sizeof (*mem) == 4) \
268 __asm __volatile (lock "incl %0" \
269 : "=m" (*mem) \
270 : "m" (*mem), \
271 "i" (offsetof (tcbhead_t, multiple_threads))); \
272 else \
273 __asm __volatile (lock "incq %q0" \
274 : "=m" (*mem) \
275 : "m" (*mem), \
276 "i" (offsetof (tcbhead_t, multiple_threads))); \
277 } while (0)
278
279#define atomic_increment(mem) __arch_increment_body (LOCK_PREFIX, mem)
280
281#define __arch_increment_cprefix \
282 "cmpl $0, %%fs:%P2\n\tje 0f\n\tlock\n0:\t"
283
284#define catomic_increment(mem) \
285 __arch_increment_body (__arch_increment_cprefix, mem)
286
287
288#define atomic_increment_and_test(mem) \
289 ({ unsigned char __result; \
290 if (sizeof (*mem) == 1) \
291 __asm __volatile (LOCK_PREFIX "incb %b0; sete %1" \
292 : "=m" (*mem), "=qm" (__result) \
293 : "m" (*mem)); \
294 else if (sizeof (*mem) == 2) \
295 __asm __volatile (LOCK_PREFIX "incw %w0; sete %1" \
296 : "=m" (*mem), "=qm" (__result) \
297 : "m" (*mem)); \
298 else if (sizeof (*mem) == 4) \
299 __asm __volatile (LOCK_PREFIX "incl %0; sete %1" \
300 : "=m" (*mem), "=qm" (__result) \
301 : "m" (*mem)); \
302 else \
303 __asm __volatile (LOCK_PREFIX "incq %q0; sete %1" \
304 : "=m" (*mem), "=qm" (__result) \
305 : "m" (*mem)); \
306 __result; })
307
308
309#define __arch_decrement_body(lock, mem) \
310 do { \
311 if (sizeof (*mem) == 1) \
312 __asm __volatile (lock "decb %b0" \
313 : "=m" (*mem) \
314 : "m" (*mem), \
315 "i" (offsetof (tcbhead_t, multiple_threads))); \
316 else if (sizeof (*mem) == 2) \
317 __asm __volatile (lock "decw %w0" \
318 : "=m" (*mem) \
319 : "m" (*mem), \
320 "i" (offsetof (tcbhead_t, multiple_threads))); \
321 else if (sizeof (*mem) == 4) \
322 __asm __volatile (lock "decl %0" \
323 : "=m" (*mem) \
324 : "m" (*mem), \
325 "i" (offsetof (tcbhead_t, multiple_threads))); \
326 else \
327 __asm __volatile (lock "decq %q0" \
328 : "=m" (*mem) \
329 : "m" (*mem), \
330 "i" (offsetof (tcbhead_t, multiple_threads))); \
331 } while (0)
332
333#define atomic_decrement(mem) __arch_decrement_body (LOCK_PREFIX, mem)
334
335#define __arch_decrement_cprefix \
336 "cmpl $0, %%fs:%P2\n\tje 0f\n\tlock\n0:\t"
337
338#define catomic_decrement(mem) \
339 __arch_decrement_body (__arch_decrement_cprefix, mem)
340
341
342#define atomic_decrement_and_test(mem) \
343 ({ unsigned char __result; \
344 if (sizeof (*mem) == 1) \
345 __asm __volatile (LOCK_PREFIX "decb %b0; sete %1" \
346 : "=m" (*mem), "=qm" (__result) \
347 : "m" (*mem)); \
348 else if (sizeof (*mem) == 2) \
349 __asm __volatile (LOCK_PREFIX "decw %w0; sete %1" \
350 : "=m" (*mem), "=qm" (__result) \
351 : "m" (*mem)); \
352 else if (sizeof (*mem) == 4) \
353 __asm __volatile (LOCK_PREFIX "decl %0; sete %1" \
354 : "=m" (*mem), "=qm" (__result) \
355 : "m" (*mem)); \
356 else \
357 __asm __volatile (LOCK_PREFIX "decq %q0; sete %1" \
358 : "=m" (*mem), "=qm" (__result) \
359 : "m" (*mem)); \
360 __result; })
361
362
363#define atomic_bit_set(mem, bit) \
364 do { \
365 if (sizeof (*mem) == 1) \
366 __asm __volatile (LOCK_PREFIX "orb %b2, %0" \
367 : "=m" (*mem) \
368 : "m" (*mem), "iq" (1L << (bit))); \
369 else if (sizeof (*mem) == 2) \
370 __asm __volatile (LOCK_PREFIX "orw %w2, %0" \
371 : "=m" (*mem) \
372 : "m" (*mem), "ir" (1L << (bit))); \
373 else if (sizeof (*mem) == 4) \
374 __asm __volatile (LOCK_PREFIX "orl %2, %0" \
375 : "=m" (*mem) \
376 : "m" (*mem), "ir" (1L << (bit))); \
377 else if (__builtin_constant_p (bit) && (bit) < 32) \
378 __asm __volatile (LOCK_PREFIX "orq %2, %0" \
379 : "=m" (*mem) \
380 : "m" (*mem), "i" (1L << (bit))); \
381 else \
382 __asm __volatile (LOCK_PREFIX "orq %q2, %0" \
383 : "=m" (*mem) \
384 : "m" (*mem), "r" (1UL << (bit))); \
385 } while (0)
386
387
388#define atomic_bit_test_set(mem, bit) \
389 ({ unsigned char __result; \
390 if (sizeof (*mem) == 1) \
391 __asm __volatile (LOCK_PREFIX "btsb %3, %1; setc %0" \
392 : "=q" (__result), "=m" (*mem) \
393 : "m" (*mem), "iq" (bit)); \
394 else if (sizeof (*mem) == 2) \
395 __asm __volatile (LOCK_PREFIX "btsw %3, %1; setc %0" \
396 : "=q" (__result), "=m" (*mem) \
397 : "m" (*mem), "ir" (bit)); \
398 else if (sizeof (*mem) == 4) \
399 __asm __volatile (LOCK_PREFIX "btsl %3, %1; setc %0" \
400 : "=q" (__result), "=m" (*mem) \
401 : "m" (*mem), "ir" (bit)); \
402 else \
403 __asm __volatile (LOCK_PREFIX "btsq %3, %1; setc %0" \
404 : "=q" (__result), "=m" (*mem) \
405 : "m" (*mem), "ir" (bit)); \
406 __result; })
407
408
409#define atomic_spin_nop() asm ("rep; nop")
410
411
412#define __arch_and_body(lock, mem, mask) \
413 do { \
414 if (sizeof (*mem) == 1) \
415 __asm __volatile (lock "andb %b1, %0" \
416 : "=m" (*mem) \
417 : "iq" (mask), "m" (*mem), \
418 "i" (offsetof (tcbhead_t, multiple_threads))); \
419 else if (sizeof (*mem) == 2) \
420 __asm __volatile (lock "andw %w1, %0" \
421 : "=m" (*mem) \
422 : "ir" (mask), "m" (*mem), \
423 "i" (offsetof (tcbhead_t, multiple_threads))); \
424 else if (sizeof (*mem) == 4) \
425 __asm __volatile (lock "andl %1, %0" \
426 : "=m" (*mem) \
427 : "ir" (mask), "m" (*mem), \
428 "i" (offsetof (tcbhead_t, multiple_threads))); \
429 else \
430 __asm __volatile (lock "andq %q1, %0" \
431 : "=m" (*mem) \
432 : "ir" (mask), "m" (*mem), \
433 "i" (offsetof (tcbhead_t, multiple_threads))); \
434 } while (0)
435
436#define __arch_cprefix \
437 "cmpl $0, %%fs:%P3\n\tje 0f\n\tlock\n0:\t"
438
439#define atomic_and(mem, mask) __arch_and_body (LOCK_PREFIX, mem, mask)
440
441#define catomic_and(mem, mask) __arch_and_body (__arch_cprefix, mem, mask)
442
443
444#define __arch_or_body(lock, mem, mask) \
445 do { \
446 if (sizeof (*mem) == 1) \
447 __asm __volatile (lock "orb %b1, %0" \
448 : "=m" (*mem) \
449 : "iq" (mask), "m" (*mem), \
450 "i" (offsetof (tcbhead_t, multiple_threads))); \
451 else if (sizeof (*mem) == 2) \
452 __asm __volatile (lock "orw %w1, %0" \
453 : "=m" (*mem) \
454 : "ir" (mask), "m" (*mem), \
455 "i" (offsetof (tcbhead_t, multiple_threads))); \
456 else if (sizeof (*mem) == 4) \
457 __asm __volatile (lock "orl %1, %0" \
458 : "=m" (*mem) \
459 : "ir" (mask), "m" (*mem), \
460 "i" (offsetof (tcbhead_t, multiple_threads))); \
461 else \
462 __asm __volatile (lock "orq %q1, %0" \
463 : "=m" (*mem) \
464 : "ir" (mask), "m" (*mem), \
465 "i" (offsetof (tcbhead_t, multiple_threads))); \
466 } while (0)
467
468#define atomic_or(mem, mask) __arch_or_body (LOCK_PREFIX, mem, mask)
469
470#define catomic_or(mem, mask) __arch_or_body (__arch_cprefix, mem, mask)
471
472/* We don't use mfence because it is supposedly slower due to having to
473 provide stronger guarantees (e.g., regarding self-modifying code). */
474#define atomic_full_barrier() \
475 __asm __volatile (LOCK_PREFIX "orl $0, (%%rsp)" ::: "memory")
476#define atomic_read_barrier() __asm ("" ::: "memory")
477#define atomic_write_barrier() __asm ("" ::: "memory")
478