1/* futex operations for glibc-internal use. Stub version; do not include
2 this file directly.
3 Copyright (C) 2014-2020 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <https://www.gnu.org/licenses/>. */
19
20#ifndef STUB_FUTEX_INTERNAL_H
21#define STUB_FUTEX_INTERNAL_H
22
23#include <sys/time.h>
24#include <stdio.h>
25#include <stdbool.h>
26#include <libc-diag.h>
27
28/* This file defines futex operations used internally in glibc. A futex
29 consists of the so-called futex word in userspace, which is of type
30 unsigned int and represents an application-specific condition, and kernel
31 state associated with this particular futex word (e.g., wait queues). The
32 futex operations we provide are wrappers for the futex syscalls and add
33 glibc-specific error checking of the syscall return value. We abort on
34 error codes that are caused by bugs in glibc or in the calling application,
35 or when an error code is not known. We return error codes that can arise
36 in correct executions to the caller. Each operation calls out exactly the
37 return values that callers need to handle.
38
39 The private flag must be either FUTEX_PRIVATE or FUTEX_SHARED.
40 FUTEX_PRIVATE is always supported, and the implementation can internally
41 use FUTEX_SHARED when FUTEX_PRIVATE is requested. FUTEX_SHARED is not
42 necessarily supported (use futex_supports_pshared to detect this).
43
44 We expect callers to only use these operations if futexes and the
45 specific futex operations being used are supported (e.g., FUTEX_SHARED).
46
47 Given that waking other threads waiting on a futex involves concurrent
48 accesses to the futex word, you must use atomic operations to access the
49 futex word.
50
51 Both absolute and relative timeouts can be used. An absolute timeout
52 expires when the given specific point in time on the specified clock
53 passes, or when it already has passed. A relative timeout expires when
54 the given duration of time on the CLOCK_MONOTONIC clock passes.
55
56 Due to POSIX requirements on when synchronization data structures such
57 as mutexes or semaphores can be destroyed and due to the futex design
58 having separate fast/slow paths for wake-ups, we need to consider that
59 futex_wake calls might effectively target a data structure that has been
60 destroyed and reused for another object, or unmapped; thus, some
61 errors or spurious wake-ups can happen in correct executions that would
62 not be possible in a program using just a single futex whose lifetime
63 does not end before the program terminates. For background, see:
64 https://sourceware.org/ml/libc-alpha/2014-04/msg00075.html
65 https://lkml.org/lkml/2014/11/27/472 */
66
67/* Defined this way for interoperability with lowlevellock.
68 FUTEX_PRIVATE must be zero because the initializers for pthread_mutex_t,
69 pthread_rwlock_t, and pthread_cond_t initialize the respective field of
70 those structures to zero, and we want FUTEX_PRIVATE to be the default. */
71#define FUTEX_PRIVATE LLL_PRIVATE
72#define FUTEX_SHARED LLL_SHARED
73#if FUTEX_PRIVATE != 0
74# error FUTEX_PRIVATE must be equal to 0
75#endif
76
77/* Calls __libc_fatal with an error message. Convenience function for
78 concrete implementations of the futex interface. */
79static __always_inline __attribute__ ((__noreturn__)) void
80futex_fatal_error (void)
81{
82 __libc_fatal ("The futex facility returned an unexpected error code.\n");
83}
84
85
86/* The Linux kernel treats provides absolute timeouts based on the
87 CLOCK_REALTIME clock and relative timeouts measured against the
88 CLOCK_MONOTONIC clock.
89
90 We expect a Linux kernel version of 2.6.22 or more recent (since this
91 version, EINTR is not returned on spurious wake-ups anymore). */
92
93/* Returns EINVAL if PSHARED is neither PTHREAD_PROCESS_PRIVATE nor
94 PTHREAD_PROCESS_SHARED; otherwise, returns 0 if PSHARED is supported, and
95 ENOTSUP if not. */
96static __always_inline int
97futex_supports_pshared (int pshared)
98{
99 if (__glibc_likely (pshared == PTHREAD_PROCESS_PRIVATE))
100 return 0;
101 else if (pshared == PTHREAD_PROCESS_SHARED)
102 return 0;
103 else
104 return EINVAL;
105}
106
107/* Atomically wrt other futex operations on the same futex, this blocks iff
108 the value *FUTEX_WORD matches the expected value. This is
109 semantically equivalent to:
110 l = <get lock associated with futex> (FUTEX_WORD);
111 wait_flag = <get wait_flag associated with futex> (FUTEX_WORD);
112 lock (l);
113 val = atomic_load_relaxed (FUTEX_WORD);
114 if (val != expected) { unlock (l); return EAGAIN; }
115 atomic_store_relaxed (wait_flag, true);
116 unlock (l);
117 // Now block; can time out in futex_time_wait (see below)
118 while (atomic_load_relaxed(wait_flag) && !<spurious wake-up>);
119
120 Note that no guarantee of a happens-before relation between a woken
121 futex_wait and a futex_wake is documented; however, this does not matter
122 in practice because we have to consider spurious wake-ups (see below),
123 and thus would not be able to reliably reason about which futex_wake woke
124 us.
125
126 Returns 0 if woken by a futex operation or spuriously. (Note that due to
127 the POSIX requirements mentioned above, we need to conservatively assume
128 that unrelated futex_wake operations could wake this futex; it is easiest
129 to just be prepared for spurious wake-ups.)
130 Returns EAGAIN if the futex word did not match the expected value.
131 Returns EINTR if waiting was interrupted by a signal.
132
133 Note that some previous code in glibc assumed the underlying futex
134 operation (e.g., syscall) to start with or include the equivalent of a
135 seq_cst fence; this allows one to avoid an explicit seq_cst fence before
136 a futex_wait call when synchronizing similar to Dekker synchronization.
137 However, we make no such guarantee here. */
138static __always_inline int
139futex_wait (unsigned int *futex_word, unsigned int expected, int private)
140{
141 int err = lll_futex_timed_wait (futex_word, expected, NULL, private);
142 switch (err)
143 {
144 case 0:
145 case -EAGAIN:
146 case -EINTR:
147 return -err;
148
149 case -ETIMEDOUT: /* Cannot have happened as we provided no timeout. */
150 case -EFAULT: /* Must have been caused by a glibc or application bug. */
151 case -EINVAL: /* Either due to wrong alignment or due to the timeout not
152 being normalized. Must have been caused by a glibc or
153 application bug. */
154 case -ENOSYS: /* Must have been caused by a glibc bug. */
155 /* No other errors are documented at this time. */
156 default:
157 futex_fatal_error ();
158 }
159}
160
161/* Like futex_wait but does not provide any indication why we stopped waiting.
162 Thus, when this function returns, you have to always check FUTEX_WORD to
163 determine whether you need to continue waiting, and you cannot detect
164 whether the waiting was interrupted by a signal. Example use:
165 while (atomic_load_relaxed (&futex_word) == 23)
166 futex_wait_simple (&futex_word, 23, FUTEX_PRIVATE);
167 This is common enough to make providing this wrapper worthwhile. */
168static __always_inline void
169futex_wait_simple (unsigned int *futex_word, unsigned int expected,
170 int private)
171{
172 ignore_value (futex_wait (futex_word, expected, private));
173}
174
175
176/* Like futex_wait but is a POSIX cancellation point. */
177static __always_inline int
178futex_wait_cancelable (unsigned int *futex_word, unsigned int expected,
179 int private)
180{
181 int oldtype;
182 oldtype = __pthread_enable_asynccancel ();
183 int err = lll_futex_timed_wait (futex_word, expected, NULL, private);
184 __pthread_disable_asynccancel (oldtype);
185 switch (err)
186 {
187 case 0:
188 case -EAGAIN:
189 case -EINTR:
190 return -err;
191
192 case -ETIMEDOUT: /* Cannot have happened as we provided no timeout. */
193 case -EFAULT: /* Must have been caused by a glibc or application bug. */
194 case -EINVAL: /* Either due to wrong alignment or due to the timeout not
195 being normalized. Must have been caused by a glibc or
196 application bug. */
197 case -ENOSYS: /* Must have been caused by a glibc bug. */
198 /* No other errors are documented at this time. */
199 default:
200 futex_fatal_error ();
201 }
202}
203
204/* Like futex_wait, but will eventually time out (i.e., stop being
205 blocked) after the duration of time provided (i.e., RELTIME) has
206 passed. The caller must provide a normalized RELTIME. RELTIME can also
207 equal NULL, in which case this function behaves equivalent to futex_wait.
208
209 Returns the same values as futex_wait under those same conditions;
210 additionally, returns ETIMEDOUT if the timeout expired.
211 */
212static __always_inline int
213futex_reltimed_wait (unsigned int* futex_word, unsigned int expected,
214 const struct timespec* reltime, int private)
215{
216 int err = lll_futex_timed_wait (futex_word, expected, reltime, private);
217 switch (err)
218 {
219 case 0:
220 case -EAGAIN:
221 case -EINTR:
222 case -ETIMEDOUT:
223 return -err;
224
225 case -EFAULT: /* Must have been caused by a glibc or application bug. */
226 case -EINVAL: /* Either due to wrong alignment or due to the timeout not
227 being normalized. Must have been caused by a glibc or
228 application bug. */
229 case -ENOSYS: /* Must have been caused by a glibc bug. */
230 /* No other errors are documented at this time. */
231 default:
232 futex_fatal_error ();
233 }
234}
235
236/* Like futex_reltimed_wait but is a POSIX cancellation point. */
237static __always_inline int
238futex_reltimed_wait_cancelable (unsigned int* futex_word,
239 unsigned int expected,
240 const struct timespec* reltime, int private)
241{
242 int oldtype;
243 oldtype = LIBC_CANCEL_ASYNC ();
244 int err = lll_futex_timed_wait (futex_word, expected, reltime, private);
245 LIBC_CANCEL_RESET (oldtype);
246 switch (err)
247 {
248 case 0:
249 case -EAGAIN:
250 case -EINTR:
251 case -ETIMEDOUT:
252 return -err;
253
254 case -EFAULT: /* Must have been caused by a glibc or application bug. */
255 case -EINVAL: /* Either due to wrong alignment or due to the timeout not
256 being normalized. Must have been caused by a glibc or
257 application bug. */
258 case -ENOSYS: /* Must have been caused by a glibc bug. */
259 /* No other errors are documented at this time. */
260 default:
261 futex_fatal_error ();
262 }
263}
264
265/* Check whether the specified clockid is supported by
266 futex_abstimed_wait and futex_abstimed_wait_cancelable. */
267static __always_inline int
268futex_abstimed_supported_clockid (clockid_t clockid)
269{
270 return lll_futex_supported_clockid (clockid);
271}
272
273/* Like futex_reltimed_wait, but the provided timeout (ABSTIME) is an
274 absolute point in time; a call will time out after this point in time. */
275static __always_inline int
276futex_abstimed_wait (unsigned int* futex_word, unsigned int expected,
277 clockid_t clockid,
278 const struct timespec* abstime, int private)
279{
280 /* Work around the fact that the kernel rejects negative timeout values
281 despite them being valid. */
282 if (__glibc_unlikely ((abstime != NULL) && (abstime->tv_sec < 0)))
283 return ETIMEDOUT;
284 int err = lll_futex_clock_wait_bitset (futex_word, expected,
285 clockid, abstime,
286 private);
287 switch (err)
288 {
289 case 0:
290 case -EAGAIN:
291 case -EINTR:
292 case -ETIMEDOUT:
293 return -err;
294
295 case -EFAULT: /* Must have been caused by a glibc or application bug. */
296 case -EINVAL: /* Either due to wrong alignment, unsupported
297 clockid or due to the timeout not being
298 normalized. Must have been caused by a glibc or
299 application bug. */
300 case -ENOSYS: /* Must have been caused by a glibc bug. */
301 /* No other errors are documented at this time. */
302 default:
303 futex_fatal_error ();
304 }
305}
306
307/* Like futex_reltimed_wait but is a POSIX cancellation point. */
308static __always_inline int
309futex_abstimed_wait_cancelable (unsigned int* futex_word,
310 unsigned int expected,
311 clockid_t clockid,
312 const struct timespec* abstime, int private)
313{
314 /* Work around the fact that the kernel rejects negative timeout values
315 despite them being valid. */
316 if (__glibc_unlikely ((abstime != NULL) && (abstime->tv_sec < 0)))
317 return ETIMEDOUT;
318 int oldtype;
319 oldtype = __pthread_enable_asynccancel ();
320 int err = lll_futex_clock_wait_bitset (futex_word, expected,
321 clockid, abstime,
322 private);
323 __pthread_disable_asynccancel (oldtype);
324 switch (err)
325 {
326 case 0:
327 case -EAGAIN:
328 case -EINTR:
329 case -ETIMEDOUT:
330 return -err;
331
332 case -EFAULT: /* Must have been caused by a glibc or application bug. */
333 case -EINVAL: /* Either due to wrong alignment or due to the timeout not
334 being normalized. Must have been caused by a glibc or
335 application bug. */
336 case -ENOSYS: /* Must have been caused by a glibc bug. */
337 /* No other errors are documented at this time. */
338 default:
339 futex_fatal_error ();
340 }
341}
342
343/* Atomically wrt other futex operations on the same futex, this unblocks the
344 specified number of processes, or all processes blocked on this futex if
345 there are fewer than the specified number. Semantically, this is
346 equivalent to:
347 l = <get lock associated with futex> (FUTEX_WORD);
348 lock (l);
349 for (res = 0; PROCESSES_TO_WAKE > 0; PROCESSES_TO_WAKE--, res++) {
350 if (<no process blocked on futex>) break;
351 wf = <get wait_flag of a process blocked on futex> (FUTEX_WORD);
352 // No happens-before guarantee with woken futex_wait (see above)
353 atomic_store_relaxed (wf, 0);
354 }
355 return res;
356
357 Note that we need to support futex_wake calls to past futexes whose memory
358 has potentially been reused due to POSIX' requirements on synchronization
359 object destruction (see above); therefore, we must not report or abort
360 on most errors. */
361static __always_inline void
362futex_wake (unsigned int* futex_word, int processes_to_wake, int private)
363{
364 int res = lll_futex_wake (futex_word, processes_to_wake, private);
365 /* No error. Ignore the number of woken processes. */
366 if (res >= 0)
367 return;
368 switch (res)
369 {
370 case -EFAULT: /* Could have happened due to memory reuse. */
371 case -EINVAL: /* Could be either due to incorrect alignment (a bug in
372 glibc or in the application) or due to memory being
373 reused for a PI futex. We cannot distinguish between the
374 two causes, and one of them is correct use, so we do not
375 act in this case. */
376 return;
377 case -ENOSYS: /* Must have been caused by a glibc bug. */
378 /* No other errors are documented at this time. */
379 default:
380 futex_fatal_error ();
381 }
382}
383
384/* The operation checks the value of the futex, if the value is 0, then
385 it is atomically set to the caller's thread ID. If the futex value is
386 nonzero, it is atomically sets the FUTEX_WAITERS bit, which signals wrt
387 other futex owner that it cannot unlock the futex in user space by
388 atomically by setting its value to 0.
389
390 If more than one wait operations is issued, the enqueueing of the waiters
391 are done in descending priority order.
392
393 The ABSTIME arguments provides an absolute timeout (measured against the
394 CLOCK_REALTIME clock). If TIMEOUT is NULL, the operation will block
395 indefinitely.
396
397 Returns:
398
399 - 0 if woken by a PI unlock operation or spuriously.
400 - EAGAIN if the futex owner thread ID is about to exit, but has not yet
401 handled the state cleanup.
402 - EDEADLK if the futex is already locked by the caller.
403 - ESRCH if the thread ID int he futex does not exist.
404 - EINVAL is the state is corrupted or if there is a waiter on the
405 futex.
406 - ETIMEDOUT if the ABSTIME expires.
407*/
408static __always_inline int
409futex_lock_pi (unsigned int *futex_word, const struct timespec *abstime,
410 int private)
411{
412 int err = lll_futex_timed_lock_pi (futex_word, abstime, private);
413 switch (err)
414 {
415 case 0:
416 case -EAGAIN:
417 case -EINTR:
418 case -ETIMEDOUT:
419 case -ESRCH:
420 case -EDEADLK:
421 case -EINVAL: /* This indicates either state corruption or that the kernel
422 found a waiter on futex address which is waiting via
423 FUTEX_WAIT or FUTEX_WAIT_BITSET. This is reported on
424 some futex_lock_pi usage (pthread_mutex_timedlock for
425 instance). */
426 return -err;
427
428 case -EFAULT: /* Must have been caused by a glibc or application bug. */
429 case -ENOSYS: /* Must have been caused by a glibc bug. */
430 /* No other errors are documented at this time. */
431 default:
432 futex_fatal_error ();
433 }
434}
435
436/* Wakes the top priority waiter that called a futex_lock_pi operation on
437 the futex.
438
439 Returns the same values as futex_lock_pi under those same conditions;
440 additionally, returns EPERM when the caller is not allowed to attach
441 itself to the futex. */
442static __always_inline int
443futex_unlock_pi (unsigned int *futex_word, int private)
444{
445 int err = lll_futex_timed_unlock_pi (futex_word, private);
446 switch (err)
447 {
448 case 0:
449 case -EAGAIN:
450 case -EINTR:
451 case -ETIMEDOUT:
452 case -ESRCH:
453 case -EDEADLK:
454 case -ENOSYS:
455 case -EPERM: /* The caller is not allowed to attach itself to the futex.
456 Used to check if PI futexes are supported by the
457 kernel. */
458 return -err;
459
460 case -EINVAL: /* Either due to wrong alignment or due to the timeout not
461 being normalized. Must have been caused by a glibc or
462 application bug. */
463 case -EFAULT: /* Must have been caused by a glibc or application bug. */
464 /* No other errors are documented at this time. */
465 default:
466 futex_fatal_error ();
467 }
468}
469
470#endif /* futex-internal.h */
471