allocatestack.c source code [glibc_src_2.24/nptl/allocatestack.c]

1	/ Copyright (C) 2002-2016 Free Software Foundation, Inc.*
2	This file is part of the GNU C Library.
3	Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library; if not, see
17	<http://www.gnu.org/licenses/>. /*
18
19	#include <assert.h>
20	#include <errno.h>
21	#include <signal.h>
22	#include <stdint.h>
23	#include <string.h>
24	#include <unistd.h>
25	#include <sys/mman.h>
26	#include <sys/param.h>
27	#include <dl-sysdep.h>
28	#include <dl-tls.h>
29	#include <tls.h>
30	#include <list.h>
31	#include <lowlevellock.h>
32	#include <futex-internal.h>
33	#include <kernel-features.h>
34	#include <stack-aliasing.h>
35
36
37	#ifndef NEED_SEPARATE_REGISTER_STACK
38
39	/ Most architectures have exactly one stack pointer. Some have more. /
40	# define STACK_VARIABLES void *stackaddr = NULL
41
42	/ How to pass the values to the 'create_thread' function. /
43	# define STACK_VARIABLES_ARGS stackaddr
44
45	/ How to declare function which gets there parameters. /
46	# define STACK_VARIABLES_PARMS void *stackaddr
47
48	/ How to declare allocate_stack. /
49	# define ALLOCATE_STACK_PARMS void **stack
50
51	/ This is how the function is called. We do it this way to allow*
52	other variants of the function to have more parameters. /*
53	# define ALLOCATE_STACK(attr, pd) allocate_stack (attr, pd, &stackaddr)
54
55	#else
56
57	/ We need two stacks. The kernel will place them but we have to tell*
58	the kernel about the size of the reserved address space. /*
59	# define STACK_VARIABLES void *stackaddr = NULL; size_t stacksize = 0
60
61	/ How to pass the values to the 'create_thread' function. /
62	# define STACK_VARIABLES_ARGS stackaddr, stacksize
63
64	/ How to declare function which gets there parameters. /
65	# define STACK_VARIABLES_PARMS void *stackaddr, size_t stacksize
66
67	/ How to declare allocate_stack. /
68	# define ALLOCATE_STACK_PARMS void *stack, size_t stacksize
69
70	/ This is how the function is called. We do it this way to allow*
71	other variants of the function to have more parameters. /*
72	# define ALLOCATE_STACK(attr, pd) \
73	allocate_stack (attr, pd, &stackaddr, &stacksize)
74
75	#endif
76
77
78	/ Default alignment of stack. /
79	#ifndef STACK_ALIGN
80	# define STACK_ALIGN __alignof__ (long double)
81	#endif
82
83	/ Default value for minimal stack size after allocating thread*
84	descriptor and guard. /*
85	#ifndef MINIMAL_REST_STACK
86	# define MINIMAL_REST_STACK 4096
87	#endif
88
89
90	/ Newer kernels have the MAP_STACK flag to indicate a mapping is used for*
91	a stack. Use it when possible. /*
92	#ifndef MAP_STACK
93	# define MAP_STACK 0
94	#endif
95
96	/ This yields the pointer that TLS support code calls the thread pointer. /
97	#if TLS_TCB_AT_TP
98	# define TLS_TPADJ(pd) (pd)
99	#elif TLS_DTV_AT_TP
100	# define TLS_TPADJ(pd) ((struct pthread )((char ) (pd) + TLS_PRE_TCB_SIZE))
101	#endif
102
103	/ Cache handling for not-yet free stacks. /
104
105	/ Maximum size in kB of cache. /
106	static size_t stack_cache_maxsize = `40` * `1024` * `1024`; / 40MiBi by default. /
107	static size_t stack_cache_actsize;
108
109	/ Mutex protecting this variable. /
110	static int stack_cache_lock = LLL_LOCK_INITIALIZER;
111
112	/ List of queued stack frames. /
113	static LIST_HEAD (stack_cache);
114
115	/ List of the stacks in use. /
116	static LIST_HEAD (stack_used);
117
118	/ We need to record what list operations we are going to do so that,*
119	in case of an asynchronous interruption due to a fork() call, we
120	can correct for the work. /*
121	static uintptr_t in_flight_stack;
122
123	/ List of the threads with user provided stacks in use. No need to*
124	initialize this, since it's done in __pthread_initialize_minimal. /*
125	list_t __stack_user __attribute__ ((nocommon));
126	hidden_data_def (__stack_user)
127
128	#if COLORING_INCREMENT != 0
129	/ Number of threads created. /
130	static unsigned int nptl_ncreated;
131	#endif
132
133
134	/ Check whether the stack is still used or not. /
135	#define FREE_P(descr) ((descr)->tid <= 0)
136
137
138	static void
139	stack_list_del (list_t *elem)
140	{
141	in_flight_stack = (uintptr_t) elem;
142
143	atomic_write_barrier ();
144
145	list_del (elem);
146
147	atomic_write_barrier ();
148
149	in_flight_stack = `0`;
150	}
151
152
153	static void
154	stack_list_add (list_t elem, list_t list)
155	{
156	in_flight_stack = (uintptr_t) elem \| `1`;
157
158	atomic_write_barrier ();
159
160	list_add (elem, list);
161
162	atomic_write_barrier ();
163
164	in_flight_stack = `0`;
165	}
166
167
168	/ We create a double linked list of all cache entries. Double linked*
169	because this allows removing entries from the end. /*
170
171
172	/ Get a stack frame from the cache. We have to match by size since*
173	some blocks might be too small or far too large. /*
174	static struct pthread *
175	get_cached_stack (size_t sizep, void* **memp)
176	{
177	size_t size = *sizep;
178	struct pthread *result = NULL;
179	list_t *entry;
180
181	lll_lock (stack_cache_lock, LLL_PRIVATE);
182
183	/ Search the cache for a matching entry. We search for the*
184	smallest stack which has at least the required size. Note that
185	in normal situations the size of all allocated stacks is the
186	same. As the very least there are only a few different sizes.
187	Therefore this loop will exit early most of the time with an
188	exact match. /*
189	list_for_each (entry, &stack_cache)
190	{
191	struct pthread *curr;
192
193	curr = list_entry (entry, struct pthread, list);
194	if (FREE_P (curr) && curr->stackblock_size >= size)
195	{
196	if (curr->stackblock_size == size)
197	{
198	result = curr;
199	break;
200	}
201
202	if (result == NULL
203	\|\| result->stackblock_size > curr->stackblock_size)
204	result = curr;
205	}
206	}
207
208	if (__builtin_expect (result == NULL, `0`)
209	/ Make sure the size difference is not too excessive. In that*
210	case we do not use the block. /*
211	\|\| __builtin_expect (result->stackblock_size > `4` * size, `0`))
212	{
213	/ Release the lock. /
214	lll_unlock (stack_cache_lock, LLL_PRIVATE);
215
216	return NULL;
217	}
218
219	/ Don't allow setxid until cloned. /
220	result->setxid_futex = -`1`;
221
222	/ Dequeue the entry. /
223	stack_list_del (&result->list);
224
225	/ And add to the list of stacks in use. /
226	stack_list_add (&result->list, &stack_used);
227
228	/ And decrease the cache size. /
229	stack_cache_actsize -= result->stackblock_size;
230
231	/ Release the lock early. /
232	lll_unlock (stack_cache_lock, LLL_PRIVATE);
233
234	/ Report size and location of the stack to the caller. /
235	*sizep = result->stackblock_size;
236	*memp = result->stackblock;
237
238	/ Cancellation handling is back to the default. /
239	result->cancelhandling = `0`;
240	result->cleanup = NULL;
241
242	/ No pending event. /
243	result->nextevent = NULL;
244
245	/ Clear the DTV. /
246	dtv_t *dtv = GET_DTV (TLS_TPADJ (result));
247	for (size_t cnt = `0`; cnt < dtv[-`1`].counter; ++cnt)
248	if (! dtv[`1` + cnt].pointer.is_static
249	&& dtv[`1` + cnt].pointer.val != TLS_DTV_UNALLOCATED)
250	free (dtv[`1` + cnt].pointer.val);
251	memset (dtv, `'\0'`, (dtv[-`1`].counter + `1`) * sizeof (dtv_t));
252
253	/ Re-initialize the TLS. /
254	_dl_allocate_tls_init (TLS_TPADJ (result));
255
256	return result;
257	}
258
259
260	/ Free stacks until cache size is lower than LIMIT. /
261	void
262	__free_stacks (size_t limit)
263	{
264	/ We reduce the size of the cache. Remove the last entries until*
265	the size is below the limit. /*
266	list_t *entry;
267	list_t *prev;
268
269	/ Search from the end of the list. /
270	list_for_each_prev_safe (entry, prev, &stack_cache)
271	{
272	struct pthread *curr;
273
274	curr = list_entry (entry, struct pthread, list);
275	if (FREE_P (curr))
276	{
277	/ Unlink the block. /
278	stack_list_del (entry);
279
280	/ Account for the freed memory. /
281	stack_cache_actsize -= curr->stackblock_size;
282
283	/ Free the memory associated with the ELF TLS. /
284	_dl_deallocate_tls (TLS_TPADJ (curr), false);
285
286	/ Remove this block. This should never fail. If it does*
287	something is really wrong. /*
288	if (munmap (curr->stackblock, curr->stackblock_size) != `0`)
289	abort ();
290
291	/ Maybe we have freed enough. /
292	if (stack_cache_actsize <= limit)
293	break;
294	}
295	}
296	}
297
298
299	/ Add a stack frame which is not used anymore to the stack. Must be*
300	called with the cache lock held. /*
301	static inline void
302	__attribute ((always_inline))
303	queue_stack (struct pthread *stack)
304	{
305	/ We unconditionally add the stack to the list. The memory may*
306	still be in use but it will not be reused until the kernel marks
307	the stack as not used anymore. /*
308	stack_list_add (&stack->list, &stack_cache);
309
310	stack_cache_actsize += stack->stackblock_size;
311	if (__glibc_unlikely (stack_cache_actsize > stack_cache_maxsize))
312	__free_stacks (stack_cache_maxsize);
313	}
314
315
316	static int
317	internal_function
318	change_stack_perm (struct pthread *pd
319	#ifdef NEED_SEPARATE_REGISTER_STACK
320	, size_t pagemask
321	#endif
322	)
323	{
324	#ifdef NEED_SEPARATE_REGISTER_STACK
325	void *stack = (pd->stackblock
326	+ (((((pd->stackblock_size - pd->guardsize) / `2`)
327	& pagemask) + pd->guardsize) & pagemask));
328	size_t len = pd->stackblock + pd->stackblock_size - stack;
329	#elif _STACK_GROWS_DOWN
330	void *stack = pd->stackblock + pd->guardsize;
331	size_t len = pd->stackblock_size - pd->guardsize;
332	#elif _STACK_GROWS_UP
333	void *stack = pd->stackblock;
334	size_t len = (uintptr_t) pd - pd->guardsize - (uintptr_t) pd->stackblock;
335	#else
336	# error "Define either _STACK_GROWS_DOWN or _STACK_GROWS_UP"
337	#endif
338	if (mprotect (stack, len, PROT_READ \| PROT_WRITE \| PROT_EXEC) != `0`)
339	return errno;
340
341	return `0`;
342	}
343
344
345	/ Returns a usable stack for a new thread either by allocating a*
346	new stack or reusing a cached stack of sufficient size.
347	ATTR must be non-NULL and point to a valid pthread_attr.
348	PDP must be non-NULL. /*
349	static int
350	allocate_stack (const struct pthread_attr attr, struct* pthread **pdp,
351	ALLOCATE_STACK_PARMS)
352	{
353	struct pthread *pd;
354	size_t size;
355	size_t pagesize_m1 = __getpagesize () - `1`;
356
357	assert (powerof2 (pagesize_m1 + `1`));
358	assert (TCB_ALIGNMENT >= STACK_ALIGN);
359
360	/ Get the stack size from the attribute if it is set. Otherwise we*
361	use the default we determined at start time. /*
362	if (attr->stacksize != `0`)
363	size = attr->stacksize;
364	else
365	{
366	lll_lock (__default_pthread_attr_lock, LLL_PRIVATE);
367	size = __default_pthread_attr.stacksize;
368	lll_unlock (__default_pthread_attr_lock, LLL_PRIVATE);
369	}
370
371	/ Get memory for the stack. /
372	if (__glibc_unlikely (attr->flags & ATTR_FLAG_STACKADDR))
373	{
374	uintptr_t adj;
375	char stackaddr = (char* *) attr->stackaddr;
376
377	/ Assume the same layout as the _STACK_GROWS_DOWN case, with struct*
378	pthread at the top of the stack block. Later we adjust the guard
379	location and stack address to match the _STACK_GROWS_UP case. /*
380	if (_STACK_GROWS_UP)
381	stackaddr += attr->stacksize;
382
383	/ If the user also specified the size of the stack make sure it*
384	is large enough. /*
385	if (attr->stacksize != `0`
386	&& attr->stacksize < (__static_tls_size + MINIMAL_REST_STACK))
387	return EINVAL;
388
389	/ Adjust stack size for alignment of the TLS block. /
390	#if TLS_TCB_AT_TP
391	adj = ((uintptr_t) stackaddr - TLS_TCB_SIZE)
392	& __static_tls_align_m1;
393	assert (size > adj + TLS_TCB_SIZE);
394	#elif TLS_DTV_AT_TP
395	adj = ((uintptr_t) stackaddr - __static_tls_size)
396	& __static_tls_align_m1;
397	assert (size > adj);
398	#endif
399
400	/ The user provided some memory. Let's hope it matches the*
401	size... We do not allocate guard pages if the user provided
402	the stack. It is the user's responsibility to do this if it
403	is wanted. /*
404	#if TLS_TCB_AT_TP
405	pd = (struct pthread *) ((uintptr_t) stackaddr
406	- TLS_TCB_SIZE - adj);
407	#elif TLS_DTV_AT_TP
408	pd = (struct pthread *) (((uintptr_t) stackaddr
409	- __static_tls_size - adj)
410	- TLS_PRE_TCB_SIZE);
411	#endif
412
413	/ The user provided stack memory needs to be cleared. /
414	memset (pd, `'\0'`, sizeof (struct pthread));
415
416	/ The first TSD block is included in the TCB. /
417	pd->specific[`0`] = pd->specific_1stblock;
418
419	/ Remember the stack-related values. /
420	pd->stackblock = (char *) stackaddr - size;
421	pd->stackblock_size = size;
422
423	/ This is a user-provided stack. It will not be queued in the*
424	stack cache nor will the memory (except the TLS memory) be freed. /*
425	pd->user_stack = true;
426
427	/ This is at least the second thread. /
428	pd->header.multiple_threads = `1`;
429	#ifndef TLS_MULTIPLE_THREADS_IN_TCB
430	__pthread_multiple_threads = *__libc_multiple_threads_ptr = `1`;
431	#endif
432
433	#ifndef __ASSUME_PRIVATE_FUTEX
434	/ The thread must know when private futexes are supported. /
435	pd->header.private_futex = THREAD_GETMEM (THREAD_SELF,
436	header.private_futex);
437	#endif
438
439	#ifdef NEED_DL_SYSINFO
440	SETUP_THREAD_SYSINFO (pd);
441	#endif
442
443	/ Don't allow setxid until cloned. /
444	pd->setxid_futex = -`1`;
445
446	/ Allocate the DTV for this thread. /
447	if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
448	{
449	/ Something went wrong. /
450	assert (errno == ENOMEM);
451	return errno;
452	}
453
454
455	/ Prepare to modify global data. /
456	lll_lock (stack_cache_lock, LLL_PRIVATE);
457
458	/ And add to the list of stacks in use. /
459	list_add (&pd->list, &__stack_user);
460
461	lll_unlock (stack_cache_lock, LLL_PRIVATE);
462	}
463	else
464	{
465	/ Allocate some anonymous memory. If possible use the cache. /
466	size_t guardsize;
467	size_t reqsize;
468	void *mem;
469	const int prot = (PROT_READ \| PROT_WRITE
470	\| ((GL(dl_stack_flags) & PF_X) ? PROT_EXEC : `0`));
471
472	#if COLORING_INCREMENT != 0
473	/ Add one more page for stack coloring. Don't do it for stacks*
474	with 16 times pagesize or larger. This might just cause
475	unnecessary misalignment. /*
476	if (size <= `16` * pagesize_m1)
477	size += pagesize_m1 + `1`;
478	#endif
479
480	/ Adjust the stack size for alignment. /
481	size &= ~__static_tls_align_m1;
482	assert (size != `0`);
483
484	/ Make sure the size of the stack is enough for the guard and*
485	eventually the thread descriptor. /*
486	guardsize = (attr->guardsize + pagesize_m1) & ~pagesize_m1;
487	if (guardsize < attr->guardsize \|\| size + guardsize < guardsize)
488	/ Arithmetic overflow. /
489	return EINVAL;
490	size += guardsize;
491	if (__builtin_expect (size < ((guardsize + __static_tls_size
492	+ MINIMAL_REST_STACK + pagesize_m1)
493	& ~pagesize_m1),
494	`0`))
495	/ The stack is too small (or the guard too large). /
496	return EINVAL;
497
498	/ Try to get a stack from the cache. /
499	reqsize = size;
500	pd = get_cached_stack (&size, &mem);
501	if (pd == NULL)
502	{
503	/ To avoid aliasing effects on a larger scale than pages we*
504	adjust the allocated stack size if necessary. This way
505	allocations directly following each other will not have
506	aliasing problems. /*
507	#if MULTI_PAGE_ALIASING != 0
508	if ((size % MULTI_PAGE_ALIASING) == `0`)
509	size += pagesize_m1 + `1`;
510	#endif
511
512	mem = mmap (NULL, size, prot,
513	MAP_PRIVATE \| MAP_ANONYMOUS \| MAP_STACK, -`1`, `0`);
514
515	if (__glibc_unlikely (mem == MAP_FAILED))
516	return errno;
517
518	/ SIZE is guaranteed to be greater than zero.*
519	So we can never get a null pointer back from mmap. /*
520	assert (mem != NULL);
521
522	#if COLORING_INCREMENT != 0
523	/ Atomically increment NCREATED. /
524	unsigned int ncreated = atomic_increment_val (&nptl_ncreated);
525
526	/ We chose the offset for coloring by incrementing it for*
527	every new thread by a fixed amount. The offset used
528	module the page size. Even if coloring would be better
529	relative to higher alignment values it makes no sense to
530	do it since the mmap() interface does not allow us to
531	specify any alignment for the returned memory block. /*
532	size_t coloring = (ncreated * COLORING_INCREMENT) & pagesize_m1;
533
534	/ Make sure the coloring offsets does not disturb the alignment*
535	of the TCB and static TLS block. /*
536	if (__glibc_unlikely ((coloring & __static_tls_align_m1) != `0`))
537	coloring = (((coloring + __static_tls_align_m1)
538	& ~(__static_tls_align_m1))
539	& ~pagesize_m1);
540	#else
541	/ Unless specified we do not make any adjustments. /
542	# define coloring 0
543	#endif
544
545	/ Place the thread descriptor at the end of the stack. /
546	#if TLS_TCB_AT_TP
547	pd = (struct pthread ) ((char* *) mem + size - coloring) - `1`;
548	#elif TLS_DTV_AT_TP
549	pd = (struct pthread *) ((((uintptr_t) mem + size - coloring
550	- __static_tls_size)
551	& ~__static_tls_align_m1)
552	- TLS_PRE_TCB_SIZE);
553	#endif
554
555	/ Remember the stack-related values. /
556	pd->stackblock = mem;
557	pd->stackblock_size = size;
558
559	/ We allocated the first block thread-specific data array.*
560	This address will not change for the lifetime of this
561	descriptor. /*
562	pd->specific[`0`] = pd->specific_1stblock;
563
564	/ This is at least the second thread. /
565	pd->header.multiple_threads = `1`;
566	#ifndef TLS_MULTIPLE_THREADS_IN_TCB
567	__pthread_multiple_threads = *__libc_multiple_threads_ptr = `1`;
568	#endif
569
570	#ifndef __ASSUME_PRIVATE_FUTEX
571	/ The thread must know when private futexes are supported. /
572	pd->header.private_futex = THREAD_GETMEM (THREAD_SELF,
573	header.private_futex);
574	#endif
575
576	#ifdef NEED_DL_SYSINFO
577	SETUP_THREAD_SYSINFO (pd);
578	#endif
579
580	/ Don't allow setxid until cloned. /
581	pd->setxid_futex = -`1`;
582
583	/ Allocate the DTV for this thread. /
584	if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
585	{
586	/ Something went wrong. /
587	assert (errno == ENOMEM);
588
589	/ Free the stack memory we just allocated. /
590	(void) munmap (mem, size);
591
592	return errno;
593	}
594
595
596	/ Prepare to modify global data. /
597	lll_lock (stack_cache_lock, LLL_PRIVATE);
598
599	/ And add to the list of stacks in use. /
600	stack_list_add (&pd->list, &stack_used);
601
602	lll_unlock (stack_cache_lock, LLL_PRIVATE);
603
604
605	/ There might have been a race. Another thread might have*
606	caused the stacks to get exec permission while this new
607	stack was prepared. Detect if this was possible and
608	change the permission if necessary. /*
609	if (__builtin_expect ((GL(dl_stack_flags) & PF_X) != `0`
610	&& (prot & PROT_EXEC) == `0`, `0`))
611	{
612	int err = change_stack_perm (pd
613	#ifdef NEED_SEPARATE_REGISTER_STACK
614	, ~pagesize_m1
615	#endif
616	);
617	if (err != `0`)
618	{
619	/ Free the stack memory we just allocated. /
620	(void) munmap (mem, size);
621
622	return err;
623	}
624	}
625
626
627	/ Note that all of the stack and the thread descriptor is*
628	zeroed. This means we do not have to initialize fields
629	with initial value zero. This is specifically true for
630	the 'tid' field which is always set back to zero once the
631	stack is not used anymore and for the 'guardsize' field
632	which will be read next. /*
633	}
634
635	/ Create or resize the guard area if necessary. /
636	if (__glibc_unlikely (guardsize > pd->guardsize))
637	{
638	#ifdef NEED_SEPARATE_REGISTER_STACK
639	char *guard = mem + (((size - guardsize) / `2`) & ~pagesize_m1);
640	#elif _STACK_GROWS_DOWN
641	char *guard = mem;
642	#elif _STACK_GROWS_UP
643	char guard = (char* *) (((uintptr_t) pd - guardsize) & ~pagesize_m1);
644	#endif
645	if (mprotect (guard, guardsize, PROT_NONE) != `0`)
646	{
647	mprot_error:
648	lll_lock (stack_cache_lock, LLL_PRIVATE);
649
650	/ Remove the thread from the list. /
651	stack_list_del (&pd->list);
652
653	lll_unlock (stack_cache_lock, LLL_PRIVATE);
654
655	/ Get rid of the TLS block we allocated. /
656	_dl_deallocate_tls (TLS_TPADJ (pd), false);
657
658	/ Free the stack memory regardless of whether the size*
659	of the cache is over the limit or not. If this piece
660	of memory caused problems we better do not use it
661	anymore. Uh, and we ignore possible errors. There
662	is nothing we could do. /*
663	(void) munmap (mem, size);
664
665	return errno;
666	}
667
668	pd->guardsize = guardsize;
669	}
670	else if (__builtin_expect (pd->guardsize - guardsize > size - reqsize,
671	`0`))
672	{
673	/ The old guard area is too large. /
674
675	#ifdef NEED_SEPARATE_REGISTER_STACK
676	char *guard = mem + (((size - guardsize) / `2`) & ~pagesize_m1);
677	char *oldguard = mem + (((size - pd->guardsize) / `2`) & ~pagesize_m1);
678
679	if (oldguard < guard
680	&& mprotect (oldguard, guard - oldguard, prot) != `0`)
681	goto mprot_error;
682
683	if (mprotect (guard + guardsize,
684	oldguard + pd->guardsize - guard - guardsize,
685	prot) != `0`)
686	goto mprot_error;
687	#elif _STACK_GROWS_DOWN
688	if (mprotect ((char *) mem + guardsize, pd->guardsize - guardsize,
689	prot) != `0`)
690	goto mprot_error;
691	#elif _STACK_GROWS_UP
692	if (mprotect ((char *) pd - pd->guardsize,
693	pd->guardsize - guardsize, prot) != `0`)
694	goto mprot_error;
695	#endif
696
697	pd->guardsize = guardsize;
698	}
699	/ The pthread_getattr_np() calls need to get passed the size*
700	requested in the attribute, regardless of how large the
701	actually used guardsize is. /*
702	pd->reported_guardsize = guardsize;
703	}
704
705	/ Initialize the lock. We have to do this unconditionally since the*
706	stillborn thread could be canceled while the lock is taken. /*
707	pd->lock = LLL_LOCK_INITIALIZER;
708
709	/ The robust mutex lists also need to be initialized*
710	unconditionally because the cleanup for the previous stack owner
711	might have happened in the kernel. /*
712	pd->robust_head.futex_offset = (offsetof (pthread_mutex_t, __data.__lock)
713	- offsetof (pthread_mutex_t,
714	__data.__list.__next));
715	pd->robust_head.list_op_pending = NULL;
716	#ifdef __PTHREAD_MUTEX_HAVE_PREV
717	pd->robust_prev = &pd->robust_head;
718	#endif
719	pd->robust_head.list = &pd->robust_head;
720
721	/ We place the thread descriptor at the end of the stack. /
722	*pdp = pd;
723
724	#if _STACK_GROWS_DOWN
725	void *stacktop;
726
727	# if TLS_TCB_AT_TP
728	/ The stack begins before the TCB and the static TLS block. /
729	stacktop = ((char *) (pd + `1`) - __static_tls_size);
730	# elif TLS_DTV_AT_TP
731	stacktop = (char *) (pd - `1`);
732	# endif
733
734	# ifdef NEED_SEPARATE_REGISTER_STACK
735	*stack = pd->stackblock;
736	stacksize = stacktop - stack;
737	# else
738	*stack = stacktop;
739	# endif
740	#else
741	*stack = pd->stackblock;
742	#endif
743
744	return `0`;
745	}
746
747
748	void
749	internal_function
750	__deallocate_stack (struct pthread *pd)
751	{
752	lll_lock (stack_cache_lock, LLL_PRIVATE);
753
754	/ Remove the thread from the list of threads with user defined*
755	stacks. /*
756	stack_list_del (&pd->list);
757
758	/ Not much to do. Just free the mmap()ed memory. Note that we do*
759	not reset the 'used' flag in the 'tid' field. This is done by
760	the kernel. If no thread has been created yet this field is
761	still zero. /*
762	if (__glibc_likely (! pd->user_stack))
763	(void) queue_stack (pd);
764	else
765	/ Free the memory associated with the ELF TLS. /
766	_dl_deallocate_tls (TLS_TPADJ (pd), false);
767
768	lll_unlock (stack_cache_lock, LLL_PRIVATE);
769	}
770
771
772	int
773	internal_function
774	__make_stacks_executable (void **stack_endp)
775	{
776	/ First the main thread's stack. /
777	int err = _dl_make_stack_executable (stack_endp);
778	if (err != `0`)
779	return err;
780
781	#ifdef NEED_SEPARATE_REGISTER_STACK
782	const size_t pagemask = ~(__getpagesize () - `1`);
783	#endif
784
785	lll_lock (stack_cache_lock, LLL_PRIVATE);
786
787	list_t *runp;
788	list_for_each (runp, &stack_used)
789	{
790	err = change_stack_perm (list_entry (runp, struct pthread, list)
791	#ifdef NEED_SEPARATE_REGISTER_STACK
792	, pagemask
793	#endif
794	);
795	if (err != `0`)
796	break;
797	}
798
799	/ Also change the permission for the currently unused stacks. This*
800	might be wasted time but better spend it here than adding a check
801	in the fast path. /*
802	if (err == `0`)
803	list_for_each (runp, &stack_cache)
804	{
805	err = change_stack_perm (list_entry (runp, struct pthread, list)
806	#ifdef NEED_SEPARATE_REGISTER_STACK
807	, pagemask
808	#endif
809	);
810	if (err != `0`)
811	break;
812	}
813
814	lll_unlock (stack_cache_lock, LLL_PRIVATE);
815
816	return err;
817	}
818
819
820	/ In case of a fork() call the memory allocation in the child will be*
821	the same but only one thread is running. All stacks except that of
822	the one running thread are not used anymore. We have to recycle
823	them. /*
824	void
825	__reclaim_stacks (void)
826	{
827	struct pthread self = (struct* pthread *) THREAD_SELF;
828
829	/ No locking necessary. The caller is the only stack in use. But*
830	we have to be aware that we might have interrupted a list
831	operation. /*
832
833	if (in_flight_stack != `0`)
834	{
835	bool add_p = in_flight_stack & `1`;
836	list_t elem = (list_t ) (in_flight_stack & ~(uintptr_t) `1`);
837
838	if (add_p)
839	{
840	/ We always add at the beginning of the list. So in this case we*
841	only need to check the beginning of these lists to see if the
842	pointers at the head of the list are inconsistent. /*
843	list_t *l = NULL;
844
845	if (stack_used.next->prev != &stack_used)
846	l = &stack_used;
847	else if (stack_cache.next->prev != &stack_cache)
848	l = &stack_cache;
849
850	if (l != NULL)
851	{
852	assert (l->next->prev == elem);
853	elem->next = l->next;
854	elem->prev = l;
855	l->next = elem;
856	}
857	}
858	else
859	{
860	/ We can simply always replay the delete operation. /
861	elem->next->prev = elem->prev;
862	elem->prev->next = elem->next;
863	}
864	}
865
866	/ Mark all stacks except the still running one as free. /
867	list_t *runp;
868	list_for_each (runp, &stack_used)
869	{
870	struct pthread curp = list_entry (runp, struct* pthread, list);
871	if (curp != self)
872	{
873	/ This marks the stack as free. /
874	curp->tid = `0`;
875
876	/ Account for the size of the stack. /
877	stack_cache_actsize += curp->stackblock_size;
878
879	if (curp->specific_used)
880	{
881	/ Clear the thread-specific data. /
882	memset (curp->specific_1stblock, `'\0'`,
883	sizeof (curp->specific_1stblock));
884
885	curp->specific_used = false;
886
887	for (size_t cnt = `1`; cnt < PTHREAD_KEY_1STLEVEL_SIZE; ++cnt)
888	if (curp->specific[cnt] != NULL)
889	{
890	memset (curp->specific[cnt], `'\0'`,
891	sizeof (curp->specific_1stblock));
892
893	/ We have allocated the block which we do not*
894	free here so re-set the bit. /*
895	curp->specific_used = true;
896	}
897	}
898	}
899	}
900
901	/ Add the stack of all running threads to the cache. /
902	list_splice (&stack_used, &stack_cache);
903
904	/ Remove the entry for the current thread to from the cache list*
905	and add it to the list of running threads. Which of the two
906	lists is decided by the user_stack flag. /*
907	stack_list_del (&self->list);
908
909	/ Re-initialize the lists for all the threads. /
910	INIT_LIST_HEAD (&stack_used);
911	INIT_LIST_HEAD (&__stack_user);
912
913	if (__glibc_unlikely (THREAD_GETMEM (self, user_stack)))
914	list_add (&self->list, &__stack_user);
915	else
916	list_add (&self->list, &stack_used);
917
918	/ There is one thread running. /
919	__nptl_nthreads = `1`;
920
921	in_flight_stack = `0`;
922
923	/ Initialize locks. /
924	stack_cache_lock = LLL_LOCK_INITIALIZER;
925	__default_pthread_attr_lock = LLL_LOCK_INITIALIZER;
926	}
927
928
929	#if HP_TIMING_AVAIL
930	# undef __find_thread_by_id
931	/ Find a thread given the thread ID. /
932	attribute_hidden
933	struct pthread *
934	__find_thread_by_id (pid_t tid)
935	{
936	struct pthread *result = NULL;
937
938	lll_lock (stack_cache_lock, LLL_PRIVATE);
939
940	/ Iterate over the list with system-allocated threads first. /
941	list_t *runp;
942	list_for_each (runp, &stack_used)
943	{
944	struct pthread *curp;
945
946	curp = list_entry (runp, struct pthread, list);
947
948	if (curp->tid == tid)
949	{
950	result = curp;
951	goto out;
952	}
953	}
954
955	/ Now the list with threads using user-allocated stacks. /
956	list_for_each (runp, &__stack_user)
957	{
958	struct pthread *curp;
959
960	curp = list_entry (runp, struct pthread, list);
961
962	if (curp->tid == tid)
963	{
964	result = curp;
965	goto out;
966	}
967	}
968
969	out:
970	lll_unlock (stack_cache_lock, LLL_PRIVATE);
971
972	return result;
973	}
974	#endif
975
976
977	#ifdef SIGSETXID
978	static void
979	internal_function
980	setxid_mark_thread (struct xid_command cmdp, struct* pthread *t)
981	{
982	int ch;
983
984	/ Wait until this thread is cloned. /
985	if (t->setxid_futex == -`1`
986	&& ! atomic_compare_and_exchange_bool_acq (&t->setxid_futex, -`2`, -`1`))
987	do
988	futex_wait_simple (&t->setxid_futex, -`2`, FUTEX_PRIVATE);
989	while (t->setxid_futex == -`2`);
990
991	/ Don't let the thread exit before the setxid handler runs. /
992	t->setxid_futex = `0`;
993
994	do
995	{
996	ch = t->cancelhandling;
997
998	/ If the thread is exiting right now, ignore it. /
999	if ((ch & EXITING_BITMASK) != `0`)
1000	{
1001	/ Release the futex if there is no other setxid in*
1002	progress. /*
1003	if ((ch & SETXID_BITMASK) == `0`)
1004	{
1005	t->setxid_futex = `1`;
1006	futex_wake (&t->setxid_futex, `1`, FUTEX_PRIVATE);
1007	}
1008	return;
1009	}
1010	}
1011	while (atomic_compare_and_exchange_bool_acq (&t->cancelhandling,
1012	ch \| SETXID_BITMASK, ch));
1013	}
1014
1015
1016	static void
1017	internal_function
1018	setxid_unmark_thread (struct xid_command cmdp, struct* pthread *t)
1019	{
1020	int ch;
1021
1022	do
1023	{
1024	ch = t->cancelhandling;
1025	if ((ch & SETXID_BITMASK) == `0`)
1026	return;
1027	}
1028	while (atomic_compare_and_exchange_bool_acq (&t->cancelhandling,
1029	ch & ~SETXID_BITMASK, ch));
1030
1031	/ Release the futex just in case. /
1032	t->setxid_futex = `1`;
1033	futex_wake (&t->setxid_futex, `1`, FUTEX_PRIVATE);
1034	}
1035
1036
1037	static int
1038	internal_function
1039	setxid_signal_thread (struct xid_command cmdp, struct* pthread *t)
1040	{
1041	if ((t->cancelhandling & SETXID_BITMASK) == `0`)
1042	return `0`;
1043
1044	int val;
1045	pid_t pid = __getpid ();
1046	INTERNAL_SYSCALL_DECL (err);
1047	val = INTERNAL_SYSCALL_CALL (tgkill, err, pid, t->tid, SIGSETXID);
1048
1049	/ If this failed, it must have had not started yet or else exited. /
1050	if (!INTERNAL_SYSCALL_ERROR_P (val, err))
1051	{
1052	atomic_increment (&cmdp->cntr);
1053	return `1`;
1054	}
1055	else
1056	return `0`;
1057	}
1058
1059	/ Check for consistency across setid system call results. The abort
1060	should not happen as long as all privileges changes happen through
1061	the glibc wrappers. ERROR must be 0 (no error) or an errno
1062	code. /*
1063	void
1064	attribute_hidden
1065	__nptl_setxid_error (struct xid_command cmdp, int* error)
1066	{
1067	do
1068	{
1069	int olderror = cmdp->error;
1070	if (olderror == error)
1071	break;
1072	if (olderror != -`1`)
1073	/ Mismatch between current and previous results. /
1074	abort ();
1075	}
1076	while (atomic_compare_and_exchange_bool_acq (&cmdp->error, error, -`1`));
1077	}
1078
1079	int
1080	attribute_hidden
1081	__nptl_setxid (struct xid_command *cmdp)
1082	{
1083	int signalled;
1084	int result;
1085	lll_lock (stack_cache_lock, LLL_PRIVATE);
1086
1087	__xidcmd = cmdp;
1088	cmdp->cntr = `0`;
1089	cmdp->error = -`1`;
1090
1091	struct pthread *self = THREAD_SELF;
1092
1093	/ Iterate over the list with system-allocated threads first. /
1094	list_t *runp;
1095	list_for_each (runp, &stack_used)
1096	{
1097	struct pthread t = list_entry (runp, struct* pthread, list);
1098	if (t == self)
1099	continue;
1100
1101	setxid_mark_thread (cmdp, t);
1102	}
1103
1104	/ Now the list with threads using user-allocated stacks. /
1105	list_for_each (runp, &__stack_user)
1106	{
1107	struct pthread t = list_entry (runp, struct* pthread, list);
1108	if (t == self)
1109	continue;
1110
1111	setxid_mark_thread (cmdp, t);
1112	}
1113
1114	/ Iterate until we don't succeed in signalling anyone. That means*
1115	we have gotten all running threads, and their children will be
1116	automatically correct once started. /*
1117	do
1118	{
1119	signalled = `0`;
1120
1121	list_for_each (runp, &stack_used)
1122	{
1123	struct pthread t = list_entry (runp, struct* pthread, list);
1124	if (t == self)
1125	continue;
1126
1127	signalled += setxid_signal_thread (cmdp, t);
1128	}
1129
1130	list_for_each (runp, &__stack_user)
1131	{
1132	struct pthread t = list_entry (runp, struct* pthread, list);
1133	if (t == self)
1134	continue;
1135
1136	signalled += setxid_signal_thread (cmdp, t);
1137	}
1138
1139	int cur = cmdp->cntr;
1140	while (cur != `0`)
1141	{
1142	futex_wait_simple ((unsigned int *) &cmdp->cntr, cur,
1143	FUTEX_PRIVATE);
1144	cur = cmdp->cntr;
1145	}
1146	}
1147	while (signalled != `0`);
1148
1149	/ Clean up flags, so that no thread blocks during exit waiting*
1150	for a signal which will never come. /*
1151	list_for_each (runp, &stack_used)
1152	{
1153	struct pthread t = list_entry (runp, struct* pthread, list);
1154	if (t == self)
1155	continue;
1156
1157	setxid_unmark_thread (cmdp, t);
1158	}
1159
1160	list_for_each (runp, &__stack_user)
1161	{
1162	struct pthread t = list_entry (runp, struct* pthread, list);
1163	if (t == self)
1164	continue;
1165
1166	setxid_unmark_thread (cmdp, t);
1167	}
1168
1169	/ This must be last, otherwise the current thread might not have*
1170	permissions to send SIGSETXID syscall to the other threads. /*
1171	INTERNAL_SYSCALL_DECL (err);
1172	result = INTERNAL_SYSCALL_NCS (cmdp->syscall_no, err, `3`,
1173	cmdp->id[`0`], cmdp->id[`1`], cmdp->id[`2`]);
1174	int error = `0`;
1175	if (__glibc_unlikely (INTERNAL_SYSCALL_ERROR_P (result, err)))
1176	{
1177	error = INTERNAL_SYSCALL_ERRNO (result, err);
1178	__set_errno (error);
1179	result = -`1`;
1180	}
1181	__nptl_setxid_error (cmdp, error);
1182
1183	lll_unlock (stack_cache_lock, LLL_PRIVATE);
1184	return result;
1185	}
1186	#endif /* SIGSETXID. */
1187
1188
1189	static inline void __attribute__((always_inline))
1190	init_one_static_tls (struct pthread curp, struct* link_map *map)
1191	{
1192	# if TLS_TCB_AT_TP
1193	void dest = (char* *) curp - map->l_tls_offset;
1194	# elif TLS_DTV_AT_TP
1195	void dest = (char* *) curp + map->l_tls_offset + TLS_PRE_TCB_SIZE;
1196	# else
1197	# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
1198	# endif
1199
1200	/ We cannot delay the initialization of the Static TLS area, since*
1201	it can be accessed with LE or IE, but since the DTV is only used
1202	by GD and LD, we can delay its update to avoid a race. /*
1203	memset (__mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size),
1204	`'\0'`, map->l_tls_blocksize - map->l_tls_initimage_size);
1205	}
1206
1207	void
1208	attribute_hidden
1209	__pthread_init_static_tls (struct link_map *map)
1210	{
1211	lll_lock (stack_cache_lock, LLL_PRIVATE);
1212
1213	/ Iterate over the list with system-allocated threads first. /
1214	list_t *runp;
1215	list_for_each (runp, &stack_used)
1216	init_one_static_tls (list_entry (runp, struct pthread, list), map);
1217
1218	/ Now the list with threads using user-allocated stacks. /
1219	list_for_each (runp, &__stack_user)
1220	init_one_static_tls (list_entry (runp, struct pthread, list), map);
1221
1222	lll_unlock (stack_cache_lock, LLL_PRIVATE);
1223	}
1224
1225
1226	void
1227	attribute_hidden
1228	__wait_lookup_done (void)
1229	{
1230	lll_lock (stack_cache_lock, LLL_PRIVATE);
1231
1232	struct pthread *self = THREAD_SELF;
1233
1234	/ Iterate over the list with system-allocated threads first. /
1235	list_t *runp;
1236	list_for_each (runp, &stack_used)
1237	{
1238	struct pthread t = list_entry (runp, struct* pthread, list);
1239	if (t == self \|\| t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED)
1240	continue;
1241
1242	int *const gscope_flagp = &t->header.gscope_flag;
1243
1244	/ We have to wait until this thread is done with the global*
1245	scope. First tell the thread that we are waiting and
1246	possibly have to be woken. /*
1247	if (atomic_compare_and_exchange_bool_acq (gscope_flagp,
1248	THREAD_GSCOPE_FLAG_WAIT,
1249	THREAD_GSCOPE_FLAG_USED))
1250	continue;
1251
1252	do
1253	futex_wait_simple ((unsigned int *) gscope_flagp,
1254	THREAD_GSCOPE_FLAG_WAIT, FUTEX_PRIVATE);
1255	while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT);
1256	}
1257
1258	/ Now the list with threads using user-allocated stacks. /
1259	list_for_each (runp, &__stack_user)
1260	{
1261	struct pthread t = list_entry (runp, struct* pthread, list);
1262	if (t == self \|\| t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED)
1263	continue;
1264
1265	int *const gscope_flagp = &t->header.gscope_flag;
1266
1267	/ We have to wait until this thread is done with the global*
1268	scope. First tell the thread that we are waiting and
1269	possibly have to be woken. /*
1270	if (atomic_compare_and_exchange_bool_acq (gscope_flagp,
1271	THREAD_GSCOPE_FLAG_WAIT,
1272	THREAD_GSCOPE_FLAG_USED))
1273	continue;
1274
1275	do
1276	futex_wait_simple ((unsigned int *) gscope_flagp,
1277	THREAD_GSCOPE_FLAG_WAIT, FUTEX_PRIVATE);
1278	while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT);
1279	}
1280
1281	lll_unlock (stack_cache_lock, LLL_PRIVATE);
1282	}
1283

Browse the source code of glibc_src_2.24/nptl/allocatestack.c