allocatestack.c source code [glibc_src_2.25/nptl/allocatestack.c]

1	/ Copyright (C) 2002-2017 Free Software Foundation, Inc.*
2	This file is part of the GNU C Library.
3	Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library; if not, see
17	<http://www.gnu.org/licenses/>. /*
18
19	#include <assert.h>
20	#include <errno.h>
21	#include <signal.h>
22	#include <stdint.h>
23	#include <string.h>
24	#include <unistd.h>
25	#include <sys/mman.h>
26	#include <sys/param.h>
27	#include <dl-sysdep.h>
28	#include <dl-tls.h>
29	#include <tls.h>
30	#include <list.h>
31	#include <lowlevellock.h>
32	#include <futex-internal.h>
33	#include <kernel-features.h>
34	#include <stack-aliasing.h>
35
36
37	#ifndef NEED_SEPARATE_REGISTER_STACK
38
39	/ Most architectures have exactly one stack pointer. Some have more. /
40	# define STACK_VARIABLES void *stackaddr = NULL
41
42	/ How to pass the values to the 'create_thread' function. /
43	# define STACK_VARIABLES_ARGS stackaddr
44
45	/ How to declare function which gets there parameters. /
46	# define STACK_VARIABLES_PARMS void *stackaddr
47
48	/ How to declare allocate_stack. /
49	# define ALLOCATE_STACK_PARMS void **stack
50
51	/ This is how the function is called. We do it this way to allow*
52	other variants of the function to have more parameters. /*
53	# define ALLOCATE_STACK(attr, pd) allocate_stack (attr, pd, &stackaddr)
54
55	#else
56
57	/ We need two stacks. The kernel will place them but we have to tell*
58	the kernel about the size of the reserved address space. /*
59	# define STACK_VARIABLES void *stackaddr = NULL; size_t stacksize = 0
60
61	/ How to pass the values to the 'create_thread' function. /
62	# define STACK_VARIABLES_ARGS stackaddr, stacksize
63
64	/ How to declare function which gets there parameters. /
65	# define STACK_VARIABLES_PARMS void *stackaddr, size_t stacksize
66
67	/ How to declare allocate_stack. /
68	# define ALLOCATE_STACK_PARMS void *stack, size_t stacksize
69
70	/ This is how the function is called. We do it this way to allow*
71	other variants of the function to have more parameters. /*
72	# define ALLOCATE_STACK(attr, pd) \
73	allocate_stack (attr, pd, &stackaddr, &stacksize)
74
75	#endif
76
77
78	/ Default alignment of stack. /
79	#ifndef STACK_ALIGN
80	# define STACK_ALIGN __alignof__ (long double)
81	#endif
82
83	/ Default value for minimal stack size after allocating thread*
84	descriptor and guard. /*
85	#ifndef MINIMAL_REST_STACK
86	# define MINIMAL_REST_STACK 4096
87	#endif
88
89
90	/ Newer kernels have the MAP_STACK flag to indicate a mapping is used for*
91	a stack. Use it when possible. /*
92	#ifndef MAP_STACK
93	# define MAP_STACK 0
94	#endif
95
96	/ This yields the pointer that TLS support code calls the thread pointer. /
97	#if TLS_TCB_AT_TP
98	# define TLS_TPADJ(pd) (pd)
99	#elif TLS_DTV_AT_TP
100	# define TLS_TPADJ(pd) ((struct pthread )((char ) (pd) + TLS_PRE_TCB_SIZE))
101	#endif
102
103	/ Cache handling for not-yet free stacks. /
104
105	/ Maximum size in kB of cache. /
106	static size_t stack_cache_maxsize = `40` * `1024` * `1024`; / 40MiBi by default. /
107	static size_t stack_cache_actsize;
108
109	/ Mutex protecting this variable. /
110	static int stack_cache_lock = LLL_LOCK_INITIALIZER;
111
112	/ List of queued stack frames. /
113	static LIST_HEAD (stack_cache);
114
115	/ List of the stacks in use. /
116	static LIST_HEAD (stack_used);
117
118	/ We need to record what list operations we are going to do so that,*
119	in case of an asynchronous interruption due to a fork() call, we
120	can correct for the work. /*
121	static uintptr_t in_flight_stack;
122
123	/ List of the threads with user provided stacks in use. No need to*
124	initialize this, since it's done in __pthread_initialize_minimal. /*
125	list_t __stack_user __attribute__ ((nocommon));
126	hidden_data_def (__stack_user)
127
128	#if COLORING_INCREMENT != 0
129	/ Number of threads created. /
130	static unsigned int nptl_ncreated;
131	#endif
132
133
134	/ Check whether the stack is still used or not. /
135	#define FREE_P(descr) ((descr)->tid <= 0)
136
137
138	static void
139	stack_list_del (list_t *elem)
140	{
141	in_flight_stack = (uintptr_t) elem;
142
143	atomic_write_barrier ();
144
145	list_del (elem);
146
147	atomic_write_barrier ();
148
149	in_flight_stack = `0`;
150	}
151
152
153	static void
154	stack_list_add (list_t elem, list_t list)
155	{
156	in_flight_stack = (uintptr_t) elem \| `1`;
157
158	atomic_write_barrier ();
159
160	list_add (elem, list);
161
162	atomic_write_barrier ();
163
164	in_flight_stack = `0`;
165	}
166
167
168	/ We create a double linked list of all cache entries. Double linked*
169	because this allows removing entries from the end. /*
170
171
172	/ Get a stack frame from the cache. We have to match by size since*
173	some blocks might be too small or far too large. /*
174	static struct pthread *
175	get_cached_stack (size_t sizep, void* **memp)
176	{
177	size_t size = *sizep;
178	struct pthread *result = NULL;
179	list_t *entry;
180
181	lll_lock (stack_cache_lock, LLL_PRIVATE);
182
183	/ Search the cache for a matching entry. We search for the*
184	smallest stack which has at least the required size. Note that
185	in normal situations the size of all allocated stacks is the
186	same. As the very least there are only a few different sizes.
187	Therefore this loop will exit early most of the time with an
188	exact match. /*
189	list_for_each (entry, &stack_cache)
190	{
191	struct pthread *curr;
192
193	curr = list_entry (entry, struct pthread, list);
194	if (FREE_P (curr) && curr->stackblock_size >= size)
195	{
196	if (curr->stackblock_size == size)
197	{
198	result = curr;
199	break;
200	}
201
202	if (result == NULL
203	\|\| result->stackblock_size > curr->stackblock_size)
204	result = curr;
205	}
206	}
207
208	if (__builtin_expect (result == NULL, `0`)
209	/ Make sure the size difference is not too excessive. In that*
210	case we do not use the block. /*
211	\|\| __builtin_expect (result->stackblock_size > `4` * size, `0`))
212	{
213	/ Release the lock. /
214	lll_unlock (stack_cache_lock, LLL_PRIVATE);
215
216	return NULL;
217	}
218
219	/ Don't allow setxid until cloned. /
220	result->setxid_futex = -`1`;
221
222	/ Dequeue the entry. /
223	stack_list_del (&result->list);
224
225	/ And add to the list of stacks in use. /
226	stack_list_add (&result->list, &stack_used);
227
228	/ And decrease the cache size. /
229	stack_cache_actsize -= result->stackblock_size;
230
231	/ Release the lock early. /
232	lll_unlock (stack_cache_lock, LLL_PRIVATE);
233
234	/ Report size and location of the stack to the caller. /
235	*sizep = result->stackblock_size;
236	*memp = result->stackblock;
237
238	/ Cancellation handling is back to the default. /
239	result->cancelhandling = `0`;
240	result->cleanup = NULL;
241
242	/ No pending event. /
243	result->nextevent = NULL;
244
245	/ Clear the DTV. /
246	dtv_t *dtv = GET_DTV (TLS_TPADJ (result));
247	for (size_t cnt = `0`; cnt < dtv[-`1`].counter; ++cnt)
248	free (dtv[`1` + cnt].pointer.to_free);
249	memset (dtv, `'\0'`, (dtv[-`1`].counter + `1`) * sizeof (dtv_t));
250
251	/ Re-initialize the TLS. /
252	_dl_allocate_tls_init (TLS_TPADJ (result));
253
254	return result;
255	}
256
257
258	/ Free stacks until cache size is lower than LIMIT. /
259	void
260	__free_stacks (size_t limit)
261	{
262	/ We reduce the size of the cache. Remove the last entries until*
263	the size is below the limit. /*
264	list_t *entry;
265	list_t *prev;
266
267	/ Search from the end of the list. /
268	list_for_each_prev_safe (entry, prev, &stack_cache)
269	{
270	struct pthread *curr;
271
272	curr = list_entry (entry, struct pthread, list);
273	if (FREE_P (curr))
274	{
275	/ Unlink the block. /
276	stack_list_del (entry);
277
278	/ Account for the freed memory. /
279	stack_cache_actsize -= curr->stackblock_size;
280
281	/ Free the memory associated with the ELF TLS. /
282	_dl_deallocate_tls (TLS_TPADJ (curr), false);
283
284	/ Remove this block. This should never fail. If it does*
285	something is really wrong. /*
286	if (munmap (curr->stackblock, curr->stackblock_size) != `0`)
287	abort ();
288
289	/ Maybe we have freed enough. /
290	if (stack_cache_actsize <= limit)
291	break;
292	}
293	}
294	}
295
296
297	/ Add a stack frame which is not used anymore to the stack. Must be*
298	called with the cache lock held. /*
299	static inline void
300	__attribute ((always_inline))
301	queue_stack (struct pthread *stack)
302	{
303	/ We unconditionally add the stack to the list. The memory may*
304	still be in use but it will not be reused until the kernel marks
305	the stack as not used anymore. /*
306	stack_list_add (&stack->list, &stack_cache);
307
308	stack_cache_actsize += stack->stackblock_size;
309	if (__glibc_unlikely (stack_cache_actsize > stack_cache_maxsize))
310	__free_stacks (stack_cache_maxsize);
311	}
312
313
314	static int
315	internal_function
316	change_stack_perm (struct pthread *pd
317	#ifdef NEED_SEPARATE_REGISTER_STACK
318	, size_t pagemask
319	#endif
320	)
321	{
322	#ifdef NEED_SEPARATE_REGISTER_STACK
323	void *stack = (pd->stackblock
324	+ (((((pd->stackblock_size - pd->guardsize) / `2`)
325	& pagemask) + pd->guardsize) & pagemask));
326	size_t len = pd->stackblock + pd->stackblock_size - stack;
327	#elif _STACK_GROWS_DOWN
328	void *stack = pd->stackblock + pd->guardsize;
329	size_t len = pd->stackblock_size - pd->guardsize;
330	#elif _STACK_GROWS_UP
331	void *stack = pd->stackblock;
332	size_t len = (uintptr_t) pd - pd->guardsize - (uintptr_t) pd->stackblock;
333	#else
334	# error "Define either _STACK_GROWS_DOWN or _STACK_GROWS_UP"
335	#endif
336	if (mprotect (stack, len, PROT_READ \| PROT_WRITE \| PROT_EXEC) != `0`)
337	return errno;
338
339	return `0`;
340	}
341
342
343	/ Returns a usable stack for a new thread either by allocating a*
344	new stack or reusing a cached stack of sufficient size.
345	ATTR must be non-NULL and point to a valid pthread_attr.
346	PDP must be non-NULL. /*
347	static int
348	allocate_stack (const struct pthread_attr attr, struct* pthread **pdp,
349	ALLOCATE_STACK_PARMS)
350	{
351	struct pthread *pd;
352	size_t size;
353	size_t pagesize_m1 = __getpagesize () - `1`;
354
355	assert (powerof2 (pagesize_m1 + `1`));
356	assert (TCB_ALIGNMENT >= STACK_ALIGN);
357
358	/ Get the stack size from the attribute if it is set. Otherwise we*
359	use the default we determined at start time. /*
360	if (attr->stacksize != `0`)
361	size = attr->stacksize;
362	else
363	{
364	lll_lock (__default_pthread_attr_lock, LLL_PRIVATE);
365	size = __default_pthread_attr.stacksize;
366	lll_unlock (__default_pthread_attr_lock, LLL_PRIVATE);
367	}
368
369	/ Get memory for the stack. /
370	if (__glibc_unlikely (attr->flags & ATTR_FLAG_STACKADDR))
371	{
372	uintptr_t adj;
373	char stackaddr = (char* *) attr->stackaddr;
374
375	/ Assume the same layout as the _STACK_GROWS_DOWN case, with struct*
376	pthread at the top of the stack block. Later we adjust the guard
377	location and stack address to match the _STACK_GROWS_UP case. /*
378	if (_STACK_GROWS_UP)
379	stackaddr += attr->stacksize;
380
381	/ If the user also specified the size of the stack make sure it*
382	is large enough. /*
383	if (attr->stacksize != `0`
384	&& attr->stacksize < (__static_tls_size + MINIMAL_REST_STACK))
385	return EINVAL;
386
387	/ Adjust stack size for alignment of the TLS block. /
388	#if TLS_TCB_AT_TP
389	adj = ((uintptr_t) stackaddr - TLS_TCB_SIZE)
390	& __static_tls_align_m1;
391	assert (size > adj + TLS_TCB_SIZE);
392	#elif TLS_DTV_AT_TP
393	adj = ((uintptr_t) stackaddr - __static_tls_size)
394	& __static_tls_align_m1;
395	assert (size > adj);
396	#endif
397
398	/ The user provided some memory. Let's hope it matches the*
399	size... We do not allocate guard pages if the user provided
400	the stack. It is the user's responsibility to do this if it
401	is wanted. /*
402	#if TLS_TCB_AT_TP
403	pd = (struct pthread *) ((uintptr_t) stackaddr
404	- TLS_TCB_SIZE - adj);
405	#elif TLS_DTV_AT_TP
406	pd = (struct pthread *) (((uintptr_t) stackaddr
407	- __static_tls_size - adj)
408	- TLS_PRE_TCB_SIZE);
409	#endif
410
411	/ The user provided stack memory needs to be cleared. /
412	memset (pd, `'\0'`, sizeof (struct pthread));
413
414	/ The first TSD block is included in the TCB. /
415	pd->specific[`0`] = pd->specific_1stblock;
416
417	/ Remember the stack-related values. /
418	pd->stackblock = (char *) stackaddr - size;
419	pd->stackblock_size = size;
420
421	/ This is a user-provided stack. It will not be queued in the*
422	stack cache nor will the memory (except the TLS memory) be freed. /*
423	pd->user_stack = true;
424
425	/ This is at least the second thread. /
426	pd->header.multiple_threads = `1`;
427	#ifndef TLS_MULTIPLE_THREADS_IN_TCB
428	__pthread_multiple_threads = *__libc_multiple_threads_ptr = `1`;
429	#endif
430
431	#ifndef __ASSUME_PRIVATE_FUTEX
432	/ The thread must know when private futexes are supported. /
433	pd->header.private_futex = THREAD_GETMEM (THREAD_SELF,
434	header.private_futex);
435	#endif
436
437	#ifdef NEED_DL_SYSINFO
438	SETUP_THREAD_SYSINFO (pd);
439	#endif
440
441	/ Don't allow setxid until cloned. /
442	pd->setxid_futex = -`1`;
443
444	/ Allocate the DTV for this thread. /
445	if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
446	{
447	/ Something went wrong. /
448	assert (errno == ENOMEM);
449	return errno;
450	}
451
452
453	/ Prepare to modify global data. /
454	lll_lock (stack_cache_lock, LLL_PRIVATE);
455
456	/ And add to the list of stacks in use. /
457	list_add (&pd->list, &__stack_user);
458
459	lll_unlock (stack_cache_lock, LLL_PRIVATE);
460	}
461	else
462	{
463	/ Allocate some anonymous memory. If possible use the cache. /
464	size_t guardsize;
465	size_t reqsize;
466	void *mem;
467	const int prot = (PROT_READ \| PROT_WRITE
468	\| ((GL(dl_stack_flags) & PF_X) ? PROT_EXEC : `0`));
469
470	#if COLORING_INCREMENT != 0
471	/ Add one more page for stack coloring. Don't do it for stacks*
472	with 16 times pagesize or larger. This might just cause
473	unnecessary misalignment. /*
474	if (size <= `16` * pagesize_m1)
475	size += pagesize_m1 + `1`;
476	#endif
477
478	/ Adjust the stack size for alignment. /
479	size &= ~__static_tls_align_m1;
480	assert (size != `0`);
481
482	/ Make sure the size of the stack is enough for the guard and*
483	eventually the thread descriptor. /*
484	guardsize = (attr->guardsize + pagesize_m1) & ~pagesize_m1;
485	if (guardsize < attr->guardsize \|\| size + guardsize < guardsize)
486	/ Arithmetic overflow. /
487	return EINVAL;
488	size += guardsize;
489	if (__builtin_expect (size < ((guardsize + __static_tls_size
490	+ MINIMAL_REST_STACK + pagesize_m1)
491	& ~pagesize_m1),
492	`0`))
493	/ The stack is too small (or the guard too large). /
494	return EINVAL;
495
496	/ Try to get a stack from the cache. /
497	reqsize = size;
498	pd = get_cached_stack (&size, &mem);
499	if (pd == NULL)
500	{
501	/ To avoid aliasing effects on a larger scale than pages we*
502	adjust the allocated stack size if necessary. This way
503	allocations directly following each other will not have
504	aliasing problems. /*
505	#if MULTI_PAGE_ALIASING != 0
506	if ((size % MULTI_PAGE_ALIASING) == `0`)
507	size += pagesize_m1 + `1`;
508	#endif
509
510	mem = mmap (NULL, size, prot,
511	MAP_PRIVATE \| MAP_ANONYMOUS \| MAP_STACK, -`1`, `0`);
512
513	if (__glibc_unlikely (mem == MAP_FAILED))
514	return errno;
515
516	/ SIZE is guaranteed to be greater than zero.*
517	So we can never get a null pointer back from mmap. /*
518	assert (mem != NULL);
519
520	#if COLORING_INCREMENT != 0
521	/ Atomically increment NCREATED. /
522	unsigned int ncreated = atomic_increment_val (&nptl_ncreated);
523
524	/ We chose the offset for coloring by incrementing it for*
525	every new thread by a fixed amount. The offset used
526	module the page size. Even if coloring would be better
527	relative to higher alignment values it makes no sense to
528	do it since the mmap() interface does not allow us to
529	specify any alignment for the returned memory block. /*
530	size_t coloring = (ncreated * COLORING_INCREMENT) & pagesize_m1;
531
532	/ Make sure the coloring offsets does not disturb the alignment*
533	of the TCB and static TLS block. /*
534	if (__glibc_unlikely ((coloring & __static_tls_align_m1) != `0`))
535	coloring = (((coloring + __static_tls_align_m1)
536	& ~(__static_tls_align_m1))
537	& ~pagesize_m1);
538	#else
539	/ Unless specified we do not make any adjustments. /
540	# define coloring 0
541	#endif
542
543	/ Place the thread descriptor at the end of the stack. /
544	#if TLS_TCB_AT_TP
545	pd = (struct pthread ) ((char* *) mem + size - coloring) - `1`;
546	#elif TLS_DTV_AT_TP
547	pd = (struct pthread *) ((((uintptr_t) mem + size - coloring
548	- __static_tls_size)
549	& ~__static_tls_align_m1)
550	- TLS_PRE_TCB_SIZE);
551	#endif
552
553	/ Remember the stack-related values. /
554	pd->stackblock = mem;
555	pd->stackblock_size = size;
556
557	/ We allocated the first block thread-specific data array.*
558	This address will not change for the lifetime of this
559	descriptor. /*
560	pd->specific[`0`] = pd->specific_1stblock;
561
562	/ This is at least the second thread. /
563	pd->header.multiple_threads = `1`;
564	#ifndef TLS_MULTIPLE_THREADS_IN_TCB
565	__pthread_multiple_threads = *__libc_multiple_threads_ptr = `1`;
566	#endif
567
568	#ifndef __ASSUME_PRIVATE_FUTEX
569	/ The thread must know when private futexes are supported. /
570	pd->header.private_futex = THREAD_GETMEM (THREAD_SELF,
571	header.private_futex);
572	#endif
573
574	#ifdef NEED_DL_SYSINFO
575	SETUP_THREAD_SYSINFO (pd);
576	#endif
577
578	/ Don't allow setxid until cloned. /
579	pd->setxid_futex = -`1`;
580
581	/ Allocate the DTV for this thread. /
582	if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
583	{
584	/ Something went wrong. /
585	assert (errno == ENOMEM);
586
587	/ Free the stack memory we just allocated. /
588	(void) munmap (mem, size);
589
590	return errno;
591	}
592
593
594	/ Prepare to modify global data. /
595	lll_lock (stack_cache_lock, LLL_PRIVATE);
596
597	/ And add to the list of stacks in use. /
598	stack_list_add (&pd->list, &stack_used);
599
600	lll_unlock (stack_cache_lock, LLL_PRIVATE);
601
602
603	/ There might have been a race. Another thread might have*
604	caused the stacks to get exec permission while this new
605	stack was prepared. Detect if this was possible and
606	change the permission if necessary. /*
607	if (__builtin_expect ((GL(dl_stack_flags) & PF_X) != `0`
608	&& (prot & PROT_EXEC) == `0`, `0`))
609	{
610	int err = change_stack_perm (pd
611	#ifdef NEED_SEPARATE_REGISTER_STACK
612	, ~pagesize_m1
613	#endif
614	);
615	if (err != `0`)
616	{
617	/ Free the stack memory we just allocated. /
618	(void) munmap (mem, size);
619
620	return err;
621	}
622	}
623
624
625	/ Note that all of the stack and the thread descriptor is*
626	zeroed. This means we do not have to initialize fields
627	with initial value zero. This is specifically true for
628	the 'tid' field which is always set back to zero once the
629	stack is not used anymore and for the 'guardsize' field
630	which will be read next. /*
631	}
632
633	/ Create or resize the guard area if necessary. /
634	if (__glibc_unlikely (guardsize > pd->guardsize))
635	{
636	#ifdef NEED_SEPARATE_REGISTER_STACK
637	char *guard = mem + (((size - guardsize) / `2`) & ~pagesize_m1);
638	#elif _STACK_GROWS_DOWN
639	char *guard = mem;
640	#elif _STACK_GROWS_UP
641	char guard = (char* *) (((uintptr_t) pd - guardsize) & ~pagesize_m1);
642	#endif
643	if (mprotect (guard, guardsize, PROT_NONE) != `0`)
644	{
645	mprot_error:
646	lll_lock (stack_cache_lock, LLL_PRIVATE);
647
648	/ Remove the thread from the list. /
649	stack_list_del (&pd->list);
650
651	lll_unlock (stack_cache_lock, LLL_PRIVATE);
652
653	/ Get rid of the TLS block we allocated. /
654	_dl_deallocate_tls (TLS_TPADJ (pd), false);
655
656	/ Free the stack memory regardless of whether the size*
657	of the cache is over the limit or not. If this piece
658	of memory caused problems we better do not use it
659	anymore. Uh, and we ignore possible errors. There
660	is nothing we could do. /*
661	(void) munmap (mem, size);
662
663	return errno;
664	}
665
666	pd->guardsize = guardsize;
667	}
668	else if (__builtin_expect (pd->guardsize - guardsize > size - reqsize,
669	`0`))
670	{
671	/ The old guard area is too large. /
672
673	#ifdef NEED_SEPARATE_REGISTER_STACK
674	char *guard = mem + (((size - guardsize) / `2`) & ~pagesize_m1);
675	char *oldguard = mem + (((size - pd->guardsize) / `2`) & ~pagesize_m1);
676
677	if (oldguard < guard
678	&& mprotect (oldguard, guard - oldguard, prot) != `0`)
679	goto mprot_error;
680
681	if (mprotect (guard + guardsize,
682	oldguard + pd->guardsize - guard - guardsize,
683	prot) != `0`)
684	goto mprot_error;
685	#elif _STACK_GROWS_DOWN
686	if (mprotect ((char *) mem + guardsize, pd->guardsize - guardsize,
687	prot) != `0`)
688	goto mprot_error;
689	#elif _STACK_GROWS_UP
690	char new_guard = (char* *)(((uintptr_t) pd - guardsize)
691	& ~pagesize_m1);
692	char old_guard = (char* *)(((uintptr_t) pd - pd->guardsize)
693	& ~pagesize_m1);
694	/ The guard size difference might be > 0, but once rounded*
695	to the nearest page the size difference might be zero. /*
696	if (new_guard > old_guard
697	&& mprotect (old_guard, new_guard - old_guard, prot) != `0`)
698	goto mprot_error;
699	#endif
700
701	pd->guardsize = guardsize;
702	}
703	/ The pthread_getattr_np() calls need to get passed the size*
704	requested in the attribute, regardless of how large the
705	actually used guardsize is. /*
706	pd->reported_guardsize = guardsize;
707	}
708
709	/ Initialize the lock. We have to do this unconditionally since the*
710	stillborn thread could be canceled while the lock is taken. /*
711	pd->lock = LLL_LOCK_INITIALIZER;
712
713	/ The robust mutex lists also need to be initialized*
714	unconditionally because the cleanup for the previous stack owner
715	might have happened in the kernel. /*
716	pd->robust_head.futex_offset = (offsetof (pthread_mutex_t, __data.__lock)
717	- offsetof (pthread_mutex_t,
718	__data.__list.__next));
719	pd->robust_head.list_op_pending = NULL;
720	#ifdef __PTHREAD_MUTEX_HAVE_PREV
721	pd->robust_prev = &pd->robust_head;
722	#endif
723	pd->robust_head.list = &pd->robust_head;
724
725	/ We place the thread descriptor at the end of the stack. /
726	*pdp = pd;
727
728	#if _STACK_GROWS_DOWN
729	void *stacktop;
730
731	# if TLS_TCB_AT_TP
732	/ The stack begins before the TCB and the static TLS block. /
733	stacktop = ((char *) (pd + `1`) - __static_tls_size);
734	# elif TLS_DTV_AT_TP
735	stacktop = (char *) (pd - `1`);
736	# endif
737
738	# ifdef NEED_SEPARATE_REGISTER_STACK
739	*stack = pd->stackblock;
740	stacksize = stacktop - stack;
741	# else
742	*stack = stacktop;
743	# endif
744	#else
745	*stack = pd->stackblock;
746	#endif
747
748	return `0`;
749	}
750
751
752	void
753	internal_function
754	__deallocate_stack (struct pthread *pd)
755	{
756	lll_lock (stack_cache_lock, LLL_PRIVATE);
757
758	/ Remove the thread from the list of threads with user defined*
759	stacks. /*
760	stack_list_del (&pd->list);
761
762	/ Not much to do. Just free the mmap()ed memory. Note that we do*
763	not reset the 'used' flag in the 'tid' field. This is done by
764	the kernel. If no thread has been created yet this field is
765	still zero. /*
766	if (__glibc_likely (! pd->user_stack))
767	(void) queue_stack (pd);
768	else
769	/ Free the memory associated with the ELF TLS. /
770	_dl_deallocate_tls (TLS_TPADJ (pd), false);
771
772	lll_unlock (stack_cache_lock, LLL_PRIVATE);
773	}
774
775
776	int
777	internal_function
778	__make_stacks_executable (void **stack_endp)
779	{
780	/ First the main thread's stack. /
781	int err = _dl_make_stack_executable (stack_endp);
782	if (err != `0`)
783	return err;
784
785	#ifdef NEED_SEPARATE_REGISTER_STACK
786	const size_t pagemask = ~(__getpagesize () - `1`);
787	#endif
788
789	lll_lock (stack_cache_lock, LLL_PRIVATE);
790
791	list_t *runp;
792	list_for_each (runp, &stack_used)
793	{
794	err = change_stack_perm (list_entry (runp, struct pthread, list)
795	#ifdef NEED_SEPARATE_REGISTER_STACK
796	, pagemask
797	#endif
798	);
799	if (err != `0`)
800	break;
801	}
802
803	/ Also change the permission for the currently unused stacks. This*
804	might be wasted time but better spend it here than adding a check
805	in the fast path. /*
806	if (err == `0`)
807	list_for_each (runp, &stack_cache)
808	{
809	err = change_stack_perm (list_entry (runp, struct pthread, list)
810	#ifdef NEED_SEPARATE_REGISTER_STACK
811	, pagemask
812	#endif
813	);
814	if (err != `0`)
815	break;
816	}
817
818	lll_unlock (stack_cache_lock, LLL_PRIVATE);
819
820	return err;
821	}
822
823
824	/ In case of a fork() call the memory allocation in the child will be*
825	the same but only one thread is running. All stacks except that of
826	the one running thread are not used anymore. We have to recycle
827	them. /*
828	void
829	__reclaim_stacks (void)
830	{
831	struct pthread self = (struct* pthread *) THREAD_SELF;
832
833	/ No locking necessary. The caller is the only stack in use. But*
834	we have to be aware that we might have interrupted a list
835	operation. /*
836
837	if (in_flight_stack != `0`)
838	{
839	bool add_p = in_flight_stack & `1`;
840	list_t elem = (list_t ) (in_flight_stack & ~(uintptr_t) `1`);
841
842	if (add_p)
843	{
844	/ We always add at the beginning of the list. So in this case we*
845	only need to check the beginning of these lists to see if the
846	pointers at the head of the list are inconsistent. /*
847	list_t *l = NULL;
848
849	if (stack_used.next->prev != &stack_used)
850	l = &stack_used;
851	else if (stack_cache.next->prev != &stack_cache)
852	l = &stack_cache;
853
854	if (l != NULL)
855	{
856	assert (l->next->prev == elem);
857	elem->next = l->next;
858	elem->prev = l;
859	l->next = elem;
860	}
861	}
862	else
863	{
864	/ We can simply always replay the delete operation. /
865	elem->next->prev = elem->prev;
866	elem->prev->next = elem->next;
867	}
868	}
869
870	/ Mark all stacks except the still running one as free. /
871	list_t *runp;
872	list_for_each (runp, &stack_used)
873	{
874	struct pthread curp = list_entry (runp, struct* pthread, list);
875	if (curp != self)
876	{
877	/ This marks the stack as free. /
878	curp->tid = `0`;
879
880	/ Account for the size of the stack. /
881	stack_cache_actsize += curp->stackblock_size;
882
883	if (curp->specific_used)
884	{
885	/ Clear the thread-specific data. /
886	memset (curp->specific_1stblock, `'\0'`,
887	sizeof (curp->specific_1stblock));
888
889	curp->specific_used = false;
890
891	for (size_t cnt = `1`; cnt < PTHREAD_KEY_1STLEVEL_SIZE; ++cnt)
892	if (curp->specific[cnt] != NULL)
893	{
894	memset (curp->specific[cnt], `'\0'`,
895	sizeof (curp->specific_1stblock));
896
897	/ We have allocated the block which we do not*
898	free here so re-set the bit. /*
899	curp->specific_used = true;
900	}
901	}
902	}
903	}
904
905	/ Add the stack of all running threads to the cache. /
906	list_splice (&stack_used, &stack_cache);
907
908	/ Remove the entry for the current thread to from the cache list*
909	and add it to the list of running threads. Which of the two
910	lists is decided by the user_stack flag. /*
911	stack_list_del (&self->list);
912
913	/ Re-initialize the lists for all the threads. /
914	INIT_LIST_HEAD (&stack_used);
915	INIT_LIST_HEAD (&__stack_user);
916
917	if (__glibc_unlikely (THREAD_GETMEM (self, user_stack)))
918	list_add (&self->list, &__stack_user);
919	else
920	list_add (&self->list, &stack_used);
921
922	/ There is one thread running. /
923	__nptl_nthreads = `1`;
924
925	in_flight_stack = `0`;
926
927	/ Initialize locks. /
928	stack_cache_lock = LLL_LOCK_INITIALIZER;
929	__default_pthread_attr_lock = LLL_LOCK_INITIALIZER;
930	}
931
932
933	#if HP_TIMING_AVAIL
934	# undef __find_thread_by_id
935	/ Find a thread given the thread ID. /
936	attribute_hidden
937	struct pthread *
938	__find_thread_by_id (pid_t tid)
939	{
940	struct pthread *result = NULL;
941
942	lll_lock (stack_cache_lock, LLL_PRIVATE);
943
944	/ Iterate over the list with system-allocated threads first. /
945	list_t *runp;
946	list_for_each (runp, &stack_used)
947	{
948	struct pthread *curp;
949
950	curp = list_entry (runp, struct pthread, list);
951
952	if (curp->tid == tid)
953	{
954	result = curp;
955	goto out;
956	}
957	}
958
959	/ Now the list with threads using user-allocated stacks. /
960	list_for_each (runp, &__stack_user)
961	{
962	struct pthread *curp;
963
964	curp = list_entry (runp, struct pthread, list);
965
966	if (curp->tid == tid)
967	{
968	result = curp;
969	goto out;
970	}
971	}
972
973	out:
974	lll_unlock (stack_cache_lock, LLL_PRIVATE);
975
976	return result;
977	}
978	#endif
979
980
981	#ifdef SIGSETXID
982	static void
983	internal_function
984	setxid_mark_thread (struct xid_command cmdp, struct* pthread *t)
985	{
986	int ch;
987
988	/ Wait until this thread is cloned. /
989	if (t->setxid_futex == -`1`
990	&& ! atomic_compare_and_exchange_bool_acq (&t->setxid_futex, -`2`, -`1`))
991	do
992	futex_wait_simple (&t->setxid_futex, -`2`, FUTEX_PRIVATE);
993	while (t->setxid_futex == -`2`);
994
995	/ Don't let the thread exit before the setxid handler runs. /
996	t->setxid_futex = `0`;
997
998	do
999	{
1000	ch = t->cancelhandling;
1001
1002	/ If the thread is exiting right now, ignore it. /
1003	if ((ch & EXITING_BITMASK) != `0`)
1004	{
1005	/ Release the futex if there is no other setxid in*
1006	progress. /*
1007	if ((ch & SETXID_BITMASK) == `0`)
1008	{
1009	t->setxid_futex = `1`;
1010	futex_wake (&t->setxid_futex, `1`, FUTEX_PRIVATE);
1011	}
1012	return;
1013	}
1014	}
1015	while (atomic_compare_and_exchange_bool_acq (&t->cancelhandling,
1016	ch \| SETXID_BITMASK, ch));
1017	}
1018
1019
1020	static void
1021	internal_function
1022	setxid_unmark_thread (struct xid_command cmdp, struct* pthread *t)
1023	{
1024	int ch;
1025
1026	do
1027	{
1028	ch = t->cancelhandling;
1029	if ((ch & SETXID_BITMASK) == `0`)
1030	return;
1031	}
1032	while (atomic_compare_and_exchange_bool_acq (&t->cancelhandling,
1033	ch & ~SETXID_BITMASK, ch));
1034
1035	/ Release the futex just in case. /
1036	t->setxid_futex = `1`;
1037	futex_wake (&t->setxid_futex, `1`, FUTEX_PRIVATE);
1038	}
1039
1040
1041	static int
1042	internal_function
1043	setxid_signal_thread (struct xid_command cmdp, struct* pthread *t)
1044	{
1045	if ((t->cancelhandling & SETXID_BITMASK) == `0`)
1046	return `0`;
1047
1048	int val;
1049	pid_t pid = __getpid ();
1050	INTERNAL_SYSCALL_DECL (err);
1051	val = INTERNAL_SYSCALL_CALL (tgkill, err, pid, t->tid, SIGSETXID);
1052
1053	/ If this failed, it must have had not started yet or else exited. /
1054	if (!INTERNAL_SYSCALL_ERROR_P (val, err))
1055	{
1056	atomic_increment (&cmdp->cntr);
1057	return `1`;
1058	}
1059	else
1060	return `0`;
1061	}
1062
1063	/ Check for consistency across setid system call results. The abort
1064	should not happen as long as all privileges changes happen through
1065	the glibc wrappers. ERROR must be 0 (no error) or an errno
1066	code. /*
1067	void
1068	attribute_hidden
1069	__nptl_setxid_error (struct xid_command cmdp, int* error)
1070	{
1071	do
1072	{
1073	int olderror = cmdp->error;
1074	if (olderror == error)
1075	break;
1076	if (olderror != -`1`)
1077	/ Mismatch between current and previous results. /
1078	abort ();
1079	}
1080	while (atomic_compare_and_exchange_bool_acq (&cmdp->error, error, -`1`));
1081	}
1082
1083	int
1084	attribute_hidden
1085	__nptl_setxid (struct xid_command *cmdp)
1086	{
1087	int signalled;
1088	int result;
1089	lll_lock (stack_cache_lock, LLL_PRIVATE);
1090
1091	__xidcmd = cmdp;
1092	cmdp->cntr = `0`;
1093	cmdp->error = -`1`;
1094
1095	struct pthread *self = THREAD_SELF;
1096
1097	/ Iterate over the list with system-allocated threads first. /
1098	list_t *runp;
1099	list_for_each (runp, &stack_used)
1100	{
1101	struct pthread t = list_entry (runp, struct* pthread, list);
1102	if (t == self)
1103	continue;
1104
1105	setxid_mark_thread (cmdp, t);
1106	}
1107
1108	/ Now the list with threads using user-allocated stacks. /
1109	list_for_each (runp, &__stack_user)
1110	{
1111	struct pthread t = list_entry (runp, struct* pthread, list);
1112	if (t == self)
1113	continue;
1114
1115	setxid_mark_thread (cmdp, t);
1116	}
1117
1118	/ Iterate until we don't succeed in signalling anyone. That means*
1119	we have gotten all running threads, and their children will be
1120	automatically correct once started. /*
1121	do
1122	{
1123	signalled = `0`;
1124
1125	list_for_each (runp, &stack_used)
1126	{
1127	struct pthread t = list_entry (runp, struct* pthread, list);
1128	if (t == self)
1129	continue;
1130
1131	signalled += setxid_signal_thread (cmdp, t);
1132	}
1133
1134	list_for_each (runp, &__stack_user)
1135	{
1136	struct pthread t = list_entry (runp, struct* pthread, list);
1137	if (t == self)
1138	continue;
1139
1140	signalled += setxid_signal_thread (cmdp, t);
1141	}
1142
1143	int cur = cmdp->cntr;
1144	while (cur != `0`)
1145	{
1146	futex_wait_simple ((unsigned int *) &cmdp->cntr, cur,
1147	FUTEX_PRIVATE);
1148	cur = cmdp->cntr;
1149	}
1150	}
1151	while (signalled != `0`);
1152
1153	/ Clean up flags, so that no thread blocks during exit waiting*
1154	for a signal which will never come. /*
1155	list_for_each (runp, &stack_used)
1156	{
1157	struct pthread t = list_entry (runp, struct* pthread, list);
1158	if (t == self)
1159	continue;
1160
1161	setxid_unmark_thread (cmdp, t);
1162	}
1163
1164	list_for_each (runp, &__stack_user)
1165	{
1166	struct pthread t = list_entry (runp, struct* pthread, list);
1167	if (t == self)
1168	continue;
1169
1170	setxid_unmark_thread (cmdp, t);
1171	}
1172
1173	/ This must be last, otherwise the current thread might not have*
1174	permissions to send SIGSETXID syscall to the other threads. /*
1175	INTERNAL_SYSCALL_DECL (err);
1176	result = INTERNAL_SYSCALL_NCS (cmdp->syscall_no, err, `3`,
1177	cmdp->id[`0`], cmdp->id[`1`], cmdp->id[`2`]);
1178	int error = `0`;
1179	if (__glibc_unlikely (INTERNAL_SYSCALL_ERROR_P (result, err)))
1180	{
1181	error = INTERNAL_SYSCALL_ERRNO (result, err);
1182	__set_errno (error);
1183	result = -`1`;
1184	}
1185	__nptl_setxid_error (cmdp, error);
1186
1187	lll_unlock (stack_cache_lock, LLL_PRIVATE);
1188	return result;
1189	}
1190	#endif /* SIGSETXID. */
1191
1192
1193	static inline void __attribute__((always_inline))
1194	init_one_static_tls (struct pthread curp, struct* link_map *map)
1195	{
1196	# if TLS_TCB_AT_TP
1197	void dest = (char* *) curp - map->l_tls_offset;
1198	# elif TLS_DTV_AT_TP
1199	void dest = (char* *) curp + map->l_tls_offset + TLS_PRE_TCB_SIZE;
1200	# else
1201	# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
1202	# endif
1203
1204	/ Initialize the memory. /
1205	memset (__mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size),
1206	`'\0'`, map->l_tls_blocksize - map->l_tls_initimage_size);
1207	}
1208
1209	void
1210	attribute_hidden
1211	__pthread_init_static_tls (struct link_map *map)
1212	{
1213	lll_lock (stack_cache_lock, LLL_PRIVATE);
1214
1215	/ Iterate over the list with system-allocated threads first. /
1216	list_t *runp;
1217	list_for_each (runp, &stack_used)
1218	init_one_static_tls (list_entry (runp, struct pthread, list), map);
1219
1220	/ Now the list with threads using user-allocated stacks. /
1221	list_for_each (runp, &__stack_user)
1222	init_one_static_tls (list_entry (runp, struct pthread, list), map);
1223
1224	lll_unlock (stack_cache_lock, LLL_PRIVATE);
1225	}
1226
1227
1228	void
1229	attribute_hidden
1230	__wait_lookup_done (void)
1231	{
1232	lll_lock (stack_cache_lock, LLL_PRIVATE);
1233
1234	struct pthread *self = THREAD_SELF;
1235
1236	/ Iterate over the list with system-allocated threads first. /
1237	list_t *runp;
1238	list_for_each (runp, &stack_used)
1239	{
1240	struct pthread t = list_entry (runp, struct* pthread, list);
1241	if (t == self \|\| t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED)
1242	continue;
1243
1244	int *const gscope_flagp = &t->header.gscope_flag;
1245
1246	/ We have to wait until this thread is done with the global*
1247	scope. First tell the thread that we are waiting and
1248	possibly have to be woken. /*
1249	if (atomic_compare_and_exchange_bool_acq (gscope_flagp,
1250	THREAD_GSCOPE_FLAG_WAIT,
1251	THREAD_GSCOPE_FLAG_USED))
1252	continue;
1253
1254	do
1255	futex_wait_simple ((unsigned int *) gscope_flagp,
1256	THREAD_GSCOPE_FLAG_WAIT, FUTEX_PRIVATE);
1257	while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT);
1258	}
1259
1260	/ Now the list with threads using user-allocated stacks. /
1261	list_for_each (runp, &__stack_user)
1262	{
1263	struct pthread t = list_entry (runp, struct* pthread, list);
1264	if (t == self \|\| t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED)
1265	continue;
1266
1267	int *const gscope_flagp = &t->header.gscope_flag;
1268
1269	/ We have to wait until this thread is done with the global*
1270	scope. First tell the thread that we are waiting and
1271	possibly have to be woken. /*
1272	if (atomic_compare_and_exchange_bool_acq (gscope_flagp,
1273	THREAD_GSCOPE_FLAG_WAIT,
1274	THREAD_GSCOPE_FLAG_USED))
1275	continue;
1276
1277	do
1278	futex_wait_simple ((unsigned int *) gscope_flagp,
1279	THREAD_GSCOPE_FLAG_WAIT, FUTEX_PRIVATE);
1280	while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT);
1281	}
1282
1283	lll_unlock (stack_cache_lock, LLL_PRIVATE);
1284	}
1285

Browse the source code of glibc_src_2.25/nptl/allocatestack.c