aio_misc.c source code [glibc_src_2.24/sysdeps/pthread/aio_misc.c]

1	/ Handle general operations.*
2	Copyright (C) 1997-2016 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4	Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, see
18	<http://www.gnu.org/licenses/>. /*
19
20	#include <aio.h>
21	#include <assert.h>
22	#include <errno.h>
23	#include <limits.h>
24	#include <pthread.h>
25	#include <stdlib.h>
26	#include <unistd.h>
27	#include <sys/param.h>
28	#include <sys/stat.h>
29	#include <sys/time.h>
30	#include <aio_misc.h>
31
32	#ifndef aio_create_helper_thread
33	# define aio_create_helper_thread __aio_create_helper_thread
34
35	extern inline int
36	__aio_create_helper_thread (pthread_t threadp, void* (tf) (void ), void* *arg)
37	{
38	pthread_attr_t attr;
39
40	/ Make sure the thread is created detached. /
41	pthread_attr_init (&attr);
42	pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
43
44	int ret = pthread_create (threadp, &attr, tf, arg);
45
46	(void) pthread_attr_destroy (&attr);
47	return ret;
48	}
49	#endif
50
51	static void add_request_to_runlist (struct requestlist *newrequest);
52
53	/ Pool of request list entries. /
54	static struct requestlist **pool;
55
56	/ Number of total and allocated pool entries. /
57	static size_t pool_max_size;
58	static size_t pool_size;
59
60	/ We implement a two dimensional array but allocate each row separately.*
61	The macro below determines how many entries should be used per row.
62	It should better be a power of two. /*
63	#define ENTRIES_PER_ROW 32
64
65	/ How many rows we allocate at once. /
66	#define ROWS_STEP 8
67
68	/ List of available entries. /
69	static struct requestlist *freelist;
70
71	/ List of request waiting to be processed. /
72	static struct requestlist *runlist;
73
74	/ Structure list of all currently processed requests. /
75	static struct requestlist *requests;
76
77	/ Number of threads currently running. /
78	static int nthreads;
79
80	/ Number of threads waiting for work to arrive. /
81	static int idle_thread_count;
82
83
84	/ These are the values used to optimize the use of AIO. The user can*
85	overwrite them by using the `aio_init' function. /*
86	static struct aioinit optim =
87	{
88	`20`, / int aio_threads; Maximal number of threads. /
89	`64`, / int aio_num; Number of expected simultaneous requests. /
90	`0`,
91	`0`,
92	`0`,
93	`0`,
94	`1`,
95	`0`
96	};
97
98
99	/ Since the list is global we need a mutex protecting it. /
100	pthread_mutex_t __aio_requests_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
101
102	/ When you add a request to the list and there are idle threads present,*
103	you signal this condition variable. When a thread finishes work, it waits
104	on this condition variable for a time before it actually exits. /*
105	pthread_cond_t __aio_new_request_notification = PTHREAD_COND_INITIALIZER;
106
107
108	/ Functions to handle request list pool. /
109	static struct requestlist *
110	get_elem (void)
111	{
112	struct requestlist *result;
113
114	if (freelist == NULL)
115	{
116	struct requestlist *new_row;
117	int cnt;
118
119	assert (sizeof (struct aiocb) == sizeof (struct aiocb64));
120
121	if (pool_size + `1` >= pool_max_size)
122	{
123	size_t new_max_size = pool_max_size + ROWS_STEP;
124	struct requestlist **new_tab;
125
126	new_tab = (struct requestlist **)
127	realloc (pool, new_max_size * sizeof (struct requestlist *));
128
129	if (new_tab == NULL)
130	return NULL;
131
132	pool_max_size = new_max_size;
133	pool = new_tab;
134	}
135
136	/ Allocate the new row. /
137	cnt = pool_size == `0` ? optim.aio_num : ENTRIES_PER_ROW;
138	new_row = (struct requestlist *) calloc (cnt,
139	sizeof (struct requestlist));
140	if (new_row == NULL)
141	return NULL;
142
143	pool[pool_size++] = new_row;
144
145	/ Put all the new entries in the freelist. /
146	do
147	{
148	new_row->next_prio = freelist;
149	freelist = new_row++;
150	}
151	while (--cnt > `0`);
152	}
153
154	result = freelist;
155	freelist = freelist->next_prio;
156
157	return result;
158	}
159
160
161	void
162	internal_function
163	__aio_free_request (struct requestlist *elem)
164	{
165	elem->running = no;
166	elem->next_prio = freelist;
167	freelist = elem;
168	}
169
170
171	struct requestlist *
172	internal_function
173	__aio_find_req (aiocb_union *elem)
174	{
175	struct requestlist *runp = requests;
176	int fildes = elem->aiocb.aio_fildes;
177
178	while (runp != NULL && runp->aiocbp->aiocb.aio_fildes < fildes)
179	runp = runp->next_fd;
180
181	if (runp != NULL)
182	{
183	if (runp->aiocbp->aiocb.aio_fildes != fildes)
184	runp = NULL;
185	else
186	while (runp != NULL && runp->aiocbp != elem)
187	runp = runp->next_prio;
188	}
189
190	return runp;
191	}
192
193
194	struct requestlist *
195	internal_function
196	__aio_find_req_fd (int fildes)
197	{
198	struct requestlist *runp = requests;
199
200	while (runp != NULL && runp->aiocbp->aiocb.aio_fildes < fildes)
201	runp = runp->next_fd;
202
203	return (runp != NULL && runp->aiocbp->aiocb.aio_fildes == fildes
204	? runp : NULL);
205	}
206
207
208	void
209	internal_function
210	__aio_remove_request (struct requestlist last, struct* requestlist *req,
211	int all)
212	{
213	assert (req->running == yes \|\| req->running == queued
214	\|\| req->running == done);
215
216	if (last != NULL)
217	last->next_prio = all ? NULL : req->next_prio;
218	else
219	{
220	if (all \|\| req->next_prio == NULL)
221	{
222	if (req->last_fd != NULL)
223	req->last_fd->next_fd = req->next_fd;
224	else
225	requests = req->next_fd;
226	if (req->next_fd != NULL)
227	req->next_fd->last_fd = req->last_fd;
228	}
229	else
230	{
231	if (req->last_fd != NULL)
232	req->last_fd->next_fd = req->next_prio;
233	else
234	requests = req->next_prio;
235
236	if (req->next_fd != NULL)
237	req->next_fd->last_fd = req->next_prio;
238
239	req->next_prio->last_fd = req->last_fd;
240	req->next_prio->next_fd = req->next_fd;
241
242	/ Mark this entry as runnable. /
243	req->next_prio->running = yes;
244	}
245
246	if (req->running == yes)
247	{
248	struct requestlist *runp = runlist;
249
250	last = NULL;
251	while (runp != NULL)
252	{
253	if (runp == req)
254	{
255	if (last == NULL)
256	runlist = runp->next_run;
257	else
258	last->next_run = runp->next_run;
259	break;
260	}
261	last = runp;
262	runp = runp->next_run;
263	}
264	}
265	}
266	}
267
268
269	/ The thread handler. /
270	static void handle_fildes_io (void* *arg);
271
272
273	/ User optimization. /
274	void
275	__aio_init (const struct aioinit *init)
276	{
277	/ Get the mutex. /
278	pthread_mutex_lock (&__aio_requests_mutex);
279
280	/ Only allow writing new values if the table is not yet allocated. /
281	if (pool == NULL)
282	{
283	optim.aio_threads = init->aio_threads < `1` ? `1` : init->aio_threads;
284	assert (powerof2 (ENTRIES_PER_ROW));
285	optim.aio_num = (init->aio_num < ENTRIES_PER_ROW
286	? ENTRIES_PER_ROW
287	: init->aio_num & ~(ENTRIES_PER_ROW - `1`));
288	}
289
290	if (init->aio_idle_time != `0`)
291	optim.aio_idle_time = init->aio_idle_time;
292
293	/ Release the mutex. /
294	pthread_mutex_unlock (&__aio_requests_mutex);
295	}
296	weak_alias (__aio_init, aio_init)
297
298
299	/ The main function of the async I/O handling. It enqueues requests*
300	and if necessary starts and handles threads. /*
301	struct requestlist *
302	internal_function
303	__aio_enqueue_request (aiocb_union aiocbp, int* operation)
304	{
305	int result = `0`;
306	int policy, prio;
307	struct sched_param param;
308	struct requestlist last, runp, *newp;
309	int running = no;
310
311	if (operation == LIO_SYNC \|\| operation == LIO_DSYNC)
312	aiocbp->aiocb.aio_reqprio = `0`;
313	else if (aiocbp->aiocb.aio_reqprio < `0`
314	#ifdef AIO_PRIO_DELTA_MAX
315	\|\| aiocbp->aiocb.aio_reqprio > AIO_PRIO_DELTA_MAX
316	#endif
317	)
318	{
319	/ Invalid priority value. /
320	__set_errno (EINVAL);
321	aiocbp->aiocb.__error_code = EINVAL;
322	aiocbp->aiocb.__return_value = -`1`;
323	return NULL;
324	}
325
326	/ Compute priority for this request. /
327	pthread_getschedparam (pthread_self (), &policy, &param);
328	prio = param.sched_priority - aiocbp->aiocb.aio_reqprio;
329
330	/ Get the mutex. /
331	pthread_mutex_lock (&__aio_requests_mutex);
332
333	last = NULL;
334	runp = requests;
335	/ First look whether the current file descriptor is currently*
336	worked with. /*
337	while (runp != NULL
338	&& runp->aiocbp->aiocb.aio_fildes < aiocbp->aiocb.aio_fildes)
339	{
340	last = runp;
341	runp = runp->next_fd;
342	}
343
344	/ Get a new element for the waiting list. /
345	newp = get_elem ();
346	if (newp == NULL)
347	{
348	pthread_mutex_unlock (&__aio_requests_mutex);
349	__set_errno (EAGAIN);
350	return NULL;
351	}
352	newp->aiocbp = aiocbp;
353	#ifdef BROKEN_THREAD_SIGNALS
354	newp->caller_pid = (aiocbp->aiocb.aio_sigevent.sigev_notify == SIGEV_SIGNAL
355	? getpid () : `0`);
356	#endif
357	newp->waiting = NULL;
358
359	aiocbp->aiocb.__abs_prio = prio;
360	aiocbp->aiocb.__policy = policy;
361	aiocbp->aiocb.aio_lio_opcode = operation;
362	aiocbp->aiocb.__error_code = EINPROGRESS;
363	aiocbp->aiocb.__return_value = `0`;
364
365	if (runp != NULL
366	&& runp->aiocbp->aiocb.aio_fildes == aiocbp->aiocb.aio_fildes)
367	{
368	/ The current file descriptor is worked on. It makes no sense*
369	to start another thread since this new thread would fight
370	with the running thread for the resources. But we also cannot
371	say that the thread processing this desriptor shall immediately
372	after finishing the current job process this request if there
373	are other threads in the running queue which have a higher
374	priority. /*
375
376	/ Simply enqueue it after the running one according to the*
377	priority. /*
378	last = NULL;
379	while (runp->next_prio != NULL
380	&& runp->next_prio->aiocbp->aiocb.__abs_prio >= prio)
381	{
382	last = runp;
383	runp = runp->next_prio;
384	}
385
386	newp->next_prio = runp->next_prio;
387	runp->next_prio = newp;
388
389	running = queued;
390	}
391	else
392	{
393	running = yes;
394	/ Enqueue this request for a new descriptor. /
395	if (last == NULL)
396	{
397	newp->last_fd = NULL;
398	newp->next_fd = requests;
399	if (requests != NULL)
400	requests->last_fd = newp;
401	requests = newp;
402	}
403	else
404	{
405	newp->next_fd = last->next_fd;
406	newp->last_fd = last;
407	last->next_fd = newp;
408	if (newp->next_fd != NULL)
409	newp->next_fd->last_fd = newp;
410	}
411
412	newp->next_prio = NULL;
413	last = NULL;
414	}
415
416	if (running == yes)
417	{
418	/ We try to create a new thread for this file descriptor. The*
419	function which gets called will handle all available requests
420	for this descriptor and when all are processed it will
421	terminate.
422
423	If no new thread can be created or if the specified limit of
424	threads for AIO is reached we queue the request. /*
425
426	/ See if we need to and are able to create a thread. /
427	if (nthreads < optim.aio_threads && idle_thread_count == `0`)
428	{
429	pthread_t thid;
430
431	running = newp->running = allocated;
432
433	/ Now try to start a thread. /
434	result = aio_create_helper_thread (&thid, handle_fildes_io, newp);
435	if (result == `0`)
436	/ We managed to enqueue the request. All errors which can*
437	happen now can be recognized by calls to `aio_return' and
438	`aio_error'. /*
439	++nthreads;
440	else
441	{
442	/ Reset the running flag. The new request is not running. /
443	running = newp->running = yes;
444
445	if (nthreads == `0`)
446	{
447	/ We cannot create a thread in the moment and there is*
448	also no thread running. This is a problem. `errno' is
449	set to EAGAIN if this is only a temporary problem. /*
450	__aio_remove_request (last, newp, `0`);
451	}
452	else
453	result = `0`;
454	}
455	}
456	}
457
458	/ Enqueue the request in the run queue if it is not yet running. /
459	if (running == yes && result == `0`)
460	{
461	add_request_to_runlist (newp);
462
463	/ If there is a thread waiting for work, then let it know that we*
464	have just given it something to do. /*
465	if (idle_thread_count > `0`)
466	pthread_cond_signal (&__aio_new_request_notification);
467	}
468
469	if (result == `0`)
470	newp->running = running;
471	else
472	{
473	/ Something went wrong. /
474	__aio_free_request (newp);
475	aiocbp->aiocb.__error_code = result;
476	__set_errno (result);
477	newp = NULL;
478	}
479
480	/ Release the mutex. /
481	pthread_mutex_unlock (&__aio_requests_mutex);
482
483	return newp;
484	}
485
486
487	static void *
488	handle_fildes_io (void *arg)
489	{
490	pthread_t self = pthread_self ();
491	struct sched_param param;
492	struct requestlist runp = (struct* requestlist *) arg;
493	aiocb_union *aiocbp;
494	int policy;
495	int fildes;
496
497	pthread_getschedparam (self, &policy, &param);
498
499	do
500	{
501	/ If runp is NULL, then we were created to service the work queue*
502	in general, not to handle any particular request. In that case we
503	skip the "do work" stuff on the first pass, and go directly to the
504	"get work off the work queue" part of this loop, which is near the
505	end. /*
506	if (runp == NULL)
507	pthread_mutex_lock (&__aio_requests_mutex);
508	else
509	{
510	/ Hopefully this request is marked as running. /
511	assert (runp->running == allocated);
512
513	/ Update our variables. /
514	aiocbp = runp->aiocbp;
515	fildes = aiocbp->aiocb.aio_fildes;
516
517	/ Change the priority to the requested value (if necessary). /
518	if (aiocbp->aiocb.__abs_prio != param.sched_priority
519	\|\| aiocbp->aiocb.__policy != policy)
520	{
521	param.sched_priority = aiocbp->aiocb.__abs_prio;
522	policy = aiocbp->aiocb.__policy;
523	pthread_setschedparam (self, policy, &param);
524	}
525
526	/ Process request pointed to by RUNP. We must not be disturbed*
527	by signals. /*
528	if ((aiocbp->aiocb.aio_lio_opcode & `127`) == LIO_READ)
529	{
530	if (sizeof (off_t) != sizeof (off64_t)
531	&& aiocbp->aiocb.aio_lio_opcode & `128`)
532	aiocbp->aiocb.__return_value =
533	TEMP_FAILURE_RETRY (__pread64 (fildes, (void *)
534	aiocbp->aiocb64.aio_buf,
535	aiocbp->aiocb64.aio_nbytes,
536	aiocbp->aiocb64.aio_offset));
537	else
538	aiocbp->aiocb.__return_value =
539	TEMP_FAILURE_RETRY (__libc_pread (fildes,
540	(void *)
541	aiocbp->aiocb.aio_buf,
542	aiocbp->aiocb.aio_nbytes,
543	aiocbp->aiocb.aio_offset));
544
545	if (aiocbp->aiocb.__return_value == -`1` && errno == ESPIPE)
546	/ The Linux kernel is different from others. It returns*
547	ESPIPE if using pread on a socket. Other platforms
548	simply ignore the offset parameter and behave like
549	read. /*
550	aiocbp->aiocb.__return_value =
551	TEMP_FAILURE_RETRY (read (fildes,
552	(void *) aiocbp->aiocb64.aio_buf,
553	aiocbp->aiocb64.aio_nbytes));
554	}
555	else if ((aiocbp->aiocb.aio_lio_opcode & `127`) == LIO_WRITE)
556	{
557	if (sizeof (off_t) != sizeof (off64_t)
558	&& aiocbp->aiocb.aio_lio_opcode & `128`)
559	aiocbp->aiocb.__return_value =
560	TEMP_FAILURE_RETRY (__pwrite64 (fildes, (const void *)
561	aiocbp->aiocb64.aio_buf,
562	aiocbp->aiocb64.aio_nbytes,
563	aiocbp->aiocb64.aio_offset));
564	else
565	aiocbp->aiocb.__return_value =
566	TEMP_FAILURE_RETRY (__libc_pwrite (fildes, (const void *)
567	aiocbp->aiocb.aio_buf,
568	aiocbp->aiocb.aio_nbytes,
569	aiocbp->aiocb.aio_offset));
570
571	if (aiocbp->aiocb.__return_value == -`1` && errno == ESPIPE)
572	/ The Linux kernel is different from others. It returns*
573	ESPIPE if using pwrite on a socket. Other platforms
574	simply ignore the offset parameter and behave like
575	write. /*
576	aiocbp->aiocb.__return_value =
577	TEMP_FAILURE_RETRY (write (fildes,
578	(void *) aiocbp->aiocb64.aio_buf,
579	aiocbp->aiocb64.aio_nbytes));
580	}
581	else if (aiocbp->aiocb.aio_lio_opcode == LIO_DSYNC)
582	aiocbp->aiocb.__return_value =
583	TEMP_FAILURE_RETRY (fdatasync (fildes));
584	else if (aiocbp->aiocb.aio_lio_opcode == LIO_SYNC)
585	aiocbp->aiocb.__return_value =
586	TEMP_FAILURE_RETRY (fsync (fildes));
587	else
588	{
589	/ This is an invalid opcode. /
590	aiocbp->aiocb.__return_value = -`1`;
591	__set_errno (EINVAL);
592	}
593
594	/ Get the mutex. /
595	pthread_mutex_lock (&__aio_requests_mutex);
596
597	if (aiocbp->aiocb.__return_value == -`1`)
598	aiocbp->aiocb.__error_code = errno;
599	else
600	aiocbp->aiocb.__error_code = `0`;
601
602	/ Send the signal to notify about finished processing of the*
603	request. /*
604	__aio_notify (runp);
605
606	/ For debugging purposes we reset the running flag of the*
607	finished request. /*
608	assert (runp->running == allocated);
609	runp->running = done;
610
611	/ Now dequeue the current request. /
612	__aio_remove_request (NULL, runp, `0`);
613	if (runp->next_prio != NULL)
614	add_request_to_runlist (runp->next_prio);
615
616	/ Free the old element. /
617	__aio_free_request (runp);
618	}
619
620	runp = runlist;
621
622	/ If the runlist is empty, then we sleep for a while, waiting for*
623	something to arrive in it. /*
624	if (runp == NULL && optim.aio_idle_time >= `0`)
625	{
626	struct timeval now;
627	struct timespec wakeup_time;
628
629	++idle_thread_count;
630	__gettimeofday (&now, NULL);
631	wakeup_time.tv_sec = now.tv_sec + optim.aio_idle_time;
632	wakeup_time.tv_nsec = now.tv_usec * `1000`;
633	if (wakeup_time.tv_nsec >= `1000000000`)
634	{
635	wakeup_time.tv_nsec -= `1000000000`;
636	++wakeup_time.tv_sec;
637	}
638	pthread_cond_timedwait (&__aio_new_request_notification,
639	&__aio_requests_mutex,
640	&wakeup_time);
641	--idle_thread_count;
642	runp = runlist;
643	}
644
645	if (runp == NULL)
646	--nthreads;
647	else
648	{
649	assert (runp->running == yes);
650	runp->running = allocated;
651	runlist = runp->next_run;
652
653	/ If we have a request to process, and there's still another in*
654	the run list, then we need to either wake up or create a new
655	thread to service the request that is still in the run list. /*
656	if (runlist != NULL)
657	{
658	/ There are at least two items in the work queue to work on.*
659	If there are other idle threads, then we should wake them
660	up for these other work elements; otherwise, we should try
661	to create a new thread. /*
662	if (idle_thread_count > `0`)
663	pthread_cond_signal (&__aio_new_request_notification);
664	else if (nthreads < optim.aio_threads)
665	{
666	pthread_t thid;
667	pthread_attr_t attr;
668
669	/ Make sure the thread is created detached. /
670	pthread_attr_init (&attr);
671	pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
672
673	/ Now try to start a thread. If we fail, no big deal,*
674	because we know that there is at least one thread (us)
675	that is working on AIO operations. /*
676	if (pthread_create (&thid, &attr, handle_fildes_io, NULL)
677	== `0`)
678	++nthreads;
679	}
680	}
681	}
682
683	/ Release the mutex. /
684	pthread_mutex_unlock (&__aio_requests_mutex);
685	}
686	while (runp != NULL);
687
688	return NULL;
689	}
690
691
692	/ Free allocated resources. /
693	libc_freeres_fn (free_res)
694	{
695	size_t row;
696
697	for (row = `0`; row < pool_max_size; ++row)
698	free (pool[row]);
699
700	free (pool);
701	}
702
703
704	/ Add newrequest to the runlist. The __abs_prio flag of newrequest must*
705	be correctly set to do this. Also, you had better set newrequest's
706	"running" flag to "yes" before you release your lock or you'll throw an
707	assertion. /*
708	static void
709	add_request_to_runlist (struct requestlist *newrequest)
710	{
711	int prio = newrequest->aiocbp->aiocb.__abs_prio;
712	struct requestlist *runp;
713
714	if (runlist == NULL \|\| runlist->aiocbp->aiocb.__abs_prio < prio)
715	{
716	newrequest->next_run = runlist;
717	runlist = newrequest;
718	}
719	else
720	{
721	runp = runlist;
722
723	while (runp->next_run != NULL
724	&& runp->next_run->aiocbp->aiocb.__abs_prio >= prio)
725	runp = runp->next_run;
726
727	newrequest->next_run = runp->next_run;
728	runp->next_run = newrequest;
729	}
730	}
731

Browse the source code of glibc_src_2.24/sysdeps/pthread/aio_misc.c