1/* Inner loops of cache daemon.
2 Copyright (C) 1998-2016 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>. */
18
19#include <alloca.h>
20#include <assert.h>
21#include <atomic.h>
22#include <error.h>
23#include <errno.h>
24#include <fcntl.h>
25#include <grp.h>
26#include <ifaddrs.h>
27#include <libintl.h>
28#include <pthread.h>
29#include <pwd.h>
30#include <resolv.h>
31#include <stdio.h>
32#include <stdlib.h>
33#include <unistd.h>
34#include <stdint.h>
35#include <arpa/inet.h>
36#ifdef HAVE_NETLINK
37# include <linux/netlink.h>
38# include <linux/rtnetlink.h>
39#endif
40#ifdef HAVE_EPOLL
41# include <sys/epoll.h>
42#endif
43#ifdef HAVE_INOTIFY
44# include <sys/inotify.h>
45#endif
46#include <sys/mman.h>
47#include <sys/param.h>
48#include <sys/poll.h>
49#ifdef HAVE_SENDFILE
50# include <sys/sendfile.h>
51#endif
52#include <sys/socket.h>
53#include <sys/stat.h>
54#include <sys/un.h>
55
56#include "nscd.h"
57#include "dbg_log.h"
58#include "selinux.h"
59#include <resolv/resolv.h>
60
61#include <kernel-features.h>
62#include <libc-internal.h>
63
64
65/* Support to run nscd as an unprivileged user */
66const char *server_user;
67static uid_t server_uid;
68static gid_t server_gid;
69const char *stat_user;
70uid_t stat_uid;
71static gid_t *server_groups;
72#ifndef NGROUPS
73# define NGROUPS 32
74#endif
75static int server_ngroups;
76
77static pthread_attr_t attr;
78
79static void begin_drop_privileges (void);
80static void finish_drop_privileges (void);
81
82/* Map request type to a string. */
83const char *const serv2str[LASTREQ] =
84{
85 [GETPWBYNAME] = "GETPWBYNAME",
86 [GETPWBYUID] = "GETPWBYUID",
87 [GETGRBYNAME] = "GETGRBYNAME",
88 [GETGRBYGID] = "GETGRBYGID",
89 [GETHOSTBYNAME] = "GETHOSTBYNAME",
90 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
91 [GETHOSTBYADDR] = "GETHOSTBYADDR",
92 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
93 [SHUTDOWN] = "SHUTDOWN",
94 [GETSTAT] = "GETSTAT",
95 [INVALIDATE] = "INVALIDATE",
96 [GETFDPW] = "GETFDPW",
97 [GETFDGR] = "GETFDGR",
98 [GETFDHST] = "GETFDHST",
99 [GETAI] = "GETAI",
100 [INITGROUPS] = "INITGROUPS",
101 [GETSERVBYNAME] = "GETSERVBYNAME",
102 [GETSERVBYPORT] = "GETSERVBYPORT",
103 [GETFDSERV] = "GETFDSERV",
104 [GETNETGRENT] = "GETNETGRENT",
105 [INNETGR] = "INNETGR",
106 [GETFDNETGR] = "GETFDNETGR"
107};
108
109/* The control data structures for the services. */
110struct database_dyn dbs[lastdb] =
111{
112 [pwddb] = {
113 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
114 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
115 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
116 .enabled = 0,
117 .check_file = 1,
118 .persistent = 0,
119 .propagate = 1,
120 .shared = 0,
121 .max_db_size = DEFAULT_MAX_DB_SIZE,
122 .suggested_module = DEFAULT_SUGGESTED_MODULE,
123 .db_filename = _PATH_NSCD_PASSWD_DB,
124 .disabled_iov = &pwd_iov_disabled,
125 .postimeout = 3600,
126 .negtimeout = 20,
127 .wr_fd = -1,
128 .ro_fd = -1,
129 .mmap_used = false
130 },
131 [grpdb] = {
132 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
133 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
134 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
135 .enabled = 0,
136 .check_file = 1,
137 .persistent = 0,
138 .propagate = 1,
139 .shared = 0,
140 .max_db_size = DEFAULT_MAX_DB_SIZE,
141 .suggested_module = DEFAULT_SUGGESTED_MODULE,
142 .db_filename = _PATH_NSCD_GROUP_DB,
143 .disabled_iov = &grp_iov_disabled,
144 .postimeout = 3600,
145 .negtimeout = 60,
146 .wr_fd = -1,
147 .ro_fd = -1,
148 .mmap_used = false
149 },
150 [hstdb] = {
151 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
152 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
153 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
154 .enabled = 0,
155 .check_file = 1,
156 .persistent = 0,
157 .propagate = 0, /* Not used. */
158 .shared = 0,
159 .max_db_size = DEFAULT_MAX_DB_SIZE,
160 .suggested_module = DEFAULT_SUGGESTED_MODULE,
161 .db_filename = _PATH_NSCD_HOSTS_DB,
162 .disabled_iov = &hst_iov_disabled,
163 .postimeout = 3600,
164 .negtimeout = 20,
165 .wr_fd = -1,
166 .ro_fd = -1,
167 .mmap_used = false
168 },
169 [servdb] = {
170 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
171 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
172 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
173 .enabled = 0,
174 .check_file = 1,
175 .persistent = 0,
176 .propagate = 0, /* Not used. */
177 .shared = 0,
178 .max_db_size = DEFAULT_MAX_DB_SIZE,
179 .suggested_module = DEFAULT_SUGGESTED_MODULE,
180 .db_filename = _PATH_NSCD_SERVICES_DB,
181 .disabled_iov = &serv_iov_disabled,
182 .postimeout = 28800,
183 .negtimeout = 20,
184 .wr_fd = -1,
185 .ro_fd = -1,
186 .mmap_used = false
187 },
188 [netgrdb] = {
189 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
190 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
191 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
192 .enabled = 0,
193 .check_file = 1,
194 .persistent = 0,
195 .propagate = 0, /* Not used. */
196 .shared = 0,
197 .max_db_size = DEFAULT_MAX_DB_SIZE,
198 .suggested_module = DEFAULT_SUGGESTED_MODULE,
199 .db_filename = _PATH_NSCD_NETGROUP_DB,
200 .disabled_iov = &netgroup_iov_disabled,
201 .postimeout = 28800,
202 .negtimeout = 20,
203 .wr_fd = -1,
204 .ro_fd = -1,
205 .mmap_used = false
206 }
207};
208
209
210/* Mapping of request type to database. */
211static struct
212{
213 bool data_request;
214 struct database_dyn *db;
215} const reqinfo[LASTREQ] =
216{
217 [GETPWBYNAME] = { true, &dbs[pwddb] },
218 [GETPWBYUID] = { true, &dbs[pwddb] },
219 [GETGRBYNAME] = { true, &dbs[grpdb] },
220 [GETGRBYGID] = { true, &dbs[grpdb] },
221 [GETHOSTBYNAME] = { true, &dbs[hstdb] },
222 [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
223 [GETHOSTBYADDR] = { true, &dbs[hstdb] },
224 [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
225 [SHUTDOWN] = { false, NULL },
226 [GETSTAT] = { false, NULL },
227 [SHUTDOWN] = { false, NULL },
228 [GETFDPW] = { false, &dbs[pwddb] },
229 [GETFDGR] = { false, &dbs[grpdb] },
230 [GETFDHST] = { false, &dbs[hstdb] },
231 [GETAI] = { true, &dbs[hstdb] },
232 [INITGROUPS] = { true, &dbs[grpdb] },
233 [GETSERVBYNAME] = { true, &dbs[servdb] },
234 [GETSERVBYPORT] = { true, &dbs[servdb] },
235 [GETFDSERV] = { false, &dbs[servdb] },
236 [GETNETGRENT] = { true, &dbs[netgrdb] },
237 [INNETGR] = { true, &dbs[netgrdb] },
238 [GETFDNETGR] = { false, &dbs[netgrdb] }
239};
240
241
242/* Initial number of threads to use. */
243int nthreads = -1;
244/* Maximum number of threads to use. */
245int max_nthreads = 32;
246
247/* Socket for incoming connections. */
248static int sock;
249
250#ifdef HAVE_INOTIFY
251/* Inotify descriptor. */
252int inotify_fd = -1;
253#endif
254
255#ifdef HAVE_NETLINK
256/* Descriptor for netlink status updates. */
257static int nl_status_fd = -1;
258#endif
259
260#ifndef __ASSUME_ACCEPT4
261static int have_accept4;
262#endif
263
264/* Number of times clients had to wait. */
265unsigned long int client_queued;
266
267
268ssize_t
269writeall (int fd, const void *buf, size_t len)
270{
271 size_t n = len;
272 ssize_t ret;
273 do
274 {
275 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
276 if (ret <= 0)
277 break;
278 buf = (const char *) buf + ret;
279 n -= ret;
280 }
281 while (n > 0);
282 return ret < 0 ? ret : len - n;
283}
284
285
286#ifdef HAVE_SENDFILE
287ssize_t
288sendfileall (int tofd, int fromfd, off_t off, size_t len)
289{
290 ssize_t n = len;
291 ssize_t ret;
292
293 do
294 {
295 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
296 if (ret <= 0)
297 break;
298 n -= ret;
299 }
300 while (n > 0);
301 return ret < 0 ? ret : len - n;
302}
303#endif
304
305
306enum usekey
307 {
308 use_not = 0,
309 /* The following three are not really used, they are symbolic constants. */
310 use_first = 16,
311 use_begin = 32,
312 use_end = 64,
313
314 use_he = 1,
315 use_he_begin = use_he | use_begin,
316 use_he_end = use_he | use_end,
317 use_data = 3,
318 use_data_begin = use_data | use_begin,
319 use_data_end = use_data | use_end,
320 use_data_first = use_data_begin | use_first
321 };
322
323
324static int
325check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
326 enum usekey use, ref_t start, size_t len)
327{
328 assert (len >= 2);
329
330 if (start > first_free || start + len > first_free
331 || (start & BLOCK_ALIGN_M1))
332 return 0;
333
334 if (usemap[start] == use_not)
335 {
336 /* Add the start marker. */
337 usemap[start] = use | use_begin;
338 use &= ~use_first;
339
340 while (--len > 0)
341 if (usemap[++start] != use_not)
342 return 0;
343 else
344 usemap[start] = use;
345
346 /* Add the end marker. */
347 usemap[start] = use | use_end;
348 }
349 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
350 {
351 /* Hash entries can't be shared. */
352 if (use == use_he)
353 return 0;
354
355 usemap[start] |= (use & use_first);
356 use &= ~use_first;
357
358 while (--len > 1)
359 if (usemap[++start] != use)
360 return 0;
361
362 if (usemap[++start] != (use | use_end))
363 return 0;
364 }
365 else
366 /* Points to a wrong object or somewhere in the middle. */
367 return 0;
368
369 return 1;
370}
371
372
373/* Verify data in persistent database. */
374static int
375verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
376{
377 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb
378 || dbnr == netgrdb);
379
380 time_t now = time (NULL);
381
382 struct database_pers_head *head = mem;
383 struct database_pers_head head_copy = *head;
384
385 /* Check that the header that was read matches the head in the database. */
386 if (memcmp (head, readhead, sizeof (*head)) != 0)
387 return 0;
388
389 /* First some easy tests: make sure the database header is sane. */
390 if (head->version != DB_VERSION
391 || head->header_size != sizeof (*head)
392 /* We allow a timestamp to be one hour ahead of the current time.
393 This should cover daylight saving time changes. */
394 || head->timestamp > now + 60 * 60 + 60
395 || (head->gc_cycle & 1)
396 || head->module == 0
397 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
398 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
399 || head->first_free < 0
400 || head->first_free > head->data_size
401 || (head->first_free & BLOCK_ALIGN_M1) != 0
402 || head->maxnentries < 0
403 || head->maxnsearched < 0)
404 return 0;
405
406 uint8_t *usemap = calloc (head->first_free, 1);
407 if (usemap == NULL)
408 return 0;
409
410 const char *data = (char *) &head->array[roundup (head->module,
411 ALIGN / sizeof (ref_t))];
412
413 nscd_ssize_t he_cnt = 0;
414 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
415 {
416 ref_t trail = head->array[cnt];
417 ref_t work = trail;
418 int tick = 0;
419
420 while (work != ENDREF)
421 {
422 if (! check_use (data, head->first_free, usemap, use_he, work,
423 sizeof (struct hashentry)))
424 goto fail;
425
426 /* Now we know we can dereference the record. */
427 struct hashentry *here = (struct hashentry *) (data + work);
428
429 ++he_cnt;
430
431 /* Make sure the record is for this type of service. */
432 if (here->type >= LASTREQ
433 || reqinfo[here->type].db != &dbs[dbnr])
434 goto fail;
435
436 /* Validate boolean field value. */
437 if (here->first != false && here->first != true)
438 goto fail;
439
440 if (here->len < 0)
441 goto fail;
442
443 /* Now the data. */
444 if (here->packet < 0
445 || here->packet > head->first_free
446 || here->packet + sizeof (struct datahead) > head->first_free)
447 goto fail;
448
449 struct datahead *dh = (struct datahead *) (data + here->packet);
450
451 if (! check_use (data, head->first_free, usemap,
452 use_data | (here->first ? use_first : 0),
453 here->packet, dh->allocsize))
454 goto fail;
455
456 if (dh->allocsize < sizeof (struct datahead)
457 || dh->recsize > dh->allocsize
458 || (dh->notfound != false && dh->notfound != true)
459 || (dh->usable != false && dh->usable != true))
460 goto fail;
461
462 if (here->key < here->packet + sizeof (struct datahead)
463 || here->key > here->packet + dh->allocsize
464 || here->key + here->len > here->packet + dh->allocsize)
465 goto fail;
466
467 work = here->next;
468
469 if (work == trail)
470 /* A circular list, this must not happen. */
471 goto fail;
472 if (tick)
473 trail = ((struct hashentry *) (data + trail))->next;
474 tick = 1 - tick;
475 }
476 }
477
478 if (he_cnt != head->nentries)
479 goto fail;
480
481 /* See if all data and keys had at least one reference from
482 he->first == true hashentry. */
483 for (ref_t idx = 0; idx < head->first_free; ++idx)
484 {
485 if (usemap[idx] == use_data_begin)
486 goto fail;
487 }
488
489 /* Finally, make sure the database hasn't changed since the first test. */
490 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
491 goto fail;
492
493 free (usemap);
494 return 1;
495
496fail:
497 free (usemap);
498 return 0;
499}
500
501
502#ifdef O_CLOEXEC
503# define EXTRA_O_FLAGS O_CLOEXEC
504#else
505# define EXTRA_O_FLAGS 0
506#endif
507
508
509/* Initialize database information structures. */
510void
511nscd_init (void)
512{
513 /* Look up unprivileged uid/gid/groups before we start listening on the
514 socket */
515 if (server_user != NULL)
516 begin_drop_privileges ();
517
518 if (nthreads == -1)
519 /* No configuration for this value, assume a default. */
520 nthreads = 4;
521
522 for (size_t cnt = 0; cnt < lastdb; ++cnt)
523 if (dbs[cnt].enabled)
524 {
525 pthread_rwlock_init (&dbs[cnt].lock, NULL);
526 pthread_mutex_init (&dbs[cnt].memlock, NULL);
527
528 if (dbs[cnt].persistent)
529 {
530 /* Try to open the appropriate file on disk. */
531 int fd = open (dbs[cnt].db_filename, O_RDWR | EXTRA_O_FLAGS);
532 if (fd != -1)
533 {
534 char *msg = NULL;
535 struct stat64 st;
536 void *mem;
537 size_t total;
538 struct database_pers_head head;
539 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
540 sizeof (head)));
541 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
542 {
543 fail_db_errno:
544 /* The code is single-threaded at this point so
545 using strerror is just fine. */
546 msg = strerror (errno);
547 fail_db:
548 dbg_log (_("invalid persistent database file \"%s\": %s"),
549 dbs[cnt].db_filename, msg);
550 unlink (dbs[cnt].db_filename);
551 }
552 else if (head.module == 0 && head.data_size == 0)
553 {
554 /* The file has been created, but the head has not
555 been initialized yet. */
556 msg = _("uninitialized header");
557 goto fail_db;
558 }
559 else if (head.header_size != (int) sizeof (head))
560 {
561 msg = _("header size does not match");
562 goto fail_db;
563 }
564 else if ((total = (sizeof (head)
565 + roundup (head.module * sizeof (ref_t),
566 ALIGN)
567 + head.data_size))
568 > st.st_size
569 || total < sizeof (head))
570 {
571 msg = _("file size does not match");
572 goto fail_db;
573 }
574 /* Note we map with the maximum size allowed for the
575 database. This is likely much larger than the
576 actual file size. This is OK on most OSes since
577 extensions of the underlying file will
578 automatically translate more pages available for
579 memory access. */
580 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
581 PROT_READ | PROT_WRITE,
582 MAP_SHARED, fd, 0))
583 == MAP_FAILED)
584 goto fail_db_errno;
585 else if (!verify_persistent_db (mem, &head, cnt))
586 {
587 munmap (mem, total);
588 msg = _("verification failed");
589 goto fail_db;
590 }
591 else
592 {
593 /* Success. We have the database. */
594 dbs[cnt].head = mem;
595 dbs[cnt].memsize = total;
596 dbs[cnt].data = (char *)
597 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
598 ALIGN / sizeof (ref_t))];
599 dbs[cnt].mmap_used = true;
600
601 if (dbs[cnt].suggested_module > head.module)
602 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
603 dbnames[cnt]);
604
605 dbs[cnt].wr_fd = fd;
606 fd = -1;
607 /* We also need a read-only descriptor. */
608 if (dbs[cnt].shared)
609 {
610 dbs[cnt].ro_fd = open (dbs[cnt].db_filename,
611 O_RDONLY | EXTRA_O_FLAGS);
612 if (dbs[cnt].ro_fd == -1)
613 dbg_log (_("\
614cannot create read-only descriptor for \"%s\"; no mmap"),
615 dbs[cnt].db_filename);
616 }
617
618 // XXX Shall we test whether the descriptors actually
619 // XXX point to the same file?
620 }
621
622 /* Close the file descriptors in case something went
623 wrong in which case the variable have not been
624 assigned -1. */
625 if (fd != -1)
626 close (fd);
627 }
628 else if (errno == EACCES)
629 do_exit (EXIT_FAILURE, 0, _("cannot access '%s'"),
630 dbs[cnt].db_filename);
631 }
632
633 if (dbs[cnt].head == NULL)
634 {
635 /* No database loaded. Allocate the data structure,
636 possibly on disk. */
637 struct database_pers_head head;
638 size_t total = (sizeof (head)
639 + roundup (dbs[cnt].suggested_module
640 * sizeof (ref_t), ALIGN)
641 + (dbs[cnt].suggested_module
642 * DEFAULT_DATASIZE_PER_BUCKET));
643
644 /* Try to create the database. If we do not need a
645 persistent database create a temporary file. */
646 int fd;
647 int ro_fd = -1;
648 if (dbs[cnt].persistent)
649 {
650 fd = open (dbs[cnt].db_filename,
651 O_RDWR | O_CREAT | O_EXCL | O_TRUNC | EXTRA_O_FLAGS,
652 S_IRUSR | S_IWUSR);
653 if (fd != -1 && dbs[cnt].shared)
654 ro_fd = open (dbs[cnt].db_filename,
655 O_RDONLY | EXTRA_O_FLAGS);
656 }
657 else
658 {
659 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
660 fd = mkostemp (fname, EXTRA_O_FLAGS);
661
662 /* We do not need the file name anymore after we
663 opened another file descriptor in read-only mode. */
664 if (fd != -1)
665 {
666 if (dbs[cnt].shared)
667 ro_fd = open (fname, O_RDONLY | EXTRA_O_FLAGS);
668
669 unlink (fname);
670 }
671 }
672
673 if (fd == -1)
674 {
675 if (errno == EEXIST)
676 {
677 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
678 dbnames[cnt], dbs[cnt].db_filename);
679 do_exit (1, 0, NULL);
680 }
681
682 if (dbs[cnt].persistent)
683 dbg_log (_("cannot create %s; no persistent database used"),
684 dbs[cnt].db_filename);
685 else
686 dbg_log (_("cannot create %s; no sharing possible"),
687 dbs[cnt].db_filename);
688
689 dbs[cnt].persistent = 0;
690 // XXX remember: no mmap
691 }
692 else
693 {
694 /* Tell the user if we could not create the read-only
695 descriptor. */
696 if (ro_fd == -1 && dbs[cnt].shared)
697 dbg_log (_("\
698cannot create read-only descriptor for \"%s\"; no mmap"),
699 dbs[cnt].db_filename);
700
701 /* Before we create the header, initialize the hash
702 table. That way if we get interrupted while writing
703 the header we can recognize a partially initialized
704 database. */
705 size_t ps = sysconf (_SC_PAGESIZE);
706 char tmpbuf[ps];
707 assert (~ENDREF == 0);
708 memset (tmpbuf, '\xff', ps);
709
710 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
711 off_t offset = sizeof (head);
712
713 size_t towrite;
714 if (offset % ps != 0)
715 {
716 towrite = MIN (remaining, ps - (offset % ps));
717 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
718 goto write_fail;
719 offset += towrite;
720 remaining -= towrite;
721 }
722
723 while (remaining > ps)
724 {
725 if (pwrite (fd, tmpbuf, ps, offset) == -1)
726 goto write_fail;
727 offset += ps;
728 remaining -= ps;
729 }
730
731 if (remaining > 0
732 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
733 goto write_fail;
734
735 /* Create the header of the file. */
736 struct database_pers_head head =
737 {
738 .version = DB_VERSION,
739 .header_size = sizeof (head),
740 .module = dbs[cnt].suggested_module,
741 .data_size = (dbs[cnt].suggested_module
742 * DEFAULT_DATASIZE_PER_BUCKET),
743 .first_free = 0
744 };
745 void *mem;
746
747 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
748 != sizeof (head))
749 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
750 != 0)
751 || (mem = mmap (NULL, dbs[cnt].max_db_size,
752 PROT_READ | PROT_WRITE,
753 MAP_SHARED, fd, 0)) == MAP_FAILED)
754 {
755 write_fail:
756 unlink (dbs[cnt].db_filename);
757 dbg_log (_("cannot write to database file %s: %s"),
758 dbs[cnt].db_filename, strerror (errno));
759 dbs[cnt].persistent = 0;
760 }
761 else
762 {
763 /* Success. */
764 dbs[cnt].head = mem;
765 dbs[cnt].data = (char *)
766 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
767 ALIGN / sizeof (ref_t))];
768 dbs[cnt].memsize = total;
769 dbs[cnt].mmap_used = true;
770
771 /* Remember the descriptors. */
772 dbs[cnt].wr_fd = fd;
773 dbs[cnt].ro_fd = ro_fd;
774 fd = -1;
775 ro_fd = -1;
776 }
777
778 if (fd != -1)
779 close (fd);
780 if (ro_fd != -1)
781 close (ro_fd);
782 }
783 }
784
785#if !defined O_CLOEXEC || !defined __ASSUME_O_CLOEXEC
786 /* We do not check here whether the O_CLOEXEC provided to the
787 open call was successful or not. The two fcntl calls are
788 only performed once each per process start-up and therefore
789 is not noticeable at all. */
790 if (paranoia
791 && ((dbs[cnt].wr_fd != -1
792 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
793 || (dbs[cnt].ro_fd != -1
794 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
795 {
796 dbg_log (_("\
797cannot set socket to close on exec: %s; disabling paranoia mode"),
798 strerror (errno));
799 paranoia = 0;
800 }
801#endif
802
803 if (dbs[cnt].head == NULL)
804 {
805 /* We do not use the persistent database. Just
806 create an in-memory data structure. */
807 assert (! dbs[cnt].persistent);
808
809 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
810 + (dbs[cnt].suggested_module
811 * sizeof (ref_t)));
812 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
813 assert (~ENDREF == 0);
814 memset (dbs[cnt].head->array, '\xff',
815 dbs[cnt].suggested_module * sizeof (ref_t));
816 dbs[cnt].head->module = dbs[cnt].suggested_module;
817 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
818 * dbs[cnt].head->module);
819 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
820 dbs[cnt].head->first_free = 0;
821
822 dbs[cnt].shared = 0;
823 assert (dbs[cnt].ro_fd == -1);
824 }
825 }
826
827 /* Create the socket. */
828 sock = socket (AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
829 if (sock < 0)
830 {
831 dbg_log (_("cannot open socket: %s"), strerror (errno));
832 do_exit (errno == EACCES ? 4 : 1, 0, NULL);
833 }
834 /* Bind a name to the socket. */
835 struct sockaddr_un sock_addr;
836 sock_addr.sun_family = AF_UNIX;
837 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
838 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
839 {
840 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
841 do_exit (errno == EACCES ? 4 : 1, 0, NULL);
842 }
843
844 /* Set permissions for the socket. */
845 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
846
847 /* Set the socket up to accept connections. */
848 if (listen (sock, SOMAXCONN) < 0)
849 {
850 dbg_log (_("cannot enable socket to accept connections: %s"),
851 strerror (errno));
852 do_exit (1, 0, NULL);
853 }
854
855#ifdef HAVE_NETLINK
856 if (dbs[hstdb].enabled)
857 {
858 /* Try to open netlink socket to monitor network setting changes. */
859 nl_status_fd = socket (AF_NETLINK,
860 SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK,
861 NETLINK_ROUTE);
862 if (nl_status_fd != -1)
863 {
864 struct sockaddr_nl snl;
865 memset (&snl, '\0', sizeof (snl));
866 snl.nl_family = AF_NETLINK;
867 /* XXX Is this the best set to use? */
868 snl.nl_groups = (RTMGRP_IPV4_IFADDR | RTMGRP_TC | RTMGRP_IPV4_MROUTE
869 | RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_RULE
870 | RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_MROUTE
871 | RTMGRP_IPV6_ROUTE | RTMGRP_IPV6_IFINFO
872 | RTMGRP_IPV6_PREFIX);
873
874 if (bind (nl_status_fd, (struct sockaddr *) &snl, sizeof (snl)) != 0)
875 {
876 close (nl_status_fd);
877 nl_status_fd = -1;
878 }
879 else
880 {
881 /* Start the timestamp process. */
882 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
883 = __bump_nl_timestamp ();
884 }
885 }
886 }
887#endif
888
889 /* Change to unprivileged uid/gid/groups if specified in config file */
890 if (server_user != NULL)
891 finish_drop_privileges ();
892}
893
894#ifdef HAVE_INOTIFY
895#define TRACED_FILE_MASK (IN_DELETE_SELF | IN_CLOSE_WRITE | IN_MOVE_SELF)
896#define TRACED_DIR_MASK (IN_DELETE_SELF | IN_CREATE | IN_MOVED_TO | IN_MOVE_SELF)
897void
898install_watches (struct traced_file *finfo)
899{
900 /* Use inotify support if we have it. */
901 if (finfo->inotify_descr[TRACED_FILE] < 0)
902 finfo->inotify_descr[TRACED_FILE] = inotify_add_watch (inotify_fd,
903 finfo->fname,
904 TRACED_FILE_MASK);
905 if (finfo->inotify_descr[TRACED_FILE] < 0)
906 {
907 dbg_log (_("disabled inotify-based monitoring for file `%s': %s"),
908 finfo->fname, strerror (errno));
909 return;
910 }
911 dbg_log (_("monitoring file `%s` (%d)"),
912 finfo->fname, finfo->inotify_descr[TRACED_FILE]);
913 /* Additionally listen for events in the file's parent directory.
914 We do this because the file to be watched might be
915 deleted and then added back again. When it is added back again
916 we must re-add the watch. We must also cover IN_MOVED_TO to
917 detect a file being moved into the directory. */
918 if (finfo->inotify_descr[TRACED_DIR] < 0)
919 finfo->inotify_descr[TRACED_DIR] = inotify_add_watch (inotify_fd,
920 finfo->dname,
921 TRACED_DIR_MASK);
922 if (finfo->inotify_descr[TRACED_DIR] < 0)
923 {
924 dbg_log (_("disabled inotify-based monitoring for directory `%s': %s"),
925 finfo->fname, strerror (errno));
926 return;
927 }
928 dbg_log (_("monitoring directory `%s` (%d)"),
929 finfo->dname, finfo->inotify_descr[TRACED_DIR]);
930}
931#endif
932
933/* Register the file in FINFO as a traced file for the database DBS[DBIX].
934
935 We support registering multiple files per database. Each call to
936 register_traced_file adds to the list of registered files.
937
938 When we prune the database, either through timeout or a request to
939 invalidate, we will check to see if any of the registered files has changed.
940 When we accept new connections to handle a cache request we will also
941 check to see if any of the registered files has changed.
942
943 If we have inotify support then we install an inotify fd to notify us of
944 file deletion or modification, both of which will require we invalidate
945 the cache for the database. Without inotify support we stat the file and
946 store st_mtime to determine if the file has been modified. */
947void
948register_traced_file (size_t dbidx, struct traced_file *finfo)
949{
950 /* If the database is disabled or file checking is disabled
951 then ignore the registration. */
952 if (! dbs[dbidx].enabled || ! dbs[dbidx].check_file)
953 return;
954
955 if (__glibc_unlikely (debug_level > 0))
956 dbg_log (_("monitoring file %s for database %s"),
957 finfo->fname, dbnames[dbidx]);
958
959#ifdef HAVE_INOTIFY
960 install_watches (finfo);
961#endif
962 struct stat64 st;
963 if (stat64 (finfo->fname, &st) < 0)
964 {
965 /* We cannot stat() the file. Set mtime to zero and try again later. */
966 dbg_log (_("stat failed for file `%s'; will try again later: %s"),
967 finfo->fname, strerror (errno));
968 finfo->mtime = 0;
969 }
970 else
971 finfo->mtime = st.st_mtime;
972
973 /* Queue up the file name. */
974 finfo->next = dbs[dbidx].traced_files;
975 dbs[dbidx].traced_files = finfo;
976}
977
978
979/* Close the connections. */
980void
981close_sockets (void)
982{
983 close (sock);
984}
985
986
987static void
988invalidate_cache (char *key, int fd)
989{
990 dbtype number;
991 int32_t resp;
992
993 for (number = pwddb; number < lastdb; ++number)
994 if (strcmp (key, dbnames[number]) == 0)
995 {
996 struct traced_file *runp = dbs[number].traced_files;
997 while (runp != NULL)
998 {
999 /* Make sure we reload from file when checking mtime. */
1000 runp->mtime = 0;
1001#ifdef HAVE_INOTIFY
1002 /* During an invalidation we try to reload the traced
1003 file watches. This allows the user to re-sync if
1004 inotify events were lost. Similar to what we do during
1005 pruning. */
1006 install_watches (runp);
1007#endif
1008 if (runp->call_res_init)
1009 {
1010 res_init ();
1011 break;
1012 }
1013 runp = runp->next;
1014 }
1015 break;
1016 }
1017
1018 if (number == lastdb)
1019 {
1020 resp = EINVAL;
1021 writeall (fd, &resp, sizeof (resp));
1022 return;
1023 }
1024
1025 if (dbs[number].enabled)
1026 {
1027 pthread_mutex_lock (&dbs[number].prune_run_lock);
1028 prune_cache (&dbs[number], LONG_MAX, fd);
1029 pthread_mutex_unlock (&dbs[number].prune_run_lock);
1030 }
1031 else
1032 {
1033 resp = 0;
1034 writeall (fd, &resp, sizeof (resp));
1035 }
1036}
1037
1038
1039#ifdef SCM_RIGHTS
1040static void
1041send_ro_fd (struct database_dyn *db, char *key, int fd)
1042{
1043 /* If we do not have an read-only file descriptor do nothing. */
1044 if (db->ro_fd == -1)
1045 return;
1046
1047 /* We need to send some data along with the descriptor. */
1048 uint64_t mapsize = (db->head->data_size
1049 + roundup (db->head->module * sizeof (ref_t), ALIGN)
1050 + sizeof (struct database_pers_head));
1051 struct iovec iov[2];
1052 iov[0].iov_base = key;
1053 iov[0].iov_len = strlen (key) + 1;
1054 iov[1].iov_base = &mapsize;
1055 iov[1].iov_len = sizeof (mapsize);
1056
1057 /* Prepare the control message to transfer the descriptor. */
1058 union
1059 {
1060 struct cmsghdr hdr;
1061 char bytes[CMSG_SPACE (sizeof (int))];
1062 } buf;
1063 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
1064 .msg_control = buf.bytes,
1065 .msg_controllen = sizeof (buf) };
1066 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
1067
1068 cmsg->cmsg_level = SOL_SOCKET;
1069 cmsg->cmsg_type = SCM_RIGHTS;
1070 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
1071
1072 int *ip = (int *) CMSG_DATA (cmsg);
1073 *ip = db->ro_fd;
1074
1075 msg.msg_controllen = cmsg->cmsg_len;
1076
1077 /* Send the control message. We repeat when we are interrupted but
1078 everything else is ignored. */
1079#ifndef MSG_NOSIGNAL
1080# define MSG_NOSIGNAL 0
1081#endif
1082 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
1083
1084 if (__glibc_unlikely (debug_level > 0))
1085 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
1086}
1087#endif /* SCM_RIGHTS */
1088
1089
1090/* Handle new request. */
1091static void
1092handle_request (int fd, request_header *req, void *key, uid_t uid, pid_t pid)
1093{
1094 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
1095 {
1096 if (debug_level > 0)
1097 dbg_log (_("\
1098cannot handle old request version %d; current version is %d"),
1099 req->version, NSCD_VERSION);
1100 return;
1101 }
1102
1103 /* Perform the SELinux check before we go on to the standard checks. */
1104 if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
1105 {
1106 if (debug_level > 0)
1107 {
1108#ifdef SO_PEERCRED
1109# ifdef PATH_MAX
1110 char buf[PATH_MAX];
1111# else
1112 char buf[4096];
1113# endif
1114
1115 snprintf (buf, sizeof (buf), "/proc/%ld/exe", (long int) pid);
1116 ssize_t n = readlink (buf, buf, sizeof (buf) - 1);
1117
1118 if (n <= 0)
1119 dbg_log (_("\
1120request from %ld not handled due to missing permission"), (long int) pid);
1121 else
1122 {
1123 buf[n] = '\0';
1124 dbg_log (_("\
1125request from '%s' [%ld] not handled due to missing permission"),
1126 buf, (long int) pid);
1127 }
1128#else
1129 dbg_log (_("request not handled due to missing permission"));
1130#endif
1131 }
1132 return;
1133 }
1134
1135 struct database_dyn *db = reqinfo[req->type].db;
1136
1137 /* See whether we can service the request from the cache. */
1138 if (__builtin_expect (reqinfo[req->type].data_request, true))
1139 {
1140 if (__builtin_expect (debug_level, 0) > 0)
1141 {
1142 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
1143 {
1144 char buf[INET6_ADDRSTRLEN];
1145
1146 dbg_log ("\t%s (%s)", serv2str[req->type],
1147 inet_ntop (req->type == GETHOSTBYADDR
1148 ? AF_INET : AF_INET6,
1149 key, buf, sizeof (buf)));
1150 }
1151 else
1152 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1153 }
1154
1155 /* Is this service enabled? */
1156 if (__glibc_unlikely (!db->enabled))
1157 {
1158 /* No, sent the prepared record. */
1159 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
1160 db->disabled_iov->iov_len,
1161 MSG_NOSIGNAL))
1162 != (ssize_t) db->disabled_iov->iov_len
1163 && __builtin_expect (debug_level, 0) > 0)
1164 {
1165 /* We have problems sending the result. */
1166 char buf[256];
1167 dbg_log (_("cannot write result: %s"),
1168 strerror_r (errno, buf, sizeof (buf)));
1169 }
1170
1171 return;
1172 }
1173
1174 /* Be sure we can read the data. */
1175 if (__glibc_unlikely (pthread_rwlock_tryrdlock (&db->lock) != 0))
1176 {
1177 ++db->head->rdlockdelayed;
1178 pthread_rwlock_rdlock (&db->lock);
1179 }
1180
1181 /* See whether we can handle it from the cache. */
1182 struct datahead *cached;
1183 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
1184 db, uid);
1185 if (cached != NULL)
1186 {
1187 /* Hurray it's in the cache. */
1188 ssize_t nwritten;
1189
1190#ifdef HAVE_SENDFILE
1191 if (__glibc_likely (db->mmap_used))
1192 {
1193 assert (db->wr_fd != -1);
1194 assert ((char *) cached->data > (char *) db->data);
1195 assert ((char *) cached->data - (char *) db->head
1196 + cached->recsize
1197 <= (sizeof (struct database_pers_head)
1198 + db->head->module * sizeof (ref_t)
1199 + db->head->data_size));
1200 nwritten = sendfileall (fd, db->wr_fd,
1201 (char *) cached->data
1202 - (char *) db->head, cached->recsize);
1203# ifndef __ASSUME_SENDFILE
1204 if (nwritten == -1 && errno == ENOSYS)
1205 goto use_write;
1206# endif
1207 }
1208 else
1209# ifndef __ASSUME_SENDFILE
1210 use_write:
1211# endif
1212#endif
1213 nwritten = writeall (fd, cached->data, cached->recsize);
1214
1215 if (nwritten != cached->recsize
1216 && __builtin_expect (debug_level, 0) > 0)
1217 {
1218 /* We have problems sending the result. */
1219 char buf[256];
1220 dbg_log (_("cannot write result: %s"),
1221 strerror_r (errno, buf, sizeof (buf)));
1222 }
1223
1224 pthread_rwlock_unlock (&db->lock);
1225
1226 return;
1227 }
1228
1229 pthread_rwlock_unlock (&db->lock);
1230 }
1231 else if (__builtin_expect (debug_level, 0) > 0)
1232 {
1233 if (req->type == INVALIDATE)
1234 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1235 else
1236 dbg_log ("\t%s", serv2str[req->type]);
1237 }
1238
1239 /* Handle the request. */
1240 switch (req->type)
1241 {
1242 case GETPWBYNAME:
1243 addpwbyname (db, fd, req, key, uid);
1244 break;
1245
1246 case GETPWBYUID:
1247 addpwbyuid (db, fd, req, key, uid);
1248 break;
1249
1250 case GETGRBYNAME:
1251 addgrbyname (db, fd, req, key, uid);
1252 break;
1253
1254 case GETGRBYGID:
1255 addgrbygid (db, fd, req, key, uid);
1256 break;
1257
1258 case GETHOSTBYNAME:
1259 addhstbyname (db, fd, req, key, uid);
1260 break;
1261
1262 case GETHOSTBYNAMEv6:
1263 addhstbynamev6 (db, fd, req, key, uid);
1264 break;
1265
1266 case GETHOSTBYADDR:
1267 addhstbyaddr (db, fd, req, key, uid);
1268 break;
1269
1270 case GETHOSTBYADDRv6:
1271 addhstbyaddrv6 (db, fd, req, key, uid);
1272 break;
1273
1274 case GETAI:
1275 addhstai (db, fd, req, key, uid);
1276 break;
1277
1278 case INITGROUPS:
1279 addinitgroups (db, fd, req, key, uid);
1280 break;
1281
1282 case GETSERVBYNAME:
1283 addservbyname (db, fd, req, key, uid);
1284 break;
1285
1286 case GETSERVBYPORT:
1287 addservbyport (db, fd, req, key, uid);
1288 break;
1289
1290 case GETNETGRENT:
1291 addgetnetgrent (db, fd, req, key, uid);
1292 break;
1293
1294 case INNETGR:
1295 addinnetgr (db, fd, req, key, uid);
1296 break;
1297
1298 case GETSTAT:
1299 case SHUTDOWN:
1300 case INVALIDATE:
1301 {
1302 /* Get the callers credentials. */
1303#ifdef SO_PEERCRED
1304 struct ucred caller;
1305 socklen_t optlen = sizeof (caller);
1306
1307 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1308 {
1309 char buf[256];
1310
1311 dbg_log (_("error getting caller's id: %s"),
1312 strerror_r (errno, buf, sizeof (buf)));
1313 break;
1314 }
1315
1316 uid = caller.uid;
1317#else
1318 /* Some systems have no SO_PEERCRED implementation. They don't
1319 care about security so we don't as well. */
1320 uid = 0;
1321#endif
1322 }
1323
1324 /* Accept shutdown, getstat and invalidate only from root. For
1325 the stat call also allow the user specified in the config file. */
1326 if (req->type == GETSTAT)
1327 {
1328 if (uid == 0 || uid == stat_uid)
1329 send_stats (fd, dbs);
1330 }
1331 else if (uid == 0)
1332 {
1333 if (req->type == INVALIDATE)
1334 invalidate_cache (key, fd);
1335 else
1336 termination_handler (0);
1337 }
1338 break;
1339
1340 case GETFDPW:
1341 case GETFDGR:
1342 case GETFDHST:
1343 case GETFDSERV:
1344 case GETFDNETGR:
1345#ifdef SCM_RIGHTS
1346 send_ro_fd (reqinfo[req->type].db, key, fd);
1347#endif
1348 break;
1349
1350 default:
1351 /* Ignore the command, it's nothing we know. */
1352 break;
1353 }
1354}
1355
1356
1357/* Restart the process. */
1358static void
1359restart (void)
1360{
1361 /* First determine the parameters. We do not use the parameters
1362 passed to main() since in case nscd is started by running the
1363 dynamic linker this will not work. Yes, this is not the usual
1364 case but nscd is part of glibc and we occasionally do this. */
1365 size_t buflen = 1024;
1366 char *buf = alloca (buflen);
1367 size_t readlen = 0;
1368 int fd = open ("/proc/self/cmdline", O_RDONLY);
1369 if (fd == -1)
1370 {
1371 dbg_log (_("\
1372cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1373 strerror (errno));
1374
1375 paranoia = 0;
1376 return;
1377 }
1378
1379 while (1)
1380 {
1381 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1382 buflen - readlen));
1383 if (n == -1)
1384 {
1385 dbg_log (_("\
1386cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1387 strerror (errno));
1388
1389 close (fd);
1390 paranoia = 0;
1391 return;
1392 }
1393
1394 readlen += n;
1395
1396 if (readlen < buflen)
1397 break;
1398
1399 /* We might have to extend the buffer. */
1400 size_t old_buflen = buflen;
1401 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1402 buf = memmove (newp, buf, old_buflen);
1403 }
1404
1405 close (fd);
1406
1407 /* Parse the command line. Worst case scenario: every two
1408 characters form one parameter (one character plus NUL). */
1409 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1410 int argc = 0;
1411
1412 char *cp = buf;
1413 while (cp < buf + readlen)
1414 {
1415 argv[argc++] = cp;
1416 cp = (char *) rawmemchr (cp, '\0') + 1;
1417 }
1418 argv[argc] = NULL;
1419
1420 /* Second, change back to the old user if we changed it. */
1421 if (server_user != NULL)
1422 {
1423 if (setresuid (old_uid, old_uid, old_uid) != 0)
1424 {
1425 dbg_log (_("\
1426cannot change to old UID: %s; disabling paranoia mode"),
1427 strerror (errno));
1428
1429 paranoia = 0;
1430 return;
1431 }
1432
1433 if (setresgid (old_gid, old_gid, old_gid) != 0)
1434 {
1435 dbg_log (_("\
1436cannot change to old GID: %s; disabling paranoia mode"),
1437 strerror (errno));
1438
1439 ignore_value (setuid (server_uid));
1440 paranoia = 0;
1441 return;
1442 }
1443 }
1444
1445 /* Next change back to the old working directory. */
1446 if (chdir (oldcwd) == -1)
1447 {
1448 dbg_log (_("\
1449cannot change to old working directory: %s; disabling paranoia mode"),
1450 strerror (errno));
1451
1452 if (server_user != NULL)
1453 {
1454 ignore_value (setuid (server_uid));
1455 ignore_value (setgid (server_gid));
1456 }
1457 paranoia = 0;
1458 return;
1459 }
1460
1461 /* Synchronize memory. */
1462 int32_t certainly[lastdb];
1463 for (int cnt = 0; cnt < lastdb; ++cnt)
1464 if (dbs[cnt].enabled)
1465 {
1466 /* Make sure nobody keeps using the database. */
1467 dbs[cnt].head->timestamp = 0;
1468 certainly[cnt] = dbs[cnt].head->nscd_certainly_running;
1469 dbs[cnt].head->nscd_certainly_running = 0;
1470
1471 if (dbs[cnt].persistent)
1472 // XXX async OK?
1473 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1474 }
1475
1476 /* The preparations are done. */
1477#ifdef PATH_MAX
1478 char pathbuf[PATH_MAX];
1479#else
1480 char pathbuf[256];
1481#endif
1482 /* Try to exec the real nscd program so the process name (as reported
1483 in /proc/PID/status) will be 'nscd', but fall back to /proc/self/exe
1484 if readlink or the exec with the result of the readlink call fails. */
1485 ssize_t n = readlink ("/proc/self/exe", pathbuf, sizeof (pathbuf) - 1);
1486 if (n != -1)
1487 {
1488 pathbuf[n] = '\0';
1489 execv (pathbuf, argv);
1490 }
1491 execv ("/proc/self/exe", argv);
1492
1493 /* If we come here, we will never be able to re-exec. */
1494 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1495 strerror (errno));
1496
1497 if (server_user != NULL)
1498 {
1499 ignore_value (setuid (server_uid));
1500 ignore_value (setgid (server_gid));
1501 }
1502 if (chdir ("/") != 0)
1503 dbg_log (_("cannot change current working directory to \"/\": %s"),
1504 strerror (errno));
1505 paranoia = 0;
1506
1507 /* Reenable the databases. */
1508 time_t now = time (NULL);
1509 for (int cnt = 0; cnt < lastdb; ++cnt)
1510 if (dbs[cnt].enabled)
1511 {
1512 dbs[cnt].head->timestamp = now;
1513 dbs[cnt].head->nscd_certainly_running = certainly[cnt];
1514 }
1515}
1516
1517
1518/* List of file descriptors. */
1519struct fdlist
1520{
1521 int fd;
1522 struct fdlist *next;
1523};
1524/* Memory allocated for the list. */
1525static struct fdlist *fdlist;
1526/* List of currently ready-to-read file descriptors. */
1527static struct fdlist *readylist;
1528
1529/* Conditional variable and mutex to signal availability of entries in
1530 READYLIST. The condvar is initialized dynamically since we might
1531 use a different clock depending on availability. */
1532static pthread_cond_t readylist_cond = PTHREAD_COND_INITIALIZER;
1533static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1534
1535/* The clock to use with the condvar. */
1536static clockid_t timeout_clock = CLOCK_REALTIME;
1537
1538/* Number of threads ready to handle the READYLIST. */
1539static unsigned long int nready;
1540
1541
1542/* Function for the clean-up threads. */
1543static void *
1544__attribute__ ((__noreturn__))
1545nscd_run_prune (void *p)
1546{
1547 const long int my_number = (long int) p;
1548 assert (dbs[my_number].enabled);
1549
1550 int dont_need_update = setup_thread (&dbs[my_number]);
1551
1552 time_t now = time (NULL);
1553
1554 /* We are running. */
1555 dbs[my_number].head->timestamp = now;
1556
1557 struct timespec prune_ts;
1558 if (__glibc_unlikely (clock_gettime (timeout_clock, &prune_ts) == -1))
1559 /* Should never happen. */
1560 abort ();
1561
1562 /* Compute the initial timeout time. Prevent all the timers to go
1563 off at the same time by adding a db-based value. */
1564 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL + my_number;
1565 dbs[my_number].wakeup_time = now + CACHE_PRUNE_INTERVAL + my_number;
1566
1567 pthread_mutex_t *prune_lock = &dbs[my_number].prune_lock;
1568 pthread_mutex_t *prune_run_lock = &dbs[my_number].prune_run_lock;
1569 pthread_cond_t *prune_cond = &dbs[my_number].prune_cond;
1570
1571 pthread_mutex_lock (prune_lock);
1572 while (1)
1573 {
1574 /* Wait, but not forever. */
1575 int e = 0;
1576 if (! dbs[my_number].clear_cache)
1577 e = pthread_cond_timedwait (prune_cond, prune_lock, &prune_ts);
1578 assert (__builtin_expect (e == 0 || e == ETIMEDOUT, 1));
1579
1580 time_t next_wait;
1581 now = time (NULL);
1582 if (e == ETIMEDOUT || now >= dbs[my_number].wakeup_time
1583 || dbs[my_number].clear_cache)
1584 {
1585 /* We will determine the new timout values based on the
1586 cache content. Should there be concurrent additions to
1587 the cache which are not accounted for in the cache
1588 pruning we want to know about it. Therefore set the
1589 timeout to the maximum. It will be descreased when adding
1590 new entries to the cache, if necessary. */
1591 dbs[my_number].wakeup_time = MAX_TIMEOUT_VALUE;
1592
1593 /* Unconditionally reset the flag. */
1594 time_t prune_now = dbs[my_number].clear_cache ? LONG_MAX : now;
1595 dbs[my_number].clear_cache = 0;
1596
1597 pthread_mutex_unlock (prune_lock);
1598
1599 /* We use a separate lock for running the prune function (instead
1600 of keeping prune_lock locked) because this enables concurrent
1601 invocations of cache_add which might modify the timeout value. */
1602 pthread_mutex_lock (prune_run_lock);
1603 next_wait = prune_cache (&dbs[my_number], prune_now, -1);
1604 pthread_mutex_unlock (prune_run_lock);
1605
1606 next_wait = MAX (next_wait, CACHE_PRUNE_INTERVAL);
1607 /* If clients cannot determine for sure whether nscd is running
1608 we need to wake up occasionally to update the timestamp.
1609 Wait 90% of the update period. */
1610#define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1611 if (__glibc_unlikely (! dont_need_update))
1612 {
1613 next_wait = MIN (UPDATE_MAPPING_TIMEOUT, next_wait);
1614 dbs[my_number].head->timestamp = now;
1615 }
1616
1617 pthread_mutex_lock (prune_lock);
1618
1619 /* Make it known when we will wake up again. */
1620 if (now + next_wait < dbs[my_number].wakeup_time)
1621 dbs[my_number].wakeup_time = now + next_wait;
1622 else
1623 next_wait = dbs[my_number].wakeup_time - now;
1624 }
1625 else
1626 /* The cache was just pruned. Do not do it again now. Just
1627 use the new timeout value. */
1628 next_wait = dbs[my_number].wakeup_time - now;
1629
1630 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1631 /* Should never happen. */
1632 abort ();
1633
1634 /* Compute next timeout time. */
1635 prune_ts.tv_sec += next_wait;
1636 }
1637}
1638
1639
1640/* This is the main loop. It is replicated in different threads but
1641 the use of the ready list makes sure only one thread handles an
1642 incoming connection. */
1643static void *
1644__attribute__ ((__noreturn__))
1645nscd_run_worker (void *p)
1646{
1647 char buf[256];
1648
1649 /* Initial locking. */
1650 pthread_mutex_lock (&readylist_lock);
1651
1652 /* One more thread available. */
1653 ++nready;
1654
1655 while (1)
1656 {
1657 while (readylist == NULL)
1658 pthread_cond_wait (&readylist_cond, &readylist_lock);
1659
1660 struct fdlist *it = readylist->next;
1661 if (readylist->next == readylist)
1662 /* Just one entry on the list. */
1663 readylist = NULL;
1664 else
1665 readylist->next = it->next;
1666
1667 /* Extract the information and mark the record ready to be used
1668 again. */
1669 int fd = it->fd;
1670 it->next = NULL;
1671
1672 /* One more thread available. */
1673 --nready;
1674
1675 /* We are done with the list. */
1676 pthread_mutex_unlock (&readylist_lock);
1677
1678#ifndef __ASSUME_ACCEPT4
1679 if (have_accept4 < 0)
1680 {
1681 /* We do not want to block on a short read or so. */
1682 int fl = fcntl (fd, F_GETFL);
1683 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1684 goto close_and_out;
1685 }
1686#endif
1687
1688 /* Now read the request. */
1689 request_header req;
1690 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1691 != sizeof (req), 0))
1692 {
1693 /* We failed to read data. Note that this also might mean we
1694 failed because we would have blocked. */
1695 if (debug_level > 0)
1696 dbg_log (_("short read while reading request: %s"),
1697 strerror_r (errno, buf, sizeof (buf)));
1698 goto close_and_out;
1699 }
1700
1701 /* Check whether this is a valid request type. */
1702 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1703 goto close_and_out;
1704
1705 /* Some systems have no SO_PEERCRED implementation. They don't
1706 care about security so we don't as well. */
1707 uid_t uid = -1;
1708#ifdef SO_PEERCRED
1709 pid_t pid = 0;
1710
1711 if (__glibc_unlikely (debug_level > 0))
1712 {
1713 struct ucred caller;
1714 socklen_t optlen = sizeof (caller);
1715
1716 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1717 pid = caller.pid;
1718 }
1719#else
1720 const pid_t pid = 0;
1721#endif
1722
1723 /* It should not be possible to crash the nscd with a silly
1724 request (i.e., a terribly large key). We limit the size to 1kb. */
1725 if (__builtin_expect (req.key_len, 1) < 0
1726 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1727 {
1728 if (debug_level > 0)
1729 dbg_log (_("key length in request too long: %d"), req.key_len);
1730 }
1731 else
1732 {
1733 /* Get the key. */
1734 char keybuf[MAXKEYLEN + 1];
1735
1736 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1737 req.key_len))
1738 != req.key_len, 0))
1739 {
1740 /* Again, this can also mean we would have blocked. */
1741 if (debug_level > 0)
1742 dbg_log (_("short read while reading request key: %s"),
1743 strerror_r (errno, buf, sizeof (buf)));
1744 goto close_and_out;
1745 }
1746 keybuf[req.key_len] = '\0';
1747
1748 if (__builtin_expect (debug_level, 0) > 0)
1749 {
1750#ifdef SO_PEERCRED
1751 if (pid != 0)
1752 dbg_log (_("\
1753handle_request: request received (Version = %d) from PID %ld"),
1754 req.version, (long int) pid);
1755 else
1756#endif
1757 dbg_log (_("\
1758handle_request: request received (Version = %d)"), req.version);
1759 }
1760
1761 /* Phew, we got all the data, now process it. */
1762 handle_request (fd, &req, keybuf, uid, pid);
1763 }
1764
1765 close_and_out:
1766 /* We are done. */
1767 close (fd);
1768
1769 /* Re-locking. */
1770 pthread_mutex_lock (&readylist_lock);
1771
1772 /* One more thread available. */
1773 ++nready;
1774 }
1775 /* NOTREACHED */
1776}
1777
1778
1779static unsigned int nconns;
1780
1781static void
1782fd_ready (int fd)
1783{
1784 pthread_mutex_lock (&readylist_lock);
1785
1786 /* Find an empty entry in FDLIST. */
1787 size_t inner;
1788 for (inner = 0; inner < nconns; ++inner)
1789 if (fdlist[inner].next == NULL)
1790 break;
1791 assert (inner < nconns);
1792
1793 fdlist[inner].fd = fd;
1794
1795 if (readylist == NULL)
1796 readylist = fdlist[inner].next = &fdlist[inner];
1797 else
1798 {
1799 fdlist[inner].next = readylist->next;
1800 readylist = readylist->next = &fdlist[inner];
1801 }
1802
1803 bool do_signal = true;
1804 if (__glibc_unlikely (nready == 0))
1805 {
1806 ++client_queued;
1807 do_signal = false;
1808
1809 /* Try to start another thread to help out. */
1810 pthread_t th;
1811 if (nthreads < max_nthreads
1812 && pthread_create (&th, &attr, nscd_run_worker,
1813 (void *) (long int) nthreads) == 0)
1814 {
1815 /* We got another thread. */
1816 ++nthreads;
1817 /* The new thread might need a kick. */
1818 do_signal = true;
1819 }
1820
1821 }
1822
1823 pthread_mutex_unlock (&readylist_lock);
1824
1825 /* Tell one of the worker threads there is work to do. */
1826 if (do_signal)
1827 pthread_cond_signal (&readylist_cond);
1828}
1829
1830
1831/* Check whether restarting should happen. */
1832static bool
1833restart_p (time_t now)
1834{
1835 return (paranoia && readylist == NULL && nready == nthreads
1836 && now >= restart_time);
1837}
1838
1839
1840/* Array for times a connection was accepted. */
1841static time_t *starttime;
1842
1843#ifdef HAVE_INOTIFY
1844/* Inotify event for changed file. */
1845union __inev
1846{
1847 struct inotify_event i;
1848# ifndef PATH_MAX
1849# define PATH_MAX 1024
1850# endif
1851 char buf[sizeof (struct inotify_event) + PATH_MAX];
1852};
1853
1854/* Returns 0 if the file is there otherwise -1. */
1855int
1856check_file (struct traced_file *finfo)
1857{
1858 struct stat64 st;
1859 /* We could check mtime and if different re-add
1860 the watches, and invalidate the database, but we
1861 don't because we are called from inotify_check_files
1862 which should be doing that work. If sufficient inotify
1863 events were lost then the next pruning or invalidation
1864 will do the stat and mtime check. We don't do it here to
1865 keep the logic simple. */
1866 if (stat64 (finfo->fname, &st) < 0)
1867 return -1;
1868 return 0;
1869}
1870
1871/* Process the inotify event in INEV. If the event matches any of the files
1872 registered with a database then mark that database as requiring its cache
1873 to be cleared. We indicate the cache needs clearing by setting
1874 TO_CLEAR[DBCNT] to true for the matching database. */
1875static void
1876inotify_check_files (bool *to_clear, union __inev *inev)
1877{
1878 /* Check which of the files changed. */
1879 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1880 {
1881 struct traced_file *finfo = dbs[dbcnt].traced_files;
1882
1883 while (finfo != NULL)
1884 {
1885 /* The configuration file was moved or deleted.
1886 We stop watching it at that point, and reinitialize. */
1887 if (finfo->inotify_descr[TRACED_FILE] == inev->i.wd
1888 && ((inev->i.mask & IN_MOVE_SELF)
1889 || (inev->i.mask & IN_DELETE_SELF)
1890 || (inev->i.mask & IN_IGNORED)))
1891 {
1892 int ret;
1893 bool moved = (inev->i.mask & IN_MOVE_SELF) != 0;
1894
1895 if (check_file (finfo) == 0)
1896 {
1897 dbg_log (_("ignored inotify event for `%s` (file exists)"),
1898 finfo->fname);
1899 return;
1900 }
1901
1902 dbg_log (_("monitored file `%s` was %s, removing watch"),
1903 finfo->fname, moved ? "moved" : "deleted");
1904 /* File was moved out, remove the watch. Watches are
1905 automatically removed when the file is deleted. */
1906 if (moved)
1907 {
1908 ret = inotify_rm_watch (inotify_fd, inev->i.wd);
1909 if (ret < 0)
1910 dbg_log (_("failed to remove file watch `%s`: %s"),
1911 finfo->fname, strerror (errno));
1912 }
1913 finfo->inotify_descr[TRACED_FILE] = -1;
1914 to_clear[dbcnt] = true;
1915 if (finfo->call_res_init)
1916 res_init ();
1917 return;
1918 }
1919 /* The configuration file was open for writing and has just closed.
1920 We reset the cache and reinitialize. */
1921 if (finfo->inotify_descr[TRACED_FILE] == inev->i.wd
1922 && inev->i.mask & IN_CLOSE_WRITE)
1923 {
1924 /* Mark cache as needing to be cleared and reinitialize. */
1925 dbg_log (_("monitored file `%s` was written to"), finfo->fname);
1926 to_clear[dbcnt] = true;
1927 if (finfo->call_res_init)
1928 res_init ();
1929 return;
1930 }
1931 /* The parent directory was moved or deleted. We trigger one last
1932 invalidation. At the next pruning or invalidation we may add
1933 this watch back if the file is present again. */
1934 if (finfo->inotify_descr[TRACED_DIR] == inev->i.wd
1935 && ((inev->i.mask & IN_DELETE_SELF)
1936 || (inev->i.mask & IN_MOVE_SELF)
1937 || (inev->i.mask & IN_IGNORED)))
1938 {
1939 bool moved = (inev->i.mask & IN_MOVE_SELF) != 0;
1940 /* The directory watch may have already been removed
1941 but we don't know so we just remove it again and
1942 ignore the error. Then we remove the file watch.
1943 Note: watches are automatically removed for deleted
1944 files. */
1945 if (moved)
1946 inotify_rm_watch (inotify_fd, inev->i.wd);
1947 if (finfo->inotify_descr[TRACED_FILE] != -1)
1948 {
1949 dbg_log (_("monitored parent directory `%s` was %s, removing watch on `%s`"),
1950 finfo->dname, moved ? "moved" : "deleted", finfo->fname);
1951 if (inotify_rm_watch (inotify_fd, finfo->inotify_descr[TRACED_FILE]) < 0)
1952 dbg_log (_("failed to remove file watch `%s`: %s"),
1953 finfo->dname, strerror (errno));
1954 }
1955 finfo->inotify_descr[TRACED_FILE] = -1;
1956 finfo->inotify_descr[TRACED_DIR] = -1;
1957 to_clear[dbcnt] = true;
1958 if (finfo->call_res_init)
1959 res_init ();
1960 /* Continue to the next entry since this might be the
1961 parent directory for multiple registered files and
1962 we want to remove watches for all registered files. */
1963 continue;
1964 }
1965 /* The parent directory had a create or moved to event. */
1966 if (finfo->inotify_descr[TRACED_DIR] == inev->i.wd
1967 && ((inev->i.mask & IN_MOVED_TO)
1968 || (inev->i.mask & IN_CREATE))
1969 && strcmp (inev->i.name, finfo->sfname) == 0)
1970 {
1971 /* We detected a directory change. We look for the creation
1972 of the file we are tracking or the move of the same file
1973 into the directory. */
1974 int ret;
1975 dbg_log (_("monitored file `%s` was %s, adding watch"),
1976 finfo->fname,
1977 inev->i.mask & IN_CREATE ? "created" : "moved into place");
1978 /* File was moved in or created. Regenerate the watch. */
1979 if (finfo->inotify_descr[TRACED_FILE] != -1)
1980 inotify_rm_watch (inotify_fd,
1981 finfo->inotify_descr[TRACED_FILE]);
1982
1983 ret = inotify_add_watch (inotify_fd,
1984 finfo->fname,
1985 TRACED_FILE_MASK);
1986 if (ret < 0)
1987 dbg_log (_("failed to add file watch `%s`: %s"),
1988 finfo->fname, strerror (errno));
1989
1990 finfo->inotify_descr[TRACED_FILE] = ret;
1991
1992 /* The file is new or moved so mark cache as needing to
1993 be cleared and reinitialize. */
1994 to_clear[dbcnt] = true;
1995 if (finfo->call_res_init)
1996 res_init ();
1997
1998 /* Done re-adding the watch. Don't return, we may still
1999 have other files in this same directory, same watch
2000 descriptor, and need to process them. */
2001 }
2002 /* Other events are ignored, and we move on to the next file. */
2003 finfo = finfo->next;
2004 }
2005 }
2006}
2007
2008/* If an entry in the array of booleans TO_CLEAR is TRUE then clear the cache
2009 for the associated database, otherwise do nothing. The TO_CLEAR array must
2010 have LASTDB entries. */
2011static inline void
2012clear_db_cache (bool *to_clear)
2013{
2014 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
2015 if (to_clear[dbcnt])
2016 {
2017 pthread_mutex_lock (&dbs[dbcnt].prune_lock);
2018 dbs[dbcnt].clear_cache = 1;
2019 pthread_mutex_unlock (&dbs[dbcnt].prune_lock);
2020 pthread_cond_signal (&dbs[dbcnt].prune_cond);
2021 }
2022}
2023
2024int
2025handle_inotify_events (void)
2026{
2027 bool to_clear[lastdb] = { false, };
2028 union __inev inev;
2029
2030 /* Read all inotify events for files registered via
2031 register_traced_file(). */
2032 while (1)
2033 {
2034 /* Potentially read multiple events into buf. */
2035 ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd,
2036 &inev.buf,
2037 sizeof (inev)));
2038 if (nb < (ssize_t) sizeof (struct inotify_event))
2039 {
2040 /* Not even 1 event. */
2041 if (__glibc_unlikely (nb == -1 && errno != EAGAIN))
2042 return -1;
2043 /* Done reading events that are ready. */
2044 break;
2045 }
2046 /* Process all events. The normal inotify interface delivers
2047 complete events on a read and never a partial event. */
2048 char *eptr = &inev.buf[0];
2049 ssize_t count;
2050 while (1)
2051 {
2052 /* Check which of the files changed. */
2053 inotify_check_files (to_clear, &inev);
2054 count = sizeof (struct inotify_event) + inev.i.len;
2055 eptr += count;
2056 nb -= count;
2057 if (nb >= (ssize_t) sizeof (struct inotify_event))
2058 memcpy (&inev, eptr, nb);
2059 else
2060 break;
2061 }
2062 continue;
2063 }
2064 /* Actually perform the cache clearing. */
2065 clear_db_cache (to_clear);
2066 return 0;
2067}
2068
2069#endif
2070
2071static void
2072__attribute__ ((__noreturn__))
2073main_loop_poll (void)
2074{
2075 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
2076 * sizeof (conns[0]));
2077
2078 conns[0].fd = sock;
2079 conns[0].events = POLLRDNORM;
2080 size_t nused = 1;
2081 size_t firstfree = 1;
2082
2083#ifdef HAVE_INOTIFY
2084 if (inotify_fd != -1)
2085 {
2086 conns[1].fd = inotify_fd;
2087 conns[1].events = POLLRDNORM;
2088 nused = 2;
2089 firstfree = 2;
2090 }
2091#endif
2092
2093#ifdef HAVE_NETLINK
2094 size_t idx_nl_status_fd = 0;
2095 if (nl_status_fd != -1)
2096 {
2097 idx_nl_status_fd = nused;
2098 conns[nused].fd = nl_status_fd;
2099 conns[nused].events = POLLRDNORM;
2100 ++nused;
2101 firstfree = nused;
2102 }
2103#endif
2104
2105 while (1)
2106 {
2107 /* Wait for any event. We wait at most a couple of seconds so
2108 that we can check whether we should close any of the accepted
2109 connections since we have not received a request. */
2110#define MAX_ACCEPT_TIMEOUT 30
2111#define MIN_ACCEPT_TIMEOUT 5
2112#define MAIN_THREAD_TIMEOUT \
2113 (MAX_ACCEPT_TIMEOUT * 1000 \
2114 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
2115
2116 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
2117
2118 time_t now = time (NULL);
2119
2120 /* If there is a descriptor ready for reading or there is a new
2121 connection, process this now. */
2122 if (n > 0)
2123 {
2124 if (conns[0].revents != 0)
2125 {
2126 /* We have a new incoming connection. Accept the connection. */
2127 int fd;
2128
2129#ifndef __ASSUME_ACCEPT4
2130 fd = -1;
2131 if (have_accept4 >= 0)
2132#endif
2133 {
2134 fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2135 SOCK_NONBLOCK));
2136#ifndef __ASSUME_ACCEPT4
2137 if (have_accept4 == 0)
2138 have_accept4 = fd != -1 || errno != ENOSYS ? 1 : -1;
2139#endif
2140 }
2141#ifndef __ASSUME_ACCEPT4
2142 if (have_accept4 < 0)
2143 fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
2144#endif
2145
2146 /* Use the descriptor if we have not reached the limit. */
2147 if (fd >= 0)
2148 {
2149 if (firstfree < nconns)
2150 {
2151 conns[firstfree].fd = fd;
2152 conns[firstfree].events = POLLRDNORM;
2153 starttime[firstfree] = now;
2154 if (firstfree >= nused)
2155 nused = firstfree + 1;
2156
2157 do
2158 ++firstfree;
2159 while (firstfree < nused && conns[firstfree].fd != -1);
2160 }
2161 else
2162 /* We cannot use the connection so close it. */
2163 close (fd);
2164 }
2165
2166 --n;
2167 }
2168
2169 size_t first = 1;
2170#ifdef HAVE_INOTIFY
2171 if (inotify_fd != -1 && conns[1].fd == inotify_fd)
2172 {
2173 if (conns[1].revents != 0)
2174 {
2175 int ret;
2176 ret = handle_inotify_events ();
2177 if (ret == -1)
2178 {
2179 /* Something went wrong when reading the inotify
2180 data. Better disable inotify. */
2181 dbg_log (_("disabled inotify-based monitoring after read error %d"), errno);
2182 conns[1].fd = -1;
2183 firstfree = 1;
2184 if (nused == 2)
2185 nused = 1;
2186 close (inotify_fd);
2187 inotify_fd = -1;
2188 }
2189 --n;
2190 }
2191
2192 first = 2;
2193 }
2194#endif
2195
2196#ifdef HAVE_NETLINK
2197 if (idx_nl_status_fd != 0 && conns[idx_nl_status_fd].revents != 0)
2198 {
2199 char buf[4096];
2200 /* Read all the data. We do not interpret it here. */
2201 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2202 sizeof (buf))) != -1)
2203 ;
2204
2205 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
2206 = __bump_nl_timestamp ();
2207 }
2208#endif
2209
2210 for (size_t cnt = first; cnt < nused && n > 0; ++cnt)
2211 if (conns[cnt].revents != 0)
2212 {
2213 fd_ready (conns[cnt].fd);
2214
2215 /* Clean up the CONNS array. */
2216 conns[cnt].fd = -1;
2217 if (cnt < firstfree)
2218 firstfree = cnt;
2219 if (cnt == nused - 1)
2220 do
2221 --nused;
2222 while (conns[nused - 1].fd == -1);
2223
2224 --n;
2225 }
2226 }
2227
2228 /* Now find entries which have timed out. */
2229 assert (nused > 0);
2230
2231 /* We make the timeout length depend on the number of file
2232 descriptors currently used. */
2233#define ACCEPT_TIMEOUT \
2234 (MAX_ACCEPT_TIMEOUT \
2235 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
2236 time_t laststart = now - ACCEPT_TIMEOUT;
2237
2238 for (size_t cnt = nused - 1; cnt > 0; --cnt)
2239 {
2240 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
2241 {
2242 /* Remove the entry, it timed out. */
2243 (void) close (conns[cnt].fd);
2244 conns[cnt].fd = -1;
2245
2246 if (cnt < firstfree)
2247 firstfree = cnt;
2248 if (cnt == nused - 1)
2249 do
2250 --nused;
2251 while (conns[nused - 1].fd == -1);
2252 }
2253 }
2254
2255 if (restart_p (now))
2256 restart ();
2257 }
2258}
2259
2260
2261#ifdef HAVE_EPOLL
2262static void
2263main_loop_epoll (int efd)
2264{
2265 struct epoll_event ev = { 0, };
2266 int nused = 1;
2267 size_t highest = 0;
2268
2269 /* Add the socket. */
2270 ev.events = EPOLLRDNORM;
2271 ev.data.fd = sock;
2272 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
2273 /* We cannot use epoll. */
2274 return;
2275
2276# ifdef HAVE_INOTIFY
2277 if (inotify_fd != -1)
2278 {
2279 ev.events = EPOLLRDNORM;
2280 ev.data.fd = inotify_fd;
2281 if (epoll_ctl (efd, EPOLL_CTL_ADD, inotify_fd, &ev) == -1)
2282 /* We cannot use epoll. */
2283 return;
2284 nused = 2;
2285 }
2286# endif
2287
2288# ifdef HAVE_NETLINK
2289 if (nl_status_fd != -1)
2290 {
2291 ev.events = EPOLLRDNORM;
2292 ev.data.fd = nl_status_fd;
2293 if (epoll_ctl (efd, EPOLL_CTL_ADD, nl_status_fd, &ev) == -1)
2294 /* We cannot use epoll. */
2295 return;
2296 }
2297# endif
2298
2299 while (1)
2300 {
2301 struct epoll_event revs[100];
2302# define nrevs (sizeof (revs) / sizeof (revs[0]))
2303
2304 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
2305
2306 time_t now = time (NULL);
2307
2308 for (int cnt = 0; cnt < n; ++cnt)
2309 if (revs[cnt].data.fd == sock)
2310 {
2311 /* A new connection. */
2312 int fd;
2313
2314# ifndef __ASSUME_ACCEPT4
2315 fd = -1;
2316 if (have_accept4 >= 0)
2317# endif
2318 {
2319 fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2320 SOCK_NONBLOCK));
2321# ifndef __ASSUME_ACCEPT4
2322 if (have_accept4 == 0)
2323 have_accept4 = fd != -1 || errno != ENOSYS ? 1 : -1;
2324# endif
2325 }
2326# ifndef __ASSUME_ACCEPT4
2327 if (have_accept4 < 0)
2328 fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
2329# endif
2330
2331 /* Use the descriptor if we have not reached the limit. */
2332 if (fd >= 0)
2333 {
2334 /* Try to add the new descriptor. */
2335 ev.data.fd = fd;
2336 if (fd >= nconns
2337 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
2338 /* The descriptor is too large or something went
2339 wrong. Close the descriptor. */
2340 close (fd);
2341 else
2342 {
2343 /* Remember when we accepted the connection. */
2344 starttime[fd] = now;
2345
2346 if (fd > highest)
2347 highest = fd;
2348
2349 ++nused;
2350 }
2351 }
2352 }
2353# ifdef HAVE_INOTIFY
2354 else if (revs[cnt].data.fd == inotify_fd)
2355 {
2356 int ret;
2357 ret = handle_inotify_events ();
2358 if (ret == -1)
2359 {
2360 /* Something went wrong when reading the inotify
2361 data. Better disable inotify. */
2362 dbg_log (_("disabled inotify-based monitoring after read error %d"), errno);
2363 (void) epoll_ctl (efd, EPOLL_CTL_DEL, inotify_fd, NULL);
2364 close (inotify_fd);
2365 inotify_fd = -1;
2366 break;
2367 }
2368 }
2369# endif
2370# ifdef HAVE_NETLINK
2371 else if (revs[cnt].data.fd == nl_status_fd)
2372 {
2373 char buf[4096];
2374 /* Read all the data. We do not interpret it here. */
2375 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2376 sizeof (buf))) != -1)
2377 ;
2378
2379 __bump_nl_timestamp ();
2380 }
2381# endif
2382 else
2383 {
2384 /* Remove the descriptor from the epoll descriptor. */
2385 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
2386
2387 /* Get a worker to handle the request. */
2388 fd_ready (revs[cnt].data.fd);
2389
2390 /* Reset the time. */
2391 starttime[revs[cnt].data.fd] = 0;
2392 if (revs[cnt].data.fd == highest)
2393 do
2394 --highest;
2395 while (highest > 0 && starttime[highest] == 0);
2396
2397 --nused;
2398 }
2399
2400 /* Now look for descriptors for accepted connections which have
2401 no reply in too long of a time. */
2402 time_t laststart = now - ACCEPT_TIMEOUT;
2403 assert (starttime[sock] == 0);
2404# ifdef HAVE_INOTIFY
2405 assert (inotify_fd == -1 || starttime[inotify_fd] == 0);
2406# endif
2407 assert (nl_status_fd == -1 || starttime[nl_status_fd] == 0);
2408 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
2409 if (starttime[cnt] != 0 && starttime[cnt] < laststart)
2410 {
2411 /* We are waiting for this one for too long. Close it. */
2412 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
2413
2414 (void) close (cnt);
2415
2416 starttime[cnt] = 0;
2417 if (cnt == highest)
2418 --highest;
2419 }
2420 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
2421 --highest;
2422
2423 if (restart_p (now))
2424 restart ();
2425 }
2426}
2427#endif
2428
2429
2430/* Start all the threads we want. The initial process is thread no. 1. */
2431void
2432start_threads (void)
2433{
2434 /* Initialize the conditional variable we will use. The only
2435 non-standard attribute we might use is the clock selection. */
2436 pthread_condattr_t condattr;
2437 pthread_condattr_init (&condattr);
2438
2439#if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2440 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2441 /* Determine whether the monotonous clock is available. */
2442 struct timespec dummy;
2443# if _POSIX_MONOTONIC_CLOCK == 0
2444 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
2445# endif
2446# if _POSIX_CLOCK_SELECTION == 0
2447 if (sysconf (_SC_CLOCK_SELECTION) > 0)
2448# endif
2449 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
2450 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
2451 timeout_clock = CLOCK_MONOTONIC;
2452#endif
2453
2454 /* Create the attribute for the threads. They are all created
2455 detached. */
2456 pthread_attr_init (&attr);
2457 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
2458 /* Use 1MB stacks, twice as much for 64-bit architectures. */
2459 pthread_attr_setstacksize (&attr, NSCD_THREAD_STACKSIZE);
2460
2461 /* We allow less than LASTDB threads only for debugging. */
2462 if (debug_level == 0)
2463 nthreads = MAX (nthreads, lastdb);
2464
2465 /* Create the threads which prune the databases. */
2466 // XXX Ideally this work would be done by some of the worker threads.
2467 // XXX But this is problematic since we would need to be able to wake
2468 // XXX them up explicitly as well as part of the group handling the
2469 // XXX ready-list. This requires an operation where we can wait on
2470 // XXX two conditional variables at the same time. This operation
2471 // XXX does not exist (yet).
2472 for (long int i = 0; i < lastdb; ++i)
2473 {
2474 /* Initialize the conditional variable. */
2475 if (pthread_cond_init (&dbs[i].prune_cond, &condattr) != 0)
2476 {
2477 dbg_log (_("could not initialize conditional variable"));
2478 do_exit (1, 0, NULL);
2479 }
2480
2481 pthread_t th;
2482 if (dbs[i].enabled
2483 && pthread_create (&th, &attr, nscd_run_prune, (void *) i) != 0)
2484 {
2485 dbg_log (_("could not start clean-up thread; terminating"));
2486 do_exit (1, 0, NULL);
2487 }
2488 }
2489
2490 pthread_condattr_destroy (&condattr);
2491
2492 for (long int i = 0; i < nthreads; ++i)
2493 {
2494 pthread_t th;
2495 if (pthread_create (&th, &attr, nscd_run_worker, NULL) != 0)
2496 {
2497 if (i == 0)
2498 {
2499 dbg_log (_("could not start any worker thread; terminating"));
2500 do_exit (1, 0, NULL);
2501 }
2502
2503 break;
2504 }
2505 }
2506
2507 /* Now it is safe to let the parent know that we're doing fine and it can
2508 exit. */
2509 notify_parent (0);
2510
2511 /* Determine how much room for descriptors we should initially
2512 allocate. This might need to change later if we cap the number
2513 with MAXCONN. */
2514 const long int nfds = sysconf (_SC_OPEN_MAX);
2515#define MINCONN 32
2516#define MAXCONN 16384
2517 if (nfds == -1 || nfds > MAXCONN)
2518 nconns = MAXCONN;
2519 else if (nfds < MINCONN)
2520 nconns = MINCONN;
2521 else
2522 nconns = nfds;
2523
2524 /* We need memory to pass descriptors on to the worker threads. */
2525 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
2526 /* Array to keep track when connection was accepted. */
2527 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
2528
2529 /* In the main thread we execute the loop which handles incoming
2530 connections. */
2531#ifdef HAVE_EPOLL
2532 int efd = epoll_create (100);
2533 if (efd != -1)
2534 {
2535 main_loop_epoll (efd);
2536 close (efd);
2537 }
2538#endif
2539
2540 main_loop_poll ();
2541}
2542
2543
2544/* Look up the uid, gid, and supplementary groups to run nscd as. When
2545 this function is called, we are not listening on the nscd socket yet so
2546 we can just use the ordinary lookup functions without causing a lockup */
2547static void
2548begin_drop_privileges (void)
2549{
2550 struct passwd *pwd = getpwnam (server_user);
2551
2552 if (pwd == NULL)
2553 {
2554 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2555 do_exit (EXIT_FAILURE, 0,
2556 _("Failed to run nscd as user '%s'"), server_user);
2557 }
2558
2559 server_uid = pwd->pw_uid;
2560 server_gid = pwd->pw_gid;
2561
2562 /* Save the old UID/GID if we have to change back. */
2563 if (paranoia)
2564 {
2565 old_uid = getuid ();
2566 old_gid = getgid ();
2567 }
2568
2569 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
2570 {
2571 /* This really must never happen. */
2572 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2573 do_exit (EXIT_FAILURE, errno,
2574 _("initial getgrouplist failed"));
2575 }
2576
2577 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
2578
2579 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
2580 == -1)
2581 {
2582 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2583 do_exit (EXIT_FAILURE, errno, _("getgrouplist failed"));
2584 }
2585}
2586
2587
2588/* Call setgroups(), setgid(), and setuid() to drop root privileges and
2589 run nscd as the user specified in the configuration file. */
2590static void
2591finish_drop_privileges (void)
2592{
2593#if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2594 /* We need to preserve the capabilities to connect to the audit daemon. */
2595 cap_t new_caps = preserve_capabilities ();
2596#endif
2597
2598 if (setgroups (server_ngroups, server_groups) == -1)
2599 {
2600 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2601 do_exit (EXIT_FAILURE, errno, _("setgroups failed"));
2602 }
2603
2604 int res;
2605 if (paranoia)
2606 res = setresgid (server_gid, server_gid, old_gid);
2607 else
2608 res = setgid (server_gid);
2609 if (res == -1)
2610 {
2611 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2612 do_exit (4, errno, "setgid");
2613 }
2614
2615 if (paranoia)
2616 res = setresuid (server_uid, server_uid, old_uid);
2617 else
2618 res = setuid (server_uid);
2619 if (res == -1)
2620 {
2621 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2622 do_exit (4, errno, "setuid");
2623 }
2624
2625#if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2626 /* Remove the temporary capabilities. */
2627 install_real_capabilities (new_caps);
2628#endif
2629}
2630