1/* Inner loops of cache daemon.
2 Copyright (C) 1998-2018 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>. */
18
19#include <alloca.h>
20#include <assert.h>
21#include <atomic.h>
22#include <error.h>
23#include <errno.h>
24#include <fcntl.h>
25#include <grp.h>
26#include <ifaddrs.h>
27#include <libintl.h>
28#include <pthread.h>
29#include <pwd.h>
30#include <resolv.h>
31#include <stdio.h>
32#include <stdlib.h>
33#include <unistd.h>
34#include <stdint.h>
35#include <arpa/inet.h>
36#ifdef HAVE_NETLINK
37# include <linux/netlink.h>
38# include <linux/rtnetlink.h>
39#endif
40#ifdef HAVE_EPOLL
41# include <sys/epoll.h>
42#endif
43#ifdef HAVE_INOTIFY
44# include <sys/inotify.h>
45#endif
46#include <sys/mman.h>
47#include <sys/param.h>
48#include <sys/poll.h>
49#include <sys/socket.h>
50#include <sys/stat.h>
51#include <sys/un.h>
52
53#include "nscd.h"
54#include "dbg_log.h"
55#include "selinux.h"
56#include <resolv/resolv.h>
57
58#include <kernel-features.h>
59#include <libc-diag.h>
60
61
62/* Support to run nscd as an unprivileged user */
63const char *server_user;
64static uid_t server_uid;
65static gid_t server_gid;
66const char *stat_user;
67uid_t stat_uid;
68static gid_t *server_groups;
69#ifndef NGROUPS
70# define NGROUPS 32
71#endif
72static int server_ngroups;
73
74static pthread_attr_t attr;
75
76static void begin_drop_privileges (void);
77static void finish_drop_privileges (void);
78
79/* Map request type to a string. */
80const char *const serv2str[LASTREQ] =
81{
82 [GETPWBYNAME] = "GETPWBYNAME",
83 [GETPWBYUID] = "GETPWBYUID",
84 [GETGRBYNAME] = "GETGRBYNAME",
85 [GETGRBYGID] = "GETGRBYGID",
86 [GETHOSTBYNAME] = "GETHOSTBYNAME",
87 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
88 [GETHOSTBYADDR] = "GETHOSTBYADDR",
89 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
90 [SHUTDOWN] = "SHUTDOWN",
91 [GETSTAT] = "GETSTAT",
92 [INVALIDATE] = "INVALIDATE",
93 [GETFDPW] = "GETFDPW",
94 [GETFDGR] = "GETFDGR",
95 [GETFDHST] = "GETFDHST",
96 [GETAI] = "GETAI",
97 [INITGROUPS] = "INITGROUPS",
98 [GETSERVBYNAME] = "GETSERVBYNAME",
99 [GETSERVBYPORT] = "GETSERVBYPORT",
100 [GETFDSERV] = "GETFDSERV",
101 [GETNETGRENT] = "GETNETGRENT",
102 [INNETGR] = "INNETGR",
103 [GETFDNETGR] = "GETFDNETGR"
104};
105
106#ifdef PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
107# define RWLOCK_INITIALIZER PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
108#else
109# define RWLOCK_INITIALIZER PTHREAD_RWLOCK_INITIALIZER
110#endif
111
112/* The control data structures for the services. */
113struct database_dyn dbs[lastdb] =
114{
115 [pwddb] = {
116 .lock = RWLOCK_INITIALIZER,
117 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
118 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
119 .enabled = 0,
120 .check_file = 1,
121 .persistent = 0,
122 .propagate = 1,
123 .shared = 0,
124 .max_db_size = DEFAULT_MAX_DB_SIZE,
125 .suggested_module = DEFAULT_SUGGESTED_MODULE,
126 .db_filename = _PATH_NSCD_PASSWD_DB,
127 .disabled_iov = &pwd_iov_disabled,
128 .postimeout = 3600,
129 .negtimeout = 20,
130 .wr_fd = -1,
131 .ro_fd = -1,
132 .mmap_used = false
133 },
134 [grpdb] = {
135 .lock = RWLOCK_INITIALIZER,
136 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
137 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
138 .enabled = 0,
139 .check_file = 1,
140 .persistent = 0,
141 .propagate = 1,
142 .shared = 0,
143 .max_db_size = DEFAULT_MAX_DB_SIZE,
144 .suggested_module = DEFAULT_SUGGESTED_MODULE,
145 .db_filename = _PATH_NSCD_GROUP_DB,
146 .disabled_iov = &grp_iov_disabled,
147 .postimeout = 3600,
148 .negtimeout = 60,
149 .wr_fd = -1,
150 .ro_fd = -1,
151 .mmap_used = false
152 },
153 [hstdb] = {
154 .lock = RWLOCK_INITIALIZER,
155 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
156 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
157 .enabled = 0,
158 .check_file = 1,
159 .persistent = 0,
160 .propagate = 0, /* Not used. */
161 .shared = 0,
162 .max_db_size = DEFAULT_MAX_DB_SIZE,
163 .suggested_module = DEFAULT_SUGGESTED_MODULE,
164 .db_filename = _PATH_NSCD_HOSTS_DB,
165 .disabled_iov = &hst_iov_disabled,
166 .postimeout = 3600,
167 .negtimeout = 20,
168 .wr_fd = -1,
169 .ro_fd = -1,
170 .mmap_used = false
171 },
172 [servdb] = {
173 .lock = RWLOCK_INITIALIZER,
174 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
175 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
176 .enabled = 0,
177 .check_file = 1,
178 .persistent = 0,
179 .propagate = 0, /* Not used. */
180 .shared = 0,
181 .max_db_size = DEFAULT_MAX_DB_SIZE,
182 .suggested_module = DEFAULT_SUGGESTED_MODULE,
183 .db_filename = _PATH_NSCD_SERVICES_DB,
184 .disabled_iov = &serv_iov_disabled,
185 .postimeout = 28800,
186 .negtimeout = 20,
187 .wr_fd = -1,
188 .ro_fd = -1,
189 .mmap_used = false
190 },
191 [netgrdb] = {
192 .lock = RWLOCK_INITIALIZER,
193 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
194 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
195 .enabled = 0,
196 .check_file = 1,
197 .persistent = 0,
198 .propagate = 0, /* Not used. */
199 .shared = 0,
200 .max_db_size = DEFAULT_MAX_DB_SIZE,
201 .suggested_module = DEFAULT_SUGGESTED_MODULE,
202 .db_filename = _PATH_NSCD_NETGROUP_DB,
203 .disabled_iov = &netgroup_iov_disabled,
204 .postimeout = 28800,
205 .negtimeout = 20,
206 .wr_fd = -1,
207 .ro_fd = -1,
208 .mmap_used = false
209 }
210};
211
212
213/* Mapping of request type to database. */
214static struct
215{
216 bool data_request;
217 struct database_dyn *db;
218} const reqinfo[LASTREQ] =
219{
220 [GETPWBYNAME] = { true, &dbs[pwddb] },
221 [GETPWBYUID] = { true, &dbs[pwddb] },
222 [GETGRBYNAME] = { true, &dbs[grpdb] },
223 [GETGRBYGID] = { true, &dbs[grpdb] },
224 [GETHOSTBYNAME] = { true, &dbs[hstdb] },
225 [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
226 [GETHOSTBYADDR] = { true, &dbs[hstdb] },
227 [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
228 [SHUTDOWN] = { false, NULL },
229 [GETSTAT] = { false, NULL },
230 [SHUTDOWN] = { false, NULL },
231 [GETFDPW] = { false, &dbs[pwddb] },
232 [GETFDGR] = { false, &dbs[grpdb] },
233 [GETFDHST] = { false, &dbs[hstdb] },
234 [GETAI] = { true, &dbs[hstdb] },
235 [INITGROUPS] = { true, &dbs[grpdb] },
236 [GETSERVBYNAME] = { true, &dbs[servdb] },
237 [GETSERVBYPORT] = { true, &dbs[servdb] },
238 [GETFDSERV] = { false, &dbs[servdb] },
239 [GETNETGRENT] = { true, &dbs[netgrdb] },
240 [INNETGR] = { true, &dbs[netgrdb] },
241 [GETFDNETGR] = { false, &dbs[netgrdb] }
242};
243
244
245/* Initial number of threads to use. */
246int nthreads = -1;
247/* Maximum number of threads to use. */
248int max_nthreads = 32;
249
250/* Socket for incoming connections. */
251static int sock;
252
253#ifdef HAVE_INOTIFY
254/* Inotify descriptor. */
255int inotify_fd = -1;
256#endif
257
258#ifdef HAVE_NETLINK
259/* Descriptor for netlink status updates. */
260static int nl_status_fd = -1;
261#endif
262
263/* Number of times clients had to wait. */
264unsigned long int client_queued;
265
266
267ssize_t
268writeall (int fd, const void *buf, size_t len)
269{
270 size_t n = len;
271 ssize_t ret;
272 do
273 {
274 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
275 if (ret <= 0)
276 break;
277 buf = (const char *) buf + ret;
278 n -= ret;
279 }
280 while (n > 0);
281 return ret < 0 ? ret : len - n;
282}
283
284
285enum usekey
286 {
287 use_not = 0,
288 /* The following three are not really used, they are symbolic constants. */
289 use_first = 16,
290 use_begin = 32,
291 use_end = 64,
292
293 use_he = 1,
294 use_he_begin = use_he | use_begin,
295 use_he_end = use_he | use_end,
296 use_data = 3,
297 use_data_begin = use_data | use_begin,
298 use_data_end = use_data | use_end,
299 use_data_first = use_data_begin | use_first
300 };
301
302
303static int
304check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
305 enum usekey use, ref_t start, size_t len)
306{
307 if (len < 2)
308 return 0;
309
310 if (start > first_free || start + len > first_free
311 || (start & BLOCK_ALIGN_M1))
312 return 0;
313
314 if (usemap[start] == use_not)
315 {
316 /* Add the start marker. */
317 usemap[start] = use | use_begin;
318 use &= ~use_first;
319
320 while (--len > 0)
321 if (usemap[++start] != use_not)
322 return 0;
323 else
324 usemap[start] = use;
325
326 /* Add the end marker. */
327 usemap[start] = use | use_end;
328 }
329 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
330 {
331 /* Hash entries can't be shared. */
332 if (use == use_he)
333 return 0;
334
335 usemap[start] |= (use & use_first);
336 use &= ~use_first;
337
338 while (--len > 1)
339 if (usemap[++start] != use)
340 return 0;
341
342 if (usemap[++start] != (use | use_end))
343 return 0;
344 }
345 else
346 /* Points to a wrong object or somewhere in the middle. */
347 return 0;
348
349 return 1;
350}
351
352
353/* Verify data in persistent database. */
354static int
355verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
356{
357 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb
358 || dbnr == netgrdb);
359
360 time_t now = time (NULL);
361
362 struct database_pers_head *head = mem;
363 struct database_pers_head head_copy = *head;
364
365 /* Check that the header that was read matches the head in the database. */
366 if (memcmp (head, readhead, sizeof (*head)) != 0)
367 return 0;
368
369 /* First some easy tests: make sure the database header is sane. */
370 if (head->version != DB_VERSION
371 || head->header_size != sizeof (*head)
372 /* We allow a timestamp to be one hour ahead of the current time.
373 This should cover daylight saving time changes. */
374 || head->timestamp > now + 60 * 60 + 60
375 || (head->gc_cycle & 1)
376 || head->module == 0
377 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
378 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
379 || head->first_free < 0
380 || head->first_free > head->data_size
381 || (head->first_free & BLOCK_ALIGN_M1) != 0
382 || head->maxnentries < 0
383 || head->maxnsearched < 0)
384 return 0;
385
386 uint8_t *usemap = calloc (head->first_free, 1);
387 if (usemap == NULL)
388 return 0;
389
390 const char *data = (char *) &head->array[roundup (head->module,
391 ALIGN / sizeof (ref_t))];
392
393 nscd_ssize_t he_cnt = 0;
394 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
395 {
396 ref_t trail = head->array[cnt];
397 ref_t work = trail;
398 int tick = 0;
399
400 while (work != ENDREF)
401 {
402 if (! check_use (data, head->first_free, usemap, use_he, work,
403 sizeof (struct hashentry)))
404 goto fail;
405
406 /* Now we know we can dereference the record. */
407 struct hashentry *here = (struct hashentry *) (data + work);
408
409 ++he_cnt;
410
411 /* Make sure the record is for this type of service. */
412 if (here->type >= LASTREQ
413 || reqinfo[here->type].db != &dbs[dbnr])
414 goto fail;
415
416 /* Validate boolean field value. */
417 if (here->first != false && here->first != true)
418 goto fail;
419
420 if (here->len < 0)
421 goto fail;
422
423 /* Now the data. */
424 if (here->packet < 0
425 || here->packet > head->first_free
426 || here->packet + sizeof (struct datahead) > head->first_free)
427 goto fail;
428
429 struct datahead *dh = (struct datahead *) (data + here->packet);
430
431 if (! check_use (data, head->first_free, usemap,
432 use_data | (here->first ? use_first : 0),
433 here->packet, dh->allocsize))
434 goto fail;
435
436 if (dh->allocsize < sizeof (struct datahead)
437 || dh->recsize > dh->allocsize
438 || (dh->notfound != false && dh->notfound != true)
439 || (dh->usable != false && dh->usable != true))
440 goto fail;
441
442 if (here->key < here->packet + sizeof (struct datahead)
443 || here->key > here->packet + dh->allocsize
444 || here->key + here->len > here->packet + dh->allocsize)
445 goto fail;
446
447 work = here->next;
448
449 if (work == trail)
450 /* A circular list, this must not happen. */
451 goto fail;
452 if (tick)
453 trail = ((struct hashentry *) (data + trail))->next;
454 tick = 1 - tick;
455 }
456 }
457
458 if (he_cnt != head->nentries)
459 goto fail;
460
461 /* See if all data and keys had at least one reference from
462 he->first == true hashentry. */
463 for (ref_t idx = 0; idx < head->first_free; ++idx)
464 {
465 if (usemap[idx] == use_data_begin)
466 goto fail;
467 }
468
469 /* Finally, make sure the database hasn't changed since the first test. */
470 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
471 goto fail;
472
473 free (usemap);
474 return 1;
475
476fail:
477 free (usemap);
478 return 0;
479}
480
481
482/* Initialize database information structures. */
483void
484nscd_init (void)
485{
486 /* Look up unprivileged uid/gid/groups before we start listening on the
487 socket */
488 if (server_user != NULL)
489 begin_drop_privileges ();
490
491 if (nthreads == -1)
492 /* No configuration for this value, assume a default. */
493 nthreads = 4;
494
495 for (size_t cnt = 0; cnt < lastdb; ++cnt)
496 if (dbs[cnt].enabled)
497 {
498 pthread_rwlock_init (&dbs[cnt].lock, NULL);
499 pthread_mutex_init (&dbs[cnt].memlock, NULL);
500
501 if (dbs[cnt].persistent)
502 {
503 /* Try to open the appropriate file on disk. */
504 int fd = open (dbs[cnt].db_filename, O_RDWR | O_CLOEXEC);
505 if (fd != -1)
506 {
507 char *msg = NULL;
508 struct stat64 st;
509 void *mem;
510 size_t total;
511 struct database_pers_head head;
512 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
513 sizeof (head)));
514 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
515 {
516 fail_db_errno:
517 /* The code is single-threaded at this point so
518 using strerror is just fine. */
519 msg = strerror (errno);
520 fail_db:
521 dbg_log (_("invalid persistent database file \"%s\": %s"),
522 dbs[cnt].db_filename, msg);
523 unlink (dbs[cnt].db_filename);
524 }
525 else if (head.module == 0 && head.data_size == 0)
526 {
527 /* The file has been created, but the head has not
528 been initialized yet. */
529 msg = _("uninitialized header");
530 goto fail_db;
531 }
532 else if (head.header_size != (int) sizeof (head))
533 {
534 msg = _("header size does not match");
535 goto fail_db;
536 }
537 else if ((total = (sizeof (head)
538 + roundup (head.module * sizeof (ref_t),
539 ALIGN)
540 + head.data_size))
541 > st.st_size
542 || total < sizeof (head))
543 {
544 msg = _("file size does not match");
545 goto fail_db;
546 }
547 /* Note we map with the maximum size allowed for the
548 database. This is likely much larger than the
549 actual file size. This is OK on most OSes since
550 extensions of the underlying file will
551 automatically translate more pages available for
552 memory access. */
553 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
554 PROT_READ | PROT_WRITE,
555 MAP_SHARED, fd, 0))
556 == MAP_FAILED)
557 goto fail_db_errno;
558 else if (!verify_persistent_db (mem, &head, cnt))
559 {
560 munmap (mem, total);
561 msg = _("verification failed");
562 goto fail_db;
563 }
564 else
565 {
566 /* Success. We have the database. */
567 dbs[cnt].head = mem;
568 dbs[cnt].memsize = total;
569 dbs[cnt].data = (char *)
570 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
571 ALIGN / sizeof (ref_t))];
572 dbs[cnt].mmap_used = true;
573
574 if (dbs[cnt].suggested_module > head.module)
575 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
576 dbnames[cnt]);
577
578 dbs[cnt].wr_fd = fd;
579 fd = -1;
580 /* We also need a read-only descriptor. */
581 if (dbs[cnt].shared)
582 {
583 dbs[cnt].ro_fd = open (dbs[cnt].db_filename,
584 O_RDONLY | O_CLOEXEC);
585 if (dbs[cnt].ro_fd == -1)
586 dbg_log (_("\
587cannot create read-only descriptor for \"%s\"; no mmap"),
588 dbs[cnt].db_filename);
589 }
590
591 // XXX Shall we test whether the descriptors actually
592 // XXX point to the same file?
593 }
594
595 /* Close the file descriptors in case something went
596 wrong in which case the variable have not been
597 assigned -1. */
598 if (fd != -1)
599 close (fd);
600 }
601 else if (errno == EACCES)
602 do_exit (EXIT_FAILURE, 0, _("cannot access '%s'"),
603 dbs[cnt].db_filename);
604 }
605
606 if (dbs[cnt].head == NULL)
607 {
608 /* No database loaded. Allocate the data structure,
609 possibly on disk. */
610 struct database_pers_head head;
611 size_t total = (sizeof (head)
612 + roundup (dbs[cnt].suggested_module
613 * sizeof (ref_t), ALIGN)
614 + (dbs[cnt].suggested_module
615 * DEFAULT_DATASIZE_PER_BUCKET));
616
617 /* Try to create the database. If we do not need a
618 persistent database create a temporary file. */
619 int fd;
620 int ro_fd = -1;
621 if (dbs[cnt].persistent)
622 {
623 fd = open (dbs[cnt].db_filename,
624 O_RDWR | O_CREAT | O_EXCL | O_TRUNC | O_CLOEXEC,
625 S_IRUSR | S_IWUSR);
626 if (fd != -1 && dbs[cnt].shared)
627 ro_fd = open (dbs[cnt].db_filename,
628 O_RDONLY | O_CLOEXEC);
629 }
630 else
631 {
632 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
633 fd = mkostemp (fname, O_CLOEXEC);
634
635 /* We do not need the file name anymore after we
636 opened another file descriptor in read-only mode. */
637 if (fd != -1)
638 {
639 if (dbs[cnt].shared)
640 ro_fd = open (fname, O_RDONLY | O_CLOEXEC);
641
642 unlink (fname);
643 }
644 }
645
646 if (fd == -1)
647 {
648 if (errno == EEXIST)
649 {
650 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
651 dbnames[cnt], dbs[cnt].db_filename);
652 do_exit (1, 0, NULL);
653 }
654
655 if (dbs[cnt].persistent)
656 dbg_log (_("cannot create %s; no persistent database used"),
657 dbs[cnt].db_filename);
658 else
659 dbg_log (_("cannot create %s; no sharing possible"),
660 dbs[cnt].db_filename);
661
662 dbs[cnt].persistent = 0;
663 // XXX remember: no mmap
664 }
665 else
666 {
667 /* Tell the user if we could not create the read-only
668 descriptor. */
669 if (ro_fd == -1 && dbs[cnt].shared)
670 dbg_log (_("\
671cannot create read-only descriptor for \"%s\"; no mmap"),
672 dbs[cnt].db_filename);
673
674 /* Before we create the header, initialize the hash
675 table. That way if we get interrupted while writing
676 the header we can recognize a partially initialized
677 database. */
678 size_t ps = sysconf (_SC_PAGESIZE);
679 char tmpbuf[ps];
680 assert (~ENDREF == 0);
681 memset (tmpbuf, '\xff', ps);
682
683 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
684 off_t offset = sizeof (head);
685
686 size_t towrite;
687 if (offset % ps != 0)
688 {
689 towrite = MIN (remaining, ps - (offset % ps));
690 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
691 goto write_fail;
692 offset += towrite;
693 remaining -= towrite;
694 }
695
696 while (remaining > ps)
697 {
698 if (pwrite (fd, tmpbuf, ps, offset) == -1)
699 goto write_fail;
700 offset += ps;
701 remaining -= ps;
702 }
703
704 if (remaining > 0
705 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
706 goto write_fail;
707
708 /* Create the header of the file. */
709 struct database_pers_head head =
710 {
711 .version = DB_VERSION,
712 .header_size = sizeof (head),
713 .module = dbs[cnt].suggested_module,
714 .data_size = (dbs[cnt].suggested_module
715 * DEFAULT_DATASIZE_PER_BUCKET),
716 .first_free = 0
717 };
718 void *mem;
719
720 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
721 != sizeof (head))
722 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
723 != 0)
724 || (mem = mmap (NULL, dbs[cnt].max_db_size,
725 PROT_READ | PROT_WRITE,
726 MAP_SHARED, fd, 0)) == MAP_FAILED)
727 {
728 write_fail:
729 unlink (dbs[cnt].db_filename);
730 dbg_log (_("cannot write to database file %s: %s"),
731 dbs[cnt].db_filename, strerror (errno));
732 dbs[cnt].persistent = 0;
733 }
734 else
735 {
736 /* Success. */
737 dbs[cnt].head = mem;
738 dbs[cnt].data = (char *)
739 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
740 ALIGN / sizeof (ref_t))];
741 dbs[cnt].memsize = total;
742 dbs[cnt].mmap_used = true;
743
744 /* Remember the descriptors. */
745 dbs[cnt].wr_fd = fd;
746 dbs[cnt].ro_fd = ro_fd;
747 fd = -1;
748 ro_fd = -1;
749 }
750
751 if (fd != -1)
752 close (fd);
753 if (ro_fd != -1)
754 close (ro_fd);
755 }
756 }
757
758 if (dbs[cnt].head == NULL)
759 {
760 /* We do not use the persistent database. Just
761 create an in-memory data structure. */
762 assert (! dbs[cnt].persistent);
763
764 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
765 + (dbs[cnt].suggested_module
766 * sizeof (ref_t)));
767 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
768 assert (~ENDREF == 0);
769 memset (dbs[cnt].head->array, '\xff',
770 dbs[cnt].suggested_module * sizeof (ref_t));
771 dbs[cnt].head->module = dbs[cnt].suggested_module;
772 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
773 * dbs[cnt].head->module);
774 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
775 dbs[cnt].head->first_free = 0;
776
777 dbs[cnt].shared = 0;
778 assert (dbs[cnt].ro_fd == -1);
779 }
780 }
781
782 /* Create the socket. */
783 sock = socket (AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
784 if (sock < 0)
785 {
786 dbg_log (_("cannot open socket: %s"), strerror (errno));
787 do_exit (errno == EACCES ? 4 : 1, 0, NULL);
788 }
789 /* Bind a name to the socket. */
790 struct sockaddr_un sock_addr;
791 sock_addr.sun_family = AF_UNIX;
792 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
793 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
794 {
795 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
796 do_exit (errno == EACCES ? 4 : 1, 0, NULL);
797 }
798
799 /* Set permissions for the socket. */
800 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
801
802 /* Set the socket up to accept connections. */
803 if (listen (sock, SOMAXCONN) < 0)
804 {
805 dbg_log (_("cannot enable socket to accept connections: %s"),
806 strerror (errno));
807 do_exit (1, 0, NULL);
808 }
809
810#ifdef HAVE_NETLINK
811 if (dbs[hstdb].enabled)
812 {
813 /* Try to open netlink socket to monitor network setting changes. */
814 nl_status_fd = socket (AF_NETLINK,
815 SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK,
816 NETLINK_ROUTE);
817 if (nl_status_fd != -1)
818 {
819 struct sockaddr_nl snl;
820 memset (&snl, '\0', sizeof (snl));
821 snl.nl_family = AF_NETLINK;
822 /* XXX Is this the best set to use? */
823 snl.nl_groups = (RTMGRP_IPV4_IFADDR | RTMGRP_TC | RTMGRP_IPV4_MROUTE
824 | RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_RULE
825 | RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_MROUTE
826 | RTMGRP_IPV6_ROUTE | RTMGRP_IPV6_IFINFO
827 | RTMGRP_IPV6_PREFIX);
828
829 if (bind (nl_status_fd, (struct sockaddr *) &snl, sizeof (snl)) != 0)
830 {
831 close (nl_status_fd);
832 nl_status_fd = -1;
833 }
834 else
835 {
836 /* Start the timestamp process. */
837 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
838 = __bump_nl_timestamp ();
839 }
840 }
841 }
842#endif
843
844 /* Change to unprivileged uid/gid/groups if specified in config file */
845 if (server_user != NULL)
846 finish_drop_privileges ();
847}
848
849#ifdef HAVE_INOTIFY
850#define TRACED_FILE_MASK (IN_DELETE_SELF | IN_CLOSE_WRITE | IN_MOVE_SELF)
851#define TRACED_DIR_MASK (IN_DELETE_SELF | IN_CREATE | IN_MOVED_TO | IN_MOVE_SELF)
852void
853install_watches (struct traced_file *finfo)
854{
855 /* Use inotify support if we have it. */
856 if (finfo->inotify_descr[TRACED_FILE] < 0)
857 finfo->inotify_descr[TRACED_FILE] = inotify_add_watch (inotify_fd,
858 finfo->fname,
859 TRACED_FILE_MASK);
860 if (finfo->inotify_descr[TRACED_FILE] < 0)
861 {
862 dbg_log (_("disabled inotify-based monitoring for file `%s': %s"),
863 finfo->fname, strerror (errno));
864 return;
865 }
866 dbg_log (_("monitoring file `%s` (%d)"),
867 finfo->fname, finfo->inotify_descr[TRACED_FILE]);
868 /* Additionally listen for events in the file's parent directory.
869 We do this because the file to be watched might be
870 deleted and then added back again. When it is added back again
871 we must re-add the watch. We must also cover IN_MOVED_TO to
872 detect a file being moved into the directory. */
873 if (finfo->inotify_descr[TRACED_DIR] < 0)
874 finfo->inotify_descr[TRACED_DIR] = inotify_add_watch (inotify_fd,
875 finfo->dname,
876 TRACED_DIR_MASK);
877 if (finfo->inotify_descr[TRACED_DIR] < 0)
878 {
879 dbg_log (_("disabled inotify-based monitoring for directory `%s': %s"),
880 finfo->fname, strerror (errno));
881 return;
882 }
883 dbg_log (_("monitoring directory `%s` (%d)"),
884 finfo->dname, finfo->inotify_descr[TRACED_DIR]);
885}
886#endif
887
888/* Register the file in FINFO as a traced file for the database DBS[DBIX].
889
890 We support registering multiple files per database. Each call to
891 register_traced_file adds to the list of registered files.
892
893 When we prune the database, either through timeout or a request to
894 invalidate, we will check to see if any of the registered files has changed.
895 When we accept new connections to handle a cache request we will also
896 check to see if any of the registered files has changed.
897
898 If we have inotify support then we install an inotify fd to notify us of
899 file deletion or modification, both of which will require we invalidate
900 the cache for the database. Without inotify support we stat the file and
901 store st_mtime to determine if the file has been modified. */
902void
903register_traced_file (size_t dbidx, struct traced_file *finfo)
904{
905 /* If the database is disabled or file checking is disabled
906 then ignore the registration. */
907 if (! dbs[dbidx].enabled || ! dbs[dbidx].check_file)
908 return;
909
910 if (__glibc_unlikely (debug_level > 0))
911 dbg_log (_("monitoring file %s for database %s"),
912 finfo->fname, dbnames[dbidx]);
913
914#ifdef HAVE_INOTIFY
915 install_watches (finfo);
916#endif
917 struct stat64 st;
918 if (stat64 (finfo->fname, &st) < 0)
919 {
920 /* We cannot stat() the file. Set mtime to zero and try again later. */
921 dbg_log (_("stat failed for file `%s'; will try again later: %s"),
922 finfo->fname, strerror (errno));
923 finfo->mtime = 0;
924 }
925 else
926 finfo->mtime = st.st_mtime;
927
928 /* Queue up the file name. */
929 finfo->next = dbs[dbidx].traced_files;
930 dbs[dbidx].traced_files = finfo;
931}
932
933
934/* Close the connections. */
935void
936close_sockets (void)
937{
938 close (sock);
939}
940
941
942static void
943invalidate_cache (char *key, int fd)
944{
945 dbtype number;
946 int32_t resp;
947
948 for (number = pwddb; number < lastdb; ++number)
949 if (strcmp (key, dbnames[number]) == 0)
950 {
951 struct traced_file *runp = dbs[number].traced_files;
952 while (runp != NULL)
953 {
954 /* Make sure we reload from file when checking mtime. */
955 runp->mtime = 0;
956#ifdef HAVE_INOTIFY
957 /* During an invalidation we try to reload the traced
958 file watches. This allows the user to re-sync if
959 inotify events were lost. Similar to what we do during
960 pruning. */
961 install_watches (runp);
962#endif
963 if (runp->call_res_init)
964 {
965 res_init ();
966 break;
967 }
968 runp = runp->next;
969 }
970 break;
971 }
972
973 if (number == lastdb)
974 {
975 resp = EINVAL;
976 writeall (fd, &resp, sizeof (resp));
977 return;
978 }
979
980 if (dbs[number].enabled)
981 {
982 pthread_mutex_lock (&dbs[number].prune_run_lock);
983 prune_cache (&dbs[number], LONG_MAX, fd);
984 pthread_mutex_unlock (&dbs[number].prune_run_lock);
985 }
986 else
987 {
988 resp = 0;
989 writeall (fd, &resp, sizeof (resp));
990 }
991}
992
993
994#ifdef SCM_RIGHTS
995static void
996send_ro_fd (struct database_dyn *db, char *key, int fd)
997{
998 /* If we do not have an read-only file descriptor do nothing. */
999 if (db->ro_fd == -1)
1000 return;
1001
1002 /* We need to send some data along with the descriptor. */
1003 uint64_t mapsize = (db->head->data_size
1004 + roundup (db->head->module * sizeof (ref_t), ALIGN)
1005 + sizeof (struct database_pers_head));
1006 struct iovec iov[2];
1007 iov[0].iov_base = key;
1008 iov[0].iov_len = strlen (key) + 1;
1009 iov[1].iov_base = &mapsize;
1010 iov[1].iov_len = sizeof (mapsize);
1011
1012 /* Prepare the control message to transfer the descriptor. */
1013 union
1014 {
1015 struct cmsghdr hdr;
1016 char bytes[CMSG_SPACE (sizeof (int))];
1017 } buf;
1018 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
1019 .msg_control = buf.bytes,
1020 .msg_controllen = sizeof (buf) };
1021 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
1022
1023 cmsg->cmsg_level = SOL_SOCKET;
1024 cmsg->cmsg_type = SCM_RIGHTS;
1025 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
1026
1027 int *ip = (int *) CMSG_DATA (cmsg);
1028 *ip = db->ro_fd;
1029
1030 msg.msg_controllen = cmsg->cmsg_len;
1031
1032 /* Send the control message. We repeat when we are interrupted but
1033 everything else is ignored. */
1034#ifndef MSG_NOSIGNAL
1035# define MSG_NOSIGNAL 0
1036#endif
1037 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
1038
1039 if (__glibc_unlikely (debug_level > 0))
1040 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
1041}
1042#endif /* SCM_RIGHTS */
1043
1044
1045/* Handle new request. */
1046static void
1047handle_request (int fd, request_header *req, void *key, uid_t uid, pid_t pid)
1048{
1049 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
1050 {
1051 if (debug_level > 0)
1052 dbg_log (_("\
1053cannot handle old request version %d; current version is %d"),
1054 req->version, NSCD_VERSION);
1055 return;
1056 }
1057
1058 /* Perform the SELinux check before we go on to the standard checks. */
1059 if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
1060 {
1061 if (debug_level > 0)
1062 {
1063#ifdef SO_PEERCRED
1064 char pbuf[sizeof ("/proc//exe") + 3 * sizeof (long int)];
1065# ifdef PATH_MAX
1066 char buf[PATH_MAX];
1067# else
1068 char buf[4096];
1069# endif
1070
1071 snprintf (pbuf, sizeof (pbuf), "/proc/%ld/exe", (long int) pid);
1072 ssize_t n = readlink (pbuf, buf, sizeof (buf) - 1);
1073
1074 if (n <= 0)
1075 dbg_log (_("\
1076request from %ld not handled due to missing permission"), (long int) pid);
1077 else
1078 {
1079 buf[n] = '\0';
1080 dbg_log (_("\
1081request from '%s' [%ld] not handled due to missing permission"),
1082 buf, (long int) pid);
1083 }
1084#else
1085 dbg_log (_("request not handled due to missing permission"));
1086#endif
1087 }
1088 return;
1089 }
1090
1091 struct database_dyn *db = reqinfo[req->type].db;
1092
1093 /* See whether we can service the request from the cache. */
1094 if (__builtin_expect (reqinfo[req->type].data_request, true))
1095 {
1096 if (__builtin_expect (debug_level, 0) > 0)
1097 {
1098 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
1099 {
1100 char buf[INET6_ADDRSTRLEN];
1101
1102 dbg_log ("\t%s (%s)", serv2str[req->type],
1103 inet_ntop (req->type == GETHOSTBYADDR
1104 ? AF_INET : AF_INET6,
1105 key, buf, sizeof (buf)));
1106 }
1107 else
1108 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1109 }
1110
1111 /* Is this service enabled? */
1112 if (__glibc_unlikely (!db->enabled))
1113 {
1114 /* No, sent the prepared record. */
1115 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
1116 db->disabled_iov->iov_len,
1117 MSG_NOSIGNAL))
1118 != (ssize_t) db->disabled_iov->iov_len
1119 && __builtin_expect (debug_level, 0) > 0)
1120 {
1121 /* We have problems sending the result. */
1122 char buf[256];
1123 dbg_log (_("cannot write result: %s"),
1124 strerror_r (errno, buf, sizeof (buf)));
1125 }
1126
1127 return;
1128 }
1129
1130 /* Be sure we can read the data. */
1131 if (__glibc_unlikely (pthread_rwlock_tryrdlock (&db->lock) != 0))
1132 {
1133 ++db->head->rdlockdelayed;
1134 pthread_rwlock_rdlock (&db->lock);
1135 }
1136
1137 /* See whether we can handle it from the cache. */
1138 struct datahead *cached;
1139 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
1140 db, uid);
1141 if (cached != NULL)
1142 {
1143 /* Hurray it's in the cache. */
1144 if (writeall (fd, cached->data, cached->recsize) != cached->recsize
1145 && __glibc_unlikely (debug_level > 0))
1146 {
1147 /* We have problems sending the result. */
1148 char buf[256];
1149 dbg_log (_("cannot write result: %s"),
1150 strerror_r (errno, buf, sizeof (buf)));
1151 }
1152
1153 pthread_rwlock_unlock (&db->lock);
1154
1155 return;
1156 }
1157
1158 pthread_rwlock_unlock (&db->lock);
1159 }
1160 else if (__builtin_expect (debug_level, 0) > 0)
1161 {
1162 if (req->type == INVALIDATE)
1163 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1164 else
1165 dbg_log ("\t%s", serv2str[req->type]);
1166 }
1167
1168 /* Handle the request. */
1169 switch (req->type)
1170 {
1171 case GETPWBYNAME:
1172 addpwbyname (db, fd, req, key, uid);
1173 break;
1174
1175 case GETPWBYUID:
1176 addpwbyuid (db, fd, req, key, uid);
1177 break;
1178
1179 case GETGRBYNAME:
1180 addgrbyname (db, fd, req, key, uid);
1181 break;
1182
1183 case GETGRBYGID:
1184 addgrbygid (db, fd, req, key, uid);
1185 break;
1186
1187 case GETHOSTBYNAME:
1188 addhstbyname (db, fd, req, key, uid);
1189 break;
1190
1191 case GETHOSTBYNAMEv6:
1192 addhstbynamev6 (db, fd, req, key, uid);
1193 break;
1194
1195 case GETHOSTBYADDR:
1196 addhstbyaddr (db, fd, req, key, uid);
1197 break;
1198
1199 case GETHOSTBYADDRv6:
1200 addhstbyaddrv6 (db, fd, req, key, uid);
1201 break;
1202
1203 case GETAI:
1204 addhstai (db, fd, req, key, uid);
1205 break;
1206
1207 case INITGROUPS:
1208 addinitgroups (db, fd, req, key, uid);
1209 break;
1210
1211 case GETSERVBYNAME:
1212 addservbyname (db, fd, req, key, uid);
1213 break;
1214
1215 case GETSERVBYPORT:
1216 addservbyport (db, fd, req, key, uid);
1217 break;
1218
1219 case GETNETGRENT:
1220 addgetnetgrent (db, fd, req, key, uid);
1221 break;
1222
1223 case INNETGR:
1224 addinnetgr (db, fd, req, key, uid);
1225 break;
1226
1227 case GETSTAT:
1228 case SHUTDOWN:
1229 case INVALIDATE:
1230 {
1231 /* Get the callers credentials. */
1232#ifdef SO_PEERCRED
1233 struct ucred caller;
1234 socklen_t optlen = sizeof (caller);
1235
1236 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1237 {
1238 char buf[256];
1239
1240 dbg_log (_("error getting caller's id: %s"),
1241 strerror_r (errno, buf, sizeof (buf)));
1242 break;
1243 }
1244
1245 uid = caller.uid;
1246#else
1247 /* Some systems have no SO_PEERCRED implementation. They don't
1248 care about security so we don't as well. */
1249 uid = 0;
1250#endif
1251 }
1252
1253 /* Accept shutdown, getstat and invalidate only from root. For
1254 the stat call also allow the user specified in the config file. */
1255 if (req->type == GETSTAT)
1256 {
1257 if (uid == 0 || uid == stat_uid)
1258 send_stats (fd, dbs);
1259 }
1260 else if (uid == 0)
1261 {
1262 if (req->type == INVALIDATE)
1263 invalidate_cache (key, fd);
1264 else
1265 termination_handler (0);
1266 }
1267 break;
1268
1269 case GETFDPW:
1270 case GETFDGR:
1271 case GETFDHST:
1272 case GETFDSERV:
1273 case GETFDNETGR:
1274#ifdef SCM_RIGHTS
1275 send_ro_fd (reqinfo[req->type].db, key, fd);
1276#endif
1277 break;
1278
1279 default:
1280 /* Ignore the command, it's nothing we know. */
1281 break;
1282 }
1283}
1284
1285static char *
1286read_cmdline (size_t *size)
1287{
1288 int fd = open ("/proc/self/cmdline", O_RDONLY);
1289 if (fd < 0)
1290 return NULL;
1291 size_t current = 0;
1292 size_t limit = 1024;
1293 char *buffer = malloc (limit);
1294 if (buffer == NULL)
1295 {
1296 close (fd);
1297 errno = ENOMEM;
1298 return NULL;
1299 }
1300 while (1)
1301 {
1302 if (current == limit)
1303 {
1304 char *newptr;
1305 if (2 * limit < limit
1306 || (newptr = realloc (buffer, 2 * limit)) == NULL)
1307 {
1308 free (buffer);
1309 close (fd);
1310 errno = ENOMEM;
1311 return NULL;
1312 }
1313 buffer = newptr;
1314 limit *= 2;
1315 }
1316
1317 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buffer + current,
1318 limit - current));
1319 if (n == -1)
1320 {
1321 int e = errno;
1322 free (buffer);
1323 close (fd);
1324 errno = e;
1325 return NULL;
1326 }
1327 if (n == 0)
1328 break;
1329 current += n;
1330 }
1331
1332 close (fd);
1333 *size = current;
1334 return buffer;
1335}
1336
1337
1338/* Restart the process. */
1339static void
1340restart (void)
1341{
1342 /* First determine the parameters. We do not use the parameters
1343 passed to main because then nscd would use the system libc after
1344 restarting even if it was started by a non-system dynamic linker
1345 during glibc testing. */
1346 size_t readlen;
1347 char *cmdline = read_cmdline (&readlen);
1348 if (cmdline == NULL)
1349 {
1350 dbg_log (_("\
1351cannot open /proc/self/cmdline: %m; disabling paranoia mode"));
1352 paranoia = 0;
1353 return;
1354 }
1355
1356 /* Parse the command line. Worst case scenario: every two
1357 characters form one parameter (one character plus NUL). */
1358 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1359 int argc = 0;
1360
1361 for (char *cp = cmdline; cp < cmdline + readlen;)
1362 {
1363 argv[argc++] = cp;
1364 cp = (char *) rawmemchr (cp, '\0') + 1;
1365 }
1366 argv[argc] = NULL;
1367
1368 /* Second, change back to the old user if we changed it. */
1369 if (server_user != NULL)
1370 {
1371 if (setresuid (old_uid, old_uid, old_uid) != 0)
1372 {
1373 dbg_log (_("\
1374cannot change to old UID: %s; disabling paranoia mode"),
1375 strerror (errno));
1376
1377 paranoia = 0;
1378 free (cmdline);
1379 return;
1380 }
1381
1382 if (setresgid (old_gid, old_gid, old_gid) != 0)
1383 {
1384 dbg_log (_("\
1385cannot change to old GID: %s; disabling paranoia mode"),
1386 strerror (errno));
1387
1388 ignore_value (setuid (server_uid));
1389 paranoia = 0;
1390 free (cmdline);
1391 return;
1392 }
1393 }
1394
1395 /* Next change back to the old working directory. */
1396 if (chdir (oldcwd) == -1)
1397 {
1398 dbg_log (_("\
1399cannot change to old working directory: %s; disabling paranoia mode"),
1400 strerror (errno));
1401
1402 if (server_user != NULL)
1403 {
1404 ignore_value (setuid (server_uid));
1405 ignore_value (setgid (server_gid));
1406 }
1407 paranoia = 0;
1408 free (cmdline);
1409 return;
1410 }
1411
1412 /* Synchronize memory. */
1413 int32_t certainly[lastdb];
1414 for (int cnt = 0; cnt < lastdb; ++cnt)
1415 if (dbs[cnt].enabled)
1416 {
1417 /* Make sure nobody keeps using the database. */
1418 dbs[cnt].head->timestamp = 0;
1419 certainly[cnt] = dbs[cnt].head->nscd_certainly_running;
1420 dbs[cnt].head->nscd_certainly_running = 0;
1421
1422 if (dbs[cnt].persistent)
1423 // XXX async OK?
1424 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1425 }
1426
1427 /* The preparations are done. */
1428#ifdef PATH_MAX
1429 char pathbuf[PATH_MAX];
1430#else
1431 char pathbuf[256];
1432#endif
1433 /* Try to exec the real nscd program so the process name (as reported
1434 in /proc/PID/status) will be 'nscd', but fall back to /proc/self/exe
1435 if readlink or the exec with the result of the readlink call fails. */
1436 ssize_t n = readlink ("/proc/self/exe", pathbuf, sizeof (pathbuf) - 1);
1437 if (n != -1)
1438 {
1439 pathbuf[n] = '\0';
1440 execv (pathbuf, argv);
1441 }
1442 execv ("/proc/self/exe", argv);
1443
1444 /* If we come here, we will never be able to re-exec. */
1445 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1446 strerror (errno));
1447
1448 if (server_user != NULL)
1449 {
1450 ignore_value (setuid (server_uid));
1451 ignore_value (setgid (server_gid));
1452 }
1453 if (chdir ("/") != 0)
1454 dbg_log (_("cannot change current working directory to \"/\": %s"),
1455 strerror (errno));
1456 paranoia = 0;
1457 free (cmdline);
1458
1459 /* Reenable the databases. */
1460 time_t now = time (NULL);
1461 for (int cnt = 0; cnt < lastdb; ++cnt)
1462 if (dbs[cnt].enabled)
1463 {
1464 dbs[cnt].head->timestamp = now;
1465 dbs[cnt].head->nscd_certainly_running = certainly[cnt];
1466 }
1467}
1468
1469
1470/* List of file descriptors. */
1471struct fdlist
1472{
1473 int fd;
1474 struct fdlist *next;
1475};
1476/* Memory allocated for the list. */
1477static struct fdlist *fdlist;
1478/* List of currently ready-to-read file descriptors. */
1479static struct fdlist *readylist;
1480
1481/* Conditional variable and mutex to signal availability of entries in
1482 READYLIST. The condvar is initialized dynamically since we might
1483 use a different clock depending on availability. */
1484static pthread_cond_t readylist_cond = PTHREAD_COND_INITIALIZER;
1485static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1486
1487/* The clock to use with the condvar. */
1488static clockid_t timeout_clock = CLOCK_REALTIME;
1489
1490/* Number of threads ready to handle the READYLIST. */
1491static unsigned long int nready;
1492
1493
1494/* Function for the clean-up threads. */
1495static void *
1496__attribute__ ((__noreturn__))
1497nscd_run_prune (void *p)
1498{
1499 const long int my_number = (long int) p;
1500 assert (dbs[my_number].enabled);
1501
1502 int dont_need_update = setup_thread (&dbs[my_number]);
1503
1504 time_t now = time (NULL);
1505
1506 /* We are running. */
1507 dbs[my_number].head->timestamp = now;
1508
1509 struct timespec prune_ts;
1510 if (__glibc_unlikely (clock_gettime (timeout_clock, &prune_ts) == -1))
1511 /* Should never happen. */
1512 abort ();
1513
1514 /* Compute the initial timeout time. Prevent all the timers to go
1515 off at the same time by adding a db-based value. */
1516 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL + my_number;
1517 dbs[my_number].wakeup_time = now + CACHE_PRUNE_INTERVAL + my_number;
1518
1519 pthread_mutex_t *prune_lock = &dbs[my_number].prune_lock;
1520 pthread_mutex_t *prune_run_lock = &dbs[my_number].prune_run_lock;
1521 pthread_cond_t *prune_cond = &dbs[my_number].prune_cond;
1522
1523 pthread_mutex_lock (prune_lock);
1524 while (1)
1525 {
1526 /* Wait, but not forever. */
1527 int e = 0;
1528 if (! dbs[my_number].clear_cache)
1529 e = pthread_cond_timedwait (prune_cond, prune_lock, &prune_ts);
1530 assert (__builtin_expect (e == 0 || e == ETIMEDOUT, 1));
1531
1532 time_t next_wait;
1533 now = time (NULL);
1534 if (e == ETIMEDOUT || now >= dbs[my_number].wakeup_time
1535 || dbs[my_number].clear_cache)
1536 {
1537 /* We will determine the new timout values based on the
1538 cache content. Should there be concurrent additions to
1539 the cache which are not accounted for in the cache
1540 pruning we want to know about it. Therefore set the
1541 timeout to the maximum. It will be descreased when adding
1542 new entries to the cache, if necessary. */
1543 dbs[my_number].wakeup_time = MAX_TIMEOUT_VALUE;
1544
1545 /* Unconditionally reset the flag. */
1546 time_t prune_now = dbs[my_number].clear_cache ? LONG_MAX : now;
1547 dbs[my_number].clear_cache = 0;
1548
1549 pthread_mutex_unlock (prune_lock);
1550
1551 /* We use a separate lock for running the prune function (instead
1552 of keeping prune_lock locked) because this enables concurrent
1553 invocations of cache_add which might modify the timeout value. */
1554 pthread_mutex_lock (prune_run_lock);
1555 next_wait = prune_cache (&dbs[my_number], prune_now, -1);
1556 pthread_mutex_unlock (prune_run_lock);
1557
1558 next_wait = MAX (next_wait, CACHE_PRUNE_INTERVAL);
1559 /* If clients cannot determine for sure whether nscd is running
1560 we need to wake up occasionally to update the timestamp.
1561 Wait 90% of the update period. */
1562#define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1563 if (__glibc_unlikely (! dont_need_update))
1564 {
1565 next_wait = MIN (UPDATE_MAPPING_TIMEOUT, next_wait);
1566 dbs[my_number].head->timestamp = now;
1567 }
1568
1569 pthread_mutex_lock (prune_lock);
1570
1571 /* Make it known when we will wake up again. */
1572 if (now + next_wait < dbs[my_number].wakeup_time)
1573 dbs[my_number].wakeup_time = now + next_wait;
1574 else
1575 next_wait = dbs[my_number].wakeup_time - now;
1576 }
1577 else
1578 /* The cache was just pruned. Do not do it again now. Just
1579 use the new timeout value. */
1580 next_wait = dbs[my_number].wakeup_time - now;
1581
1582 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1583 /* Should never happen. */
1584 abort ();
1585
1586 /* Compute next timeout time. */
1587 prune_ts.tv_sec += next_wait;
1588 }
1589}
1590
1591
1592/* This is the main loop. It is replicated in different threads but
1593 the use of the ready list makes sure only one thread handles an
1594 incoming connection. */
1595static void *
1596__attribute__ ((__noreturn__))
1597nscd_run_worker (void *p)
1598{
1599 char buf[256];
1600
1601 /* Initial locking. */
1602 pthread_mutex_lock (&readylist_lock);
1603
1604 /* One more thread available. */
1605 ++nready;
1606
1607 while (1)
1608 {
1609 while (readylist == NULL)
1610 pthread_cond_wait (&readylist_cond, &readylist_lock);
1611
1612 struct fdlist *it = readylist->next;
1613 if (readylist->next == readylist)
1614 /* Just one entry on the list. */
1615 readylist = NULL;
1616 else
1617 readylist->next = it->next;
1618
1619 /* Extract the information and mark the record ready to be used
1620 again. */
1621 int fd = it->fd;
1622 it->next = NULL;
1623
1624 /* One more thread available. */
1625 --nready;
1626
1627 /* We are done with the list. */
1628 pthread_mutex_unlock (&readylist_lock);
1629
1630 /* Now read the request. */
1631 request_header req;
1632 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1633 != sizeof (req), 0))
1634 {
1635 /* We failed to read data. Note that this also might mean we
1636 failed because we would have blocked. */
1637 if (debug_level > 0)
1638 dbg_log (_("short read while reading request: %s"),
1639 strerror_r (errno, buf, sizeof (buf)));
1640 goto close_and_out;
1641 }
1642
1643 /* Check whether this is a valid request type. */
1644 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1645 goto close_and_out;
1646
1647 /* Some systems have no SO_PEERCRED implementation. They don't
1648 care about security so we don't as well. */
1649 uid_t uid = -1;
1650#ifdef SO_PEERCRED
1651 pid_t pid = 0;
1652
1653 if (__glibc_unlikely (debug_level > 0))
1654 {
1655 struct ucred caller;
1656 socklen_t optlen = sizeof (caller);
1657
1658 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1659 pid = caller.pid;
1660 }
1661#else
1662 const pid_t pid = 0;
1663#endif
1664
1665 /* It should not be possible to crash the nscd with a silly
1666 request (i.e., a terribly large key). We limit the size to 1kb. */
1667 if (__builtin_expect (req.key_len, 1) < 0
1668 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1669 {
1670 if (debug_level > 0)
1671 dbg_log (_("key length in request too long: %d"), req.key_len);
1672 }
1673 else
1674 {
1675 /* Get the key. */
1676 char keybuf[MAXKEYLEN + 1];
1677
1678 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1679 req.key_len))
1680 != req.key_len, 0))
1681 {
1682 /* Again, this can also mean we would have blocked. */
1683 if (debug_level > 0)
1684 dbg_log (_("short read while reading request key: %s"),
1685 strerror_r (errno, buf, sizeof (buf)));
1686 goto close_and_out;
1687 }
1688 keybuf[req.key_len] = '\0';
1689
1690 if (__builtin_expect (debug_level, 0) > 0)
1691 {
1692#ifdef SO_PEERCRED
1693 if (pid != 0)
1694 dbg_log (_("\
1695handle_request: request received (Version = %d) from PID %ld"),
1696 req.version, (long int) pid);
1697 else
1698#endif
1699 dbg_log (_("\
1700handle_request: request received (Version = %d)"), req.version);
1701 }
1702
1703 /* Phew, we got all the data, now process it. */
1704 handle_request (fd, &req, keybuf, uid, pid);
1705 }
1706
1707 close_and_out:
1708 /* We are done. */
1709 close (fd);
1710
1711 /* Re-locking. */
1712 pthread_mutex_lock (&readylist_lock);
1713
1714 /* One more thread available. */
1715 ++nready;
1716 }
1717 /* NOTREACHED */
1718}
1719
1720
1721static unsigned int nconns;
1722
1723static void
1724fd_ready (int fd)
1725{
1726 pthread_mutex_lock (&readylist_lock);
1727
1728 /* Find an empty entry in FDLIST. */
1729 size_t inner;
1730 for (inner = 0; inner < nconns; ++inner)
1731 if (fdlist[inner].next == NULL)
1732 break;
1733 assert (inner < nconns);
1734
1735 fdlist[inner].fd = fd;
1736
1737 if (readylist == NULL)
1738 readylist = fdlist[inner].next = &fdlist[inner];
1739 else
1740 {
1741 fdlist[inner].next = readylist->next;
1742 readylist = readylist->next = &fdlist[inner];
1743 }
1744
1745 bool do_signal = true;
1746 if (__glibc_unlikely (nready == 0))
1747 {
1748 ++client_queued;
1749 do_signal = false;
1750
1751 /* Try to start another thread to help out. */
1752 pthread_t th;
1753 if (nthreads < max_nthreads
1754 && pthread_create (&th, &attr, nscd_run_worker,
1755 (void *) (long int) nthreads) == 0)
1756 {
1757 /* We got another thread. */
1758 ++nthreads;
1759 /* The new thread might need a kick. */
1760 do_signal = true;
1761 }
1762
1763 }
1764
1765 pthread_mutex_unlock (&readylist_lock);
1766
1767 /* Tell one of the worker threads there is work to do. */
1768 if (do_signal)
1769 pthread_cond_signal (&readylist_cond);
1770}
1771
1772
1773/* Check whether restarting should happen. */
1774static bool
1775restart_p (time_t now)
1776{
1777 return (paranoia && readylist == NULL && nready == nthreads
1778 && now >= restart_time);
1779}
1780
1781
1782/* Array for times a connection was accepted. */
1783static time_t *starttime;
1784
1785#ifdef HAVE_INOTIFY
1786/* Inotify event for changed file. */
1787union __inev
1788{
1789 struct inotify_event i;
1790# ifndef PATH_MAX
1791# define PATH_MAX 1024
1792# endif
1793 char buf[sizeof (struct inotify_event) + PATH_MAX];
1794};
1795
1796/* Returns 0 if the file is there otherwise -1. */
1797int
1798check_file (struct traced_file *finfo)
1799{
1800 struct stat64 st;
1801 /* We could check mtime and if different re-add
1802 the watches, and invalidate the database, but we
1803 don't because we are called from inotify_check_files
1804 which should be doing that work. If sufficient inotify
1805 events were lost then the next pruning or invalidation
1806 will do the stat and mtime check. We don't do it here to
1807 keep the logic simple. */
1808 if (stat64 (finfo->fname, &st) < 0)
1809 return -1;
1810 return 0;
1811}
1812
1813/* Process the inotify event in INEV. If the event matches any of the files
1814 registered with a database then mark that database as requiring its cache
1815 to be cleared. We indicate the cache needs clearing by setting
1816 TO_CLEAR[DBCNT] to true for the matching database. */
1817static void
1818inotify_check_files (bool *to_clear, union __inev *inev)
1819{
1820 /* Check which of the files changed. */
1821 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1822 {
1823 struct traced_file *finfo = dbs[dbcnt].traced_files;
1824
1825 while (finfo != NULL)
1826 {
1827 /* The configuration file was moved or deleted.
1828 We stop watching it at that point, and reinitialize. */
1829 if (finfo->inotify_descr[TRACED_FILE] == inev->i.wd
1830 && ((inev->i.mask & IN_MOVE_SELF)
1831 || (inev->i.mask & IN_DELETE_SELF)
1832 || (inev->i.mask & IN_IGNORED)))
1833 {
1834 int ret;
1835 bool moved = (inev->i.mask & IN_MOVE_SELF) != 0;
1836
1837 if (check_file (finfo) == 0)
1838 {
1839 dbg_log (_("ignored inotify event for `%s` (file exists)"),
1840 finfo->fname);
1841 return;
1842 }
1843
1844 dbg_log (_("monitored file `%s` was %s, removing watch"),
1845 finfo->fname, moved ? "moved" : "deleted");
1846 /* File was moved out, remove the watch. Watches are
1847 automatically removed when the file is deleted. */
1848 if (moved)
1849 {
1850 ret = inotify_rm_watch (inotify_fd, inev->i.wd);
1851 if (ret < 0)
1852 dbg_log (_("failed to remove file watch `%s`: %s"),
1853 finfo->fname, strerror (errno));
1854 }
1855 finfo->inotify_descr[TRACED_FILE] = -1;
1856 to_clear[dbcnt] = true;
1857 if (finfo->call_res_init)
1858 res_init ();
1859 return;
1860 }
1861 /* The configuration file was open for writing and has just closed.
1862 We reset the cache and reinitialize. */
1863 if (finfo->inotify_descr[TRACED_FILE] == inev->i.wd
1864 && inev->i.mask & IN_CLOSE_WRITE)
1865 {
1866 /* Mark cache as needing to be cleared and reinitialize. */
1867 dbg_log (_("monitored file `%s` was written to"), finfo->fname);
1868 to_clear[dbcnt] = true;
1869 if (finfo->call_res_init)
1870 res_init ();
1871 return;
1872 }
1873 /* The parent directory was moved or deleted. We trigger one last
1874 invalidation. At the next pruning or invalidation we may add
1875 this watch back if the file is present again. */
1876 if (finfo->inotify_descr[TRACED_DIR] == inev->i.wd
1877 && ((inev->i.mask & IN_DELETE_SELF)
1878 || (inev->i.mask & IN_MOVE_SELF)
1879 || (inev->i.mask & IN_IGNORED)))
1880 {
1881 bool moved = (inev->i.mask & IN_MOVE_SELF) != 0;
1882 /* The directory watch may have already been removed
1883 but we don't know so we just remove it again and
1884 ignore the error. Then we remove the file watch.
1885 Note: watches are automatically removed for deleted
1886 files. */
1887 if (moved)
1888 inotify_rm_watch (inotify_fd, inev->i.wd);
1889 if (finfo->inotify_descr[TRACED_FILE] != -1)
1890 {
1891 dbg_log (_("monitored parent directory `%s` was %s, removing watch on `%s`"),
1892 finfo->dname, moved ? "moved" : "deleted", finfo->fname);
1893 if (inotify_rm_watch (inotify_fd, finfo->inotify_descr[TRACED_FILE]) < 0)
1894 dbg_log (_("failed to remove file watch `%s`: %s"),
1895 finfo->dname, strerror (errno));
1896 }
1897 finfo->inotify_descr[TRACED_FILE] = -1;
1898 finfo->inotify_descr[TRACED_DIR] = -1;
1899 to_clear[dbcnt] = true;
1900 if (finfo->call_res_init)
1901 res_init ();
1902 /* Continue to the next entry since this might be the
1903 parent directory for multiple registered files and
1904 we want to remove watches for all registered files. */
1905 continue;
1906 }
1907 /* The parent directory had a create or moved to event. */
1908 if (finfo->inotify_descr[TRACED_DIR] == inev->i.wd
1909 && ((inev->i.mask & IN_MOVED_TO)
1910 || (inev->i.mask & IN_CREATE))
1911 && strcmp (inev->i.name, finfo->sfname) == 0)
1912 {
1913 /* We detected a directory change. We look for the creation
1914 of the file we are tracking or the move of the same file
1915 into the directory. */
1916 int ret;
1917 dbg_log (_("monitored file `%s` was %s, adding watch"),
1918 finfo->fname,
1919 inev->i.mask & IN_CREATE ? "created" : "moved into place");
1920 /* File was moved in or created. Regenerate the watch. */
1921 if (finfo->inotify_descr[TRACED_FILE] != -1)
1922 inotify_rm_watch (inotify_fd,
1923 finfo->inotify_descr[TRACED_FILE]);
1924
1925 ret = inotify_add_watch (inotify_fd,
1926 finfo->fname,
1927 TRACED_FILE_MASK);
1928 if (ret < 0)
1929 dbg_log (_("failed to add file watch `%s`: %s"),
1930 finfo->fname, strerror (errno));
1931
1932 finfo->inotify_descr[TRACED_FILE] = ret;
1933
1934 /* The file is new or moved so mark cache as needing to
1935 be cleared and reinitialize. */
1936 to_clear[dbcnt] = true;
1937 if (finfo->call_res_init)
1938 res_init ();
1939
1940 /* Done re-adding the watch. Don't return, we may still
1941 have other files in this same directory, same watch
1942 descriptor, and need to process them. */
1943 }
1944 /* Other events are ignored, and we move on to the next file. */
1945 finfo = finfo->next;
1946 }
1947 }
1948}
1949
1950/* If an entry in the array of booleans TO_CLEAR is TRUE then clear the cache
1951 for the associated database, otherwise do nothing. The TO_CLEAR array must
1952 have LASTDB entries. */
1953static inline void
1954clear_db_cache (bool *to_clear)
1955{
1956 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1957 if (to_clear[dbcnt])
1958 {
1959 pthread_mutex_lock (&dbs[dbcnt].prune_lock);
1960 dbs[dbcnt].clear_cache = 1;
1961 pthread_mutex_unlock (&dbs[dbcnt].prune_lock);
1962 pthread_cond_signal (&dbs[dbcnt].prune_cond);
1963 }
1964}
1965
1966int
1967handle_inotify_events (void)
1968{
1969 bool to_clear[lastdb] = { false, };
1970 union __inev inev;
1971
1972 /* Read all inotify events for files registered via
1973 register_traced_file(). */
1974 while (1)
1975 {
1976 /* Potentially read multiple events into buf. */
1977 ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd,
1978 &inev.buf,
1979 sizeof (inev)));
1980 if (nb < (ssize_t) sizeof (struct inotify_event))
1981 {
1982 /* Not even 1 event. */
1983 if (__glibc_unlikely (nb == -1 && errno != EAGAIN))
1984 return -1;
1985 /* Done reading events that are ready. */
1986 break;
1987 }
1988 /* Process all events. The normal inotify interface delivers
1989 complete events on a read and never a partial event. */
1990 char *eptr = &inev.buf[0];
1991 ssize_t count;
1992 while (1)
1993 {
1994 /* Check which of the files changed. */
1995 inotify_check_files (to_clear, &inev);
1996 count = sizeof (struct inotify_event) + inev.i.len;
1997 eptr += count;
1998 nb -= count;
1999 if (nb >= (ssize_t) sizeof (struct inotify_event))
2000 memcpy (&inev, eptr, nb);
2001 else
2002 break;
2003 }
2004 continue;
2005 }
2006 /* Actually perform the cache clearing. */
2007 clear_db_cache (to_clear);
2008 return 0;
2009}
2010
2011#endif
2012
2013static void
2014__attribute__ ((__noreturn__))
2015main_loop_poll (void)
2016{
2017 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
2018 * sizeof (conns[0]));
2019
2020 conns[0].fd = sock;
2021 conns[0].events = POLLRDNORM;
2022 size_t nused = 1;
2023 size_t firstfree = 1;
2024
2025#ifdef HAVE_INOTIFY
2026 if (inotify_fd != -1)
2027 {
2028 conns[1].fd = inotify_fd;
2029 conns[1].events = POLLRDNORM;
2030 nused = 2;
2031 firstfree = 2;
2032 }
2033#endif
2034
2035#ifdef HAVE_NETLINK
2036 size_t idx_nl_status_fd = 0;
2037 if (nl_status_fd != -1)
2038 {
2039 idx_nl_status_fd = nused;
2040 conns[nused].fd = nl_status_fd;
2041 conns[nused].events = POLLRDNORM;
2042 ++nused;
2043 firstfree = nused;
2044 }
2045#endif
2046
2047 while (1)
2048 {
2049 /* Wait for any event. We wait at most a couple of seconds so
2050 that we can check whether we should close any of the accepted
2051 connections since we have not received a request. */
2052#define MAX_ACCEPT_TIMEOUT 30
2053#define MIN_ACCEPT_TIMEOUT 5
2054#define MAIN_THREAD_TIMEOUT \
2055 (MAX_ACCEPT_TIMEOUT * 1000 \
2056 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
2057
2058 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
2059
2060 time_t now = time (NULL);
2061
2062 /* If there is a descriptor ready for reading or there is a new
2063 connection, process this now. */
2064 if (n > 0)
2065 {
2066 if (conns[0].revents != 0)
2067 {
2068 /* We have a new incoming connection. Accept the connection. */
2069 int fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2070 SOCK_NONBLOCK));
2071
2072 /* Use the descriptor if we have not reached the limit. */
2073 if (fd >= 0)
2074 {
2075 if (firstfree < nconns)
2076 {
2077 conns[firstfree].fd = fd;
2078 conns[firstfree].events = POLLRDNORM;
2079 starttime[firstfree] = now;
2080 if (firstfree >= nused)
2081 nused = firstfree + 1;
2082
2083 do
2084 ++firstfree;
2085 while (firstfree < nused && conns[firstfree].fd != -1);
2086 }
2087 else
2088 /* We cannot use the connection so close it. */
2089 close (fd);
2090 }
2091
2092 --n;
2093 }
2094
2095 size_t first = 1;
2096#ifdef HAVE_INOTIFY
2097 if (inotify_fd != -1 && conns[1].fd == inotify_fd)
2098 {
2099 if (conns[1].revents != 0)
2100 {
2101 int ret;
2102 ret = handle_inotify_events ();
2103 if (ret == -1)
2104 {
2105 /* Something went wrong when reading the inotify
2106 data. Better disable inotify. */
2107 dbg_log (_("disabled inotify-based monitoring after read error %d"), errno);
2108 conns[1].fd = -1;
2109 firstfree = 1;
2110 if (nused == 2)
2111 nused = 1;
2112 close (inotify_fd);
2113 inotify_fd = -1;
2114 }
2115 --n;
2116 }
2117
2118 first = 2;
2119 }
2120#endif
2121
2122#ifdef HAVE_NETLINK
2123 if (idx_nl_status_fd != 0 && conns[idx_nl_status_fd].revents != 0)
2124 {
2125 char buf[4096];
2126 /* Read all the data. We do not interpret it here. */
2127 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2128 sizeof (buf))) != -1)
2129 ;
2130
2131 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
2132 = __bump_nl_timestamp ();
2133 }
2134#endif
2135
2136 for (size_t cnt = first; cnt < nused && n > 0; ++cnt)
2137 if (conns[cnt].revents != 0)
2138 {
2139 fd_ready (conns[cnt].fd);
2140
2141 /* Clean up the CONNS array. */
2142 conns[cnt].fd = -1;
2143 if (cnt < firstfree)
2144 firstfree = cnt;
2145 if (cnt == nused - 1)
2146 do
2147 --nused;
2148 while (conns[nused - 1].fd == -1);
2149
2150 --n;
2151 }
2152 }
2153
2154 /* Now find entries which have timed out. */
2155 assert (nused > 0);
2156
2157 /* We make the timeout length depend on the number of file
2158 descriptors currently used. */
2159#define ACCEPT_TIMEOUT \
2160 (MAX_ACCEPT_TIMEOUT \
2161 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
2162 time_t laststart = now - ACCEPT_TIMEOUT;
2163
2164 for (size_t cnt = nused - 1; cnt > 0; --cnt)
2165 {
2166 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
2167 {
2168 /* Remove the entry, it timed out. */
2169 (void) close (conns[cnt].fd);
2170 conns[cnt].fd = -1;
2171
2172 if (cnt < firstfree)
2173 firstfree = cnt;
2174 if (cnt == nused - 1)
2175 do
2176 --nused;
2177 while (conns[nused - 1].fd == -1);
2178 }
2179 }
2180
2181 if (restart_p (now))
2182 restart ();
2183 }
2184}
2185
2186
2187#ifdef HAVE_EPOLL
2188static void
2189main_loop_epoll (int efd)
2190{
2191 struct epoll_event ev = { 0, };
2192 int nused = 1;
2193 size_t highest = 0;
2194
2195 /* Add the socket. */
2196 ev.events = EPOLLRDNORM;
2197 ev.data.fd = sock;
2198 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
2199 /* We cannot use epoll. */
2200 return;
2201
2202# ifdef HAVE_INOTIFY
2203 if (inotify_fd != -1)
2204 {
2205 ev.events = EPOLLRDNORM;
2206 ev.data.fd = inotify_fd;
2207 if (epoll_ctl (efd, EPOLL_CTL_ADD, inotify_fd, &ev) == -1)
2208 /* We cannot use epoll. */
2209 return;
2210 nused = 2;
2211 }
2212# endif
2213
2214# ifdef HAVE_NETLINK
2215 if (nl_status_fd != -1)
2216 {
2217 ev.events = EPOLLRDNORM;
2218 ev.data.fd = nl_status_fd;
2219 if (epoll_ctl (efd, EPOLL_CTL_ADD, nl_status_fd, &ev) == -1)
2220 /* We cannot use epoll. */
2221 return;
2222 }
2223# endif
2224
2225 while (1)
2226 {
2227 struct epoll_event revs[100];
2228# define nrevs (sizeof (revs) / sizeof (revs[0]))
2229
2230 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
2231
2232 time_t now = time (NULL);
2233
2234 for (int cnt = 0; cnt < n; ++cnt)
2235 if (revs[cnt].data.fd == sock)
2236 {
2237 /* A new connection. */
2238 int fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2239 SOCK_NONBLOCK));
2240
2241 /* Use the descriptor if we have not reached the limit. */
2242 if (fd >= 0)
2243 {
2244 /* Try to add the new descriptor. */
2245 ev.data.fd = fd;
2246 if (fd >= nconns
2247 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
2248 /* The descriptor is too large or something went
2249 wrong. Close the descriptor. */
2250 close (fd);
2251 else
2252 {
2253 /* Remember when we accepted the connection. */
2254 starttime[fd] = now;
2255
2256 if (fd > highest)
2257 highest = fd;
2258
2259 ++nused;
2260 }
2261 }
2262 }
2263# ifdef HAVE_INOTIFY
2264 else if (revs[cnt].data.fd == inotify_fd)
2265 {
2266 int ret;
2267 ret = handle_inotify_events ();
2268 if (ret == -1)
2269 {
2270 /* Something went wrong when reading the inotify
2271 data. Better disable inotify. */
2272 dbg_log (_("disabled inotify-based monitoring after read error %d"), errno);
2273 (void) epoll_ctl (efd, EPOLL_CTL_DEL, inotify_fd, NULL);
2274 close (inotify_fd);
2275 inotify_fd = -1;
2276 break;
2277 }
2278 }
2279# endif
2280# ifdef HAVE_NETLINK
2281 else if (revs[cnt].data.fd == nl_status_fd)
2282 {
2283 char buf[4096];
2284 /* Read all the data. We do not interpret it here. */
2285 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2286 sizeof (buf))) != -1)
2287 ;
2288
2289 __bump_nl_timestamp ();
2290 }
2291# endif
2292 else
2293 {
2294 /* Remove the descriptor from the epoll descriptor. */
2295 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
2296
2297 /* Get a worker to handle the request. */
2298 fd_ready (revs[cnt].data.fd);
2299
2300 /* Reset the time. */
2301 starttime[revs[cnt].data.fd] = 0;
2302 if (revs[cnt].data.fd == highest)
2303 do
2304 --highest;
2305 while (highest > 0 && starttime[highest] == 0);
2306
2307 --nused;
2308 }
2309
2310 /* Now look for descriptors for accepted connections which have
2311 no reply in too long of a time. */
2312 time_t laststart = now - ACCEPT_TIMEOUT;
2313 assert (starttime[sock] == 0);
2314# ifdef HAVE_INOTIFY
2315 assert (inotify_fd == -1 || starttime[inotify_fd] == 0);
2316# endif
2317 assert (nl_status_fd == -1 || starttime[nl_status_fd] == 0);
2318 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
2319 if (starttime[cnt] != 0 && starttime[cnt] < laststart)
2320 {
2321 /* We are waiting for this one for too long. Close it. */
2322 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
2323
2324 (void) close (cnt);
2325
2326 starttime[cnt] = 0;
2327 if (cnt == highest)
2328 --highest;
2329 }
2330 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
2331 --highest;
2332
2333 if (restart_p (now))
2334 restart ();
2335 }
2336}
2337#endif
2338
2339
2340/* Start all the threads we want. The initial process is thread no. 1. */
2341void
2342start_threads (void)
2343{
2344 /* Initialize the conditional variable we will use. The only
2345 non-standard attribute we might use is the clock selection. */
2346 pthread_condattr_t condattr;
2347 pthread_condattr_init (&condattr);
2348
2349#if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2350 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2351 /* Determine whether the monotonous clock is available. */
2352 struct timespec dummy;
2353# if _POSIX_MONOTONIC_CLOCK == 0
2354 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
2355# endif
2356# if _POSIX_CLOCK_SELECTION == 0
2357 if (sysconf (_SC_CLOCK_SELECTION) > 0)
2358# endif
2359 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
2360 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
2361 timeout_clock = CLOCK_MONOTONIC;
2362#endif
2363
2364 /* Create the attribute for the threads. They are all created
2365 detached. */
2366 pthread_attr_init (&attr);
2367 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
2368 /* Use 1MB stacks, twice as much for 64-bit architectures. */
2369 pthread_attr_setstacksize (&attr, NSCD_THREAD_STACKSIZE);
2370
2371 /* We allow less than LASTDB threads only for debugging. */
2372 if (debug_level == 0)
2373 nthreads = MAX (nthreads, lastdb);
2374
2375 /* Create the threads which prune the databases. */
2376 // XXX Ideally this work would be done by some of the worker threads.
2377 // XXX But this is problematic since we would need to be able to wake
2378 // XXX them up explicitly as well as part of the group handling the
2379 // XXX ready-list. This requires an operation where we can wait on
2380 // XXX two conditional variables at the same time. This operation
2381 // XXX does not exist (yet).
2382 for (long int i = 0; i < lastdb; ++i)
2383 {
2384 /* Initialize the conditional variable. */
2385 if (pthread_cond_init (&dbs[i].prune_cond, &condattr) != 0)
2386 {
2387 dbg_log (_("could not initialize conditional variable"));
2388 do_exit (1, 0, NULL);
2389 }
2390
2391 pthread_t th;
2392 if (dbs[i].enabled
2393 && pthread_create (&th, &attr, nscd_run_prune, (void *) i) != 0)
2394 {
2395 dbg_log (_("could not start clean-up thread; terminating"));
2396 do_exit (1, 0, NULL);
2397 }
2398 }
2399
2400 pthread_condattr_destroy (&condattr);
2401
2402 for (long int i = 0; i < nthreads; ++i)
2403 {
2404 pthread_t th;
2405 if (pthread_create (&th, &attr, nscd_run_worker, NULL) != 0)
2406 {
2407 if (i == 0)
2408 {
2409 dbg_log (_("could not start any worker thread; terminating"));
2410 do_exit (1, 0, NULL);
2411 }
2412
2413 break;
2414 }
2415 }
2416
2417 /* Now it is safe to let the parent know that we're doing fine and it can
2418 exit. */
2419 notify_parent (0);
2420
2421 /* Determine how much room for descriptors we should initially
2422 allocate. This might need to change later if we cap the number
2423 with MAXCONN. */
2424 const long int nfds = sysconf (_SC_OPEN_MAX);
2425#define MINCONN 32
2426#define MAXCONN 16384
2427 if (nfds == -1 || nfds > MAXCONN)
2428 nconns = MAXCONN;
2429 else if (nfds < MINCONN)
2430 nconns = MINCONN;
2431 else
2432 nconns = nfds;
2433
2434 /* We need memory to pass descriptors on to the worker threads. */
2435 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
2436 /* Array to keep track when connection was accepted. */
2437 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
2438
2439 /* In the main thread we execute the loop which handles incoming
2440 connections. */
2441#ifdef HAVE_EPOLL
2442 int efd = epoll_create (100);
2443 if (efd != -1)
2444 {
2445 main_loop_epoll (efd);
2446 close (efd);
2447 }
2448#endif
2449
2450 main_loop_poll ();
2451}
2452
2453
2454/* Look up the uid, gid, and supplementary groups to run nscd as. When
2455 this function is called, we are not listening on the nscd socket yet so
2456 we can just use the ordinary lookup functions without causing a lockup */
2457static void
2458begin_drop_privileges (void)
2459{
2460 struct passwd *pwd = getpwnam (server_user);
2461
2462 if (pwd == NULL)
2463 {
2464 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2465 do_exit (EXIT_FAILURE, 0,
2466 _("Failed to run nscd as user '%s'"), server_user);
2467 }
2468
2469 server_uid = pwd->pw_uid;
2470 server_gid = pwd->pw_gid;
2471
2472 /* Save the old UID/GID if we have to change back. */
2473 if (paranoia)
2474 {
2475 old_uid = getuid ();
2476 old_gid = getgid ();
2477 }
2478
2479 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
2480 {
2481 /* This really must never happen. */
2482 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2483 do_exit (EXIT_FAILURE, errno,
2484 _("initial getgrouplist failed"));
2485 }
2486
2487 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
2488
2489 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
2490 == -1)
2491 {
2492 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2493 do_exit (EXIT_FAILURE, errno, _("getgrouplist failed"));
2494 }
2495}
2496
2497
2498/* Call setgroups(), setgid(), and setuid() to drop root privileges and
2499 run nscd as the user specified in the configuration file. */
2500static void
2501finish_drop_privileges (void)
2502{
2503#if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2504 /* We need to preserve the capabilities to connect to the audit daemon. */
2505 cap_t new_caps = preserve_capabilities ();
2506#endif
2507
2508 if (setgroups (server_ngroups, server_groups) == -1)
2509 {
2510 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2511 do_exit (EXIT_FAILURE, errno, _("setgroups failed"));
2512 }
2513
2514 int res;
2515 if (paranoia)
2516 res = setresgid (server_gid, server_gid, old_gid);
2517 else
2518 res = setgid (server_gid);
2519 if (res == -1)
2520 {
2521 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2522 do_exit (4, errno, "setgid");
2523 }
2524
2525 if (paranoia)
2526 res = setresuid (server_uid, server_uid, old_uid);
2527 else
2528 res = setuid (server_uid);
2529 if (res == -1)
2530 {
2531 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2532 do_exit (4, errno, "setuid");
2533 }
2534
2535#if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2536 /* Remove the temporary capabilities. */
2537 install_real_capabilities (new_caps);
2538#endif
2539}
2540