1/* Inner loops of cache daemon.
2 Copyright (C) 1998-2020 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <https://www.gnu.org/licenses/>. */
18
19#include <alloca.h>
20#include <assert.h>
21#include <atomic.h>
22#include <error.h>
23#include <errno.h>
24#include <fcntl.h>
25#include <grp.h>
26#include <ifaddrs.h>
27#include <libintl.h>
28#include <pthread.h>
29#include <pwd.h>
30#include <resolv.h>
31#include <stdio.h>
32#include <stdlib.h>
33#include <unistd.h>
34#include <stdint.h>
35#include <arpa/inet.h>
36#ifdef HAVE_NETLINK
37# include <linux/netlink.h>
38# include <linux/rtnetlink.h>
39#endif
40#ifdef HAVE_EPOLL
41# include <sys/epoll.h>
42#endif
43#ifdef HAVE_INOTIFY
44# include <sys/inotify.h>
45#endif
46#include <sys/mman.h>
47#include <sys/param.h>
48#include <sys/poll.h>
49#include <sys/socket.h>
50#include <sys/stat.h>
51#include <sys/un.h>
52
53#include "nscd.h"
54#include "dbg_log.h"
55#include "selinux.h"
56#include <resolv/resolv.h>
57
58#include <kernel-features.h>
59#include <libc-diag.h>
60
61
62/* Support to run nscd as an unprivileged user */
63const char *server_user;
64static uid_t server_uid;
65static gid_t server_gid;
66const char *stat_user;
67uid_t stat_uid;
68static gid_t *server_groups;
69#ifndef NGROUPS
70# define NGROUPS 32
71#endif
72static int server_ngroups;
73
74static pthread_attr_t attr;
75
76static void begin_drop_privileges (void);
77static void finish_drop_privileges (void);
78
79/* Map request type to a string. */
80const char *const serv2str[LASTREQ] =
81{
82 [GETPWBYNAME] = "GETPWBYNAME",
83 [GETPWBYUID] = "GETPWBYUID",
84 [GETGRBYNAME] = "GETGRBYNAME",
85 [GETGRBYGID] = "GETGRBYGID",
86 [GETHOSTBYNAME] = "GETHOSTBYNAME",
87 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
88 [GETHOSTBYADDR] = "GETHOSTBYADDR",
89 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
90 [SHUTDOWN] = "SHUTDOWN",
91 [GETSTAT] = "GETSTAT",
92 [INVALIDATE] = "INVALIDATE",
93 [GETFDPW] = "GETFDPW",
94 [GETFDGR] = "GETFDGR",
95 [GETFDHST] = "GETFDHST",
96 [GETAI] = "GETAI",
97 [INITGROUPS] = "INITGROUPS",
98 [GETSERVBYNAME] = "GETSERVBYNAME",
99 [GETSERVBYPORT] = "GETSERVBYPORT",
100 [GETFDSERV] = "GETFDSERV",
101 [GETNETGRENT] = "GETNETGRENT",
102 [INNETGR] = "INNETGR",
103 [GETFDNETGR] = "GETFDNETGR"
104};
105
106#ifdef PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
107# define RWLOCK_INITIALIZER PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
108#else
109# define RWLOCK_INITIALIZER PTHREAD_RWLOCK_INITIALIZER
110#endif
111
112/* The control data structures for the services. */
113struct database_dyn dbs[lastdb] =
114{
115 [pwddb] = {
116 .lock = RWLOCK_INITIALIZER,
117 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
118 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
119 .enabled = 0,
120 .check_file = 1,
121 .persistent = 0,
122 .propagate = 1,
123 .shared = 0,
124 .max_db_size = DEFAULT_MAX_DB_SIZE,
125 .suggested_module = DEFAULT_SUGGESTED_MODULE,
126 .db_filename = _PATH_NSCD_PASSWD_DB,
127 .disabled_iov = &pwd_iov_disabled,
128 .postimeout = 3600,
129 .negtimeout = 20,
130 .wr_fd = -1,
131 .ro_fd = -1,
132 .mmap_used = false
133 },
134 [grpdb] = {
135 .lock = RWLOCK_INITIALIZER,
136 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
137 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
138 .enabled = 0,
139 .check_file = 1,
140 .persistent = 0,
141 .propagate = 1,
142 .shared = 0,
143 .max_db_size = DEFAULT_MAX_DB_SIZE,
144 .suggested_module = DEFAULT_SUGGESTED_MODULE,
145 .db_filename = _PATH_NSCD_GROUP_DB,
146 .disabled_iov = &grp_iov_disabled,
147 .postimeout = 3600,
148 .negtimeout = 60,
149 .wr_fd = -1,
150 .ro_fd = -1,
151 .mmap_used = false
152 },
153 [hstdb] = {
154 .lock = RWLOCK_INITIALIZER,
155 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
156 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
157 .enabled = 0,
158 .check_file = 1,
159 .persistent = 0,
160 .propagate = 0, /* Not used. */
161 .shared = 0,
162 .max_db_size = DEFAULT_MAX_DB_SIZE,
163 .suggested_module = DEFAULT_SUGGESTED_MODULE,
164 .db_filename = _PATH_NSCD_HOSTS_DB,
165 .disabled_iov = &hst_iov_disabled,
166 .postimeout = 3600,
167 .negtimeout = 20,
168 .wr_fd = -1,
169 .ro_fd = -1,
170 .mmap_used = false
171 },
172 [servdb] = {
173 .lock = RWLOCK_INITIALIZER,
174 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
175 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
176 .enabled = 0,
177 .check_file = 1,
178 .persistent = 0,
179 .propagate = 0, /* Not used. */
180 .shared = 0,
181 .max_db_size = DEFAULT_MAX_DB_SIZE,
182 .suggested_module = DEFAULT_SUGGESTED_MODULE,
183 .db_filename = _PATH_NSCD_SERVICES_DB,
184 .disabled_iov = &serv_iov_disabled,
185 .postimeout = 28800,
186 .negtimeout = 20,
187 .wr_fd = -1,
188 .ro_fd = -1,
189 .mmap_used = false
190 },
191 [netgrdb] = {
192 .lock = RWLOCK_INITIALIZER,
193 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
194 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
195 .enabled = 0,
196 .check_file = 1,
197 .persistent = 0,
198 .propagate = 0, /* Not used. */
199 .shared = 0,
200 .max_db_size = DEFAULT_MAX_DB_SIZE,
201 .suggested_module = DEFAULT_SUGGESTED_MODULE,
202 .db_filename = _PATH_NSCD_NETGROUP_DB,
203 .disabled_iov = &netgroup_iov_disabled,
204 .postimeout = 28800,
205 .negtimeout = 20,
206 .wr_fd = -1,
207 .ro_fd = -1,
208 .mmap_used = false
209 }
210};
211
212
213/* Mapping of request type to database. */
214static struct
215{
216 bool data_request;
217 struct database_dyn *db;
218} const reqinfo[LASTREQ] =
219{
220 [GETPWBYNAME] = { true, &dbs[pwddb] },
221 [GETPWBYUID] = { true, &dbs[pwddb] },
222 [GETGRBYNAME] = { true, &dbs[grpdb] },
223 [GETGRBYGID] = { true, &dbs[grpdb] },
224 [GETHOSTBYNAME] = { true, &dbs[hstdb] },
225 [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
226 [GETHOSTBYADDR] = { true, &dbs[hstdb] },
227 [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
228 [SHUTDOWN] = { false, NULL },
229 [GETSTAT] = { false, NULL },
230 [GETFDPW] = { false, &dbs[pwddb] },
231 [GETFDGR] = { false, &dbs[grpdb] },
232 [GETFDHST] = { false, &dbs[hstdb] },
233 [GETAI] = { true, &dbs[hstdb] },
234 [INITGROUPS] = { true, &dbs[grpdb] },
235 [GETSERVBYNAME] = { true, &dbs[servdb] },
236 [GETSERVBYPORT] = { true, &dbs[servdb] },
237 [GETFDSERV] = { false, &dbs[servdb] },
238 [GETNETGRENT] = { true, &dbs[netgrdb] },
239 [INNETGR] = { true, &dbs[netgrdb] },
240 [GETFDNETGR] = { false, &dbs[netgrdb] }
241};
242
243
244/* Initial number of threads to use. */
245int nthreads = -1;
246/* Maximum number of threads to use. */
247int max_nthreads = 32;
248
249/* Socket for incoming connections. */
250static int sock;
251
252#ifdef HAVE_INOTIFY
253/* Inotify descriptor. */
254int inotify_fd = -1;
255#endif
256
257#ifdef HAVE_NETLINK
258/* Descriptor for netlink status updates. */
259static int nl_status_fd = -1;
260#endif
261
262/* Number of times clients had to wait. */
263unsigned long int client_queued;
264
265
266ssize_t
267writeall (int fd, const void *buf, size_t len)
268{
269 size_t n = len;
270 ssize_t ret;
271 do
272 {
273 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
274 if (ret <= 0)
275 break;
276 buf = (const char *) buf + ret;
277 n -= ret;
278 }
279 while (n > 0);
280 return ret < 0 ? ret : len - n;
281}
282
283
284enum usekey
285 {
286 use_not = 0,
287 /* The following three are not really used, they are symbolic constants. */
288 use_first = 16,
289 use_begin = 32,
290 use_end = 64,
291
292 use_he = 1,
293 use_he_begin = use_he | use_begin,
294 use_he_end = use_he | use_end,
295 use_data = 3,
296 use_data_begin = use_data | use_begin,
297 use_data_end = use_data | use_end,
298 use_data_first = use_data_begin | use_first
299 };
300
301
302static int
303check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
304 enum usekey use, ref_t start, size_t len)
305{
306 if (len < 2)
307 return 0;
308
309 if (start > first_free || start + len > first_free
310 || (start & BLOCK_ALIGN_M1))
311 return 0;
312
313 if (usemap[start] == use_not)
314 {
315 /* Add the start marker. */
316 usemap[start] = use | use_begin;
317 use &= ~use_first;
318
319 while (--len > 0)
320 if (usemap[++start] != use_not)
321 return 0;
322 else
323 usemap[start] = use;
324
325 /* Add the end marker. */
326 usemap[start] = use | use_end;
327 }
328 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
329 {
330 /* Hash entries can't be shared. */
331 if (use == use_he)
332 return 0;
333
334 usemap[start] |= (use & use_first);
335 use &= ~use_first;
336
337 while (--len > 1)
338 if (usemap[++start] != use)
339 return 0;
340
341 if (usemap[++start] != (use | use_end))
342 return 0;
343 }
344 else
345 /* Points to a wrong object or somewhere in the middle. */
346 return 0;
347
348 return 1;
349}
350
351
352/* Verify data in persistent database. */
353static int
354verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
355{
356 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb
357 || dbnr == netgrdb);
358
359 time_t now = time (NULL);
360
361 struct database_pers_head *head = mem;
362 struct database_pers_head head_copy = *head;
363
364 /* Check that the header that was read matches the head in the database. */
365 if (memcmp (head, readhead, sizeof (*head)) != 0)
366 return 0;
367
368 /* First some easy tests: make sure the database header is sane. */
369 if (head->version != DB_VERSION
370 || head->header_size != sizeof (*head)
371 /* We allow a timestamp to be one hour ahead of the current time.
372 This should cover daylight saving time changes. */
373 || head->timestamp > now + 60 * 60 + 60
374 || (head->gc_cycle & 1)
375 || head->module == 0
376 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
377 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
378 || head->first_free < 0
379 || head->first_free > head->data_size
380 || (head->first_free & BLOCK_ALIGN_M1) != 0
381 || head->maxnentries < 0
382 || head->maxnsearched < 0)
383 return 0;
384
385 uint8_t *usemap = calloc (head->first_free, 1);
386 if (usemap == NULL)
387 return 0;
388
389 const char *data = (char *) &head->array[roundup (head->module,
390 ALIGN / sizeof (ref_t))];
391
392 nscd_ssize_t he_cnt = 0;
393 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
394 {
395 ref_t trail = head->array[cnt];
396 ref_t work = trail;
397 int tick = 0;
398
399 while (work != ENDREF)
400 {
401 if (! check_use (data, head->first_free, usemap, use_he, work,
402 sizeof (struct hashentry)))
403 goto fail;
404
405 /* Now we know we can dereference the record. */
406 struct hashentry *here = (struct hashentry *) (data + work);
407
408 ++he_cnt;
409
410 /* Make sure the record is for this type of service. */
411 if (here->type >= LASTREQ
412 || reqinfo[here->type].db != &dbs[dbnr])
413 goto fail;
414
415 /* Validate boolean field value. */
416 if (here->first != false && here->first != true)
417 goto fail;
418
419 if (here->len < 0)
420 goto fail;
421
422 /* Now the data. */
423 if (here->packet < 0
424 || here->packet > head->first_free
425 || here->packet + sizeof (struct datahead) > head->first_free)
426 goto fail;
427
428 struct datahead *dh = (struct datahead *) (data + here->packet);
429
430 if (! check_use (data, head->first_free, usemap,
431 use_data | (here->first ? use_first : 0),
432 here->packet, dh->allocsize))
433 goto fail;
434
435 if (dh->allocsize < sizeof (struct datahead)
436 || dh->recsize > dh->allocsize
437 || (dh->notfound != false && dh->notfound != true)
438 || (dh->usable != false && dh->usable != true))
439 goto fail;
440
441 if (here->key < here->packet + sizeof (struct datahead)
442 || here->key > here->packet + dh->allocsize
443 || here->key + here->len > here->packet + dh->allocsize)
444 goto fail;
445
446 work = here->next;
447
448 if (work == trail)
449 /* A circular list, this must not happen. */
450 goto fail;
451 if (tick)
452 trail = ((struct hashentry *) (data + trail))->next;
453 tick = 1 - tick;
454 }
455 }
456
457 if (he_cnt != head->nentries)
458 goto fail;
459
460 /* See if all data and keys had at least one reference from
461 he->first == true hashentry. */
462 for (ref_t idx = 0; idx < head->first_free; ++idx)
463 {
464 if (usemap[idx] == use_data_begin)
465 goto fail;
466 }
467
468 /* Finally, make sure the database hasn't changed since the first test. */
469 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
470 goto fail;
471
472 free (usemap);
473 return 1;
474
475fail:
476 free (usemap);
477 return 0;
478}
479
480
481/* Initialize database information structures. */
482void
483nscd_init (void)
484{
485 /* Look up unprivileged uid/gid/groups before we start listening on the
486 socket */
487 if (server_user != NULL)
488 begin_drop_privileges ();
489
490 if (nthreads == -1)
491 /* No configuration for this value, assume a default. */
492 nthreads = 4;
493
494 for (size_t cnt = 0; cnt < lastdb; ++cnt)
495 if (dbs[cnt].enabled)
496 {
497 pthread_rwlock_init (&dbs[cnt].lock, NULL);
498 pthread_mutex_init (&dbs[cnt].memlock, NULL);
499
500 if (dbs[cnt].persistent)
501 {
502 /* Try to open the appropriate file on disk. */
503 int fd = open (dbs[cnt].db_filename, O_RDWR | O_CLOEXEC);
504 if (fd != -1)
505 {
506 char *msg = NULL;
507 struct stat64 st;
508 void *mem;
509 size_t total;
510 struct database_pers_head head;
511 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
512 sizeof (head)));
513 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
514 {
515 fail_db_errno:
516 /* The code is single-threaded at this point so
517 using strerror is just fine. */
518 msg = strerror (errno);
519 fail_db:
520 dbg_log (_("invalid persistent database file \"%s\": %s"),
521 dbs[cnt].db_filename, msg);
522 unlink (dbs[cnt].db_filename);
523 }
524 else if (head.module == 0 && head.data_size == 0)
525 {
526 /* The file has been created, but the head has not
527 been initialized yet. */
528 msg = _("uninitialized header");
529 goto fail_db;
530 }
531 else if (head.header_size != (int) sizeof (head))
532 {
533 msg = _("header size does not match");
534 goto fail_db;
535 }
536 else if ((total = (sizeof (head)
537 + roundup (head.module * sizeof (ref_t),
538 ALIGN)
539 + head.data_size))
540 > st.st_size
541 || total < sizeof (head))
542 {
543 msg = _("file size does not match");
544 goto fail_db;
545 }
546 /* Note we map with the maximum size allowed for the
547 database. This is likely much larger than the
548 actual file size. This is OK on most OSes since
549 extensions of the underlying file will
550 automatically translate more pages available for
551 memory access. */
552 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
553 PROT_READ | PROT_WRITE,
554 MAP_SHARED, fd, 0))
555 == MAP_FAILED)
556 goto fail_db_errno;
557 else if (!verify_persistent_db (mem, &head, cnt))
558 {
559 munmap (mem, total);
560 msg = _("verification failed");
561 goto fail_db;
562 }
563 else
564 {
565 /* Success. We have the database. */
566 dbs[cnt].head = mem;
567 dbs[cnt].memsize = total;
568 dbs[cnt].data = (char *)
569 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
570 ALIGN / sizeof (ref_t))];
571 dbs[cnt].mmap_used = true;
572
573 if (dbs[cnt].suggested_module > head.module)
574 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
575 dbnames[cnt]);
576
577 dbs[cnt].wr_fd = fd;
578 fd = -1;
579 /* We also need a read-only descriptor. */
580 if (dbs[cnt].shared)
581 {
582 dbs[cnt].ro_fd = open (dbs[cnt].db_filename,
583 O_RDONLY | O_CLOEXEC);
584 if (dbs[cnt].ro_fd == -1)
585 dbg_log (_("\
586cannot create read-only descriptor for \"%s\"; no mmap"),
587 dbs[cnt].db_filename);
588 }
589
590 // XXX Shall we test whether the descriptors actually
591 // XXX point to the same file?
592 }
593
594 /* Close the file descriptors in case something went
595 wrong in which case the variable have not been
596 assigned -1. */
597 if (fd != -1)
598 close (fd);
599 }
600 else if (errno == EACCES)
601 do_exit (EXIT_FAILURE, 0, _("cannot access '%s'"),
602 dbs[cnt].db_filename);
603 }
604
605 if (dbs[cnt].head == NULL)
606 {
607 /* No database loaded. Allocate the data structure,
608 possibly on disk. */
609 struct database_pers_head head;
610 size_t total = (sizeof (head)
611 + roundup (dbs[cnt].suggested_module
612 * sizeof (ref_t), ALIGN)
613 + (dbs[cnt].suggested_module
614 * DEFAULT_DATASIZE_PER_BUCKET));
615
616 /* Try to create the database. If we do not need a
617 persistent database create a temporary file. */
618 int fd;
619 int ro_fd = -1;
620 if (dbs[cnt].persistent)
621 {
622 fd = open (dbs[cnt].db_filename,
623 O_RDWR | O_CREAT | O_EXCL | O_TRUNC | O_CLOEXEC,
624 S_IRUSR | S_IWUSR);
625 if (fd != -1 && dbs[cnt].shared)
626 ro_fd = open (dbs[cnt].db_filename,
627 O_RDONLY | O_CLOEXEC);
628 }
629 else
630 {
631 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
632 fd = mkostemp (fname, O_CLOEXEC);
633
634 /* We do not need the file name anymore after we
635 opened another file descriptor in read-only mode. */
636 if (fd != -1)
637 {
638 if (dbs[cnt].shared)
639 ro_fd = open (fname, O_RDONLY | O_CLOEXEC);
640
641 unlink (fname);
642 }
643 }
644
645 if (fd == -1)
646 {
647 if (errno == EEXIST)
648 {
649 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
650 dbnames[cnt], dbs[cnt].db_filename);
651 do_exit (1, 0, NULL);
652 }
653
654 if (dbs[cnt].persistent)
655 dbg_log (_("cannot create %s; no persistent database used"),
656 dbs[cnt].db_filename);
657 else
658 dbg_log (_("cannot create %s; no sharing possible"),
659 dbs[cnt].db_filename);
660
661 dbs[cnt].persistent = 0;
662 // XXX remember: no mmap
663 }
664 else
665 {
666 /* Tell the user if we could not create the read-only
667 descriptor. */
668 if (ro_fd == -1 && dbs[cnt].shared)
669 dbg_log (_("\
670cannot create read-only descriptor for \"%s\"; no mmap"),
671 dbs[cnt].db_filename);
672
673 /* Before we create the header, initialize the hash
674 table. That way if we get interrupted while writing
675 the header we can recognize a partially initialized
676 database. */
677 size_t ps = sysconf (_SC_PAGESIZE);
678 char tmpbuf[ps];
679 assert (~ENDREF == 0);
680 memset (tmpbuf, '\xff', ps);
681
682 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
683 off_t offset = sizeof (head);
684
685 size_t towrite;
686 if (offset % ps != 0)
687 {
688 towrite = MIN (remaining, ps - (offset % ps));
689 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
690 goto write_fail;
691 offset += towrite;
692 remaining -= towrite;
693 }
694
695 while (remaining > ps)
696 {
697 if (pwrite (fd, tmpbuf, ps, offset) == -1)
698 goto write_fail;
699 offset += ps;
700 remaining -= ps;
701 }
702
703 if (remaining > 0
704 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
705 goto write_fail;
706
707 /* Create the header of the file. */
708 struct database_pers_head head =
709 {
710 .version = DB_VERSION,
711 .header_size = sizeof (head),
712 .module = dbs[cnt].suggested_module,
713 .data_size = (dbs[cnt].suggested_module
714 * DEFAULT_DATASIZE_PER_BUCKET),
715 .first_free = 0
716 };
717 void *mem;
718
719 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
720 != sizeof (head))
721 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
722 != 0)
723 || (mem = mmap (NULL, dbs[cnt].max_db_size,
724 PROT_READ | PROT_WRITE,
725 MAP_SHARED, fd, 0)) == MAP_FAILED)
726 {
727 write_fail:
728 unlink (dbs[cnt].db_filename);
729 dbg_log (_("cannot write to database file %s: %s"),
730 dbs[cnt].db_filename, strerror (errno));
731 dbs[cnt].persistent = 0;
732 }
733 else
734 {
735 /* Success. */
736 dbs[cnt].head = mem;
737 dbs[cnt].data = (char *)
738 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
739 ALIGN / sizeof (ref_t))];
740 dbs[cnt].memsize = total;
741 dbs[cnt].mmap_used = true;
742
743 /* Remember the descriptors. */
744 dbs[cnt].wr_fd = fd;
745 dbs[cnt].ro_fd = ro_fd;
746 fd = -1;
747 ro_fd = -1;
748 }
749
750 if (fd != -1)
751 close (fd);
752 if (ro_fd != -1)
753 close (ro_fd);
754 }
755 }
756
757 if (dbs[cnt].head == NULL)
758 {
759 /* We do not use the persistent database. Just
760 create an in-memory data structure. */
761 assert (! dbs[cnt].persistent);
762
763 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
764 + (dbs[cnt].suggested_module
765 * sizeof (ref_t)));
766 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
767 assert (~ENDREF == 0);
768 memset (dbs[cnt].head->array, '\xff',
769 dbs[cnt].suggested_module * sizeof (ref_t));
770 dbs[cnt].head->module = dbs[cnt].suggested_module;
771 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
772 * dbs[cnt].head->module);
773 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
774 dbs[cnt].head->first_free = 0;
775
776 dbs[cnt].shared = 0;
777 assert (dbs[cnt].ro_fd == -1);
778 }
779 }
780
781 /* Create the socket. */
782 sock = socket (AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
783 if (sock < 0)
784 {
785 dbg_log (_("cannot open socket: %s"), strerror (errno));
786 do_exit (errno == EACCES ? 4 : 1, 0, NULL);
787 }
788 /* Bind a name to the socket. */
789 struct sockaddr_un sock_addr;
790 sock_addr.sun_family = AF_UNIX;
791 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
792 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
793 {
794 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
795 do_exit (errno == EACCES ? 4 : 1, 0, NULL);
796 }
797
798 /* Set permissions for the socket. */
799 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
800
801 /* Set the socket up to accept connections. */
802 if (listen (sock, SOMAXCONN) < 0)
803 {
804 dbg_log (_("cannot enable socket to accept connections: %s"),
805 strerror (errno));
806 do_exit (1, 0, NULL);
807 }
808
809#ifdef HAVE_NETLINK
810 if (dbs[hstdb].enabled)
811 {
812 /* Try to open netlink socket to monitor network setting changes. */
813 nl_status_fd = socket (AF_NETLINK,
814 SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK,
815 NETLINK_ROUTE);
816 if (nl_status_fd != -1)
817 {
818 struct sockaddr_nl snl;
819 memset (&snl, '\0', sizeof (snl));
820 snl.nl_family = AF_NETLINK;
821 /* XXX Is this the best set to use? */
822 snl.nl_groups = (RTMGRP_IPV4_IFADDR | RTMGRP_TC | RTMGRP_IPV4_MROUTE
823 | RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_RULE
824 | RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_MROUTE
825 | RTMGRP_IPV6_ROUTE | RTMGRP_IPV6_IFINFO
826 | RTMGRP_IPV6_PREFIX);
827
828 if (bind (nl_status_fd, (struct sockaddr *) &snl, sizeof (snl)) != 0)
829 {
830 close (nl_status_fd);
831 nl_status_fd = -1;
832 }
833 else
834 {
835 /* Start the timestamp process. */
836 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
837 = __bump_nl_timestamp ();
838 }
839 }
840 }
841#endif
842
843 /* Change to unprivileged uid/gid/groups if specified in config file */
844 if (server_user != NULL)
845 finish_drop_privileges ();
846}
847
848#ifdef HAVE_INOTIFY
849#define TRACED_FILE_MASK (IN_DELETE_SELF | IN_CLOSE_WRITE | IN_MOVE_SELF)
850#define TRACED_DIR_MASK (IN_DELETE_SELF | IN_CREATE | IN_MOVED_TO | IN_MOVE_SELF)
851void
852install_watches (struct traced_file *finfo)
853{
854 /* Use inotify support if we have it. */
855 if (finfo->inotify_descr[TRACED_FILE] < 0)
856 finfo->inotify_descr[TRACED_FILE] = inotify_add_watch (inotify_fd,
857 finfo->fname,
858 TRACED_FILE_MASK);
859 if (finfo->inotify_descr[TRACED_FILE] < 0)
860 {
861 dbg_log (_("disabled inotify-based monitoring for file `%s': %s"),
862 finfo->fname, strerror (errno));
863 return;
864 }
865 dbg_log (_("monitoring file `%s` (%d)"),
866 finfo->fname, finfo->inotify_descr[TRACED_FILE]);
867 /* Additionally listen for events in the file's parent directory.
868 We do this because the file to be watched might be
869 deleted and then added back again. When it is added back again
870 we must re-add the watch. We must also cover IN_MOVED_TO to
871 detect a file being moved into the directory. */
872 if (finfo->inotify_descr[TRACED_DIR] < 0)
873 finfo->inotify_descr[TRACED_DIR] = inotify_add_watch (inotify_fd,
874 finfo->dname,
875 TRACED_DIR_MASK);
876 if (finfo->inotify_descr[TRACED_DIR] < 0)
877 {
878 dbg_log (_("disabled inotify-based monitoring for directory `%s': %s"),
879 finfo->fname, strerror (errno));
880 return;
881 }
882 dbg_log (_("monitoring directory `%s` (%d)"),
883 finfo->dname, finfo->inotify_descr[TRACED_DIR]);
884}
885#endif
886
887/* Register the file in FINFO as a traced file for the database DBS[DBIX].
888
889 We support registering multiple files per database. Each call to
890 register_traced_file adds to the list of registered files.
891
892 When we prune the database, either through timeout or a request to
893 invalidate, we will check to see if any of the registered files has changed.
894 When we accept new connections to handle a cache request we will also
895 check to see if any of the registered files has changed.
896
897 If we have inotify support then we install an inotify fd to notify us of
898 file deletion or modification, both of which will require we invalidate
899 the cache for the database. Without inotify support we stat the file and
900 store st_mtime to determine if the file has been modified. */
901void
902register_traced_file (size_t dbidx, struct traced_file *finfo)
903{
904 /* If the database is disabled or file checking is disabled
905 then ignore the registration. */
906 if (! dbs[dbidx].enabled || ! dbs[dbidx].check_file)
907 return;
908
909 if (__glibc_unlikely (debug_level > 0))
910 dbg_log (_("monitoring file %s for database %s"),
911 finfo->fname, dbnames[dbidx]);
912
913#ifdef HAVE_INOTIFY
914 install_watches (finfo);
915#endif
916 struct stat64 st;
917 if (stat64 (finfo->fname, &st) < 0)
918 {
919 /* We cannot stat() the file. Set mtime to zero and try again later. */
920 dbg_log (_("stat failed for file `%s'; will try again later: %s"),
921 finfo->fname, strerror (errno));
922 finfo->mtime = 0;
923 }
924 else
925 finfo->mtime = st.st_mtime;
926
927 /* Queue up the file name. */
928 finfo->next = dbs[dbidx].traced_files;
929 dbs[dbidx].traced_files = finfo;
930}
931
932
933/* Close the connections. */
934void
935close_sockets (void)
936{
937 close (sock);
938}
939
940
941static void
942invalidate_cache (char *key, int fd)
943{
944 dbtype number;
945 int32_t resp;
946
947 for (number = pwddb; number < lastdb; ++number)
948 if (strcmp (key, dbnames[number]) == 0)
949 {
950 struct traced_file *runp = dbs[number].traced_files;
951 while (runp != NULL)
952 {
953 /* Make sure we reload from file when checking mtime. */
954 runp->mtime = 0;
955#ifdef HAVE_INOTIFY
956 /* During an invalidation we try to reload the traced
957 file watches. This allows the user to re-sync if
958 inotify events were lost. Similar to what we do during
959 pruning. */
960 install_watches (runp);
961#endif
962 if (runp->call_res_init)
963 {
964 res_init ();
965 break;
966 }
967 runp = runp->next;
968 }
969 break;
970 }
971
972 if (number == lastdb)
973 {
974 resp = EINVAL;
975 writeall (fd, &resp, sizeof (resp));
976 return;
977 }
978
979 if (dbs[number].enabled)
980 {
981 pthread_mutex_lock (&dbs[number].prune_run_lock);
982 prune_cache (&dbs[number], LONG_MAX, fd);
983 pthread_mutex_unlock (&dbs[number].prune_run_lock);
984 }
985 else
986 {
987 resp = 0;
988 writeall (fd, &resp, sizeof (resp));
989 }
990}
991
992
993#ifdef SCM_RIGHTS
994static void
995send_ro_fd (struct database_dyn *db, char *key, int fd)
996{
997 /* If we do not have an read-only file descriptor do nothing. */
998 if (db->ro_fd == -1)
999 return;
1000
1001 /* We need to send some data along with the descriptor. */
1002 uint64_t mapsize = (db->head->data_size
1003 + roundup (db->head->module * sizeof (ref_t), ALIGN)
1004 + sizeof (struct database_pers_head));
1005 struct iovec iov[2];
1006 iov[0].iov_base = key;
1007 iov[0].iov_len = strlen (key) + 1;
1008 iov[1].iov_base = &mapsize;
1009 iov[1].iov_len = sizeof (mapsize);
1010
1011 /* Prepare the control message to transfer the descriptor. */
1012 union
1013 {
1014 struct cmsghdr hdr;
1015 char bytes[CMSG_SPACE (sizeof (int))];
1016 } buf;
1017 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
1018 .msg_control = buf.bytes,
1019 .msg_controllen = sizeof (buf) };
1020 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
1021
1022 cmsg->cmsg_level = SOL_SOCKET;
1023 cmsg->cmsg_type = SCM_RIGHTS;
1024 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
1025
1026 int *ip = (int *) CMSG_DATA (cmsg);
1027 *ip = db->ro_fd;
1028
1029 msg.msg_controllen = cmsg->cmsg_len;
1030
1031 /* Send the control message. We repeat when we are interrupted but
1032 everything else is ignored. */
1033#ifndef MSG_NOSIGNAL
1034# define MSG_NOSIGNAL 0
1035#endif
1036 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
1037
1038 if (__glibc_unlikely (debug_level > 0))
1039 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
1040}
1041#endif /* SCM_RIGHTS */
1042
1043
1044/* Handle new request. */
1045static void
1046handle_request (int fd, request_header *req, void *key, uid_t uid, pid_t pid)
1047{
1048 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
1049 {
1050 if (debug_level > 0)
1051 dbg_log (_("\
1052cannot handle old request version %d; current version is %d"),
1053 req->version, NSCD_VERSION);
1054 return;
1055 }
1056
1057 /* Perform the SELinux check before we go on to the standard checks. */
1058 if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
1059 {
1060 if (debug_level > 0)
1061 {
1062#ifdef SO_PEERCRED
1063 char pbuf[sizeof ("/proc//exe") + 3 * sizeof (long int)];
1064# ifdef PATH_MAX
1065 char buf[PATH_MAX];
1066# else
1067 char buf[4096];
1068# endif
1069
1070 snprintf (pbuf, sizeof (pbuf), "/proc/%ld/exe", (long int) pid);
1071 ssize_t n = readlink (pbuf, buf, sizeof (buf) - 1);
1072
1073 if (n <= 0)
1074 dbg_log (_("\
1075request from %ld not handled due to missing permission"), (long int) pid);
1076 else
1077 {
1078 buf[n] = '\0';
1079 dbg_log (_("\
1080request from '%s' [%ld] not handled due to missing permission"),
1081 buf, (long int) pid);
1082 }
1083#else
1084 dbg_log (_("request not handled due to missing permission"));
1085#endif
1086 }
1087 return;
1088 }
1089
1090 struct database_dyn *db = reqinfo[req->type].db;
1091
1092 /* See whether we can service the request from the cache. */
1093 if (__builtin_expect (reqinfo[req->type].data_request, true))
1094 {
1095 if (__builtin_expect (debug_level, 0) > 0)
1096 {
1097 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
1098 {
1099 char buf[INET6_ADDRSTRLEN];
1100
1101 dbg_log ("\t%s (%s)", serv2str[req->type],
1102 inet_ntop (req->type == GETHOSTBYADDR
1103 ? AF_INET : AF_INET6,
1104 key, buf, sizeof (buf)));
1105 }
1106 else
1107 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1108 }
1109
1110 /* Is this service enabled? */
1111 if (__glibc_unlikely (!db->enabled))
1112 {
1113 /* No, sent the prepared record. */
1114 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
1115 db->disabled_iov->iov_len,
1116 MSG_NOSIGNAL))
1117 != (ssize_t) db->disabled_iov->iov_len
1118 && __builtin_expect (debug_level, 0) > 0)
1119 {
1120 /* We have problems sending the result. */
1121 char buf[256];
1122 dbg_log (_("cannot write result: %s"),
1123 strerror_r (errno, buf, sizeof (buf)));
1124 }
1125
1126 return;
1127 }
1128
1129 /* Be sure we can read the data. */
1130 if (__glibc_unlikely (pthread_rwlock_tryrdlock (&db->lock) != 0))
1131 {
1132 ++db->head->rdlockdelayed;
1133 pthread_rwlock_rdlock (&db->lock);
1134 }
1135
1136 /* See whether we can handle it from the cache. */
1137 struct datahead *cached;
1138 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
1139 db, uid);
1140 if (cached != NULL)
1141 {
1142 /* Hurray it's in the cache. */
1143 if (writeall (fd, cached->data, cached->recsize) != cached->recsize
1144 && __glibc_unlikely (debug_level > 0))
1145 {
1146 /* We have problems sending the result. */
1147 char buf[256];
1148 dbg_log (_("cannot write result: %s"),
1149 strerror_r (errno, buf, sizeof (buf)));
1150 }
1151
1152 pthread_rwlock_unlock (&db->lock);
1153
1154 return;
1155 }
1156
1157 pthread_rwlock_unlock (&db->lock);
1158 }
1159 else if (__builtin_expect (debug_level, 0) > 0)
1160 {
1161 if (req->type == INVALIDATE)
1162 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1163 else
1164 dbg_log ("\t%s", serv2str[req->type]);
1165 }
1166
1167 /* Handle the request. */
1168 switch (req->type)
1169 {
1170 case GETPWBYNAME:
1171 addpwbyname (db, fd, req, key, uid);
1172 break;
1173
1174 case GETPWBYUID:
1175 addpwbyuid (db, fd, req, key, uid);
1176 break;
1177
1178 case GETGRBYNAME:
1179 addgrbyname (db, fd, req, key, uid);
1180 break;
1181
1182 case GETGRBYGID:
1183 addgrbygid (db, fd, req, key, uid);
1184 break;
1185
1186 case GETHOSTBYNAME:
1187 addhstbyname (db, fd, req, key, uid);
1188 break;
1189
1190 case GETHOSTBYNAMEv6:
1191 addhstbynamev6 (db, fd, req, key, uid);
1192 break;
1193
1194 case GETHOSTBYADDR:
1195 addhstbyaddr (db, fd, req, key, uid);
1196 break;
1197
1198 case GETHOSTBYADDRv6:
1199 addhstbyaddrv6 (db, fd, req, key, uid);
1200 break;
1201
1202 case GETAI:
1203 addhstai (db, fd, req, key, uid);
1204 break;
1205
1206 case INITGROUPS:
1207 addinitgroups (db, fd, req, key, uid);
1208 break;
1209
1210 case GETSERVBYNAME:
1211 addservbyname (db, fd, req, key, uid);
1212 break;
1213
1214 case GETSERVBYPORT:
1215 addservbyport (db, fd, req, key, uid);
1216 break;
1217
1218 case GETNETGRENT:
1219 addgetnetgrent (db, fd, req, key, uid);
1220 break;
1221
1222 case INNETGR:
1223 addinnetgr (db, fd, req, key, uid);
1224 break;
1225
1226 case GETSTAT:
1227 case SHUTDOWN:
1228 case INVALIDATE:
1229 {
1230 /* Get the callers credentials. */
1231#ifdef SO_PEERCRED
1232 struct ucred caller;
1233 socklen_t optlen = sizeof (caller);
1234
1235 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1236 {
1237 char buf[256];
1238
1239 dbg_log (_("error getting caller's id: %s"),
1240 strerror_r (errno, buf, sizeof (buf)));
1241 break;
1242 }
1243
1244 uid = caller.uid;
1245#else
1246 /* Some systems have no SO_PEERCRED implementation. They don't
1247 care about security so we don't as well. */
1248 uid = 0;
1249#endif
1250 }
1251
1252 /* Accept shutdown, getstat and invalidate only from root. For
1253 the stat call also allow the user specified in the config file. */
1254 if (req->type == GETSTAT)
1255 {
1256 if (uid == 0 || uid == stat_uid)
1257 send_stats (fd, dbs);
1258 }
1259 else if (uid == 0)
1260 {
1261 if (req->type == INVALIDATE)
1262 invalidate_cache (key, fd);
1263 else
1264 termination_handler (0);
1265 }
1266 break;
1267
1268 case GETFDPW:
1269 case GETFDGR:
1270 case GETFDHST:
1271 case GETFDSERV:
1272 case GETFDNETGR:
1273#ifdef SCM_RIGHTS
1274 send_ro_fd (reqinfo[req->type].db, key, fd);
1275#endif
1276 break;
1277
1278 default:
1279 /* Ignore the command, it's nothing we know. */
1280 break;
1281 }
1282}
1283
1284static char *
1285read_cmdline (size_t *size)
1286{
1287 int fd = open ("/proc/self/cmdline", O_RDONLY);
1288 if (fd < 0)
1289 return NULL;
1290 size_t current = 0;
1291 size_t limit = 1024;
1292 char *buffer = malloc (limit);
1293 if (buffer == NULL)
1294 {
1295 close (fd);
1296 errno = ENOMEM;
1297 return NULL;
1298 }
1299 while (1)
1300 {
1301 if (current == limit)
1302 {
1303 char *newptr;
1304 if (2 * limit < limit
1305 || (newptr = realloc (buffer, 2 * limit)) == NULL)
1306 {
1307 free (buffer);
1308 close (fd);
1309 errno = ENOMEM;
1310 return NULL;
1311 }
1312 buffer = newptr;
1313 limit *= 2;
1314 }
1315
1316 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buffer + current,
1317 limit - current));
1318 if (n == -1)
1319 {
1320 int e = errno;
1321 free (buffer);
1322 close (fd);
1323 errno = e;
1324 return NULL;
1325 }
1326 if (n == 0)
1327 break;
1328 current += n;
1329 }
1330
1331 close (fd);
1332 *size = current;
1333 return buffer;
1334}
1335
1336
1337/* Restart the process. */
1338static void
1339restart (void)
1340{
1341 /* First determine the parameters. We do not use the parameters
1342 passed to main because then nscd would use the system libc after
1343 restarting even if it was started by a non-system dynamic linker
1344 during glibc testing. */
1345 size_t readlen;
1346 char *cmdline = read_cmdline (&readlen);
1347 if (cmdline == NULL)
1348 {
1349 dbg_log (_("\
1350cannot open /proc/self/cmdline: %m; disabling paranoia mode"));
1351 paranoia = 0;
1352 return;
1353 }
1354
1355 /* Parse the command line. Worst case scenario: every two
1356 characters form one parameter (one character plus NUL). */
1357 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1358 int argc = 0;
1359
1360 for (char *cp = cmdline; cp < cmdline + readlen;)
1361 {
1362 argv[argc++] = cp;
1363 cp = (char *) rawmemchr (cp, '\0') + 1;
1364 }
1365 argv[argc] = NULL;
1366
1367 /* Second, change back to the old user if we changed it. */
1368 if (server_user != NULL)
1369 {
1370 if (setresuid (old_uid, old_uid, old_uid) != 0)
1371 {
1372 dbg_log (_("\
1373cannot change to old UID: %s; disabling paranoia mode"),
1374 strerror (errno));
1375
1376 paranoia = 0;
1377 free (cmdline);
1378 return;
1379 }
1380
1381 if (setresgid (old_gid, old_gid, old_gid) != 0)
1382 {
1383 dbg_log (_("\
1384cannot change to old GID: %s; disabling paranoia mode"),
1385 strerror (errno));
1386
1387 ignore_value (setuid (server_uid));
1388 paranoia = 0;
1389 free (cmdline);
1390 return;
1391 }
1392 }
1393
1394 /* Next change back to the old working directory. */
1395 if (chdir (oldcwd) == -1)
1396 {
1397 dbg_log (_("\
1398cannot change to old working directory: %s; disabling paranoia mode"),
1399 strerror (errno));
1400
1401 if (server_user != NULL)
1402 {
1403 ignore_value (setuid (server_uid));
1404 ignore_value (setgid (server_gid));
1405 }
1406 paranoia = 0;
1407 free (cmdline);
1408 return;
1409 }
1410
1411 /* Synchronize memory. */
1412 int32_t certainly[lastdb];
1413 for (int cnt = 0; cnt < lastdb; ++cnt)
1414 if (dbs[cnt].enabled)
1415 {
1416 /* Make sure nobody keeps using the database. */
1417 dbs[cnt].head->timestamp = 0;
1418 certainly[cnt] = dbs[cnt].head->nscd_certainly_running;
1419 dbs[cnt].head->nscd_certainly_running = 0;
1420
1421 if (dbs[cnt].persistent)
1422 // XXX async OK?
1423 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1424 }
1425
1426 /* The preparations are done. */
1427#ifdef PATH_MAX
1428 char pathbuf[PATH_MAX];
1429#else
1430 char pathbuf[256];
1431#endif
1432 /* Try to exec the real nscd program so the process name (as reported
1433 in /proc/PID/status) will be 'nscd', but fall back to /proc/self/exe
1434 if readlink or the exec with the result of the readlink call fails. */
1435 ssize_t n = readlink ("/proc/self/exe", pathbuf, sizeof (pathbuf) - 1);
1436 if (n != -1)
1437 {
1438 pathbuf[n] = '\0';
1439 execv (pathbuf, argv);
1440 }
1441 execv ("/proc/self/exe", argv);
1442
1443 /* If we come here, we will never be able to re-exec. */
1444 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1445 strerror (errno));
1446
1447 if (server_user != NULL)
1448 {
1449 ignore_value (setuid (server_uid));
1450 ignore_value (setgid (server_gid));
1451 }
1452 if (chdir ("/") != 0)
1453 dbg_log (_("cannot change current working directory to \"/\": %s"),
1454 strerror (errno));
1455 paranoia = 0;
1456 free (cmdline);
1457
1458 /* Reenable the databases. */
1459 time_t now = time (NULL);
1460 for (int cnt = 0; cnt < lastdb; ++cnt)
1461 if (dbs[cnt].enabled)
1462 {
1463 dbs[cnt].head->timestamp = now;
1464 dbs[cnt].head->nscd_certainly_running = certainly[cnt];
1465 }
1466}
1467
1468
1469/* List of file descriptors. */
1470struct fdlist
1471{
1472 int fd;
1473 struct fdlist *next;
1474};
1475/* Memory allocated for the list. */
1476static struct fdlist *fdlist;
1477/* List of currently ready-to-read file descriptors. */
1478static struct fdlist *readylist;
1479
1480/* Conditional variable and mutex to signal availability of entries in
1481 READYLIST. The condvar is initialized dynamically since we might
1482 use a different clock depending on availability. */
1483static pthread_cond_t readylist_cond = PTHREAD_COND_INITIALIZER;
1484static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1485
1486/* The clock to use with the condvar. */
1487static clockid_t timeout_clock = CLOCK_REALTIME;
1488
1489/* Number of threads ready to handle the READYLIST. */
1490static unsigned long int nready;
1491
1492
1493/* Function for the clean-up threads. */
1494static void *
1495__attribute__ ((__noreturn__))
1496nscd_run_prune (void *p)
1497{
1498 const long int my_number = (long int) p;
1499 assert (dbs[my_number].enabled);
1500
1501 int dont_need_update = setup_thread (&dbs[my_number]);
1502
1503 time_t now = time (NULL);
1504
1505 /* We are running. */
1506 dbs[my_number].head->timestamp = now;
1507
1508 struct timespec prune_ts;
1509 if (__glibc_unlikely (clock_gettime (timeout_clock, &prune_ts) == -1))
1510 /* Should never happen. */
1511 abort ();
1512
1513 /* Compute the initial timeout time. Prevent all the timers to go
1514 off at the same time by adding a db-based value. */
1515 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL + my_number;
1516 dbs[my_number].wakeup_time = now + CACHE_PRUNE_INTERVAL + my_number;
1517
1518 pthread_mutex_t *prune_lock = &dbs[my_number].prune_lock;
1519 pthread_mutex_t *prune_run_lock = &dbs[my_number].prune_run_lock;
1520 pthread_cond_t *prune_cond = &dbs[my_number].prune_cond;
1521
1522 pthread_mutex_lock (prune_lock);
1523 while (1)
1524 {
1525 /* Wait, but not forever. */
1526 int e = 0;
1527 if (! dbs[my_number].clear_cache)
1528 e = pthread_cond_timedwait (prune_cond, prune_lock, &prune_ts);
1529 assert (__builtin_expect (e == 0 || e == ETIMEDOUT, 1));
1530
1531 time_t next_wait;
1532 now = time (NULL);
1533 if (e == ETIMEDOUT || now >= dbs[my_number].wakeup_time
1534 || dbs[my_number].clear_cache)
1535 {
1536 /* We will determine the new timout values based on the
1537 cache content. Should there be concurrent additions to
1538 the cache which are not accounted for in the cache
1539 pruning we want to know about it. Therefore set the
1540 timeout to the maximum. It will be descreased when adding
1541 new entries to the cache, if necessary. */
1542 dbs[my_number].wakeup_time = MAX_TIMEOUT_VALUE;
1543
1544 /* Unconditionally reset the flag. */
1545 time_t prune_now = dbs[my_number].clear_cache ? LONG_MAX : now;
1546 dbs[my_number].clear_cache = 0;
1547
1548 pthread_mutex_unlock (prune_lock);
1549
1550 /* We use a separate lock for running the prune function (instead
1551 of keeping prune_lock locked) because this enables concurrent
1552 invocations of cache_add which might modify the timeout value. */
1553 pthread_mutex_lock (prune_run_lock);
1554 next_wait = prune_cache (&dbs[my_number], prune_now, -1);
1555 pthread_mutex_unlock (prune_run_lock);
1556
1557 next_wait = MAX (next_wait, CACHE_PRUNE_INTERVAL);
1558 /* If clients cannot determine for sure whether nscd is running
1559 we need to wake up occasionally to update the timestamp.
1560 Wait 90% of the update period. */
1561#define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1562 if (__glibc_unlikely (! dont_need_update))
1563 {
1564 next_wait = MIN (UPDATE_MAPPING_TIMEOUT, next_wait);
1565 dbs[my_number].head->timestamp = now;
1566 }
1567
1568 pthread_mutex_lock (prune_lock);
1569
1570 /* Make it known when we will wake up again. */
1571 if (now + next_wait < dbs[my_number].wakeup_time)
1572 dbs[my_number].wakeup_time = now + next_wait;
1573 else
1574 next_wait = dbs[my_number].wakeup_time - now;
1575 }
1576 else
1577 /* The cache was just pruned. Do not do it again now. Just
1578 use the new timeout value. */
1579 next_wait = dbs[my_number].wakeup_time - now;
1580
1581 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1582 /* Should never happen. */
1583 abort ();
1584
1585 /* Compute next timeout time. */
1586 prune_ts.tv_sec += next_wait;
1587 }
1588}
1589
1590
1591/* This is the main loop. It is replicated in different threads but
1592 the use of the ready list makes sure only one thread handles an
1593 incoming connection. */
1594static void *
1595__attribute__ ((__noreturn__))
1596nscd_run_worker (void *p)
1597{
1598 char buf[256];
1599
1600 /* Initial locking. */
1601 pthread_mutex_lock (&readylist_lock);
1602
1603 /* One more thread available. */
1604 ++nready;
1605
1606 while (1)
1607 {
1608 while (readylist == NULL)
1609 pthread_cond_wait (&readylist_cond, &readylist_lock);
1610
1611 struct fdlist *it = readylist->next;
1612 if (readylist->next == readylist)
1613 /* Just one entry on the list. */
1614 readylist = NULL;
1615 else
1616 readylist->next = it->next;
1617
1618 /* Extract the information and mark the record ready to be used
1619 again. */
1620 int fd = it->fd;
1621 it->next = NULL;
1622
1623 /* One more thread available. */
1624 --nready;
1625
1626 /* We are done with the list. */
1627 pthread_mutex_unlock (&readylist_lock);
1628
1629 /* Now read the request. */
1630 request_header req;
1631 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1632 != sizeof (req), 0))
1633 {
1634 /* We failed to read data. Note that this also might mean we
1635 failed because we would have blocked. */
1636 if (debug_level > 0)
1637 dbg_log (_("short read while reading request: %s"),
1638 strerror_r (errno, buf, sizeof (buf)));
1639 goto close_and_out;
1640 }
1641
1642 /* Check whether this is a valid request type. */
1643 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1644 goto close_and_out;
1645
1646 /* Some systems have no SO_PEERCRED implementation. They don't
1647 care about security so we don't as well. */
1648 uid_t uid = -1;
1649#ifdef SO_PEERCRED
1650 pid_t pid = 0;
1651
1652 if (__glibc_unlikely (debug_level > 0))
1653 {
1654 struct ucred caller;
1655 socklen_t optlen = sizeof (caller);
1656
1657 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1658 pid = caller.pid;
1659 }
1660#else
1661 const pid_t pid = 0;
1662#endif
1663
1664 /* It should not be possible to crash the nscd with a silly
1665 request (i.e., a terribly large key). We limit the size to 1kb. */
1666 if (__builtin_expect (req.key_len, 1) < 0
1667 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1668 {
1669 if (debug_level > 0)
1670 dbg_log (_("key length in request too long: %d"), req.key_len);
1671 }
1672 else
1673 {
1674 /* Get the key. */
1675 char keybuf[MAXKEYLEN + 1];
1676
1677 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1678 req.key_len))
1679 != req.key_len, 0))
1680 {
1681 /* Again, this can also mean we would have blocked. */
1682 if (debug_level > 0)
1683 dbg_log (_("short read while reading request key: %s"),
1684 strerror_r (errno, buf, sizeof (buf)));
1685 goto close_and_out;
1686 }
1687 keybuf[req.key_len] = '\0';
1688
1689 if (__builtin_expect (debug_level, 0) > 0)
1690 {
1691#ifdef SO_PEERCRED
1692 if (pid != 0)
1693 dbg_log (_("\
1694handle_request: request received (Version = %d) from PID %ld"),
1695 req.version, (long int) pid);
1696 else
1697#endif
1698 dbg_log (_("\
1699handle_request: request received (Version = %d)"), req.version);
1700 }
1701
1702 /* Phew, we got all the data, now process it. */
1703 handle_request (fd, &req, keybuf, uid, pid);
1704 }
1705
1706 close_and_out:
1707 /* We are done. */
1708 close (fd);
1709
1710 /* Re-locking. */
1711 pthread_mutex_lock (&readylist_lock);
1712
1713 /* One more thread available. */
1714 ++nready;
1715 }
1716 /* NOTREACHED */
1717}
1718
1719
1720static unsigned int nconns;
1721
1722static void
1723fd_ready (int fd)
1724{
1725 pthread_mutex_lock (&readylist_lock);
1726
1727 /* Find an empty entry in FDLIST. */
1728 size_t inner;
1729 for (inner = 0; inner < nconns; ++inner)
1730 if (fdlist[inner].next == NULL)
1731 break;
1732 assert (inner < nconns);
1733
1734 fdlist[inner].fd = fd;
1735
1736 if (readylist == NULL)
1737 readylist = fdlist[inner].next = &fdlist[inner];
1738 else
1739 {
1740 fdlist[inner].next = readylist->next;
1741 readylist = readylist->next = &fdlist[inner];
1742 }
1743
1744 bool do_signal = true;
1745 if (__glibc_unlikely (nready == 0))
1746 {
1747 ++client_queued;
1748 do_signal = false;
1749
1750 /* Try to start another thread to help out. */
1751 pthread_t th;
1752 if (nthreads < max_nthreads
1753 && pthread_create (&th, &attr, nscd_run_worker,
1754 (void *) (long int) nthreads) == 0)
1755 {
1756 /* We got another thread. */
1757 ++nthreads;
1758 /* The new thread might need a kick. */
1759 do_signal = true;
1760 }
1761
1762 }
1763
1764 pthread_mutex_unlock (&readylist_lock);
1765
1766 /* Tell one of the worker threads there is work to do. */
1767 if (do_signal)
1768 pthread_cond_signal (&readylist_cond);
1769}
1770
1771
1772/* Check whether restarting should happen. */
1773static bool
1774restart_p (time_t now)
1775{
1776 return (paranoia && readylist == NULL && nready == nthreads
1777 && now >= restart_time);
1778}
1779
1780
1781/* Array for times a connection was accepted. */
1782static time_t *starttime;
1783
1784#ifdef HAVE_INOTIFY
1785/* Inotify event for changed file. */
1786union __inev
1787{
1788 struct inotify_event i;
1789# ifndef PATH_MAX
1790# define PATH_MAX 1024
1791# endif
1792 char buf[sizeof (struct inotify_event) + PATH_MAX];
1793};
1794
1795/* Returns 0 if the file is there otherwise -1. */
1796int
1797check_file (struct traced_file *finfo)
1798{
1799 struct stat64 st;
1800 /* We could check mtime and if different re-add
1801 the watches, and invalidate the database, but we
1802 don't because we are called from inotify_check_files
1803 which should be doing that work. If sufficient inotify
1804 events were lost then the next pruning or invalidation
1805 will do the stat and mtime check. We don't do it here to
1806 keep the logic simple. */
1807 if (stat64 (finfo->fname, &st) < 0)
1808 return -1;
1809 return 0;
1810}
1811
1812/* Process the inotify event in INEV. If the event matches any of the files
1813 registered with a database then mark that database as requiring its cache
1814 to be cleared. We indicate the cache needs clearing by setting
1815 TO_CLEAR[DBCNT] to true for the matching database. */
1816static void
1817inotify_check_files (bool *to_clear, union __inev *inev)
1818{
1819 /* Check which of the files changed. */
1820 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1821 {
1822 struct traced_file *finfo = dbs[dbcnt].traced_files;
1823
1824 while (finfo != NULL)
1825 {
1826 /* The configuration file was moved or deleted.
1827 We stop watching it at that point, and reinitialize. */
1828 if (finfo->inotify_descr[TRACED_FILE] == inev->i.wd
1829 && ((inev->i.mask & IN_MOVE_SELF)
1830 || (inev->i.mask & IN_DELETE_SELF)
1831 || (inev->i.mask & IN_IGNORED)))
1832 {
1833 int ret;
1834 bool moved = (inev->i.mask & IN_MOVE_SELF) != 0;
1835
1836 if (check_file (finfo) == 0)
1837 {
1838 dbg_log (_("ignored inotify event for `%s` (file exists)"),
1839 finfo->fname);
1840 return;
1841 }
1842
1843 dbg_log (_("monitored file `%s` was %s, removing watch"),
1844 finfo->fname, moved ? "moved" : "deleted");
1845 /* File was moved out, remove the watch. Watches are
1846 automatically removed when the file is deleted. */
1847 if (moved)
1848 {
1849 ret = inotify_rm_watch (inotify_fd, inev->i.wd);
1850 if (ret < 0)
1851 dbg_log (_("failed to remove file watch `%s`: %s"),
1852 finfo->fname, strerror (errno));
1853 }
1854 finfo->inotify_descr[TRACED_FILE] = -1;
1855 to_clear[dbcnt] = true;
1856 if (finfo->call_res_init)
1857 res_init ();
1858 return;
1859 }
1860 /* The configuration file was open for writing and has just closed.
1861 We reset the cache and reinitialize. */
1862 if (finfo->inotify_descr[TRACED_FILE] == inev->i.wd
1863 && inev->i.mask & IN_CLOSE_WRITE)
1864 {
1865 /* Mark cache as needing to be cleared and reinitialize. */
1866 dbg_log (_("monitored file `%s` was written to"), finfo->fname);
1867 to_clear[dbcnt] = true;
1868 if (finfo->call_res_init)
1869 res_init ();
1870 return;
1871 }
1872 /* The parent directory was moved or deleted. We trigger one last
1873 invalidation. At the next pruning or invalidation we may add
1874 this watch back if the file is present again. */
1875 if (finfo->inotify_descr[TRACED_DIR] == inev->i.wd
1876 && ((inev->i.mask & IN_DELETE_SELF)
1877 || (inev->i.mask & IN_MOVE_SELF)
1878 || (inev->i.mask & IN_IGNORED)))
1879 {
1880 bool moved = (inev->i.mask & IN_MOVE_SELF) != 0;
1881 /* The directory watch may have already been removed
1882 but we don't know so we just remove it again and
1883 ignore the error. Then we remove the file watch.
1884 Note: watches are automatically removed for deleted
1885 files. */
1886 if (moved)
1887 inotify_rm_watch (inotify_fd, inev->i.wd);
1888 if (finfo->inotify_descr[TRACED_FILE] != -1)
1889 {
1890 dbg_log (_("monitored parent directory `%s` was %s, removing watch on `%s`"),
1891 finfo->dname, moved ? "moved" : "deleted", finfo->fname);
1892 if (inotify_rm_watch (inotify_fd, finfo->inotify_descr[TRACED_FILE]) < 0)
1893 dbg_log (_("failed to remove file watch `%s`: %s"),
1894 finfo->dname, strerror (errno));
1895 }
1896 finfo->inotify_descr[TRACED_FILE] = -1;
1897 finfo->inotify_descr[TRACED_DIR] = -1;
1898 to_clear[dbcnt] = true;
1899 if (finfo->call_res_init)
1900 res_init ();
1901 /* Continue to the next entry since this might be the
1902 parent directory for multiple registered files and
1903 we want to remove watches for all registered files. */
1904 continue;
1905 }
1906 /* The parent directory had a create or moved to event. */
1907 if (finfo->inotify_descr[TRACED_DIR] == inev->i.wd
1908 && ((inev->i.mask & IN_MOVED_TO)
1909 || (inev->i.mask & IN_CREATE))
1910 && strcmp (inev->i.name, finfo->sfname) == 0)
1911 {
1912 /* We detected a directory change. We look for the creation
1913 of the file we are tracking or the move of the same file
1914 into the directory. */
1915 int ret;
1916 dbg_log (_("monitored file `%s` was %s, adding watch"),
1917 finfo->fname,
1918 inev->i.mask & IN_CREATE ? "created" : "moved into place");
1919 /* File was moved in or created. Regenerate the watch. */
1920 if (finfo->inotify_descr[TRACED_FILE] != -1)
1921 inotify_rm_watch (inotify_fd,
1922 finfo->inotify_descr[TRACED_FILE]);
1923
1924 ret = inotify_add_watch (inotify_fd,
1925 finfo->fname,
1926 TRACED_FILE_MASK);
1927 if (ret < 0)
1928 dbg_log (_("failed to add file watch `%s`: %s"),
1929 finfo->fname, strerror (errno));
1930
1931 finfo->inotify_descr[TRACED_FILE] = ret;
1932
1933 /* The file is new or moved so mark cache as needing to
1934 be cleared and reinitialize. */
1935 to_clear[dbcnt] = true;
1936 if (finfo->call_res_init)
1937 res_init ();
1938
1939 /* Done re-adding the watch. Don't return, we may still
1940 have other files in this same directory, same watch
1941 descriptor, and need to process them. */
1942 }
1943 /* Other events are ignored, and we move on to the next file. */
1944 finfo = finfo->next;
1945 }
1946 }
1947}
1948
1949/* If an entry in the array of booleans TO_CLEAR is TRUE then clear the cache
1950 for the associated database, otherwise do nothing. The TO_CLEAR array must
1951 have LASTDB entries. */
1952static inline void
1953clear_db_cache (bool *to_clear)
1954{
1955 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1956 if (to_clear[dbcnt])
1957 {
1958 pthread_mutex_lock (&dbs[dbcnt].prune_lock);
1959 dbs[dbcnt].clear_cache = 1;
1960 pthread_mutex_unlock (&dbs[dbcnt].prune_lock);
1961 pthread_cond_signal (&dbs[dbcnt].prune_cond);
1962 }
1963}
1964
1965int
1966handle_inotify_events (void)
1967{
1968 bool to_clear[lastdb] = { false, };
1969 union __inev inev;
1970
1971 /* Read all inotify events for files registered via
1972 register_traced_file(). */
1973 while (1)
1974 {
1975 /* Potentially read multiple events into buf. */
1976 ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd,
1977 &inev.buf,
1978 sizeof (inev)));
1979 if (nb < (ssize_t) sizeof (struct inotify_event))
1980 {
1981 /* Not even 1 event. */
1982 if (__glibc_unlikely (nb == -1 && errno != EAGAIN))
1983 return -1;
1984 /* Done reading events that are ready. */
1985 break;
1986 }
1987 /* Process all events. The normal inotify interface delivers
1988 complete events on a read and never a partial event. */
1989 char *eptr = &inev.buf[0];
1990 ssize_t count;
1991 while (1)
1992 {
1993 /* Check which of the files changed. */
1994 inotify_check_files (to_clear, &inev);
1995 count = sizeof (struct inotify_event) + inev.i.len;
1996 eptr += count;
1997 nb -= count;
1998 if (nb >= (ssize_t) sizeof (struct inotify_event))
1999 memcpy (&inev, eptr, nb);
2000 else
2001 break;
2002 }
2003 continue;
2004 }
2005 /* Actually perform the cache clearing. */
2006 clear_db_cache (to_clear);
2007 return 0;
2008}
2009
2010#endif
2011
2012static void
2013__attribute__ ((__noreturn__))
2014main_loop_poll (void)
2015{
2016 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
2017 * sizeof (conns[0]));
2018
2019 conns[0].fd = sock;
2020 conns[0].events = POLLRDNORM;
2021 size_t nused = 1;
2022 size_t firstfree = 1;
2023
2024#ifdef HAVE_INOTIFY
2025 if (inotify_fd != -1)
2026 {
2027 conns[1].fd = inotify_fd;
2028 conns[1].events = POLLRDNORM;
2029 nused = 2;
2030 firstfree = 2;
2031 }
2032#endif
2033
2034#ifdef HAVE_NETLINK
2035 size_t idx_nl_status_fd = 0;
2036 if (nl_status_fd != -1)
2037 {
2038 idx_nl_status_fd = nused;
2039 conns[nused].fd = nl_status_fd;
2040 conns[nused].events = POLLRDNORM;
2041 ++nused;
2042 firstfree = nused;
2043 }
2044#endif
2045
2046 while (1)
2047 {
2048 /* Wait for any event. We wait at most a couple of seconds so
2049 that we can check whether we should close any of the accepted
2050 connections since we have not received a request. */
2051#define MAX_ACCEPT_TIMEOUT 30
2052#define MIN_ACCEPT_TIMEOUT 5
2053#define MAIN_THREAD_TIMEOUT \
2054 (MAX_ACCEPT_TIMEOUT * 1000 \
2055 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
2056
2057 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
2058
2059 time_t now = time (NULL);
2060
2061 /* If there is a descriptor ready for reading or there is a new
2062 connection, process this now. */
2063 if (n > 0)
2064 {
2065 if (conns[0].revents != 0)
2066 {
2067 /* We have a new incoming connection. Accept the connection. */
2068 int fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2069 SOCK_NONBLOCK));
2070
2071 /* Use the descriptor if we have not reached the limit. */
2072 if (fd >= 0)
2073 {
2074 if (firstfree < nconns)
2075 {
2076 conns[firstfree].fd = fd;
2077 conns[firstfree].events = POLLRDNORM;
2078 starttime[firstfree] = now;
2079 if (firstfree >= nused)
2080 nused = firstfree + 1;
2081
2082 do
2083 ++firstfree;
2084 while (firstfree < nused && conns[firstfree].fd != -1);
2085 }
2086 else
2087 /* We cannot use the connection so close it. */
2088 close (fd);
2089 }
2090
2091 --n;
2092 }
2093
2094 size_t first = 1;
2095#ifdef HAVE_INOTIFY
2096 if (inotify_fd != -1 && conns[1].fd == inotify_fd)
2097 {
2098 if (conns[1].revents != 0)
2099 {
2100 int ret;
2101 ret = handle_inotify_events ();
2102 if (ret == -1)
2103 {
2104 /* Something went wrong when reading the inotify
2105 data. Better disable inotify. */
2106 dbg_log (_("disabled inotify-based monitoring after read error %d"), errno);
2107 conns[1].fd = -1;
2108 firstfree = 1;
2109 if (nused == 2)
2110 nused = 1;
2111 close (inotify_fd);
2112 inotify_fd = -1;
2113 }
2114 --n;
2115 }
2116
2117 first = 2;
2118 }
2119#endif
2120
2121#ifdef HAVE_NETLINK
2122 if (idx_nl_status_fd != 0 && conns[idx_nl_status_fd].revents != 0)
2123 {
2124 char buf[4096];
2125 /* Read all the data. We do not interpret it here. */
2126 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2127 sizeof (buf))) != -1)
2128 ;
2129
2130 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
2131 = __bump_nl_timestamp ();
2132 }
2133#endif
2134
2135 for (size_t cnt = first; cnt < nused && n > 0; ++cnt)
2136 if (conns[cnt].revents != 0)
2137 {
2138 fd_ready (conns[cnt].fd);
2139
2140 /* Clean up the CONNS array. */
2141 conns[cnt].fd = -1;
2142 if (cnt < firstfree)
2143 firstfree = cnt;
2144 if (cnt == nused - 1)
2145 do
2146 --nused;
2147 while (conns[nused - 1].fd == -1);
2148
2149 --n;
2150 }
2151 }
2152
2153 /* Now find entries which have timed out. */
2154 assert (nused > 0);
2155
2156 /* We make the timeout length depend on the number of file
2157 descriptors currently used. */
2158#define ACCEPT_TIMEOUT \
2159 (MAX_ACCEPT_TIMEOUT \
2160 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
2161 time_t laststart = now - ACCEPT_TIMEOUT;
2162
2163 for (size_t cnt = nused - 1; cnt > 0; --cnt)
2164 {
2165 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
2166 {
2167 /* Remove the entry, it timed out. */
2168 (void) close (conns[cnt].fd);
2169 conns[cnt].fd = -1;
2170
2171 if (cnt < firstfree)
2172 firstfree = cnt;
2173 if (cnt == nused - 1)
2174 do
2175 --nused;
2176 while (conns[nused - 1].fd == -1);
2177 }
2178 }
2179
2180 if (restart_p (now))
2181 restart ();
2182 }
2183}
2184
2185
2186#ifdef HAVE_EPOLL
2187static void
2188main_loop_epoll (int efd)
2189{
2190 struct epoll_event ev = { 0, };
2191 int nused = 1;
2192 size_t highest = 0;
2193
2194 /* Add the socket. */
2195 ev.events = EPOLLRDNORM;
2196 ev.data.fd = sock;
2197 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
2198 /* We cannot use epoll. */
2199 return;
2200
2201# ifdef HAVE_INOTIFY
2202 if (inotify_fd != -1)
2203 {
2204 ev.events = EPOLLRDNORM;
2205 ev.data.fd = inotify_fd;
2206 if (epoll_ctl (efd, EPOLL_CTL_ADD, inotify_fd, &ev) == -1)
2207 /* We cannot use epoll. */
2208 return;
2209 nused = 2;
2210 }
2211# endif
2212
2213# ifdef HAVE_NETLINK
2214 if (nl_status_fd != -1)
2215 {
2216 ev.events = EPOLLRDNORM;
2217 ev.data.fd = nl_status_fd;
2218 if (epoll_ctl (efd, EPOLL_CTL_ADD, nl_status_fd, &ev) == -1)
2219 /* We cannot use epoll. */
2220 return;
2221 }
2222# endif
2223
2224 while (1)
2225 {
2226 struct epoll_event revs[100];
2227# define nrevs (sizeof (revs) / sizeof (revs[0]))
2228
2229 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
2230
2231 time_t now = time (NULL);
2232
2233 for (int cnt = 0; cnt < n; ++cnt)
2234 if (revs[cnt].data.fd == sock)
2235 {
2236 /* A new connection. */
2237 int fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2238 SOCK_NONBLOCK));
2239
2240 /* Use the descriptor if we have not reached the limit. */
2241 if (fd >= 0)
2242 {
2243 /* Try to add the new descriptor. */
2244 ev.data.fd = fd;
2245 if (fd >= nconns
2246 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
2247 /* The descriptor is too large or something went
2248 wrong. Close the descriptor. */
2249 close (fd);
2250 else
2251 {
2252 /* Remember when we accepted the connection. */
2253 starttime[fd] = now;
2254
2255 if (fd > highest)
2256 highest = fd;
2257
2258 ++nused;
2259 }
2260 }
2261 }
2262# ifdef HAVE_INOTIFY
2263 else if (revs[cnt].data.fd == inotify_fd)
2264 {
2265 int ret;
2266 ret = handle_inotify_events ();
2267 if (ret == -1)
2268 {
2269 /* Something went wrong when reading the inotify
2270 data. Better disable inotify. */
2271 dbg_log (_("disabled inotify-based monitoring after read error %d"), errno);
2272 (void) epoll_ctl (efd, EPOLL_CTL_DEL, inotify_fd, NULL);
2273 close (inotify_fd);
2274 inotify_fd = -1;
2275 break;
2276 }
2277 }
2278# endif
2279# ifdef HAVE_NETLINK
2280 else if (revs[cnt].data.fd == nl_status_fd)
2281 {
2282 char buf[4096];
2283 /* Read all the data. We do not interpret it here. */
2284 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2285 sizeof (buf))) != -1)
2286 ;
2287
2288 __bump_nl_timestamp ();
2289 }
2290# endif
2291 else
2292 {
2293 /* Remove the descriptor from the epoll descriptor. */
2294 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
2295
2296 /* Get a worker to handle the request. */
2297 fd_ready (revs[cnt].data.fd);
2298
2299 /* Reset the time. */
2300 starttime[revs[cnt].data.fd] = 0;
2301 if (revs[cnt].data.fd == highest)
2302 do
2303 --highest;
2304 while (highest > 0 && starttime[highest] == 0);
2305
2306 --nused;
2307 }
2308
2309 /* Now look for descriptors for accepted connections which have
2310 no reply in too long of a time. */
2311 time_t laststart = now - ACCEPT_TIMEOUT;
2312 assert (starttime[sock] == 0);
2313# ifdef HAVE_INOTIFY
2314 assert (inotify_fd == -1 || starttime[inotify_fd] == 0);
2315# endif
2316 assert (nl_status_fd == -1 || starttime[nl_status_fd] == 0);
2317 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
2318 if (starttime[cnt] != 0 && starttime[cnt] < laststart)
2319 {
2320 /* We are waiting for this one for too long. Close it. */
2321 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
2322
2323 (void) close (cnt);
2324
2325 starttime[cnt] = 0;
2326 if (cnt == highest)
2327 --highest;
2328 }
2329 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
2330 --highest;
2331
2332 if (restart_p (now))
2333 restart ();
2334 }
2335}
2336#endif
2337
2338
2339/* Start all the threads we want. The initial process is thread no. 1. */
2340void
2341start_threads (void)
2342{
2343 /* Initialize the conditional variable we will use. The only
2344 non-standard attribute we might use is the clock selection. */
2345 pthread_condattr_t condattr;
2346 pthread_condattr_init (&condattr);
2347
2348#if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2349 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2350 /* Determine whether the monotonous clock is available. */
2351 struct timespec dummy;
2352# if _POSIX_MONOTONIC_CLOCK == 0
2353 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
2354# endif
2355# if _POSIX_CLOCK_SELECTION == 0
2356 if (sysconf (_SC_CLOCK_SELECTION) > 0)
2357# endif
2358 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
2359 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
2360 timeout_clock = CLOCK_MONOTONIC;
2361#endif
2362
2363 /* Create the attribute for the threads. They are all created
2364 detached. */
2365 pthread_attr_init (&attr);
2366 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
2367 /* Use 1MB stacks, twice as much for 64-bit architectures. */
2368 pthread_attr_setstacksize (&attr, NSCD_THREAD_STACKSIZE);
2369
2370 /* We allow less than LASTDB threads only for debugging. */
2371 if (debug_level == 0)
2372 nthreads = MAX (nthreads, lastdb);
2373
2374 /* Create the threads which prune the databases. */
2375 // XXX Ideally this work would be done by some of the worker threads.
2376 // XXX But this is problematic since we would need to be able to wake
2377 // XXX them up explicitly as well as part of the group handling the
2378 // XXX ready-list. This requires an operation where we can wait on
2379 // XXX two conditional variables at the same time. This operation
2380 // XXX does not exist (yet).
2381 for (long int i = 0; i < lastdb; ++i)
2382 {
2383 /* Initialize the conditional variable. */
2384 if (pthread_cond_init (&dbs[i].prune_cond, &condattr) != 0)
2385 {
2386 dbg_log (_("could not initialize conditional variable"));
2387 do_exit (1, 0, NULL);
2388 }
2389
2390 pthread_t th;
2391 if (dbs[i].enabled
2392 && pthread_create (&th, &attr, nscd_run_prune, (void *) i) != 0)
2393 {
2394 dbg_log (_("could not start clean-up thread; terminating"));
2395 do_exit (1, 0, NULL);
2396 }
2397 }
2398
2399 pthread_condattr_destroy (&condattr);
2400
2401 for (long int i = 0; i < nthreads; ++i)
2402 {
2403 pthread_t th;
2404 if (pthread_create (&th, &attr, nscd_run_worker, NULL) != 0)
2405 {
2406 if (i == 0)
2407 {
2408 dbg_log (_("could not start any worker thread; terminating"));
2409 do_exit (1, 0, NULL);
2410 }
2411
2412 break;
2413 }
2414 }
2415
2416 /* Now it is safe to let the parent know that we're doing fine and it can
2417 exit. */
2418 notify_parent (0);
2419
2420 /* Determine how much room for descriptors we should initially
2421 allocate. This might need to change later if we cap the number
2422 with MAXCONN. */
2423 const long int nfds = sysconf (_SC_OPEN_MAX);
2424#define MINCONN 32
2425#define MAXCONN 16384
2426 if (nfds == -1 || nfds > MAXCONN)
2427 nconns = MAXCONN;
2428 else if (nfds < MINCONN)
2429 nconns = MINCONN;
2430 else
2431 nconns = nfds;
2432
2433 /* We need memory to pass descriptors on to the worker threads. */
2434 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
2435 /* Array to keep track when connection was accepted. */
2436 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
2437
2438 /* In the main thread we execute the loop which handles incoming
2439 connections. */
2440#ifdef HAVE_EPOLL
2441 int efd = epoll_create (100);
2442 if (efd != -1)
2443 {
2444 main_loop_epoll (efd);
2445 close (efd);
2446 }
2447#endif
2448
2449 main_loop_poll ();
2450}
2451
2452
2453/* Look up the uid, gid, and supplementary groups to run nscd as. When
2454 this function is called, we are not listening on the nscd socket yet so
2455 we can just use the ordinary lookup functions without causing a lockup */
2456static void
2457begin_drop_privileges (void)
2458{
2459 struct passwd *pwd = getpwnam (server_user);
2460
2461 if (pwd == NULL)
2462 {
2463 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2464 do_exit (EXIT_FAILURE, 0,
2465 _("Failed to run nscd as user '%s'"), server_user);
2466 }
2467
2468 server_uid = pwd->pw_uid;
2469 server_gid = pwd->pw_gid;
2470
2471 /* Save the old UID/GID if we have to change back. */
2472 if (paranoia)
2473 {
2474 old_uid = getuid ();
2475 old_gid = getgid ();
2476 }
2477
2478 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
2479 {
2480 /* This really must never happen. */
2481 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2482 do_exit (EXIT_FAILURE, errno,
2483 _("initial getgrouplist failed"));
2484 }
2485
2486 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
2487
2488 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
2489 == -1)
2490 {
2491 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2492 do_exit (EXIT_FAILURE, errno, _("getgrouplist failed"));
2493 }
2494}
2495
2496
2497/* Call setgroups(), setgid(), and setuid() to drop root privileges and
2498 run nscd as the user specified in the configuration file. */
2499static void
2500finish_drop_privileges (void)
2501{
2502#if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2503 /* We need to preserve the capabilities to connect to the audit daemon. */
2504 cap_t new_caps = preserve_capabilities ();
2505#endif
2506
2507 if (setgroups (server_ngroups, server_groups) == -1)
2508 {
2509 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2510 do_exit (EXIT_FAILURE, errno, _("setgroups failed"));
2511 }
2512
2513 int res;
2514 if (paranoia)
2515 res = setresgid (server_gid, server_gid, old_gid);
2516 else
2517 res = setgid (server_gid);
2518 if (res == -1)
2519 {
2520 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2521 do_exit (4, errno, "setgid");
2522 }
2523
2524 if (paranoia)
2525 res = setresuid (server_uid, server_uid, old_uid);
2526 else
2527 res = setuid (server_uid);
2528 if (res == -1)
2529 {
2530 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2531 do_exit (4, errno, "setuid");
2532 }
2533
2534#if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2535 /* Remove the temporary capabilities. */
2536 install_real_capabilities (new_caps);
2537#endif
2538}
2539