Back to index

glibc  2.9
connections.c
Go to the documentation of this file.
00001 /* Inner loops of cache daemon.
00002    Copyright (C) 1998-2007, 2008 Free Software Foundation, Inc.
00003    This file is part of the GNU C Library.
00004    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
00005 
00006    This program is free software; you can redistribute it and/or modify
00007    it under the terms of the GNU General Public License as published
00008    by the Free Software Foundation; version 2 of the License, or
00009    (at your option) any later version.
00010 
00011    This program is distributed in the hope that it will be useful,
00012    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014    GNU General Public License for more details.
00015 
00016    You should have received a copy of the GNU General Public License
00017    along with this program; if not, write to the Free Software Foundation,
00018    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
00019 
00020 #include <alloca.h>
00021 #include <assert.h>
00022 #include <atomic.h>
00023 #include <error.h>
00024 #include <errno.h>
00025 #include <fcntl.h>
00026 #include <grp.h>
00027 #include <libintl.h>
00028 #include <pthread.h>
00029 #include <pwd.h>
00030 #include <resolv.h>
00031 #include <stdio.h>
00032 #include <stdlib.h>
00033 #include <unistd.h>
00034 #include <arpa/inet.h>
00035 #ifdef HAVE_EPOLL
00036 # include <sys/epoll.h>
00037 #endif
00038 #ifdef HAVE_INOTIFY
00039 # include <sys/inotify.h>
00040 #endif
00041 #include <sys/mman.h>
00042 #include <sys/param.h>
00043 #include <sys/poll.h>
00044 #ifdef HAVE_SENDFILE
00045 # include <sys/sendfile.h>
00046 #endif
00047 #include <sys/socket.h>
00048 #include <sys/stat.h>
00049 #include <sys/un.h>
00050 
00051 #include "nscd.h"
00052 #include "dbg_log.h"
00053 #include "selinux.h"
00054 #include <resolv/resolv.h>
00055 #ifdef HAVE_SENDFILE
00056 # include <kernel-features.h>
00057 #endif
00058 
00059 
00060 /* Wrapper functions with error checking for standard functions.  */
00061 extern void *xmalloc (size_t n);
00062 extern void *xcalloc (size_t n, size_t s);
00063 extern void *xrealloc (void *o, size_t n);
00064 
00065 /* Support to run nscd as an unprivileged user */
00066 const char *server_user;
00067 static uid_t server_uid;
00068 static gid_t server_gid;
00069 const char *stat_user;
00070 uid_t stat_uid;
00071 static gid_t *server_groups;
00072 #ifndef NGROUPS
00073 # define NGROUPS 32
00074 #endif
00075 static int server_ngroups;
00076 
00077 static pthread_attr_t attr;
00078 
00079 static void begin_drop_privileges (void);
00080 static void finish_drop_privileges (void);
00081 
00082 /* Map request type to a string.  */
00083 const char *const serv2str[LASTREQ] =
00084 {
00085   [GETPWBYNAME] = "GETPWBYNAME",
00086   [GETPWBYUID] = "GETPWBYUID",
00087   [GETGRBYNAME] = "GETGRBYNAME",
00088   [GETGRBYGID] = "GETGRBYGID",
00089   [GETHOSTBYNAME] = "GETHOSTBYNAME",
00090   [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
00091   [GETHOSTBYADDR] = "GETHOSTBYADDR",
00092   [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
00093   [SHUTDOWN] = "SHUTDOWN",
00094   [GETSTAT] = "GETSTAT",
00095   [INVALIDATE] = "INVALIDATE",
00096   [GETFDPW] = "GETFDPW",
00097   [GETFDGR] = "GETFDGR",
00098   [GETFDHST] = "GETFDHST",
00099   [GETAI] = "GETAI",
00100   [INITGROUPS] = "INITGROUPS",
00101   [GETSERVBYNAME] = "GETSERVBYNAME",
00102   [GETSERVBYPORT] = "GETSERVBYPORT",
00103   [GETFDSERV] = "GETFDSERV"
00104 };
00105 
00106 /* The control data structures for the services.  */
00107 struct database_dyn dbs[lastdb] =
00108 {
00109   [pwddb] = {
00110     .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
00111     .prune_lock = PTHREAD_MUTEX_INITIALIZER,
00112     .enabled = 0,
00113     .check_file = 1,
00114     .persistent = 0,
00115     .propagate = 1,
00116     .shared = 0,
00117     .max_db_size = DEFAULT_MAX_DB_SIZE,
00118     .suggested_module = DEFAULT_SUGGESTED_MODULE,
00119    .reset_res = 0,
00120     .filename = "/etc/passwd",
00121     .db_filename = _PATH_NSCD_PASSWD_DB,
00122     .disabled_iov = &pwd_iov_disabled,
00123     .postimeout = 3600,
00124     .negtimeout = 20,
00125     .wr_fd = -1,
00126     .ro_fd = -1,
00127     .mmap_used = false
00128   },
00129   [grpdb] = {
00130     .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
00131     .prune_lock = PTHREAD_MUTEX_INITIALIZER,
00132     .enabled = 0,
00133     .check_file = 1,
00134     .persistent = 0,
00135     .propagate = 1,
00136     .shared = 0,
00137     .max_db_size = DEFAULT_MAX_DB_SIZE,
00138     .suggested_module = DEFAULT_SUGGESTED_MODULE,
00139     .reset_res = 0,
00140     .filename = "/etc/group",
00141     .db_filename = _PATH_NSCD_GROUP_DB,
00142     .disabled_iov = &grp_iov_disabled,
00143     .postimeout = 3600,
00144     .negtimeout = 60,
00145     .wr_fd = -1,
00146     .ro_fd = -1,
00147     .mmap_used = false
00148   },
00149   [hstdb] = {
00150     .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
00151     .prune_lock = PTHREAD_MUTEX_INITIALIZER,
00152     .enabled = 0,
00153     .check_file = 1,
00154     .persistent = 0,
00155     .propagate = 0,         /* Not used.  */
00156     .shared = 0,
00157     .max_db_size = DEFAULT_MAX_DB_SIZE,
00158     .suggested_module = DEFAULT_SUGGESTED_MODULE,
00159     .reset_res = 1,
00160     .filename = "/etc/hosts",
00161     .db_filename = _PATH_NSCD_HOSTS_DB,
00162     .disabled_iov = &hst_iov_disabled,
00163     .postimeout = 3600,
00164     .negtimeout = 20,
00165     .wr_fd = -1,
00166     .ro_fd = -1,
00167     .mmap_used = false
00168   },
00169   [servdb] = {
00170     .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
00171     .prune_lock = PTHREAD_MUTEX_INITIALIZER,
00172     .enabled = 0,
00173     .check_file = 1,
00174     .persistent = 0,
00175     .propagate = 0,         /* Not used.  */
00176     .shared = 0,
00177     .max_db_size = DEFAULT_MAX_DB_SIZE,
00178     .suggested_module = DEFAULT_SUGGESTED_MODULE,
00179     .reset_res = 0,
00180     .filename = "/etc/services",
00181     .db_filename = _PATH_NSCD_SERVICES_DB,
00182     .disabled_iov = &serv_iov_disabled,
00183     .postimeout = 28800,
00184     .negtimeout = 20,
00185     .wr_fd = -1,
00186     .ro_fd = -1,
00187     .mmap_used = false
00188   }
00189 };
00190 
00191 
00192 /* Mapping of request type to database.  */
00193 static struct
00194 {
00195   bool data_request;
00196   struct database_dyn *db;
00197 } const reqinfo[LASTREQ] =
00198 {
00199   [GETPWBYNAME] = { true, &dbs[pwddb] },
00200   [GETPWBYUID] = { true, &dbs[pwddb] },
00201   [GETGRBYNAME] = { true, &dbs[grpdb] },
00202   [GETGRBYGID] = { true, &dbs[grpdb] },
00203   [GETHOSTBYNAME] = { true, &dbs[hstdb] },
00204   [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
00205   [GETHOSTBYADDR] = { true, &dbs[hstdb] },
00206   [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
00207   [SHUTDOWN] = { false, NULL },
00208   [GETSTAT] = { false, NULL },
00209   [SHUTDOWN] = { false, NULL },
00210   [GETFDPW] = { false, &dbs[pwddb] },
00211   [GETFDGR] = { false, &dbs[grpdb] },
00212   [GETFDHST] = { false, &dbs[hstdb] },
00213   [GETAI] = { true, &dbs[hstdb] },
00214   [INITGROUPS] = { true, &dbs[grpdb] },
00215   [GETSERVBYNAME] = { true, &dbs[servdb] },
00216   [GETSERVBYPORT] = { true, &dbs[servdb] },
00217   [GETFDSERV] = { false, &dbs[servdb] }
00218 };
00219 
00220 
00221 /* Initial number of threads to use.  */
00222 int nthreads = -1;
00223 /* Maximum number of threads to use.  */
00224 int max_nthreads = 32;
00225 
00226 /* Socket for incoming connections.  */
00227 static int sock;
00228 
00229 #ifdef HAVE_INOTIFY
00230 /* Inotify descriptor.  */
00231 static int inotify_fd = -1;
00232 
00233 /* Watch descriptor for resolver configuration file.  */
00234 static int resolv_conf_descr = -1;
00235 #endif
00236 
00237 #ifndef __ASSUME_SOCK_CLOEXEC
00238 /* Negative if SOCK_CLOEXEC is not supported, positive if it is, zero
00239    before be know the result.  */
00240 static int have_sock_cloexec;
00241 /* The paccept syscall was introduced at the same time as SOCK_CLOEXEC.  */
00242 # define have_paccept -1    // XXX For the time being there is no such call
00243 #endif
00244 
00245 /* Number of times clients had to wait.  */
00246 unsigned long int client_queued;
00247 
00248 /* Data structure for recording in-flight memory allocation.  */
00249 __thread struct mem_in_flight mem_in_flight attribute_tls_model_ie;
00250 /* Global list of the mem_in_flight variables of all the threads.  */
00251 struct mem_in_flight *mem_in_flight_list;
00252 
00253 
00254 ssize_t
00255 writeall (int fd, const void *buf, size_t len)
00256 {
00257   size_t n = len;
00258   ssize_t ret;
00259   do
00260     {
00261       ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
00262       if (ret <= 0)
00263        break;
00264       buf = (const char *) buf + ret;
00265       n -= ret;
00266     }
00267   while (n > 0);
00268   return ret < 0 ? ret : len - n;
00269 }
00270 
00271 
00272 #ifdef HAVE_SENDFILE
00273 ssize_t
00274 sendfileall (int tofd, int fromfd, off_t off, size_t len)
00275 {
00276   ssize_t n = len;
00277   ssize_t ret;
00278 
00279   do
00280     {
00281       ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
00282       if (ret <= 0)
00283        break;
00284       n -= ret;
00285     }
00286   while (n > 0);
00287   return ret < 0 ? ret : len - n;
00288 }
00289 #endif
00290 
00291 
00292 enum usekey
00293   {
00294     use_not = 0,
00295     /* The following three are not really used, they are symbolic constants.  */
00296     use_first = 16,
00297     use_begin = 32,
00298     use_end = 64,
00299 
00300     use_he = 1,
00301     use_he_begin = use_he | use_begin,
00302     use_he_end = use_he | use_end,
00303 #if SEPARATE_KEY
00304     use_key = 2,
00305     use_key_begin = use_key | use_begin,
00306     use_key_end = use_key | use_end,
00307     use_key_first = use_key_begin | use_first,
00308 #endif
00309     use_data = 3,
00310     use_data_begin = use_data | use_begin,
00311     use_data_end = use_data | use_end,
00312     use_data_first = use_data_begin | use_first
00313   };
00314 
00315 
00316 static int
00317 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
00318           enum usekey use, ref_t start, size_t len)
00319 {
00320   assert (len >= 2);
00321 
00322   if (start > first_free || start + len > first_free
00323       || (start & BLOCK_ALIGN_M1))
00324     return 0;
00325 
00326   if (usemap[start] == use_not)
00327     {
00328       /* Add the start marker.  */
00329       usemap[start] = use | use_begin;
00330       use &= ~use_first;
00331 
00332       while (--len > 0)
00333        if (usemap[++start] != use_not)
00334          return 0;
00335        else
00336          usemap[start] = use;
00337 
00338       /* Add the end marker.  */
00339       usemap[start] = use | use_end;
00340     }
00341   else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
00342     {
00343       /* Hash entries can't be shared.  */
00344       if (use == use_he)
00345        return 0;
00346 
00347       usemap[start] |= (use & use_first);
00348       use &= ~use_first;
00349 
00350       while (--len > 1)
00351        if (usemap[++start] != use)
00352          return 0;
00353 
00354       if (usemap[++start] != (use | use_end))
00355        return 0;
00356     }
00357   else
00358     /* Points to a wrong object or somewhere in the middle.  */
00359     return 0;
00360 
00361   return 1;
00362 }
00363 
00364 
00365 /* Verify data in persistent database.  */
00366 static int
00367 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
00368 {
00369   assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb);
00370 
00371   time_t now = time (NULL);
00372 
00373   struct database_pers_head *head = mem;
00374   struct database_pers_head head_copy = *head;
00375 
00376   /* Check that the header that was read matches the head in the database.  */
00377   if (memcmp (head, readhead, sizeof (*head)) != 0)
00378     return 0;
00379 
00380   /* First some easy tests: make sure the database header is sane.  */
00381   if (head->version != DB_VERSION
00382       || head->header_size != sizeof (*head)
00383       /* We allow a timestamp to be one hour ahead of the current time.
00384         This should cover daylight saving time changes.  */
00385       || head->timestamp > now + 60 * 60 + 60
00386       || (head->gc_cycle & 1)
00387       || head->module == 0
00388       || (size_t) head->module > INT32_MAX / sizeof (ref_t)
00389       || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
00390       || head->first_free < 0
00391       || head->first_free > head->data_size
00392       || (head->first_free & BLOCK_ALIGN_M1) != 0
00393       || head->maxnentries < 0
00394       || head->maxnsearched < 0)
00395     return 0;
00396 
00397   uint8_t *usemap = calloc (head->first_free, 1);
00398   if (usemap == NULL)
00399     return 0;
00400 
00401   const char *data = (char *) &head->array[roundup (head->module,
00402                                               ALIGN / sizeof (ref_t))];
00403 
00404   nscd_ssize_t he_cnt = 0;
00405   for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
00406     {
00407       ref_t trail = head->array[cnt];
00408       ref_t work = trail;
00409       int tick = 0;
00410 
00411       while (work != ENDREF)
00412        {
00413          if (! check_use (data, head->first_free, usemap, use_he, work,
00414                         sizeof (struct hashentry)))
00415            goto fail;
00416 
00417          /* Now we know we can dereference the record.  */
00418          struct hashentry *here = (struct hashentry *) (data + work);
00419 
00420          ++he_cnt;
00421 
00422          /* Make sure the record is for this type of service.  */
00423          if (here->type >= LASTREQ
00424              || reqinfo[here->type].db != &dbs[dbnr])
00425            goto fail;
00426 
00427          /* Validate boolean field value.  */
00428          if (here->first != false && here->first != true)
00429            goto fail;
00430 
00431          if (here->len < 0)
00432            goto fail;
00433 
00434          /* Now the data.  */
00435          if (here->packet < 0
00436              || here->packet > head->first_free
00437              || here->packet + sizeof (struct datahead) > head->first_free)
00438            goto fail;
00439 
00440          struct datahead *dh = (struct datahead *) (data + here->packet);
00441 
00442          if (! check_use (data, head->first_free, usemap,
00443                         use_data | (here->first ? use_first : 0),
00444                         here->packet, dh->allocsize))
00445            goto fail;
00446 
00447          if (dh->allocsize < sizeof (struct datahead)
00448              || dh->recsize > dh->allocsize
00449              || (dh->notfound != false && dh->notfound != true)
00450              || (dh->usable != false && dh->usable != true))
00451            goto fail;
00452 
00453          if (here->key < here->packet + sizeof (struct datahead)
00454              || here->key > here->packet + dh->allocsize
00455              || here->key + here->len > here->packet + dh->allocsize)
00456            {
00457 #if SEPARATE_KEY
00458              /* If keys can appear outside of data, this should be done
00459                instead.  But gc doesn't mark the data in that case.  */
00460              if (! check_use (data, head->first_free, usemap,
00461                             use_key | (here->first ? use_first : 0),
00462                             here->key, here->len))
00463 #endif
00464               goto fail;
00465            }
00466 
00467          work = here->next;
00468 
00469          if (work == trail)
00470            /* A circular list, this must not happen.  */
00471            goto fail;
00472          if (tick)
00473            trail = ((struct hashentry *) (data + trail))->next;
00474          tick = 1 - tick;
00475        }
00476     }
00477 
00478   if (he_cnt != head->nentries)
00479     goto fail;
00480 
00481   /* See if all data and keys had at least one reference from
00482      he->first == true hashentry.  */
00483   for (ref_t idx = 0; idx < head->first_free; ++idx)
00484     {
00485 #if SEPARATE_KEY
00486       if (usemap[idx] == use_key_begin)
00487        goto fail;
00488 #endif
00489       if (usemap[idx] == use_data_begin)
00490        goto fail;
00491     }
00492 
00493   /* Finally, make sure the database hasn't changed since the first test.  */
00494   if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
00495     goto fail;
00496 
00497   free (usemap);
00498   return 1;
00499 
00500 fail:
00501   free (usemap);
00502   return 0;
00503 }
00504 
00505 
00506 #ifdef O_CLOEXEC
00507 # define EXTRA_O_FLAGS O_CLOEXEC
00508 #else
00509 # define EXTRA_O_FLAGS 0
00510 #endif
00511 
00512 
00513 /* Initialize database information structures.  */
00514 void
00515 nscd_init (void)
00516 {
00517   /* Look up unprivileged uid/gid/groups before we start listening on the
00518      socket  */
00519   if (server_user != NULL)
00520     begin_drop_privileges ();
00521 
00522   if (nthreads == -1)
00523     /* No configuration for this value, assume a default.  */
00524     nthreads = 4;
00525 
00526 #ifdef HAVE_INOTIFY
00527   /* Use inotify to recognize changed files.  */
00528   inotify_fd = inotify_init1 (IN_NONBLOCK);
00529 # ifndef __ASSUME_IN_NONBLOCK
00530   if (inotify_fd == -1 && errno == ENOSYS)
00531     {
00532       inotify_fd = inotify_init ();
00533       if (inotify_fd != -1)
00534        fcntl (inotify_fd, F_SETFL, O_RDONLY | O_NONBLOCK);
00535     }
00536 # endif
00537 #endif
00538 
00539   for (size_t cnt = 0; cnt < lastdb; ++cnt)
00540     if (dbs[cnt].enabled)
00541       {
00542        pthread_rwlock_init (&dbs[cnt].lock, NULL);
00543        pthread_mutex_init (&dbs[cnt].memlock, NULL);
00544 
00545        if (dbs[cnt].persistent)
00546          {
00547            /* Try to open the appropriate file on disk.  */
00548            int fd = open (dbs[cnt].db_filename, O_RDWR | EXTRA_O_FLAGS);
00549            if (fd != -1)
00550              {
00551               char *msg = NULL;
00552               struct stat64 st;
00553               void *mem;
00554               size_t total;
00555               struct database_pers_head head;
00556               ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
00557                                                 sizeof (head)));
00558               if (n != sizeof (head) || fstat64 (fd, &st) != 0)
00559                 {
00560                 fail_db_errno:
00561                   /* The code is single-threaded at this point so
00562                      using strerror is just fine.  */
00563                   msg = strerror (errno);
00564                 fail_db:
00565                   dbg_log (_("invalid persistent database file \"%s\": %s"),
00566                           dbs[cnt].db_filename, msg);
00567                   unlink (dbs[cnt].db_filename);
00568                 }
00569               else if (head.module == 0 && head.data_size == 0)
00570                 {
00571                   /* The file has been created, but the head has not
00572                      been initialized yet.  */
00573                   msg = _("uninitialized header");
00574                   goto fail_db;
00575                 }
00576               else if (head.header_size != (int) sizeof (head))
00577                 {
00578                   msg = _("header size does not match");
00579                   goto fail_db;
00580                 }
00581               else if ((total = (sizeof (head)
00582                                + roundup (head.module * sizeof (ref_t),
00583                                          ALIGN)
00584                                + head.data_size))
00585                       > st.st_size
00586                       || total < sizeof (head))
00587                 {
00588                   msg = _("file size does not match");
00589                   goto fail_db;
00590                 }
00591               /* Note we map with the maximum size allowed for the
00592                  database.  This is likely much larger than the
00593                  actual file size.  This is OK on most OSes since
00594                  extensions of the underlying file will
00595                  automatically translate more pages available for
00596                  memory access.  */
00597               else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
00598                                   PROT_READ | PROT_WRITE,
00599                                   MAP_SHARED, fd, 0))
00600                       == MAP_FAILED)
00601                 goto fail_db_errno;
00602               else if (!verify_persistent_db (mem, &head, cnt))
00603                 {
00604                   munmap (mem, total);
00605                   msg = _("verification failed");
00606                   goto fail_db;
00607                 }
00608               else
00609                 {
00610                   /* Success.  We have the database.  */
00611                   dbs[cnt].head = mem;
00612                   dbs[cnt].memsize = total;
00613                   dbs[cnt].data = (char *)
00614                     &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
00615                                                ALIGN / sizeof (ref_t))];
00616                   dbs[cnt].mmap_used = true;
00617 
00618                   if (dbs[cnt].suggested_module > head.module)
00619                     dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
00620                             dbnames[cnt]);
00621 
00622                   dbs[cnt].wr_fd = fd;
00623                   fd = -1;
00624                   /* We also need a read-only descriptor.  */
00625                   if (dbs[cnt].shared)
00626                     {
00627                      dbs[cnt].ro_fd = open (dbs[cnt].db_filename,
00628                                           O_RDONLY | EXTRA_O_FLAGS);
00629                      if (dbs[cnt].ro_fd == -1)
00630                        dbg_log (_("\
00631 cannot create read-only descriptor for \"%s\"; no mmap"),
00632                                dbs[cnt].db_filename);
00633                     }
00634 
00635                   // XXX Shall we test whether the descriptors actually
00636                   // XXX point to the same file?
00637                 }
00638 
00639               /* Close the file descriptors in case something went
00640                  wrong in which case the variable have not been
00641                  assigned -1.  */
00642               if (fd != -1)
00643                 close (fd);
00644              }
00645          }
00646 
00647        if (dbs[cnt].head == NULL)
00648          {
00649            /* No database loaded.  Allocate the data structure,
00650               possibly on disk.  */
00651            struct database_pers_head head;
00652            size_t total = (sizeof (head)
00653                          + roundup (dbs[cnt].suggested_module
00654                                    * sizeof (ref_t), ALIGN)
00655                          + (dbs[cnt].suggested_module
00656                             * DEFAULT_DATASIZE_PER_BUCKET));
00657 
00658            /* Try to create the database.  If we do not need a
00659               persistent database create a temporary file.  */
00660            int fd;
00661            int ro_fd = -1;
00662            if (dbs[cnt].persistent)
00663              {
00664               fd = open (dbs[cnt].db_filename,
00665                         O_RDWR | O_CREAT | O_EXCL | O_TRUNC | EXTRA_O_FLAGS,
00666                         S_IRUSR | S_IWUSR);
00667               if (fd != -1 && dbs[cnt].shared)
00668                 ro_fd = open (dbs[cnt].db_filename,
00669                             O_RDONLY | EXTRA_O_FLAGS);
00670              }
00671            else
00672              {
00673               char fname[] = _PATH_NSCD_XYZ_DB_TMP;
00674               fd = mkostemp (fname, EXTRA_O_FLAGS);
00675 
00676               /* We do not need the file name anymore after we
00677                  opened another file descriptor in read-only mode.  */
00678               if (fd != -1)
00679                 {
00680                   if (dbs[cnt].shared)
00681                     ro_fd = open (fname, O_RDONLY | EXTRA_O_FLAGS);
00682 
00683                   unlink (fname);
00684                 }
00685              }
00686 
00687            if (fd == -1)
00688              {
00689               if (errno == EEXIST)
00690                 {
00691                   dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
00692                           dbnames[cnt], dbs[cnt].db_filename);
00693                   // XXX Correct way to terminate?
00694                   exit (1);
00695                 }
00696 
00697               if  (dbs[cnt].persistent)
00698                 dbg_log (_("cannot create %s; no persistent database used"),
00699                         dbs[cnt].db_filename);
00700               else
00701                 dbg_log (_("cannot create %s; no sharing possible"),
00702                         dbs[cnt].db_filename);
00703 
00704               dbs[cnt].persistent = 0;
00705               // XXX remember: no mmap
00706              }
00707            else
00708              {
00709               /* Tell the user if we could not create the read-only
00710                  descriptor.  */
00711               if (ro_fd == -1 && dbs[cnt].shared)
00712                 dbg_log (_("\
00713 cannot create read-only descriptor for \"%s\"; no mmap"),
00714                         dbs[cnt].db_filename);
00715 
00716               /* Before we create the header, initialiye the hash
00717                  table.  So that if we get interrupted if writing
00718                  the header we can recognize a partially initialized
00719                  database.  */
00720               size_t ps = sysconf (_SC_PAGESIZE);
00721               char tmpbuf[ps];
00722               assert (~ENDREF == 0);
00723               memset (tmpbuf, '\xff', ps);
00724 
00725               size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
00726               off_t offset = sizeof (head);
00727 
00728               size_t towrite;
00729               if (offset % ps != 0)
00730                 {
00731                   towrite = MIN (remaining, ps - (offset % ps));
00732                   if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
00733                     goto write_fail;
00734                   offset += towrite;
00735                   remaining -= towrite;
00736                 }
00737 
00738               while (remaining > ps)
00739                 {
00740                   if (pwrite (fd, tmpbuf, ps, offset) == -1)
00741                     goto write_fail;
00742                   offset += ps;
00743                   remaining -= ps;
00744                 }
00745 
00746               if (remaining > 0
00747                   && pwrite (fd, tmpbuf, remaining, offset) != remaining)
00748                 goto write_fail;
00749 
00750               /* Create the header of the file.  */
00751               struct database_pers_head head =
00752                 {
00753                   .version = DB_VERSION,
00754                   .header_size = sizeof (head),
00755                   .module = dbs[cnt].suggested_module,
00756                   .data_size = (dbs[cnt].suggested_module
00757                               * DEFAULT_DATASIZE_PER_BUCKET),
00758                   .first_free = 0
00759                 };
00760               void *mem;
00761 
00762               if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
00763                    != sizeof (head))
00764                   || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
00765                      != 0)
00766                   || (mem = mmap (NULL, dbs[cnt].max_db_size,
00767                                 PROT_READ | PROT_WRITE,
00768                                 MAP_SHARED, fd, 0)) == MAP_FAILED)
00769                 {
00770                 write_fail:
00771                   unlink (dbs[cnt].db_filename);
00772                   dbg_log (_("cannot write to database file %s: %s"),
00773                           dbs[cnt].db_filename, strerror (errno));
00774                   dbs[cnt].persistent = 0;
00775                 }
00776               else
00777                 {
00778                   /* Success.  */
00779                   dbs[cnt].head = mem;
00780                   dbs[cnt].data = (char *)
00781                     &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
00782                                                ALIGN / sizeof (ref_t))];
00783                   dbs[cnt].memsize = total;
00784                   dbs[cnt].mmap_used = true;
00785 
00786                   /* Remember the descriptors.  */
00787                   dbs[cnt].wr_fd = fd;
00788                   dbs[cnt].ro_fd = ro_fd;
00789                   fd = -1;
00790                   ro_fd = -1;
00791                 }
00792 
00793               if (fd != -1)
00794                 close (fd);
00795               if (ro_fd != -1)
00796                 close (ro_fd);
00797              }
00798          }
00799 
00800 #if !defined O_CLOEXEC || !defined __ASSUME_O_CLOEXEC
00801        /* We do not check here whether the O_CLOEXEC provided to the
00802           open call was successful or not.  The two fcntl calls are
00803           only performed once each per process start-up and therefore
00804           is not noticeable at all.  */
00805        if (paranoia
00806            && ((dbs[cnt].wr_fd != -1
00807                && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
00808               || (dbs[cnt].ro_fd != -1
00809                   && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
00810          {
00811            dbg_log (_("\
00812 cannot set socket to close on exec: %s; disabling paranoia mode"),
00813                    strerror (errno));
00814            paranoia = 0;
00815          }
00816 #endif
00817 
00818        if (dbs[cnt].head == NULL)
00819          {
00820            /* We do not use the persistent database.  Just
00821               create an in-memory data structure.  */
00822            assert (! dbs[cnt].persistent);
00823 
00824            dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
00825                                  + (dbs[cnt].suggested_module
00826                                    * sizeof (ref_t)));
00827            memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
00828            assert (~ENDREF == 0);
00829            memset (dbs[cnt].head->array, '\xff',
00830                   dbs[cnt].suggested_module * sizeof (ref_t));
00831            dbs[cnt].head->module = dbs[cnt].suggested_module;
00832            dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
00833                                    * dbs[cnt].head->module);
00834            dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
00835            dbs[cnt].head->first_free = 0;
00836 
00837            dbs[cnt].shared = 0;
00838            assert (dbs[cnt].ro_fd == -1);
00839          }
00840 
00841        dbs[cnt].inotify_descr = -1;
00842        if (dbs[cnt].check_file)
00843          {
00844 #ifdef HAVE_INOTIFY
00845            if (inotify_fd < 0
00846               || (dbs[cnt].inotify_descr
00847                   = inotify_add_watch (inotify_fd, dbs[cnt].filename,
00848                                     IN_DELETE_SELF | IN_MODIFY)) < 0)
00849              /* We cannot notice changes in the main thread.  */
00850 #endif
00851              {
00852               /* We need the modification date of the file.  */
00853               struct stat64 st;
00854 
00855               if (stat64 (dbs[cnt].filename, &st) < 0)
00856                 {
00857                   /* We cannot stat() the file, disable file checking.  */
00858                   dbg_log (_("cannot stat() file `%s': %s"),
00859                           dbs[cnt].filename, strerror (errno));
00860                   dbs[cnt].check_file = 0;
00861                 }
00862               else
00863                 dbs[cnt].file_mtime = st.st_mtime;
00864              }
00865          }
00866 
00867 #ifdef HAVE_INOTIFY
00868        if (cnt == hstdb && inotify_fd >= -1)
00869          /* We also monitor the resolver configuration file.  */
00870          resolv_conf_descr = inotify_add_watch (inotify_fd,
00871                                            _PATH_RESCONF,
00872                                            IN_DELETE_SELF | IN_MODIFY);
00873 #endif
00874       }
00875 
00876   /* Create the socket.  */
00877 #ifndef __ASSUME_SOCK_CLOEXEC
00878   sock = -1;
00879   if (have_sock_cloexec >= 0)
00880 #endif
00881     {
00882       sock = socket (AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
00883 #ifndef __ASSUME_SOCK_CLOEXEC
00884       if (have_sock_cloexec == 0)
00885        have_sock_cloexec = sock != -1 || errno != EINVAL ? 1 : -1;
00886 #endif
00887     }
00888 #ifndef __ASSUME_SOCK_CLOEXEC
00889   if (have_sock_cloexec < 0)
00890     sock = socket (AF_UNIX, SOCK_STREAM, 0);
00891 #endif
00892   if (sock < 0)
00893     {
00894       dbg_log (_("cannot open socket: %s"), strerror (errno));
00895       exit (errno == EACCES ? 4 : 1);
00896     }
00897   /* Bind a name to the socket.  */
00898   struct sockaddr_un sock_addr;
00899   sock_addr.sun_family = AF_UNIX;
00900   strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
00901   if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
00902     {
00903       dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
00904       exit (errno == EACCES ? 4 : 1);
00905     }
00906 
00907 #ifndef __ASSUME_SOCK_CLOEXEC
00908   if (have_sock_cloexec < 0)
00909     {
00910       /* We don't want to get stuck on accept.  */
00911       int fl = fcntl (sock, F_GETFL);
00912       if (fl == -1 || fcntl (sock, F_SETFL, fl | O_NONBLOCK) == -1)
00913        {
00914          dbg_log (_("cannot change socket to nonblocking mode: %s"),
00915                  strerror (errno));
00916          exit (1);
00917        }
00918 
00919       /* The descriptor needs to be closed on exec.  */
00920       if (paranoia && fcntl (sock, F_SETFD, FD_CLOEXEC) == -1)
00921        {
00922          dbg_log (_("cannot set socket to close on exec: %s"),
00923                  strerror (errno));
00924          exit (1);
00925        }
00926     }
00927 #endif
00928 
00929   /* Set permissions for the socket.  */
00930   chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
00931 
00932   /* Set the socket up to accept connections.  */
00933   if (listen (sock, SOMAXCONN) < 0)
00934     {
00935       dbg_log (_("cannot enable socket to accept connections: %s"),
00936               strerror (errno));
00937       exit (1);
00938     }
00939 
00940   /* Change to unprivileged uid/gid/groups if specifed in config file */
00941   if (server_user != NULL)
00942     finish_drop_privileges ();
00943 }
00944 
00945 
00946 /* Close the connections.  */
00947 void
00948 close_sockets (void)
00949 {
00950   close (sock);
00951 }
00952 
00953 
00954 static void
00955 invalidate_cache (char *key, int fd)
00956 {
00957   dbtype number;
00958   int32_t resp;
00959 
00960   for (number = pwddb; number < lastdb; ++number)
00961     if (strcmp (key, dbnames[number]) == 0)
00962       {
00963        if (dbs[number].reset_res)
00964          res_init ();
00965 
00966        break;
00967       }
00968 
00969   if (number == lastdb)
00970     {
00971       resp = EINVAL;
00972       writeall (fd, &resp, sizeof (resp));
00973       return;
00974     }
00975 
00976   if (dbs[number].enabled)
00977     {
00978       pthread_mutex_lock (&dbs[number].prune_lock);
00979       prune_cache (&dbs[number], LONG_MAX, fd);
00980       pthread_mutex_unlock (&dbs[number].prune_lock);
00981     }
00982   else
00983     {
00984       resp = 0;
00985       writeall (fd, &resp, sizeof (resp));
00986     }
00987 }
00988 
00989 
00990 #ifdef SCM_RIGHTS
00991 static void
00992 send_ro_fd (struct database_dyn *db, char *key, int fd)
00993 {
00994   /* If we do not have an read-only file descriptor do nothing.  */
00995   if (db->ro_fd == -1)
00996     return;
00997 
00998   /* We need to send some data along with the descriptor.  */
00999   uint64_t mapsize = (db->head->data_size
01000                     + roundup (db->head->module * sizeof (ref_t), ALIGN)
01001                     + sizeof (struct database_pers_head));
01002   struct iovec iov[2];
01003   iov[0].iov_base = key;
01004   iov[0].iov_len = strlen (key) + 1;
01005   iov[1].iov_base = &mapsize;
01006   iov[1].iov_len = sizeof (mapsize);
01007 
01008   /* Prepare the control message to transfer the descriptor.  */
01009   union
01010   {
01011     struct cmsghdr hdr;
01012     char bytes[CMSG_SPACE (sizeof (int))];
01013   } buf;
01014   struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
01015                      .msg_control = buf.bytes,
01016                      .msg_controllen = sizeof (buf) };
01017   struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
01018 
01019   cmsg->cmsg_level = SOL_SOCKET;
01020   cmsg->cmsg_type = SCM_RIGHTS;
01021   cmsg->cmsg_len = CMSG_LEN (sizeof (int));
01022 
01023   *(int *) CMSG_DATA (cmsg) = db->ro_fd;
01024 
01025   msg.msg_controllen = cmsg->cmsg_len;
01026 
01027   /* Send the control message.  We repeat when we are interrupted but
01028      everything else is ignored.  */
01029 #ifndef MSG_NOSIGNAL
01030 # define MSG_NOSIGNAL 0
01031 #endif
01032   (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
01033 
01034   if (__builtin_expect (debug_level > 0, 0))
01035     dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
01036 }
01037 #endif /* SCM_RIGHTS */
01038 
01039 
01040 /* Handle new request.  */
01041 static void
01042 handle_request (int fd, request_header *req, void *key, uid_t uid, pid_t pid)
01043 {
01044   if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
01045     {
01046       if (debug_level > 0)
01047        dbg_log (_("\
01048 cannot handle old request version %d; current version is %d"),
01049                req->version, NSCD_VERSION);
01050       return;
01051     }
01052 
01053   /* Perform the SELinux check before we go on to the standard checks.  */
01054   if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
01055     {
01056       if (debug_level > 0)
01057        {
01058 #ifdef SO_PEERCRED
01059 # ifdef PATH_MAX
01060          char buf[PATH_MAX];
01061 # else
01062          char buf[4096];
01063 # endif
01064 
01065          snprintf (buf, sizeof (buf), "/proc/%ld/exe", (long int) pid);
01066          ssize_t n = readlink (buf, buf, sizeof (buf) - 1);
01067 
01068          if (n <= 0)
01069            dbg_log (_("\
01070 request from %ld not handled due to missing permission"), (long int) pid);
01071          else
01072            {
01073              buf[n] = '\0';
01074              dbg_log (_("\
01075 request from '%s' [%ld] not handled due to missing permission"),
01076                      buf, (long int) pid);
01077            }
01078 #else
01079          dbg_log (_("request not handled due to missing permission"));
01080 #endif
01081        }
01082       return;
01083     }
01084 
01085   struct database_dyn *db = reqinfo[req->type].db;
01086 
01087   /* See whether we can service the request from the cache.  */
01088   if (__builtin_expect (reqinfo[req->type].data_request, true))
01089     {
01090       if (__builtin_expect (debug_level, 0) > 0)
01091        {
01092          if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
01093            {
01094              char buf[INET6_ADDRSTRLEN];
01095 
01096              dbg_log ("\t%s (%s)", serv2str[req->type],
01097                      inet_ntop (req->type == GETHOSTBYADDR
01098                               ? AF_INET : AF_INET6,
01099                               key, buf, sizeof (buf)));
01100            }
01101          else
01102            dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
01103        }
01104 
01105       /* Is this service enabled?  */
01106       if (__builtin_expect (!db->enabled, 0))
01107        {
01108          /* No, sent the prepared record.  */
01109          if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
01110                                    db->disabled_iov->iov_len,
01111                                    MSG_NOSIGNAL))
01112              != (ssize_t) db->disabled_iov->iov_len
01113              && __builtin_expect (debug_level, 0) > 0)
01114            {
01115              /* We have problems sending the result.  */
01116              char buf[256];
01117              dbg_log (_("cannot write result: %s"),
01118                      strerror_r (errno, buf, sizeof (buf)));
01119            }
01120 
01121          return;
01122        }
01123 
01124       /* Be sure we can read the data.  */
01125       if (__builtin_expect (pthread_rwlock_tryrdlock (&db->lock) != 0, 0))
01126        {
01127          ++db->head->rdlockdelayed;
01128          pthread_rwlock_rdlock (&db->lock);
01129        }
01130 
01131       /* See whether we can handle it from the cache.  */
01132       struct datahead *cached;
01133       cached = (struct datahead *) cache_search (req->type, key, req->key_len,
01134                                            db, uid);
01135       if (cached != NULL)
01136        {
01137          /* Hurray it's in the cache.  */
01138          ssize_t nwritten;
01139 
01140 #ifdef HAVE_SENDFILE
01141          if (__builtin_expect (db->mmap_used, 1))
01142            {
01143              assert (db->wr_fd != -1);
01144              assert ((char *) cached->data > (char *) db->data);
01145              assert ((char *) cached->data - (char *) db->head
01146                     + cached->recsize
01147                     <= (sizeof (struct database_pers_head)
01148                        + db->head->module * sizeof (ref_t)
01149                        + db->head->data_size));
01150              nwritten = sendfileall (fd, db->wr_fd,
01151                                   (char *) cached->data
01152                                   - (char *) db->head, cached->recsize);
01153 # ifndef __ASSUME_SENDFILE
01154              if (nwritten == -1 && errno == ENOSYS)
01155               goto use_write;
01156 # endif
01157            }
01158          else
01159 # ifndef __ASSUME_SENDFILE
01160          use_write:
01161 # endif
01162 #endif
01163            nwritten = writeall (fd, cached->data, cached->recsize);
01164 
01165          if (nwritten != cached->recsize
01166              && __builtin_expect (debug_level, 0) > 0)
01167            {
01168              /* We have problems sending the result.  */
01169              char buf[256];
01170              dbg_log (_("cannot write result: %s"),
01171                      strerror_r (errno, buf, sizeof (buf)));
01172            }
01173 
01174          pthread_rwlock_unlock (&db->lock);
01175 
01176          return;
01177        }
01178 
01179       pthread_rwlock_unlock (&db->lock);
01180     }
01181   else if (__builtin_expect (debug_level, 0) > 0)
01182     {
01183       if (req->type == INVALIDATE)
01184        dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
01185       else
01186        dbg_log ("\t%s", serv2str[req->type]);
01187     }
01188 
01189   /* Handle the request.  */
01190   switch (req->type)
01191     {
01192     case GETPWBYNAME:
01193       addpwbyname (db, fd, req, key, uid);
01194       break;
01195 
01196     case GETPWBYUID:
01197       addpwbyuid (db, fd, req, key, uid);
01198       break;
01199 
01200     case GETGRBYNAME:
01201       addgrbyname (db, fd, req, key, uid);
01202       break;
01203 
01204     case GETGRBYGID:
01205       addgrbygid (db, fd, req, key, uid);
01206       break;
01207 
01208     case GETHOSTBYNAME:
01209       addhstbyname (db, fd, req, key, uid);
01210       break;
01211 
01212     case GETHOSTBYNAMEv6:
01213       addhstbynamev6 (db, fd, req, key, uid);
01214       break;
01215 
01216     case GETHOSTBYADDR:
01217       addhstbyaddr (db, fd, req, key, uid);
01218       break;
01219 
01220     case GETHOSTBYADDRv6:
01221       addhstbyaddrv6 (db, fd, req, key, uid);
01222       break;
01223 
01224     case GETAI:
01225       addhstai (db, fd, req, key, uid);
01226       break;
01227 
01228     case INITGROUPS:
01229       addinitgroups (db, fd, req, key, uid);
01230       break;
01231 
01232     case GETSERVBYNAME:
01233       addservbyname (db, fd, req, key, uid);
01234       break;
01235 
01236     case GETSERVBYPORT:
01237       addservbyport (db, fd, req, key, uid);
01238       break;
01239 
01240     case GETSTAT:
01241     case SHUTDOWN:
01242     case INVALIDATE:
01243       {
01244        /* Get the callers credentials.  */
01245 #ifdef SO_PEERCRED
01246        struct ucred caller;
01247        socklen_t optlen = sizeof (caller);
01248 
01249        if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
01250          {
01251            char buf[256];
01252 
01253            dbg_log (_("error getting caller's id: %s"),
01254                    strerror_r (errno, buf, sizeof (buf)));
01255            break;
01256          }
01257 
01258        uid = caller.uid;
01259 #else
01260        /* Some systems have no SO_PEERCRED implementation.  They don't
01261           care about security so we don't as well.  */
01262        uid = 0;
01263 #endif
01264       }
01265 
01266       /* Accept shutdown, getstat and invalidate only from root.  For
01267         the stat call also allow the user specified in the config file.  */
01268       if (req->type == GETSTAT)
01269        {
01270          if (uid == 0 || uid == stat_uid)
01271            send_stats (fd, dbs);
01272        }
01273       else if (uid == 0)
01274        {
01275          if (req->type == INVALIDATE)
01276            invalidate_cache (key, fd);
01277          else
01278            termination_handler (0);
01279        }
01280       break;
01281 
01282     case GETFDPW:
01283     case GETFDGR:
01284     case GETFDHST:
01285     case GETFDSERV:
01286 #ifdef SCM_RIGHTS
01287       send_ro_fd (reqinfo[req->type].db, key, fd);
01288 #endif
01289       break;
01290 
01291     default:
01292       /* Ignore the command, it's nothing we know.  */
01293       break;
01294     }
01295 }
01296 
01297 
01298 /* Restart the process.  */
01299 static void
01300 restart (void)
01301 {
01302   /* First determine the parameters.  We do not use the parameters
01303      passed to main() since in case nscd is started by running the
01304      dynamic linker this will not work.  Yes, this is not the usual
01305      case but nscd is part of glibc and we occasionally do this.  */
01306   size_t buflen = 1024;
01307   char *buf = alloca (buflen);
01308   size_t readlen = 0;
01309   int fd = open ("/proc/self/cmdline", O_RDONLY);
01310   if (fd == -1)
01311     {
01312       dbg_log (_("\
01313 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
01314               strerror (errno));
01315 
01316       paranoia = 0;
01317       return;
01318     }
01319 
01320   while (1)
01321     {
01322       ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
01323                                        buflen - readlen));
01324       if (n == -1)
01325        {
01326          dbg_log (_("\
01327 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
01328                  strerror (errno));
01329 
01330          close (fd);
01331          paranoia = 0;
01332          return;
01333        }
01334 
01335       readlen += n;
01336 
01337       if (readlen < buflen)
01338        break;
01339 
01340       /* We might have to extend the buffer.  */
01341       size_t old_buflen = buflen;
01342       char *newp = extend_alloca (buf, buflen, 2 * buflen);
01343       buf = memmove (newp, buf, old_buflen);
01344     }
01345 
01346   close (fd);
01347 
01348   /* Parse the command line.  Worst case scenario: every two
01349      characters form one parameter (one character plus NUL).  */
01350   char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
01351   int argc = 0;
01352 
01353   char *cp = buf;
01354   while (cp < buf + readlen)
01355     {
01356       argv[argc++] = cp;
01357       cp = (char *) rawmemchr (cp, '\0') + 1;
01358     }
01359   argv[argc] = NULL;
01360 
01361   /* Second, change back to the old user if we changed it.  */
01362   if (server_user != NULL)
01363     {
01364       if (setresuid (old_uid, old_uid, old_uid) != 0)
01365        {
01366          dbg_log (_("\
01367 cannot change to old UID: %s; disabling paranoia mode"),
01368                  strerror (errno));
01369 
01370          paranoia = 0;
01371          return;
01372        }
01373 
01374       if (setresgid (old_gid, old_gid, old_gid) != 0)
01375        {
01376          dbg_log (_("\
01377 cannot change to old GID: %s; disabling paranoia mode"),
01378                  strerror (errno));
01379 
01380          setuid (server_uid);
01381          paranoia = 0;
01382          return;
01383        }
01384     }
01385 
01386   /* Next change back to the old working directory.  */
01387   if (chdir (oldcwd) == -1)
01388     {
01389       dbg_log (_("\
01390 cannot change to old working directory: %s; disabling paranoia mode"),
01391               strerror (errno));
01392 
01393       if (server_user != NULL)
01394        {
01395          setuid (server_uid);
01396          setgid (server_gid);
01397        }
01398       paranoia = 0;
01399       return;
01400     }
01401 
01402   /* Synchronize memory.  */
01403   int32_t certainly[lastdb];
01404   for (int cnt = 0; cnt < lastdb; ++cnt)
01405     if (dbs[cnt].enabled)
01406       {
01407        /* Make sure nobody keeps using the database.  */
01408        dbs[cnt].head->timestamp = 0;
01409        certainly[cnt] = dbs[cnt].head->nscd_certainly_running;
01410        dbs[cnt].head->nscd_certainly_running = 0;
01411 
01412        if (dbs[cnt].persistent)
01413          // XXX async OK?
01414          msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
01415       }
01416 
01417   /* The preparations are done.  */
01418   execv ("/proc/self/exe", argv);
01419 
01420   /* If we come here, we will never be able to re-exec.  */
01421   dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
01422           strerror (errno));
01423 
01424   if (server_user != NULL)
01425     {
01426       setuid (server_uid);
01427       setgid (server_gid);
01428     }
01429   if (chdir ("/") != 0)
01430     dbg_log (_("cannot change current working directory to \"/\": %s"),
01431             strerror (errno));
01432   paranoia = 0;
01433 
01434   /* Reenable the databases.  */
01435   time_t now = time (NULL);
01436   for (int cnt = 0; cnt < lastdb; ++cnt)
01437     if (dbs[cnt].enabled)
01438       {
01439        dbs[cnt].head->timestamp = now;
01440        dbs[cnt].head->nscd_certainly_running = certainly[cnt];
01441       }
01442 }
01443 
01444 
01445 /* List of file descriptors.  */
01446 struct fdlist
01447 {
01448   int fd;
01449   struct fdlist *next;
01450 };
01451 /* Memory allocated for the list.  */
01452 static struct fdlist *fdlist;
01453 /* List of currently ready-to-read file descriptors.  */
01454 static struct fdlist *readylist;
01455 
01456 /* Conditional variable and mutex to signal availability of entries in
01457    READYLIST.  The condvar is initialized dynamically since we might
01458    use a different clock depending on availability.  */
01459 static pthread_cond_t readylist_cond = PTHREAD_COND_INITIALIZER;
01460 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
01461 
01462 /* The clock to use with the condvar.  */
01463 static clockid_t timeout_clock = CLOCK_REALTIME;
01464 
01465 /* Number of threads ready to handle the READYLIST.  */
01466 static unsigned long int nready;
01467 
01468 
01469 /* Function for the clean-up threads.  */
01470 static void *
01471 __attribute__ ((__noreturn__))
01472 nscd_run_prune (void *p)
01473 {
01474   const long int my_number = (long int) p;
01475   assert (dbs[my_number].enabled);
01476 
01477   int dont_need_update = setup_thread (&dbs[my_number]);
01478 
01479   time_t now = time (NULL);
01480 
01481   /* We are running.  */
01482   dbs[my_number].head->timestamp = now;
01483 
01484   struct timespec prune_ts;
01485   if (__builtin_expect (clock_gettime (timeout_clock, &prune_ts) == -1, 0))
01486     /* Should never happen.  */
01487     abort ();
01488 
01489   /* Compute the initial timeout time.  Prevent all the timers to go
01490      off at the same time by adding a db-based value.  */
01491   prune_ts.tv_sec += CACHE_PRUNE_INTERVAL + my_number;
01492   dbs[my_number].wakeup_time = now + CACHE_PRUNE_INTERVAL + my_number;
01493 
01494   pthread_mutex_t *prune_lock = &dbs[my_number].prune_lock;
01495   pthread_cond_t *prune_cond = &dbs[my_number].prune_cond;
01496 
01497   pthread_mutex_lock (prune_lock);
01498   while (1)
01499     {
01500       /* Wait, but not forever.  */
01501       int e = 0;
01502       if (! dbs[my_number].clear_cache)
01503        e = pthread_cond_timedwait (prune_cond, prune_lock, &prune_ts);
01504       assert (__builtin_expect (e == 0 || e == ETIMEDOUT, 1));
01505 
01506       time_t next_wait;
01507       now = time (NULL);
01508       if (e == ETIMEDOUT || now >= dbs[my_number].wakeup_time
01509          || dbs[my_number].clear_cache)
01510        {
01511          /* We will determine the new timout values based on the
01512             cache content.  Should there be concurrent additions to
01513             the cache which are not accounted for in the cache
01514             pruning we want to know about it.  Therefore set the
01515             timeout to the maximum.  It will be descreased when adding
01516             new entries to the cache, if necessary.  */
01517          if (sizeof (time_t) == sizeof (long int))
01518            dbs[my_number].wakeup_time = LONG_MAX;
01519          else
01520            dbs[my_number].wakeup_time = INT_MAX;
01521 
01522          /* Unconditionally reset the flag.  */
01523          time_t prune_now = dbs[my_number].clear_cache ? LONG_MAX : now;
01524          dbs[my_number].clear_cache = 0;
01525 
01526          pthread_mutex_unlock (prune_lock);
01527 
01528          next_wait = prune_cache (&dbs[my_number], prune_now, -1);
01529 
01530          next_wait = MAX (next_wait, CACHE_PRUNE_INTERVAL);
01531          /* If clients cannot determine for sure whether nscd is running
01532             we need to wake up occasionally to update the timestamp.
01533             Wait 90% of the update period.  */
01534 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
01535          if (__builtin_expect (! dont_need_update, 0))
01536            {
01537              next_wait = MIN (UPDATE_MAPPING_TIMEOUT, next_wait);
01538              dbs[my_number].head->timestamp = now;
01539            }
01540 
01541          pthread_mutex_lock (prune_lock);
01542 
01543          /* Make it known when we will wake up again.  */
01544          if (now + next_wait < dbs[my_number].wakeup_time)
01545            dbs[my_number].wakeup_time = now + next_wait;
01546          else
01547            next_wait = dbs[my_number].wakeup_time - now;
01548        }
01549       else
01550        /* The cache was just pruned.  Do not do it again now.  Just
01551           use the new timeout value.  */
01552        next_wait = dbs[my_number].wakeup_time - now;
01553 
01554       if (clock_gettime (timeout_clock, &prune_ts) == -1)
01555        /* Should never happen.  */
01556        abort ();
01557 
01558       /* Compute next timeout time.  */
01559       prune_ts.tv_sec += next_wait;
01560     }
01561 }
01562 
01563 
01564 /* This is the main loop.  It is replicated in different threads but
01565    the the use of the ready list makes sure only one thread handles an
01566    incoming connection.  */
01567 static void *
01568 __attribute__ ((__noreturn__))
01569 nscd_run_worker (void *p)
01570 {
01571   char buf[256];
01572 
01573   /* Initialize the memory-in-flight list.  */
01574   for (enum in_flight idx = 0; idx < IDX_last; ++idx)
01575     mem_in_flight.block[idx].dbidx = -1;
01576   /* And queue this threads structure.  */
01577   do
01578     mem_in_flight.next = mem_in_flight_list;
01579   while (atomic_compare_and_exchange_bool_acq (&mem_in_flight_list,
01580                                           &mem_in_flight,
01581                                           mem_in_flight.next) != 0);
01582 
01583   /* Initial locking.  */
01584   pthread_mutex_lock (&readylist_lock);
01585 
01586   /* One more thread available.  */
01587   ++nready;
01588 
01589   while (1)
01590     {
01591       while (readylist == NULL)
01592        pthread_cond_wait (&readylist_cond, &readylist_lock);
01593 
01594       struct fdlist *it = readylist->next;
01595       if (readylist->next == readylist)
01596        /* Just one entry on the list.  */
01597        readylist = NULL;
01598       else
01599        readylist->next = it->next;
01600 
01601       /* Extract the information and mark the record ready to be used
01602         again.  */
01603       int fd = it->fd;
01604       it->next = NULL;
01605 
01606       /* One more thread available.  */
01607       --nready;
01608 
01609       /* We are done with the list.  */
01610       pthread_mutex_unlock (&readylist_lock);
01611 
01612 #ifndef __ASSUME_SOCK_CLOEXEC
01613       if (have_sock_cloexec < 0)
01614        {
01615          /* We do not want to block on a short read or so.  */
01616          int fl = fcntl (fd, F_GETFL);
01617          if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
01618            goto close_and_out;
01619        }
01620 #endif
01621 
01622       /* Now read the request.  */
01623       request_header req;
01624       if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
01625                          != sizeof (req), 0))
01626        {
01627          /* We failed to read data.  Note that this also might mean we
01628             failed because we would have blocked.  */
01629          if (debug_level > 0)
01630            dbg_log (_("short read while reading request: %s"),
01631                    strerror_r (errno, buf, sizeof (buf)));
01632          goto close_and_out;
01633        }
01634 
01635       /* Check whether this is a valid request type.  */
01636       if (req.type < GETPWBYNAME || req.type >= LASTREQ)
01637        goto close_and_out;
01638 
01639       /* Some systems have no SO_PEERCRED implementation.  They don't
01640         care about security so we don't as well.  */
01641       uid_t uid = -1;
01642 #ifdef SO_PEERCRED
01643       pid_t pid = 0;
01644 
01645       if (__builtin_expect (debug_level > 0, 0))
01646        {
01647          struct ucred caller;
01648          socklen_t optlen = sizeof (caller);
01649 
01650          if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
01651            pid = caller.pid;
01652        }
01653 #else
01654       const pid_t pid = 0;
01655 #endif
01656 
01657       /* It should not be possible to crash the nscd with a silly
01658         request (i.e., a terribly large key).  We limit the size to 1kb.  */
01659       if (__builtin_expect (req.key_len, 1) < 0
01660          || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
01661        {
01662          if (debug_level > 0)
01663            dbg_log (_("key length in request too long: %d"), req.key_len);
01664        }
01665       else
01666        {
01667          /* Get the key.  */
01668          char keybuf[MAXKEYLEN];
01669 
01670          if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
01671                                                    req.key_len))
01672                             != req.key_len, 0))
01673            {
01674              /* Again, this can also mean we would have blocked.  */
01675              if (debug_level > 0)
01676               dbg_log (_("short read while reading request key: %s"),
01677                       strerror_r (errno, buf, sizeof (buf)));
01678              goto close_and_out;
01679            }
01680 
01681          if (__builtin_expect (debug_level, 0) > 0)
01682            {
01683 #ifdef SO_PEERCRED
01684              if (pid != 0)
01685               dbg_log (_("\
01686 handle_request: request received (Version = %d) from PID %ld"),
01687                       req.version, (long int) pid);
01688              else
01689 #endif
01690               dbg_log (_("\
01691 handle_request: request received (Version = %d)"), req.version);
01692            }
01693 
01694          /* Phew, we got all the data, now process it.  */
01695          handle_request (fd, &req, keybuf, uid, pid);
01696        }
01697 
01698     close_and_out:
01699       /* We are done.  */
01700       close (fd);
01701 
01702       /* Re-locking.  */
01703       pthread_mutex_lock (&readylist_lock);
01704 
01705       /* One more thread available.  */
01706       ++nready;
01707     }
01708 }
01709 
01710 
01711 static unsigned int nconns;
01712 
01713 static void
01714 fd_ready (int fd)
01715 {
01716   pthread_mutex_lock (&readylist_lock);
01717 
01718   /* Find an empty entry in FDLIST.  */
01719   size_t inner;
01720   for (inner = 0; inner < nconns; ++inner)
01721     if (fdlist[inner].next == NULL)
01722       break;
01723   assert (inner < nconns);
01724 
01725   fdlist[inner].fd = fd;
01726 
01727   if (readylist == NULL)
01728     readylist = fdlist[inner].next = &fdlist[inner];
01729   else
01730     {
01731       fdlist[inner].next = readylist->next;
01732       readylist = readylist->next = &fdlist[inner];
01733     }
01734 
01735   bool do_signal = true;
01736   if (__builtin_expect (nready == 0, 0))
01737     {
01738       ++client_queued;
01739       do_signal = false;
01740 
01741       /* Try to start another thread to help out.  */
01742       pthread_t th;
01743       if (nthreads < max_nthreads
01744          && pthread_create (&th, &attr, nscd_run_worker,
01745                           (void *) (long int) nthreads) == 0)
01746        {
01747          /* We got another thread.  */
01748          ++nthreads;
01749          /* The new thread might need a kick.  */
01750          do_signal = true;
01751        }
01752 
01753     }
01754 
01755   pthread_mutex_unlock (&readylist_lock);
01756 
01757   /* Tell one of the worker threads there is work to do.  */
01758   if (do_signal)
01759     pthread_cond_signal (&readylist_cond);
01760 }
01761 
01762 
01763 /* Check whether restarting should happen.  */
01764 static inline int
01765 restart_p (time_t now)
01766 {
01767   return (paranoia && readylist == NULL && nready == nthreads
01768          && now >= restart_time);
01769 }
01770 
01771 
01772 /* Array for times a connection was accepted.  */
01773 static time_t *starttime;
01774 
01775 
01776 static void
01777 __attribute__ ((__noreturn__))
01778 main_loop_poll (void)
01779 {
01780   struct pollfd *conns = (struct pollfd *) xmalloc (nconns
01781                                               * sizeof (conns[0]));
01782 
01783   conns[0].fd = sock;
01784   conns[0].events = POLLRDNORM;
01785   size_t nused = 1;
01786   size_t firstfree = 1;
01787 
01788 #ifdef HAVE_INOTIFY
01789   if (inotify_fd != -1)
01790     {
01791       conns[1].fd = inotify_fd;
01792       conns[1].events = POLLRDNORM;
01793       nused = 2;
01794       firstfree = 2;
01795     }
01796 #endif
01797 
01798   while (1)
01799     {
01800       /* Wait for any event.  We wait at most a couple of seconds so
01801         that we can check whether we should close any of the accepted
01802         connections since we have not received a request.  */
01803 #define MAX_ACCEPT_TIMEOUT 30
01804 #define MIN_ACCEPT_TIMEOUT 5
01805 #define MAIN_THREAD_TIMEOUT \
01806   (MAX_ACCEPT_TIMEOUT * 1000                                                \
01807    - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
01808 
01809       int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
01810 
01811       time_t now = time (NULL);
01812 
01813       /* If there is a descriptor ready for reading or there is a new
01814         connection, process this now.  */
01815       if (n > 0)
01816        {
01817          if (conns[0].revents != 0)
01818            {
01819              /* We have a new incoming connection.  Accept the connection.  */
01820              int fd;
01821 
01822 #ifndef __ASSUME_PACCEPT
01823              fd = -1;
01824              if (have_paccept >= 0)
01825 #endif
01826               {
01827 #if 0
01828                 fd = TEMP_FAILURE_RETRY (paccept (sock, NULL, NULL, NULL,
01829                                               SOCK_NONBLOCK));
01830 #ifndef __ASSUME_PACCEPT
01831                 if (have_paccept == 0)
01832                   have_paccept = fd != -1 || errno != ENOSYS ? 1 : -1;
01833 #endif
01834 #endif
01835               }
01836 #ifndef __ASSUME_PACCEPT
01837              if (have_paccept < 0)
01838               fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
01839 #endif
01840 
01841              /* Use the descriptor if we have not reached the limit.  */
01842              if (fd >= 0)
01843               {
01844                 if (firstfree < nconns)
01845                   {
01846                     conns[firstfree].fd = fd;
01847                     conns[firstfree].events = POLLRDNORM;
01848                     starttime[firstfree] = now;
01849                     if (firstfree >= nused)
01850                      nused = firstfree + 1;
01851 
01852                     do
01853                      ++firstfree;
01854                     while (firstfree < nused && conns[firstfree].fd != -1);
01855                   }
01856                 else
01857                   /* We cannot use the connection so close it.  */
01858                   close (fd);
01859               }
01860 
01861              --n;
01862            }
01863 
01864          size_t first = 1;
01865 #ifdef HAVE_INOTIFY
01866          if (inotify_fd != -1 && conns[1].fd == inotify_fd)
01867            {
01868              if (conns[1].revents != 0)
01869               {
01870                 bool to_clear[lastdb] = { false, };
01871                 union
01872                 {
01873 # ifndef PATH_MAX
01874 #  define PATH_MAX 1024
01875 # endif
01876                   struct inotify_event i;
01877                   char buf[sizeof (struct inotify_event) + PATH_MAX];
01878                 } inev;
01879 
01880                 while (1)
01881                   {
01882                     ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd, &inev,
01883                                                       sizeof (inev)));
01884                     if (nb < (ssize_t) sizeof (struct inotify_event))
01885                      {
01886                        if (__builtin_expect (nb == -1 && errno != EAGAIN,
01887                                           0))
01888                          {
01889                            /* Something went wrong when reading the inotify
01890                              data.  Better disable inotify.  */
01891                            dbg_log (_("\
01892 disabled inotify after read error %d"),
01893                                    errno);
01894                            conns[1].fd = -1;
01895                            firstfree = 1;
01896                            if (nused == 2)
01897                             nused = 1;
01898                            close (inotify_fd);
01899                            inotify_fd = -1;
01900                          }
01901                        break;
01902                      }
01903 
01904                     /* Check which of the files changed.  */
01905                     for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
01906                      if (inev.i.wd == dbs[dbcnt].inotify_descr)
01907                        {
01908                          to_clear[dbcnt] = true;
01909                          goto next;
01910                        }
01911 
01912                     if (inev.i.wd == resolv_conf_descr)
01913                      {
01914                        res_init ();
01915                        to_clear[hstdb] = true;
01916                      }
01917                   next:;
01918                   }
01919 
01920                 /* Actually perform the cache clearing.  */
01921                 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
01922                   if (to_clear[dbcnt])
01923                     {
01924                      pthread_mutex_lock (&dbs[dbcnt].prune_lock);
01925                      dbs[dbcnt].clear_cache = 1;
01926                      pthread_mutex_unlock (&dbs[dbcnt].prune_lock);
01927                      pthread_cond_signal (&dbs[dbcnt].prune_cond);
01928                     }
01929 
01930                 --n;
01931               }
01932 
01933              first = 2;
01934            }
01935 #endif
01936 
01937          for (size_t cnt = first; cnt < nused && n > 0; ++cnt)
01938            if (conns[cnt].revents != 0)
01939              {
01940               fd_ready (conns[cnt].fd);
01941 
01942               /* Clean up the CONNS array.  */
01943               conns[cnt].fd = -1;
01944               if (cnt < firstfree)
01945                 firstfree = cnt;
01946               if (cnt == nused - 1)
01947                 do
01948                   --nused;
01949                 while (conns[nused - 1].fd == -1);
01950 
01951               --n;
01952              }
01953        }
01954 
01955       /* Now find entries which have timed out.  */
01956       assert (nused > 0);
01957 
01958       /* We make the timeout length depend on the number of file
01959         descriptors currently used.  */
01960 #define ACCEPT_TIMEOUT \
01961   (MAX_ACCEPT_TIMEOUT                                                       \
01962    - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
01963       time_t laststart = now - ACCEPT_TIMEOUT;
01964 
01965       for (size_t cnt = nused - 1; cnt > 0; --cnt)
01966        {
01967          if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
01968            {
01969              /* Remove the entry, it timed out.  */
01970              (void) close (conns[cnt].fd);
01971              conns[cnt].fd = -1;
01972 
01973              if (cnt < firstfree)
01974               firstfree = cnt;
01975              if (cnt == nused - 1)
01976               do
01977                 --nused;
01978               while (conns[nused - 1].fd == -1);
01979            }
01980        }
01981 
01982       if (restart_p (now))
01983        restart ();
01984     }
01985 }
01986 
01987 
01988 #ifdef HAVE_EPOLL
01989 static void
01990 main_loop_epoll (int efd)
01991 {
01992   struct epoll_event ev = { 0, };
01993   int nused = 1;
01994   size_t highest = 0;
01995 
01996   /* Add the socket.  */
01997   ev.events = EPOLLRDNORM;
01998   ev.data.fd = sock;
01999   if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
02000     /* We cannot use epoll.  */
02001     return;
02002 
02003 #ifdef HAVE_INOTIFY
02004   if (inotify_fd != -1)
02005     {
02006       ev.events = EPOLLRDNORM;
02007       ev.data.fd = inotify_fd;
02008       if (epoll_ctl (efd, EPOLL_CTL_ADD, inotify_fd, &ev) == -1)
02009        /* We cannot use epoll.  */
02010        return;
02011       nused = 2;
02012     }
02013 #endif
02014 
02015   while (1)
02016     {
02017       struct epoll_event revs[100];
02018 # define nrevs (sizeof (revs) / sizeof (revs[0]))
02019 
02020       int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
02021 
02022       time_t now = time (NULL);
02023 
02024       for (int cnt = 0; cnt < n; ++cnt)
02025        if (revs[cnt].data.fd == sock)
02026          {
02027            /* A new connection.  */
02028            int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
02029 
02030            if (fd >= 0)
02031              {
02032               /* Try to add the  new descriptor.  */
02033               ev.data.fd = fd;
02034               if (fd >= nconns
02035                   || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
02036                 /* The descriptor is too large or something went
02037                    wrong.  Close the descriptor.  */
02038                 close (fd);
02039               else
02040                 {
02041                   /* Remember when we accepted the connection.  */
02042                   starttime[fd] = now;
02043 
02044                   if (fd > highest)
02045                     highest = fd;
02046 
02047                   ++nused;
02048                 }
02049              }
02050          }
02051 #ifdef HAVE_INOTIFY
02052        else if (revs[cnt].data.fd == inotify_fd)
02053          {
02054            bool to_clear[lastdb] = { false, };
02055            union
02056            {
02057              struct inotify_event i;
02058              char buf[sizeof (struct inotify_event) + PATH_MAX];
02059            } inev;
02060 
02061            while (1)
02062              {
02063               ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd, &inev,
02064                                            sizeof (inev)));
02065               if (nb < (ssize_t) sizeof (struct inotify_event))
02066                 {
02067                   if (__builtin_expect (nb == -1 && errno != EAGAIN, 0))
02068                     {
02069                      /* Something went wrong when reading the inotify
02070                         data.  Better disable inotify.  */
02071                      dbg_log (_("disabled inotify after read error %d"),
02072                              errno);
02073                      (void) epoll_ctl (efd, EPOLL_CTL_DEL, inotify_fd,
02074                                      NULL);
02075                      close (inotify_fd);
02076                      inotify_fd = -1;
02077                     }
02078                   break;
02079                 }
02080 
02081               /* Check which of the files changed.  */
02082               for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
02083                 if (inev.i.wd == dbs[dbcnt].inotify_descr)
02084                   {
02085                     to_clear[dbcnt] = true;
02086                     goto next;
02087                   }
02088 
02089               if (inev.i.wd == resolv_conf_descr)
02090                 {
02091                   res_init ();
02092                   to_clear[hstdb] = true;
02093                 }
02094              next:;
02095              }
02096 
02097            /* Actually perform the cache clearing.  */
02098            for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
02099              if (to_clear[dbcnt])
02100               {
02101                 pthread_mutex_lock (&dbs[dbcnt].prune_lock);
02102                 dbs[dbcnt].clear_cache = 1;
02103                 pthread_mutex_unlock (&dbs[dbcnt].prune_lock);
02104                 pthread_cond_signal (&dbs[dbcnt].prune_cond);
02105               }
02106          }
02107 #endif
02108        else
02109          {
02110            /* Remove the descriptor from the epoll descriptor.  */
02111            (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
02112 
02113            /* Get a worker to handle the request.  */
02114            fd_ready (revs[cnt].data.fd);
02115 
02116            /* Reset the time.  */
02117            starttime[revs[cnt].data.fd] = 0;
02118            if (revs[cnt].data.fd == highest)
02119              do
02120               --highest;
02121              while (highest > 0 && starttime[highest] == 0);
02122 
02123            --nused;
02124          }
02125 
02126       /*  Now look for descriptors for accepted connections which have
02127          no reply in too long of a time.  */
02128       time_t laststart = now - ACCEPT_TIMEOUT;
02129       assert (starttime[sock] == 0);
02130       assert (inotify_fd == -1 || starttime[inotify_fd] == 0);
02131       for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
02132        if (starttime[cnt] != 0 && starttime[cnt] < laststart)
02133          {
02134            /* We are waiting for this one for too long.  Close it.  */
02135            (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
02136 
02137            (void) close (cnt);
02138 
02139            starttime[cnt] = 0;
02140            if (cnt == highest)
02141              --highest;
02142          }
02143        else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
02144          --highest;
02145 
02146       if (restart_p (now))
02147        restart ();
02148     }
02149 }
02150 #endif
02151 
02152 
02153 /* Start all the threads we want.  The initial process is thread no. 1.  */
02154 void
02155 start_threads (void)
02156 {
02157   /* Initialize the conditional variable we will use.  The only
02158      non-standard attribute we might use is the clock selection.  */
02159   pthread_condattr_t condattr;
02160   pthread_condattr_init (&condattr);
02161 
02162 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
02163     && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
02164   /* Determine whether the monotonous clock is available.  */
02165   struct timespec dummy;
02166 # if _POSIX_MONOTONIC_CLOCK == 0
02167   if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
02168 # endif
02169 # if _POSIX_CLOCK_SELECTION == 0
02170     if (sysconf (_SC_CLOCK_SELECTION) > 0)
02171 # endif
02172       if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
02173          && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
02174        timeout_clock = CLOCK_MONOTONIC;
02175 #endif
02176 
02177   /* Create the attribute for the threads.  They are all created
02178      detached.  */
02179   pthread_attr_init (&attr);
02180   pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
02181   /* Use 1MB stacks, twice as much for 64-bit architectures.  */
02182   pthread_attr_setstacksize (&attr, NSCD_THREAD_STACKSIZE);
02183 
02184   /* We allow less than LASTDB threads only for debugging.  */
02185   if (debug_level == 0)
02186     nthreads = MAX (nthreads, lastdb);
02187 
02188   /* Create the threads which prune the databases.  */
02189   // XXX Ideally this work would be done by some of the worker threads.
02190   // XXX But this is problematic since we would need to be able to wake
02191   // XXX them up explicitly as well as part of the group handling the
02192   // XXX ready-list.  This requires an operation where we can wait on
02193   // XXX two conditional variables at the same time.  This operation
02194   // XXX does not exist (yet).
02195   for (long int i = 0; i < lastdb; ++i)
02196     {
02197       /* Initialize the conditional variable.  */
02198       if (pthread_cond_init (&dbs[i].prune_cond, &condattr) != 0)
02199        {
02200          dbg_log (_("could not initialize conditional variable"));
02201          exit (1);
02202        }
02203 
02204       pthread_t th;
02205       if (dbs[i].enabled
02206          && pthread_create (&th, &attr, nscd_run_prune, (void *) i) != 0)
02207        {
02208          dbg_log (_("could not start clean-up thread; terminating"));
02209          exit (1);
02210        }
02211     }
02212 
02213   pthread_condattr_destroy (&condattr);
02214 
02215   for (long int i = 0; i < nthreads; ++i)
02216     {
02217       pthread_t th;
02218       if (pthread_create (&th, &attr, nscd_run_worker, NULL) != 0)
02219        {
02220          if (i == 0)
02221            {
02222              dbg_log (_("could not start any worker thread; terminating"));
02223              exit (1);
02224            }
02225 
02226          break;
02227        }
02228     }
02229 
02230   /* Determine how much room for descriptors we should initially
02231      allocate.  This might need to change later if we cap the number
02232      with MAXCONN.  */
02233   const long int nfds = sysconf (_SC_OPEN_MAX);
02234 #define MINCONN 32
02235 #define MAXCONN 16384
02236   if (nfds == -1 || nfds > MAXCONN)
02237     nconns = MAXCONN;
02238   else if (nfds < MINCONN)
02239     nconns = MINCONN;
02240   else
02241     nconns = nfds;
02242 
02243   /* We need memory to pass descriptors on to the worker threads.  */
02244   fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
02245   /* Array to keep track when connection was accepted.  */
02246   starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
02247 
02248   /* In the main thread we execute the loop which handles incoming
02249      connections.  */
02250 #ifdef HAVE_EPOLL
02251   int efd = epoll_create (100);
02252   if (efd != -1)
02253     {
02254       main_loop_epoll (efd);
02255       close (efd);
02256     }
02257 #endif
02258 
02259   main_loop_poll ();
02260 }
02261 
02262 
02263 /* Look up the uid, gid, and supplementary groups to run nscd as. When
02264    this function is called, we are not listening on the nscd socket yet so
02265    we can just use the ordinary lookup functions without causing a lockup  */
02266 static void
02267 begin_drop_privileges (void)
02268 {
02269   struct passwd *pwd = getpwnam (server_user);
02270 
02271   if (pwd == NULL)
02272     {
02273       dbg_log (_("Failed to run nscd as user '%s'"), server_user);
02274       error (EXIT_FAILURE, 0, _("Failed to run nscd as user '%s'"),
02275             server_user);
02276     }
02277 
02278   server_uid = pwd->pw_uid;
02279   server_gid = pwd->pw_gid;
02280 
02281   /* Save the old UID/GID if we have to change back.  */
02282   if (paranoia)
02283     {
02284       old_uid = getuid ();
02285       old_gid = getgid ();
02286     }
02287 
02288   if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
02289     {
02290       /* This really must never happen.  */
02291       dbg_log (_("Failed to run nscd as user '%s'"), server_user);
02292       error (EXIT_FAILURE, errno, _("initial getgrouplist failed"));
02293     }
02294 
02295   server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
02296 
02297   if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
02298       == -1)
02299     {
02300       dbg_log (_("Failed to run nscd as user '%s'"), server_user);
02301       error (EXIT_FAILURE, errno, _("getgrouplist failed"));
02302     }
02303 }
02304 
02305 
02306 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
02307    run nscd as the user specified in the configuration file.  */
02308 static void
02309 finish_drop_privileges (void)
02310 {
02311 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
02312   /* We need to preserve the capabilities to connect to the audit daemon.  */
02313   cap_t new_caps = preserve_capabilities ();
02314 #endif
02315 
02316   if (setgroups (server_ngroups, server_groups) == -1)
02317     {
02318       dbg_log (_("Failed to run nscd as user '%s'"), server_user);
02319       error (EXIT_FAILURE, errno, _("setgroups failed"));
02320     }
02321 
02322   int res;
02323   if (paranoia)
02324     res = setresgid (server_gid, server_gid, old_gid);
02325   else
02326     res = setgid (server_gid);
02327   if (res == -1)
02328     {
02329       dbg_log (_("Failed to run nscd as user '%s'"), server_user);
02330       perror ("setgid");
02331       exit (4);
02332     }
02333 
02334   if (paranoia)
02335     res = setresuid (server_uid, server_uid, old_uid);
02336   else
02337     res = setuid (server_uid);
02338   if (res == -1)
02339     {
02340       dbg_log (_("Failed to run nscd as user '%s'"), server_user);
02341       perror ("setuid");
02342       exit (4);
02343     }
02344 
02345 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
02346   /* Remove the temporary capabilities.  */
02347   install_real_capabilities (new_caps);
02348 #endif
02349 }