Back to index

glibc  2.9
cache.c
Go to the documentation of this file.
00001 /* Copyright (c) 1998, 1999, 2003-2007, 2008 Free Software Foundation, Inc.
00002    This file is part of the GNU C Library.
00003    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
00004 
00005    This program is free software; you can redistribute it and/or modify
00006    it under the terms of the GNU General Public License as published
00007    by the Free Software Foundation; version 2 of the License, or
00008    (at your option) any later version.
00009 
00010    This program is distributed in the hope that it will be useful,
00011    but WITHOUT ANY WARRANTY; without even the implied warranty of
00012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013    GNU General Public License for more details.
00014 
00015    You should have received a copy of the GNU General Public License
00016    along with this program; if not, write to the Free Software Foundation,
00017    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
00018 
00019 #include <assert.h>
00020 #include <atomic.h>
00021 #include <errno.h>
00022 #include <error.h>
00023 #include <inttypes.h>
00024 #include <limits.h>
00025 #include <stdlib.h>
00026 #include <string.h>
00027 #include <libintl.h>
00028 #include <arpa/inet.h>
00029 #include <rpcsvc/nis.h>
00030 #include <sys/mman.h>
00031 #include <sys/param.h>
00032 #include <sys/stat.h>
00033 #include <sys/uio.h>
00034 
00035 #include "nscd.h"
00036 #include "dbg_log.h"
00037 
00038 
00039 /* Wrapper functions with error checking for standard functions.  */
00040 extern void *xcalloc (size_t n, size_t s);
00041 
00042 
00043 /* Number of times a value is reloaded without being used.  UINT_MAX
00044    means unlimited.  */
00045 unsigned int reload_count = DEFAULT_RELOAD_LIMIT;
00046 
00047 
00048 static void (*const readdfcts[LASTREQ]) (struct database_dyn *,
00049                                     struct hashentry *,
00050                                     struct datahead *) =
00051 {
00052   [GETPWBYNAME] = readdpwbyname,
00053   [GETPWBYUID] = readdpwbyuid,
00054   [GETGRBYNAME] = readdgrbyname,
00055   [GETGRBYGID] = readdgrbygid,
00056   [GETHOSTBYNAME] = readdhstbyname,
00057   [GETHOSTBYNAMEv6] = readdhstbynamev6,
00058   [GETHOSTBYADDR] = readdhstbyaddr,
00059   [GETHOSTBYADDRv6] = readdhstbyaddrv6,
00060   [GETAI] = readdhstai,
00061   [INITGROUPS] = readdinitgroups,
00062   [GETSERVBYNAME] = readdservbyname,
00063   [GETSERVBYPORT] = readdservbyport
00064 };
00065 
00066 
00067 /* Search the cache for a matching entry and return it when found.  If
00068    this fails search the negative cache and return (void *) -1 if this
00069    search was successful.  Otherwise return NULL.
00070 
00071    This function must be called with the read-lock held.  */
00072 struct datahead *
00073 cache_search (request_type type, void *key, size_t len,
00074              struct database_dyn *table, uid_t owner)
00075 {
00076   unsigned long int hash = __nis_hash (key, len) % table->head->module;
00077 
00078   unsigned long int nsearched = 0;
00079   struct datahead *result = NULL;
00080 
00081   ref_t work = table->head->array[hash];
00082   while (work != ENDREF)
00083     {
00084       ++nsearched;
00085 
00086       struct hashentry *here = (struct hashentry *) (table->data + work);
00087 
00088       if (type == here->type && len == here->len
00089          && memcmp (key, table->data + here->key, len) == 0
00090          && here->owner == owner)
00091        {
00092          /* We found the entry.  Increment the appropriate counter.  */
00093          struct datahead *dh
00094            = (struct datahead *) (table->data + here->packet);
00095 
00096          /* See whether we must ignore the entry.  */
00097          if (dh->usable)
00098            {
00099              /* We do not synchronize the memory here.  The statistics
00100                data is not crucial, we synchronize only once in a while
00101                in the cleanup threads.  */
00102              if (dh->notfound)
00103               ++table->head->neghit;
00104              else
00105               {
00106                 ++table->head->poshit;
00107 
00108                 if (dh->nreloads != 0)
00109                   dh->nreloads = 0;
00110               }
00111 
00112              result = dh;
00113              break;
00114            }
00115        }
00116 
00117       work = here->next;
00118     }
00119 
00120   if (nsearched > table->head->maxnsearched)
00121     table->head->maxnsearched = nsearched;
00122 
00123   return result;
00124 }
00125 
00126 /* Add a new entry to the cache.  The return value is zero if the function
00127    call was successful.
00128 
00129    This function must be called with the read-lock held.
00130 
00131    We modify the table but we nevertheless only acquire a read-lock.
00132    This is ok since we use operations which would be safe even without
00133    locking, given that the `prune_cache' function never runs.  Using
00134    the readlock reduces the chance of conflicts.  */
00135 int
00136 cache_add (int type, const void *key, size_t len, struct datahead *packet,
00137           bool first, struct database_dyn *table,
00138           uid_t owner, bool prune_wakeup)
00139 {
00140   if (__builtin_expect (debug_level >= 2, 0))
00141     {
00142       const char *str;
00143       char buf[INET6_ADDRSTRLEN + 1];
00144       if (type == GETHOSTBYADDR || type == GETHOSTBYADDRv6)
00145        str = inet_ntop (type == GETHOSTBYADDR ? AF_INET : AF_INET6,
00146                       key, buf, sizeof (buf));
00147       else
00148        str = key;
00149 
00150       dbg_log (_("add new entry \"%s\" of type %s for %s to cache%s"),
00151               str, serv2str[type], dbnames[table - dbs],
00152               first ? _(" (first)") : "");
00153     }
00154 
00155   unsigned long int hash = __nis_hash (key, len) % table->head->module;
00156   struct hashentry *newp;
00157 
00158   newp = mempool_alloc (table, sizeof (struct hashentry), IDX_record_data);
00159   /* If we cannot allocate memory, just do not do anything.  */
00160   if (newp == NULL)
00161     {
00162       ++table->head->addfailed;
00163 
00164       /* If necessary mark the entry as unusable so that lookups will
00165         not use it.  */
00166       if (first)
00167        packet->usable = false;
00168 
00169       /* Mark the in-flight memory as unused.  */
00170       for (enum in_flight idx = 0; idx < IDX_record_data; ++idx)
00171        mem_in_flight.block[idx].dbidx = -1;
00172 
00173       return -1;
00174     }
00175 
00176   newp->type = type;
00177   newp->first = first;
00178   newp->len = len;
00179   newp->key = (char *) key - table->data;
00180   assert (newp->key + newp->len <= table->head->first_free);
00181   newp->owner = owner;
00182   newp->packet = (char *) packet - table->data;
00183   assert ((newp->packet & BLOCK_ALIGN_M1) == 0);
00184 
00185   /* Put the new entry in the first position.  */
00186   do
00187     newp->next = table->head->array[hash];
00188   while (atomic_compare_and_exchange_bool_acq (&table->head->array[hash],
00189                                           (ref_t) ((char *) newp
00190                                                  - table->data),
00191                                           (ref_t) newp->next));
00192 
00193   /* Update the statistics.  */
00194   if (packet->notfound)
00195     ++table->head->negmiss;
00196   else if (first)
00197     ++table->head->posmiss;
00198 
00199   /* We depend on this value being correct and at least as high as the
00200      real number of entries.  */
00201   atomic_increment (&table->head->nentries);
00202 
00203   /* It does not matter that we are not loading the just increment
00204      value, this is just for statistics.  */
00205   unsigned long int nentries = table->head->nentries;
00206   if (nentries > table->head->maxnentries)
00207     table->head->maxnentries = nentries;
00208 
00209   if (table->persistent)
00210     // XXX async OK?
00211     msync ((void *) table->head,
00212           (char *) &table->head->array[hash] - (char *) table->head
00213           + sizeof (ref_t), MS_ASYNC);
00214 
00215   /* We do not have to worry about the pruning thread if we are
00216      re-adding the data since this is done by the pruning thread.  We
00217      also do not have to do anything in case this is not the first
00218      time the data is entered since different data heads all have the
00219      same timeout.  */
00220   if (first && prune_wakeup)
00221     {
00222       /* Perhaps the prune thread for the table is not running in a long
00223         time.  Wake it if necessary.  */
00224       pthread_mutex_lock (&table->prune_lock);
00225       time_t next_wakeup = table->wakeup_time;
00226       bool do_wakeup = false;
00227       if (next_wakeup > packet->timeout + CACHE_PRUNE_INTERVAL)
00228        {
00229          table->wakeup_time = packet->timeout;
00230          do_wakeup = true;
00231        }
00232       pthread_mutex_unlock (&table->prune_lock);
00233       if (do_wakeup)
00234        pthread_cond_signal (&table->prune_cond);
00235     }
00236 
00237   /* Mark the in-flight memory as unused.  */
00238   for (enum in_flight idx = 0; idx < IDX_last; ++idx)
00239     mem_in_flight.block[idx].dbidx = -1;
00240 
00241   return 0;
00242 }
00243 
00244 /* Walk through the table and remove all entries which lifetime ended.
00245 
00246    We have a problem here.  To actually remove the entries we must get
00247    the write-lock.  But since we want to keep the time we have the
00248    lock as short as possible we cannot simply acquire the lock when we
00249    start looking for timedout entries.
00250 
00251    Therefore we do it in two stages: first we look for entries which
00252    must be invalidated and remember them.  Then we get the lock and
00253    actually remove them.  This is complicated by the way we have to
00254    free the data structures since some hash table entries share the same
00255    data.  */
00256 time_t
00257 prune_cache (struct database_dyn *table, time_t now, int fd)
00258 {
00259   size_t cnt = table->head->module;
00260 
00261   /* If this table is not actually used don't do anything.  */
00262   if (cnt == 0)
00263     {
00264       if (fd != -1)
00265        {
00266          /* Reply to the INVALIDATE initiator.  */
00267          int32_t resp = 0;
00268          writeall (fd, &resp, sizeof (resp));
00269        }
00270 
00271       /* No need to do this again anytime soon.  */
00272       return 24 * 60 * 60;
00273     }
00274 
00275   /* If we check for the modification of the underlying file we invalidate
00276      the entries also in this case.  */
00277   if (table->inotify_descr < 0 && table->check_file && now != LONG_MAX)
00278     {
00279       struct stat64 st;
00280 
00281       if (stat64 (table->filename, &st) < 0)
00282        {
00283          char buf[128];
00284          /* We cannot stat() the file, disable file checking if the
00285              file does not exist.  */
00286          dbg_log (_("cannot stat() file `%s': %s"),
00287                  table->filename, strerror_r (errno, buf, sizeof (buf)));
00288          if (errno == ENOENT)
00289            table->check_file = 0;
00290        }
00291       else
00292        {
00293          if (st.st_mtime != table->file_mtime)
00294            {
00295              /* The file changed.  Invalidate all entries.  */
00296              now = LONG_MAX;
00297              table->file_mtime = st.st_mtime;
00298            }
00299        }
00300     }
00301 
00302   /* We run through the table and find values which are not valid anymore.
00303 
00304      Note that for the initial step, finding the entries to be removed,
00305      we don't need to get any lock.  It is at all timed assured that the
00306      linked lists are set up correctly and that no second thread prunes
00307      the cache.  */
00308   bool *mark;
00309   size_t memory_needed = cnt * sizeof (bool);
00310   bool mark_use_alloca;
00311   if (__builtin_expect (memory_needed <= MAX_STACK_USE, 1))
00312     {
00313       mark = alloca (cnt * sizeof (bool));
00314       memset (mark, '\0', memory_needed);
00315       mark_use_alloca = true;
00316     }
00317   else
00318     {
00319       mark = xcalloc (1, memory_needed);
00320       mark_use_alloca = false;
00321     }
00322   size_t first = cnt + 1;
00323   size_t last = 0;
00324   char *const data = table->data;
00325   bool any = false;
00326 
00327   if (__builtin_expect (debug_level > 2, 0))
00328     dbg_log (_("pruning %s cache; time %ld"),
00329             dbnames[table - dbs], (long int) now);
00330 
00331 #define NO_TIMEOUT LONG_MAX
00332   time_t next_timeout = NO_TIMEOUT;
00333   do
00334     {
00335       ref_t run = table->head->array[--cnt];
00336 
00337       while (run != ENDREF)
00338        {
00339          struct hashentry *runp = (struct hashentry *) (data + run);
00340          struct datahead *dh = (struct datahead *) (data + runp->packet);
00341 
00342          /* Some debug support.  */
00343          if (__builtin_expect (debug_level > 2, 0))
00344            {
00345              char buf[INET6_ADDRSTRLEN];
00346              const char *str;
00347 
00348              if (runp->type == GETHOSTBYADDR || runp->type == GETHOSTBYADDRv6)
00349               {
00350                 inet_ntop (runp->type == GETHOSTBYADDR ? AF_INET : AF_INET6,
00351                           data + runp->key, buf, sizeof (buf));
00352                 str = buf;
00353               }
00354              else
00355               str = data + runp->key;
00356 
00357              dbg_log (_("considering %s entry \"%s\", timeout %" PRIu64),
00358                      serv2str[runp->type], str, dh->timeout);
00359            }
00360 
00361          /* Check whether the entry timed out.  */
00362          if (dh->timeout < now)
00363            {
00364              /* This hash bucket could contain entries which need to
00365                be looked at.  */
00366              mark[cnt] = true;
00367 
00368              first = MIN (first, cnt);
00369              last = MAX (last, cnt);
00370 
00371              /* We only have to look at the data of the first entries
00372                since the count information is kept in the data part
00373                which is shared.  */
00374              if (runp->first)
00375               {
00376 
00377                 /* At this point there are two choices: we reload the
00378                    value or we discard it.  Do not change NRELOADS if
00379                    we never not reload the record.  */
00380                 if ((reload_count != UINT_MAX
00381                      && __builtin_expect (dh->nreloads >= reload_count, 0))
00382                     /* We always remove negative entries.  */
00383                     || dh->notfound
00384                     /* Discard everything if the user explicitly
00385                       requests it.  */
00386                     || now == LONG_MAX)
00387                   {
00388                     /* Remove the value.  */
00389                     dh->usable = false;
00390 
00391                     /* We definitely have some garbage entries now.  */
00392                     any = true;
00393                   }
00394                 else
00395                   {
00396                     /* Reload the value.  We do this only for the
00397                       initially used key, not the additionally
00398                       added derived value.  */
00399                     assert (runp->type < LASTREQ
00400                            && readdfcts[runp->type] != NULL);
00401 
00402                     readdfcts[runp->type] (table, runp, dh);
00403 
00404                     /* If the entry has been replaced, we might need
00405                       cleanup.  */
00406                     any |= !dh->usable;
00407                   }
00408               }
00409            }
00410          else
00411            {
00412              assert (dh->usable);
00413              next_timeout = MIN (next_timeout, dh->timeout);
00414            }
00415 
00416          run = runp->next;
00417        }
00418     }
00419   while (cnt > 0);
00420 
00421   if (__builtin_expect (fd != -1, 0))
00422     {
00423       /* Reply to the INVALIDATE initiator that the cache has been
00424         invalidated.  */
00425       int32_t resp = 0;
00426       writeall (fd, &resp, sizeof (resp));
00427     }
00428 
00429   if (first <= last)
00430     {
00431       struct hashentry *head = NULL;
00432 
00433       /* Now we have to get the write lock since we are about to modify
00434         the table.  */
00435       if (__builtin_expect (pthread_rwlock_trywrlock (&table->lock) != 0, 0))
00436        {
00437          ++table->head->wrlockdelayed;
00438          pthread_rwlock_wrlock (&table->lock);
00439        }
00440 
00441       while (first <= last)
00442        {
00443          if (mark[first])
00444            {
00445              ref_t *old = &table->head->array[first];
00446              ref_t run = table->head->array[first];
00447 
00448              assert (run != ENDREF);
00449              do
00450               {
00451                 struct hashentry *runp = (struct hashentry *) (data + run);
00452                 struct datahead *dh
00453                   = (struct datahead *) (data + runp->packet);
00454 
00455                 if (! dh->usable)
00456                   {
00457                     /* We need the list only for debugging but it is
00458                       more costly to avoid creating the list than
00459                       doing it.  */
00460                     runp->dellist = head;
00461                     head = runp;
00462 
00463                     /* No need for an atomic operation, we have the
00464                       write lock.  */
00465                     --table->head->nentries;
00466 
00467                     run = *old = runp->next;
00468                   }
00469                 else
00470                   {
00471                     old = &runp->next;
00472                     run = runp->next;
00473                   }
00474               }
00475              while (run != ENDREF);
00476            }
00477 
00478          ++first;
00479        }
00480 
00481       /* It's all done.  */
00482       pthread_rwlock_unlock (&table->lock);
00483 
00484       /* Make sure the data is saved to disk.  */
00485       if (table->persistent)
00486        msync (table->head,
00487               data + table->head->first_free - (char *) table->head,
00488               MS_ASYNC);
00489 
00490       /* One extra pass if we do debugging.  */
00491       if (__builtin_expect (debug_level > 0, 0))
00492        {
00493          struct hashentry *runp = head;
00494 
00495          while (runp != NULL)
00496            {
00497              char buf[INET6_ADDRSTRLEN];
00498              const char *str;
00499 
00500              if (runp->type == GETHOSTBYADDR || runp->type == GETHOSTBYADDRv6)
00501               {
00502                 inet_ntop (runp->type == GETHOSTBYADDR ? AF_INET : AF_INET6,
00503                           data + runp->key, buf, sizeof (buf));
00504                 str = buf;
00505               }
00506              else
00507               str = data + runp->key;
00508 
00509              dbg_log ("remove %s entry \"%s\"", serv2str[runp->type], str);
00510 
00511              runp = runp->dellist;
00512            }
00513        }
00514     }
00515 
00516   if (__builtin_expect (! mark_use_alloca, 0))
00517     free (mark);
00518 
00519   /* Run garbage collection if any entry has been removed or replaced.  */
00520   if (any)
00521     gc (table);
00522 
00523   /* If there is no entry in the database and we therefore have no new
00524      timeout value, tell the caller to wake up in 24 hours.  */
00525   return next_timeout == NO_TIMEOUT ? 24 * 60 * 60 : next_timeout - now;
00526 }