Back to index

glibc  2.9
loadarchive.c
Go to the documentation of this file.
00001 /* Code to load locale data from the locale archive file.
00002    Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
00003    This file is part of the GNU C Library.
00004 
00005    The GNU C Library is free software; you can redistribute it and/or
00006    modify it under the terms of the GNU Lesser General Public
00007    License as published by the Free Software Foundation; either
00008    version 2.1 of the License, or (at your option) any later version.
00009 
00010    The GNU C Library is distributed in the hope that it will be useful,
00011    but WITHOUT ANY WARRANTY; without even the implied warranty of
00012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013    Lesser General Public License for more details.
00014 
00015    You should have received a copy of the GNU Lesser General Public
00016    License along with the GNU C Library; if not, write to the Free
00017    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
00018    02111-1307 USA.  */
00019 
00020 #include <locale.h>
00021 #include <stddef.h>
00022 #include <stdlib.h>
00023 #include <stdbool.h>
00024 #include <errno.h>
00025 #include <assert.h>
00026 #include <string.h>
00027 #include <fcntl.h>
00028 #include <unistd.h>
00029 #include <sys/mman.h>
00030 #include <sys/stat.h>
00031 #include <sys/param.h>
00032 
00033 #include "localeinfo.h"
00034 #include "locarchive.h"
00035 #include <not-cancel.h>
00036 
00037 /* Define the hash function.  We define the function as static inline.  */
00038 #define compute_hashval static inline compute_hashval
00039 #define hashval_t uint32_t
00040 #include "hashval.h"
00041 #undef compute_hashval
00042 
00043 
00044 /* Name of the locale archive file.  */
00045 static const char archfname[] = LOCALEDIR "/locale-archive";
00046 
00047 /* Size of initial mapping window, optimal if large enough to
00048    cover the header plus the initial locale.  */
00049 #define ARCHIVE_MAPPING_WINDOW     (2 * 1024 * 1024)
00050 
00051 #ifndef MAP_COPY
00052 /* This is not quite as good as MAP_COPY since unexamined pages
00053    can change out from under us and give us inconsistent data.
00054    But we rely on the user not to diddle the system's live archive.
00055    Even though we only ever use PROT_READ, using MAP_SHARED would
00056    not give the system sufficient freedom to e.g. let the on disk
00057    file go away because it doesn't know we won't call mprotect later.  */
00058 # define MAP_COPY MAP_PRIVATE
00059 #endif
00060 #ifndef MAP_FILE
00061  /* Some systems do not have this flag; it is superfluous.  */
00062 # define MAP_FILE 0
00063 #endif
00064 
00065 /* Record of contiguous pages already mapped from the locale archive.  */
00066 struct archmapped
00067 {
00068   void *ptr;
00069   uint32_t from;
00070   uint32_t len;
00071   struct archmapped *next;
00072 };
00073 static struct archmapped *archmapped;
00074 
00075 /* This describes the mapping at the beginning of the file that contains
00076    the header data.  There could be data in the following partial page,
00077    so this is searched like any other.  Once the archive has been used,
00078    ARCHMAPPED points to this; if mapping the archive header failed,
00079    then headmap.ptr is null.  */
00080 static struct archmapped headmap;
00081 static struct stat64 archive_stat; /* stat of archive when header mapped.  */
00082 
00083 /* Record of locales that we have already loaded from the archive.  */
00084 struct locale_in_archive
00085 {
00086   struct locale_in_archive *next;
00087   char *name;
00088   struct locale_data *data[__LC_LAST];
00089 };
00090 static struct locale_in_archive *archloaded;
00091 
00092 
00093 /* Local structure and subroutine of _nl_load_archive, see below.  */
00094 struct range
00095 {
00096   uint32_t from;
00097   uint32_t len;
00098   int category;
00099   void *result;
00100 };
00101 
00102 static int
00103 rangecmp (const void *p1, const void *p2)
00104 {
00105   return ((struct range *) p1)->from - ((struct range *) p2)->from;
00106 }
00107 
00108 
00109 /* Calculate the amount of space needed for all the tables described
00110    by the given header.  Note we do not include the empty table space
00111    that has been preallocated in the file, so our mapping may not be
00112    large enough if localedef adds data to the file in place.  However,
00113    doing that would permute the header fields while we are accessing
00114    them and thus not be safe anyway, so we don't allow for that.  */
00115 static inline off_t
00116 calculate_head_size (const struct locarhead *h)
00117 {
00118   off_t namehash_end = (h->namehash_offset
00119                      + h->namehash_size * sizeof (struct namehashent));
00120   off_t string_end =  h->string_offset + h->string_used;
00121   off_t locrectab_end = (h->locrectab_offset
00122                       + h->locrectab_used * sizeof (struct locrecent));
00123   return MAX (namehash_end, MAX (string_end, locrectab_end));
00124 }
00125 
00126 
00127 /* Find the locale *NAMEP in the locale archive, and return the
00128    internalized data structure for its CATEGORY data.  If this locale has
00129    already been loaded from the archive, just returns the existing data
00130    structure.  If successful, sets *NAMEP to point directly into the mapped
00131    archive string table; that way, the next call can short-circuit strcmp.  */
00132 struct locale_data *
00133 internal_function
00134 _nl_load_locale_from_archive (int category, const char **namep)
00135 {
00136   const char *name = *namep;
00137   struct
00138   {
00139     void *addr;
00140     size_t len;
00141   } results[__LC_LAST];
00142   struct locale_in_archive *lia;
00143   struct locarhead *head;
00144   struct namehashent *namehashtab;
00145   struct locrecent *locrec;
00146   struct archmapped *mapped;
00147   struct archmapped *last;
00148   unsigned long int hval;
00149   size_t idx;
00150   size_t incr;
00151   struct range ranges[__LC_LAST - 1];
00152   int nranges;
00153   int cnt;
00154   size_t ps = __sysconf (_SC_PAGE_SIZE);
00155   int fd = -1;
00156 
00157   /* Check if we have already loaded this locale from the archive.
00158      If we previously loaded the locale but found bogons in the data,
00159      then we will have stored a null pointer to return here.  */
00160   for (lia = archloaded; lia != NULL; lia = lia->next)
00161     if (name == lia->name || !strcmp (name, lia->name))
00162       {
00163        *namep = lia->name;
00164        return lia->data[category];
00165       }
00166 
00167   {
00168     /* If the name contains a codeset, then we normalize the name before
00169        doing the lookup.  */
00170     const char *p = strchr (name, '.');
00171     if (p != NULL && p[1] != '@' && p[1] != '\0')
00172       {
00173        const char *rest = __strchrnul (++p, '@');
00174        const char *normalized_codeset = _nl_normalize_codeset (p, rest - p);
00175        if (normalized_codeset == NULL)    /* malloc failure */
00176          return NULL;
00177        if (strncmp (normalized_codeset, p, rest - p) != 0
00178            || normalized_codeset[rest - p] != '\0')
00179          {
00180            /* There is a normalized codeset name that is different from
00181               what was specified; reconstruct a new locale name using it.  */
00182            size_t normlen = strlen (normalized_codeset);
00183            size_t restlen = strlen (rest) + 1;
00184            char *newname = alloca (p - name + normlen + restlen);
00185            memcpy (__mempcpy (__mempcpy (newname, name, p - name),
00186                             normalized_codeset, normlen),
00187                   rest, restlen);
00188            name = newname;
00189          }
00190        free ((char *) normalized_codeset);
00191       }
00192   }
00193 
00194   /* Make sure the archive is loaded.  */
00195   if (archmapped == NULL)
00196     {
00197       void *result;
00198       size_t headsize, mapsize;
00199 
00200       /* We do this early as a sign that we have tried to open the archive.
00201         If headmap.ptr remains null, that's an indication that we tried
00202         and failed, so we won't try again.  */
00203       archmapped = &headmap;
00204 
00205       /* The archive has never been opened.  */
00206       fd = open_not_cancel_2 (archfname, O_RDONLY|O_LARGEFILE);
00207       if (fd < 0)
00208        /* Cannot open the archive, for whatever reason.  */
00209        return NULL;
00210 
00211       if (__fxstat64 (_STAT_VER, fd, &archive_stat) == -1)
00212        {
00213          /* stat failed, very strange.  */
00214        close_and_out:
00215          if (fd >= 0)
00216            close_not_cancel_no_status (fd);
00217          return NULL;
00218        }
00219 
00220 
00221       /* Map an initial window probably large enough to cover the header
00222         and the first locale's data.  With a large address space, we can
00223         just map the whole file and be sure everything is covered.  */
00224 
00225       mapsize = (sizeof (void *) > 4 ? archive_stat.st_size
00226                : MIN (archive_stat.st_size, ARCHIVE_MAPPING_WINDOW));
00227 
00228       result = __mmap64 (NULL, mapsize, PROT_READ, MAP_FILE|MAP_COPY, fd, 0);
00229       if (result == MAP_FAILED)
00230        goto close_and_out;
00231 
00232       /* Check whether the file is large enough for the sizes given in
00233         the header.  Theoretically an archive could be so large that
00234         just the header fails to fit in our initial mapping window.  */
00235       headsize = calculate_head_size ((const struct locarhead *) result);
00236       if (headsize > mapsize)
00237        {
00238          (void) __munmap (result, mapsize);
00239          if (sizeof (void *) > 4 || headsize > archive_stat.st_size)
00240            /* The file is not big enough for the header.  Bogus.  */
00241            goto close_and_out;
00242 
00243          /* Freakishly long header.  */
00244          /* XXX could use mremap when available */
00245          mapsize = (headsize + ps - 1) & ~(ps - 1);
00246          result = __mmap64 (NULL, mapsize, PROT_READ, MAP_FILE|MAP_COPY,
00247                           fd, 0);
00248          if (result == MAP_FAILED)
00249            goto close_and_out;
00250        }
00251 
00252       if (sizeof (void *) > 4 || mapsize >= archive_stat.st_size)
00253        {
00254          /* We've mapped the whole file already, so we can be
00255             sure we won't need this file descriptor later.  */
00256          close_not_cancel_no_status (fd);
00257          fd = -1;
00258        }
00259 
00260       headmap.ptr = result;
00261       /* headmap.from already initialized to zero.  */
00262       headmap.len = mapsize;
00263     }
00264 
00265   /* If there is no archive or it cannot be loaded for some reason fail.  */
00266   if (__builtin_expect (headmap.ptr == NULL, 0))
00267     goto close_and_out;
00268 
00269   /* We have the archive available.  To find the name we first have to
00270      determine its hash value.  */
00271   hval = compute_hashval (name, strlen (name));
00272 
00273   head = headmap.ptr;
00274   namehashtab = (struct namehashent *) ((char *) head
00275                                    + head->namehash_offset);
00276 
00277   idx = hval % head->namehash_size;
00278   incr = 1 + hval % (head->namehash_size - 2);
00279 
00280   /* If the name_offset field is zero this means this is a
00281      deleted entry and therefore no entry can be found.  */
00282   while (1)
00283     {
00284       if (namehashtab[idx].name_offset == 0)
00285        /* Not found.  */
00286        goto close_and_out;
00287 
00288       if (namehashtab[idx].hashval == hval
00289          && strcmp (name, headmap.ptr + namehashtab[idx].name_offset) == 0)
00290        /* Found the entry.  */
00291        break;
00292 
00293       idx += incr;
00294       if (idx >= head->namehash_size)
00295        idx -= head->namehash_size;
00296     }
00297 
00298   /* We found an entry.  It might be a placeholder for a removed one.  */
00299   if (namehashtab[idx].locrec_offset == 0)
00300     goto close_and_out;
00301 
00302   locrec = (struct locrecent *) (headmap.ptr + namehashtab[idx].locrec_offset);
00303 
00304   if (sizeof (void *) > 4 /* || headmap.len == archive_stat.st_size */)
00305     {
00306       /* We already have the whole locale archive mapped in.  */
00307       assert (headmap.len == archive_stat.st_size);
00308       for (cnt = 0; cnt < __LC_LAST; ++cnt)
00309        if (cnt != LC_ALL)
00310          {
00311            if (locrec->record[cnt].offset + locrec->record[cnt].len
00312               > headmap.len)
00313              /* The archive locrectab contains bogus offsets.  */
00314              goto close_and_out;
00315            results[cnt].addr = headmap.ptr + locrec->record[cnt].offset;
00316            results[cnt].len = locrec->record[cnt].len;
00317          }
00318     }
00319   else
00320     {
00321       /* Get the offsets of the data files and sort them.  */
00322       for (cnt = nranges = 0; cnt < __LC_LAST; ++cnt)
00323        if (cnt != LC_ALL)
00324          {
00325            ranges[nranges].from = locrec->record[cnt].offset;
00326            ranges[nranges].len = locrec->record[cnt].len;
00327            ranges[nranges].category = cnt;
00328            ranges[nranges].result = NULL;
00329 
00330            ++nranges;
00331          }
00332 
00333       qsort (ranges, nranges, sizeof (ranges[0]), rangecmp);
00334 
00335       /* The information about mmap'd blocks is kept in a list.
00336         Skip over the blocks which are before the data we need.  */
00337       last = mapped = archmapped;
00338       for (cnt = 0; cnt < nranges; ++cnt)
00339        {
00340          int upper;
00341          size_t from;
00342          size_t to;
00343          void *addr;
00344          struct archmapped *newp;
00345 
00346          /* Determine whether the appropriate page is already mapped.  */
00347          while (mapped != NULL
00348                && (mapped->from + mapped->len
00349                    <= ranges[cnt].from + ranges[cnt].len))
00350            {
00351              last = mapped;
00352              mapped = mapped->next;
00353            }
00354 
00355          /* Do we have a match?  */
00356          if (mapped != NULL
00357              && mapped->from <= ranges[cnt].from
00358              && (ranges[cnt].from + ranges[cnt].len
00359                 <= mapped->from + mapped->len))
00360            {
00361              /* Yep, already loaded.  */
00362              results[ranges[cnt].category].addr = ((char *) mapped->ptr
00363                                               + ranges[cnt].from
00364                                               - mapped->from);
00365              results[ranges[cnt].category].len = ranges[cnt].len;
00366              continue;
00367            }
00368 
00369          /* Map the range with the locale data from the file.  We will
00370             try to cover as much of the locale as possible.  I.e., if the
00371             next category (next as in "next offset") is on the current or
00372             immediately following page we use it as well.  */
00373          assert (powerof2 (ps));
00374          from = ranges[cnt].from & ~(ps - 1);
00375          upper = cnt;
00376          do
00377            {
00378              to = ranges[upper].from + ranges[upper].len;
00379              if (to > (size_t) archive_stat.st_size)
00380               /* The archive locrectab contains bogus offsets.  */
00381               goto close_and_out;
00382              to = (to + ps - 1) & ~(ps - 1);
00383 
00384              /* If a range is already mmaped in, stop.   */
00385              if (mapped != NULL && ranges[upper].from >= mapped->from)
00386               break;
00387 
00388              ++upper;
00389            }
00390          /* Loop while still in contiguous pages. */
00391          while (upper < nranges && ranges[upper].from < to + ps);
00392 
00393          /* Open the file if it hasn't happened yet.  */
00394          if (fd == -1)
00395            {
00396              struct stat64 st;
00397              fd = open_not_cancel_2 (archfname, O_RDONLY|O_LARGEFILE);
00398              if (fd == -1)
00399               /* Cannot open the archive, for whatever reason.  */
00400               return NULL;
00401              /* Now verify we think this is really the same archive file
00402                we opened before.  If it has been changed we cannot trust
00403                the header we read previously.  */
00404              if (__fxstat64 (_STAT_VER, fd, &st) < 0
00405                 || st.st_size != archive_stat.st_size
00406                 || st.st_mtime != archive_stat.st_mtime
00407                 || st.st_dev != archive_stat.st_dev
00408                 || st.st_ino != archive_stat.st_ino)
00409               goto close_and_out;
00410            }
00411 
00412          /* Map the range from the archive.  */
00413          addr = __mmap64 (NULL, to - from, PROT_READ, MAP_FILE|MAP_COPY,
00414                         fd, from);
00415          if (addr == MAP_FAILED)
00416            goto close_and_out;
00417 
00418          /* Allocate a record for this mapping.  */
00419          newp = (struct archmapped *) malloc (sizeof (struct archmapped));
00420          if (newp == NULL)
00421            {
00422              (void) __munmap (addr, to - from);
00423              goto close_and_out;
00424            }
00425 
00426          /* And queue it.  */
00427          newp->ptr = addr;
00428          newp->from = from;
00429          newp->len = to - from;
00430          assert (last->next == mapped);
00431          newp->next = mapped;
00432          last->next = newp;
00433          last = newp;
00434 
00435          /* Determine the load addresses for the category data.  */
00436          do
00437            {
00438              assert (ranges[cnt].from >= from);
00439              results[ranges[cnt].category].addr = ((char *) addr
00440                                               + ranges[cnt].from - from);
00441              results[ranges[cnt].category].len = ranges[cnt].len;
00442            }
00443          while (++cnt < upper);
00444          --cnt;             /* The 'for' will increase 'cnt' again.  */
00445        }
00446     }
00447 
00448   /* We don't need the file descriptor any longer.  */
00449   if (fd >= 0)
00450     close_not_cancel_no_status (fd);
00451   fd = -1;
00452 
00453   /* We succeeded in mapping all the necessary regions of the archive.
00454      Now we need the expected data structures to point into the data.  */
00455 
00456   lia = malloc (sizeof *lia);
00457   if (__builtin_expect (lia == NULL, 0))
00458     return NULL;
00459 
00460   lia->name = strdup (*namep);
00461   if (__builtin_expect (lia->name == NULL, 0))
00462     {
00463       free (lia);
00464       return NULL;
00465     }
00466 
00467   lia->next = archloaded;
00468   archloaded = lia;
00469 
00470   for (cnt = 0; cnt < __LC_LAST; ++cnt)
00471     if (cnt != LC_ALL)
00472       {
00473        lia->data[cnt] = _nl_intern_locale_data (cnt,
00474                                            results[cnt].addr,
00475                                            results[cnt].len);
00476        if (__builtin_expect (lia->data[cnt] != NULL, 1))
00477          {
00478            /* _nl_intern_locale_data leaves us these fields to initialize.  */
00479            lia->data[cnt]->alloc = ld_archive;
00480            lia->data[cnt]->name = lia->name;
00481 
00482            /* We do this instead of bumping the count each time we return
00483               this data because the mappings stay around forever anyway
00484               and we might as well hold on to a little more memory and not
00485               have to rebuild it on the next lookup of the same thing.
00486               If we were to maintain the usage_count normally and let the
00487               structures be freed, we would have to remove the elements
00488               from archloaded too.  */
00489            lia->data[cnt]->usage_count = UNDELETABLE;
00490          }
00491       }
00492 
00493   *namep = lia->name;
00494   return lia->data[category];
00495 }
00496 
00497 void __libc_freeres_fn_section
00498 _nl_archive_subfreeres (void)
00499 {
00500   struct locale_in_archive *lia;
00501   struct archmapped *am;
00502 
00503   /* Toss out our cached locales.  */
00504   lia = archloaded;
00505   while (lia != NULL)
00506     {
00507       int category;
00508       struct locale_in_archive *dead = lia;
00509       lia = lia->next;
00510 
00511       free (dead->name);
00512       for (category = 0; category < __LC_LAST; ++category)
00513        if (category != LC_ALL)
00514          {
00515            /* _nl_unload_locale just does this free for the archive case.  */
00516            if (dead->data[category]->private.cleanup)
00517              (*dead->data[category]->private.cleanup) (dead->data[category]);
00518 
00519            free (dead->data[category]);
00520          }
00521       free (dead);
00522     }
00523   archloaded = NULL;
00524 
00525   if (archmapped != NULL)
00526     {
00527       /* Now toss all the mapping windows, which we know nothing is using any
00528         more because we just tossed all the locales that point into them.  */
00529 
00530       assert (archmapped == &headmap);
00531       archmapped = NULL;
00532       (void) __munmap (headmap.ptr, headmap.len);
00533       am = headmap.next;
00534       while (am != NULL)
00535        {
00536          struct archmapped *dead = am;
00537          am = am->next;
00538          (void) __munmap (dead->ptr, dead->len);
00539          free (dead);
00540        }
00541     }
00542 }