Back to index

glibc  2.9
gconv_conf.c
Go to the documentation of this file.
00001 /* Handle configuration data.
00002    Copyright (C) 1997-2003, 2005, 2006 Free Software Foundation, Inc.
00003    This file is part of the GNU C Library.
00004    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
00005 
00006    The GNU C Library is free software; you can redistribute it and/or
00007    modify it under the terms of the GNU Lesser General Public
00008    License as published by the Free Software Foundation; either
00009    version 2.1 of the License, or (at your option) any later version.
00010 
00011    The GNU C Library is distributed in the hope that it will be useful,
00012    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014    Lesser General Public License for more details.
00015 
00016    You should have received a copy of the GNU Lesser General Public
00017    License along with the GNU C Library; if not, write to the Free
00018    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
00019    02111-1307 USA.  */
00020 
00021 #include <assert.h>
00022 #include <ctype.h>
00023 #include <errno.h>
00024 #include <limits.h>
00025 #include <locale.h>
00026 #include <search.h>
00027 #include <stddef.h>
00028 #include <stdio.h>
00029 #include <stdio_ext.h>
00030 #include <stdlib.h>
00031 #include <string.h>
00032 #include <unistd.h>
00033 #include <sys/param.h>
00034 
00035 #include <bits/libc-lock.h>
00036 #include <gconv_int.h>
00037 
00038 
00039 /* This is the default path where we look for module lists.  */
00040 static const char default_gconv_path[] = GCONV_PATH;
00041 
00042 /* The path elements, as determined by the __gconv_get_path function.
00043    All path elements end in a slash.  */
00044 struct path_elem *__gconv_path_elem;
00045 /* Maximum length of a single path element in __gconv_path_elem.  */
00046 size_t __gconv_max_path_elem_len;
00047 
00048 /* We use the following struct if we couldn't allocate memory.  */
00049 static const struct path_elem empty_path_elem = { NULL, 0 };
00050 
00051 /* Name of the file containing the module information in the directories
00052    along the path.  */
00053 static const char gconv_conf_filename[] = "gconv-modules";
00054 
00055 /* Filename extension for the modules.  */
00056 #ifndef MODULE_EXT
00057 # define MODULE_EXT ".so"
00058 #endif
00059 static const char gconv_module_ext[] = MODULE_EXT;
00060 
00061 /* We have a few builtin transformations.  */
00062 static struct gconv_module builtin_modules[] =
00063 {
00064 #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
00065                             MinF, MaxF, MinT, MaxT) \
00066   {                                                                  \
00067     .from_string = From,                                             \
00068     .to_string = To,                                                 \
00069     .cost_hi = Cost,                                                 \
00070     .cost_lo = INT_MAX,                                                     \
00071     .module_name = Name                                                     \
00072   },
00073 #define BUILTIN_ALIAS(From, To)
00074 
00075 #include "gconv_builtin.h"
00076 
00077 #undef BUILTIN_TRANSFORMATION
00078 #undef BUILTIN_ALIAS
00079 };
00080 
00081 static const char builtin_aliases[] =
00082 {
00083 #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
00084                             MinF, MaxF, MinT, MaxT)
00085 #define BUILTIN_ALIAS(From, To) From "\0" To "\0"
00086 
00087 #include "gconv_builtin.h"
00088 
00089 #undef BUILTIN_TRANSFORMATION
00090 #undef BUILTIN_ALIAS
00091 };
00092 
00093 #ifdef USE_IN_LIBIO
00094 # include <libio/libioP.h>
00095 # define __getdelim(line, len, c, fp) _IO_getdelim (line, len, c, fp)
00096 #endif
00097 
00098 
00099 /* Value of the GCONV_PATH environment variable.  */
00100 const char *__gconv_path_envvar;
00101 
00102 
00103 /* Test whether there is already a matching module known.  */
00104 static int
00105 internal_function
00106 detect_conflict (const char *alias)
00107 {
00108   struct gconv_module *node = __gconv_modules_db;
00109 
00110   while (node != NULL)
00111     {
00112       int cmpres = strcmp (alias, node->from_string);
00113 
00114       if (cmpres == 0)
00115        /* We have a conflict.  */
00116        return 1;
00117       else if (cmpres < 0)
00118        node = node->left;
00119       else
00120        node = node->right;
00121     }
00122 
00123   return node != NULL;
00124 }
00125 
00126 
00127 /* The actual code to add aliases.  */
00128 static void
00129 add_alias2 (const char *from, const char *to, const char *wp, void *modules)
00130 {
00131   /* Test whether this alias conflicts with any available module.  */
00132   if (detect_conflict (from))
00133     /* It does conflict, don't add the alias.  */
00134     return;
00135 
00136   struct gconv_alias *new_alias = (struct gconv_alias *)
00137     malloc (sizeof (struct gconv_alias) + (wp - from));
00138   if (new_alias != NULL)
00139     {
00140       void **inserted;
00141 
00142       new_alias->fromname = memcpy ((char *) new_alias
00143                                 + sizeof (struct gconv_alias),
00144                                 from, wp - from);
00145       new_alias->toname = new_alias->fromname + (to - from);
00146 
00147       inserted = (void **) __tsearch (new_alias, &__gconv_alias_db,
00148                                   __gconv_alias_compare);
00149       if (inserted == NULL || *inserted != new_alias)
00150        /* Something went wrong, free this entry.  */
00151        free (new_alias);
00152     }
00153 }
00154 
00155 
00156 /* Add new alias.  */
00157 static void
00158 add_alias (char *rp, void *modules)
00159 {
00160   /* We now expect two more string.  The strings are normalized
00161      (converted to UPPER case) and strored in the alias database.  */
00162   char *from, *to, *wp;
00163 
00164   while (__isspace_l (*rp, _nl_C_locobj_ptr))
00165     ++rp;
00166   from = wp = rp;
00167   while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr))
00168     *wp++ = __toupper_l (*rp++, _nl_C_locobj_ptr);
00169   if (*rp == '\0')
00170     /* There is no `to' string on the line.  Ignore it.  */
00171     return;
00172   *wp++ = '\0';
00173   to = ++rp;
00174   while (__isspace_l (*rp, _nl_C_locobj_ptr))
00175     ++rp;
00176   while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr))
00177     *wp++ = __toupper_l (*rp++, _nl_C_locobj_ptr);
00178   if (to == wp)
00179     /* No `to' string, ignore the line.  */
00180     return;
00181   *wp++ = '\0';
00182 
00183   add_alias2 (from, to, wp, modules);
00184 }
00185 
00186 
00187 /* Insert a data structure for a new module in the search tree.  */
00188 static void
00189 internal_function
00190 insert_module (struct gconv_module *newp, int tobefreed)
00191 {
00192   struct gconv_module **rootp = &__gconv_modules_db;
00193 
00194   while (*rootp != NULL)
00195     {
00196       struct gconv_module *root = *rootp;
00197       int cmpres;
00198 
00199       cmpres = strcmp (newp->from_string, root->from_string);
00200       if (cmpres == 0)
00201        {
00202          /* Both strings are identical.  Insert the string at the
00203             end of the `same' list if it is not already there.  */
00204          while (strcmp (newp->from_string, root->from_string) != 0
00205                || strcmp (newp->to_string, root->to_string) != 0)
00206            {
00207              rootp = &root->same;
00208              root = *rootp;
00209              if (root == NULL)
00210               break;
00211            }
00212 
00213          if (root != NULL)
00214            {
00215              /* This is a no new conversion.  But maybe the cost is
00216                better.  */
00217              if (newp->cost_hi < root->cost_hi
00218                 || (newp->cost_hi == root->cost_hi
00219                     && newp->cost_lo < root->cost_lo))
00220               {
00221                 newp->left = root->left;
00222                 newp->right = root->right;
00223                 newp->same = root->same;
00224                 *rootp = newp;
00225 
00226                 free (root);
00227               }
00228              else if (tobefreed)
00229               free (newp);
00230              return;
00231            }
00232 
00233          break;
00234        }
00235       else if (cmpres < 0)
00236        rootp = &root->left;
00237       else
00238        rootp = &root->right;
00239     }
00240 
00241   /* Plug in the new node here.  */
00242   *rootp = newp;
00243 }
00244 
00245 
00246 /* Add new module.  */
00247 static void
00248 internal_function
00249 add_module (char *rp, const char *directory, size_t dir_len, void **modules,
00250            size_t *nmodules, int modcounter)
00251 {
00252   /* We expect now
00253      1. `from' name
00254      2. `to' name
00255      3. filename of the module
00256      4. an optional cost value
00257   */
00258   struct gconv_alias fake_alias;
00259   struct gconv_module *new_module;
00260   char *from, *to, *module, *wp;
00261   int need_ext;
00262   int cost_hi;
00263 
00264   while (__isspace_l (*rp, _nl_C_locobj_ptr))
00265     ++rp;
00266   from = rp;
00267   while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr))
00268     {
00269       *rp = __toupper_l (*rp, _nl_C_locobj_ptr);
00270       ++rp;
00271     }
00272   if (*rp == '\0')
00273     return;
00274   *rp++ = '\0';
00275   to = wp = rp;
00276   while (__isspace_l (*rp, _nl_C_locobj_ptr))
00277     ++rp;
00278   while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr))
00279     *wp++ = __toupper_l (*rp++, _nl_C_locobj_ptr);
00280   if (*rp == '\0')
00281     return;
00282   *wp++ = '\0';
00283   do
00284     ++rp;
00285   while (__isspace_l (*rp, _nl_C_locobj_ptr));
00286   module = wp;
00287   while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr))
00288     *wp++ = *rp++;
00289   if (*rp == '\0')
00290     {
00291       /* There is no cost, use one by default.  */
00292       *wp++ = '\0';
00293       cost_hi = 1;
00294     }
00295   else
00296     {
00297       /* There might be a cost value.  */
00298       char *endp;
00299 
00300       *wp++ = '\0';
00301       cost_hi = strtol (rp, &endp, 10);
00302       if (rp == endp || cost_hi < 1)
00303        /* No useful information.  */
00304        cost_hi = 1;
00305     }
00306 
00307   if (module[0] == '\0')
00308     /* No module name given.  */
00309     return;
00310   if (module[0] == '/')
00311     dir_len = 0;
00312 
00313   /* See whether we must add the ending.  */
00314   need_ext = 0;
00315   if (wp - module < (ptrdiff_t) sizeof (gconv_module_ext)
00316       || memcmp (wp - sizeof (gconv_module_ext), gconv_module_ext,
00317                sizeof (gconv_module_ext)) != 0)
00318     /* We must add the module extension.  */
00319     need_ext = sizeof (gconv_module_ext) - 1;
00320 
00321   /* See whether we have already an alias with this name defined.  */
00322   fake_alias.fromname = strndupa (from, to - from);
00323 
00324   if (__tfind (&fake_alias, &__gconv_alias_db, __gconv_alias_compare) != NULL)
00325     /* This module duplicates an alias.  */
00326     return;
00327 
00328   new_module = (struct gconv_module *) calloc (1,
00329                                           sizeof (struct gconv_module)
00330                                           + (wp - from)
00331                                           + dir_len + need_ext);
00332   if (new_module != NULL)
00333     {
00334       char *tmp;
00335 
00336       new_module->from_string = tmp = (char *) (new_module + 1);
00337       tmp = __mempcpy (tmp, from, to - from);
00338 
00339       new_module->to_string = tmp;
00340       tmp = __mempcpy (tmp, to, module - to);
00341 
00342       new_module->cost_hi = cost_hi;
00343       new_module->cost_lo = modcounter;
00344 
00345       new_module->module_name = tmp;
00346 
00347       if (dir_len != 0)
00348        tmp = __mempcpy (tmp, directory, dir_len);
00349 
00350       tmp = __mempcpy (tmp, module, wp - module);
00351 
00352       if (need_ext)
00353        memcpy (tmp - 1, gconv_module_ext, sizeof (gconv_module_ext));
00354 
00355       /* Now insert the new module data structure in our search tree.  */
00356       insert_module (new_module, 1);
00357     }
00358 }
00359 
00360 
00361 /* Read the next configuration file.  */
00362 static void
00363 internal_function
00364 read_conf_file (const char *filename, const char *directory, size_t dir_len,
00365               void **modules, size_t *nmodules)
00366 {
00367   /* Note the file is opened with cancellation in the I/O functions
00368      disabled.  */
00369   FILE *fp = fopen (filename, "rc");
00370   char *line = NULL;
00371   size_t line_len = 0;
00372   static int modcounter;
00373 
00374   /* Don't complain if a file is not present or readable, simply silently
00375      ignore it.  */
00376   if (fp == NULL)
00377     return;
00378 
00379   /* No threads reading from this stream.  */
00380   __fsetlocking (fp, FSETLOCKING_BYCALLER);
00381 
00382   /* Process the known entries of the file.  Comments start with `#' and
00383      end with the end of the line.  Empty lines are ignored.  */
00384   while (!feof_unlocked (fp))
00385     {
00386       char *rp, *endp, *word;
00387       ssize_t n = __getdelim (&line, &line_len, '\n', fp);
00388       if (n < 0)
00389        /* An error occurred.  */
00390        break;
00391 
00392       rp = line;
00393       /* Terminate the line (excluding comments or newline) by an NUL byte
00394         to simplify the following code.  */
00395       endp = strchr (rp, '#');
00396       if (endp != NULL)
00397        *endp = '\0';
00398       else
00399        if (rp[n - 1] == '\n')
00400          rp[n - 1] = '\0';
00401 
00402       while (__isspace_l (*rp, _nl_C_locobj_ptr))
00403        ++rp;
00404 
00405       /* If this is an empty line go on with the next one.  */
00406       if (rp == endp)
00407        continue;
00408 
00409       word = rp;
00410       while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr))
00411        ++rp;
00412 
00413       if (rp - word == sizeof ("alias") - 1
00414          && memcmp (word, "alias", sizeof ("alias") - 1) == 0)
00415        add_alias (rp, *modules);
00416       else if (rp - word == sizeof ("module") - 1
00417               && memcmp (word, "module", sizeof ("module") - 1) == 0)
00418        add_module (rp, directory, dir_len, modules, nmodules, modcounter++);
00419       /* else */
00420        /* Otherwise ignore the line.  */
00421     }
00422 
00423   free (line);
00424 
00425   fclose (fp);
00426 }
00427 
00428 
00429 /* Determine the directories we are looking for data in.  */
00430 void
00431 internal_function
00432 __gconv_get_path (void)
00433 {
00434   struct path_elem *result;
00435   __libc_lock_define_initialized (static, lock);
00436 
00437   __libc_lock_lock (lock);
00438 
00439   /* Make sure there wasn't a second thread doing it already.  */
00440   result = (struct path_elem *) __gconv_path_elem;
00441   if (result == NULL)
00442     {
00443       /* Determine the complete path first.  */
00444       char *gconv_path;
00445       size_t gconv_path_len;
00446       char *elem;
00447       char *oldp;
00448       char *cp;
00449       int nelems;
00450       char *cwd;
00451       size_t cwdlen;
00452 
00453       if (__gconv_path_envvar == NULL)
00454        {
00455          /* No user-defined path.  Make a modifiable copy of the
00456             default path.  */
00457          gconv_path = strdupa (default_gconv_path);
00458          gconv_path_len = sizeof (default_gconv_path);
00459          cwd = NULL;
00460          cwdlen = 0;
00461        }
00462       else
00463        {
00464          /* Append the default path to the user-defined path.  */
00465          size_t user_len = strlen (__gconv_path_envvar);
00466 
00467          gconv_path_len = user_len + 1 + sizeof (default_gconv_path);
00468          gconv_path = alloca (gconv_path_len);
00469          __mempcpy (__mempcpy (__mempcpy (gconv_path, __gconv_path_envvar,
00470                                       user_len),
00471                             ":", 1),
00472                    default_gconv_path, sizeof (default_gconv_path));
00473          cwd = __getcwd (NULL, 0);
00474          cwdlen = strlen (cwd);
00475        }
00476       assert (default_gconv_path[0] == '/');
00477 
00478       /* In a first pass we calculate the number of elements.  */
00479       oldp = NULL;
00480       cp = strchr (gconv_path, ':');
00481       nelems = 1;
00482       while (cp != NULL)
00483        {
00484          if (cp != oldp + 1)
00485            ++nelems;
00486          oldp = cp;
00487          cp =  strchr (cp + 1, ':');
00488        }
00489 
00490       /* Allocate the memory for the result.  */
00491       result = (struct path_elem *) malloc ((nelems + 1)
00492                                        * sizeof (struct path_elem)
00493                                        + gconv_path_len + nelems
00494                                        + (nelems - 1) * (cwdlen + 1));
00495       if (result != NULL)
00496        {
00497          char *strspace = (char *) &result[nelems + 1];
00498          int n = 0;
00499 
00500          /* Separate the individual parts.  */
00501          __gconv_max_path_elem_len = 0;
00502          elem = __strtok_r (gconv_path, ":", &gconv_path);
00503          assert (elem != NULL);
00504          do
00505            {
00506              result[n].name = strspace;
00507              if (elem[0] != '/')
00508               {
00509                 assert (cwd != NULL);
00510                 strspace = __mempcpy (strspace, cwd, cwdlen);
00511                 *strspace++ = '/';
00512               }
00513              strspace = __stpcpy (strspace, elem);
00514              if (strspace[-1] != '/')
00515               *strspace++ = '/';
00516 
00517              result[n].len = strspace - result[n].name;
00518              if (result[n].len > __gconv_max_path_elem_len)
00519               __gconv_max_path_elem_len = result[n].len;
00520 
00521              *strspace++ = '\0';
00522              ++n;
00523            }
00524          while ((elem = __strtok_r (NULL, ":", &gconv_path)) != NULL);
00525 
00526          result[n].name = NULL;
00527          result[n].len = 0;
00528        }
00529 
00530       __gconv_path_elem = result ?: (struct path_elem *) &empty_path_elem;
00531 
00532       free (cwd);
00533     }
00534 
00535   __libc_lock_unlock (lock);
00536 }
00537 
00538 
00539 /* Read all configuration files found in the user-specified and the default
00540    path.  */
00541 void
00542 attribute_hidden
00543 __gconv_read_conf (void)
00544 {
00545   void *modules = NULL;
00546   size_t nmodules = 0;
00547   int save_errno = errno;
00548   size_t cnt;
00549 
00550   /* First see whether we should use the cache.  */
00551   if (__gconv_load_cache () == 0)
00552     {
00553       /* Yes, we are done.  */
00554       __set_errno (save_errno);
00555       return;
00556     }
00557 
00558 #ifndef STATIC_GCONV
00559   /* Find out where we have to look.  */
00560   if (__gconv_path_elem == NULL)
00561     __gconv_get_path ();
00562 
00563   for (cnt = 0; __gconv_path_elem[cnt].name != NULL; ++cnt)
00564     {
00565       const char *elem = __gconv_path_elem[cnt].name;
00566       size_t elem_len = __gconv_path_elem[cnt].len;
00567       char *filename;
00568 
00569       /* No slash needs to be inserted between elem and gconv_conf_filename;
00570         elem already ends in a slash.  */
00571       filename = alloca (elem_len + sizeof (gconv_conf_filename));
00572       __mempcpy (__mempcpy (filename, elem, elem_len),
00573                gconv_conf_filename, sizeof (gconv_conf_filename));
00574 
00575       /* Read the next configuration file.  */
00576       read_conf_file (filename, elem, elem_len, &modules, &nmodules);
00577     }
00578 #endif
00579 
00580   /* Add the internal modules.  */
00581   for (cnt = 0; cnt < sizeof (builtin_modules) / sizeof (builtin_modules[0]);
00582        ++cnt)
00583     {
00584       struct gconv_alias fake_alias;
00585 
00586       fake_alias.fromname = (char *) builtin_modules[cnt].from_string;
00587 
00588       if (__tfind (&fake_alias, &__gconv_alias_db, __gconv_alias_compare)
00589          != NULL)
00590        /* It'll conflict so don't add it.  */
00591        continue;
00592 
00593       insert_module (&builtin_modules[cnt], 0);
00594     }
00595 
00596   /* Add aliases for builtin conversions.  */
00597   const char *cp = builtin_aliases;
00598   do
00599     {
00600       const char *from = cp;
00601       const char *to = __rawmemchr (from, '\0') + 1;
00602       cp = __rawmemchr (to, '\0') + 1;
00603 
00604       add_alias2 (from, to, cp, modules);
00605     }
00606   while (*cp != '\0');
00607 
00608   /* Restore the error number.  */
00609   __set_errno (save_errno);
00610 }
00611 
00612 
00613 
00614 /* Free all resources if necessary.  */
00615 libc_freeres_fn (free_mem)
00616 {
00617   if (__gconv_path_elem != NULL && __gconv_path_elem != &empty_path_elem)
00618     free ((void *) __gconv_path_elem);
00619 }