Back to index

glibc  2.9
gconv_trans.c
Go to the documentation of this file.
00001 /* Transliteration using the locale's data.
00002    Copyright (C) 2000 Free Software Foundation, Inc.
00003    This file is part of the GNU C Library.
00004    Contributed by Ulrich Drepper <drepper@cygnus.com>, 2000.
00005 
00006    The GNU C Library is free software; you can redistribute it and/or
00007    modify it under the terms of the GNU Lesser General Public
00008    License as published by the Free Software Foundation; either
00009    version 2.1 of the License, or (at your option) any later version.
00010 
00011    The GNU C Library is distributed in the hope that it will be useful,
00012    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014    Lesser General Public License for more details.
00015 
00016    You should have received a copy of the GNU Lesser General Public
00017    License along with the GNU C Library; if not, write to the Free
00018    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
00019    02111-1307 USA.  */
00020 
00021 #include <assert.h>
00022 #include <dlfcn.h>
00023 #include <search.h>
00024 #include <stdint.h>
00025 #include <string.h>
00026 #include <stdlib.h>
00027 
00028 #include <bits/libc-lock.h>
00029 #include "gconv_int.h"
00030 #include "../locale/localeinfo.h"
00031 
00032 
00033 int
00034 __gconv_transliterate (struct __gconv_step *step,
00035                      struct __gconv_step_data *step_data,
00036                      void *trans_data __attribute__ ((unused)),
00037                      const unsigned char *inbufstart,
00038                      const unsigned char **inbufp,
00039                      const unsigned char *inbufend,
00040                      unsigned char **outbufstart, size_t *irreversible)
00041 {
00042   /* Find out about the locale's transliteration.  */
00043   uint_fast32_t size;
00044   const uint32_t *from_idx;
00045   const uint32_t *from_tbl;
00046   const uint32_t *to_idx;
00047   const uint32_t *to_tbl;
00048   const uint32_t *winbuf;
00049   const uint32_t *winbufend;
00050   uint_fast32_t low;
00051   uint_fast32_t high;
00052 
00053   /* The input buffer.  There are actually 4-byte values.  */
00054   winbuf = (const uint32_t *) *inbufp;
00055   winbufend = (const uint32_t *) inbufend;
00056 
00057   __gconv_fct fct = step->__fct;
00058 #ifdef PTR_DEMANGLE
00059   if (step->__shlib_handle != NULL)
00060     PTR_DEMANGLE (fct);
00061 #endif
00062 
00063   /* If there is no transliteration information in the locale don't do
00064      anything and return the error.  */
00065   size = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_TAB_SIZE);
00066   if (size == 0)
00067     goto no_rules;
00068 
00069   /* Get the rest of the values.  */
00070   from_idx =
00071     (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_IDX);
00072   from_tbl =
00073     (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_TBL);
00074   to_idx =
00075     (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_IDX);
00076   to_tbl =
00077     (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_TBL);
00078 
00079   /* Test whether there is enough input.  */
00080   if (winbuf + 1 > winbufend)
00081     return (winbuf == winbufend
00082            ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
00083 
00084   /* The array starting at FROM_IDX contains indeces to the string table
00085      in FROM_TBL.  The indeces are sorted wrt to the strings.  I.e., we
00086      are doing binary search.  */
00087   low = 0;
00088   high = size;
00089   while (low < high)
00090     {
00091       uint_fast32_t med = (low + high) / 2;
00092       uint32_t idx;
00093       int cnt;
00094 
00095       /* Compare the string at this index with the string at the current
00096         position in the input buffer.  */
00097       idx = from_idx[med];
00098       cnt = 0;
00099       do
00100        {
00101          if (from_tbl[idx + cnt] != winbuf[cnt])
00102            /* Does not match.  */
00103            break;
00104          ++cnt;
00105        }
00106       while (from_tbl[idx + cnt] != L'\0' && winbuf + cnt < winbufend);
00107 
00108       if (cnt > 0 && from_tbl[idx + cnt] == L'\0')
00109        {
00110          /* Found a matching input sequence.  Now try to convert the
00111             possible replacements.  */
00112          uint32_t idx2 = to_idx[med];
00113 
00114          do
00115            {
00116              /* Determine length of replacement.  */
00117              uint_fast32_t len = 0;
00118              int res;
00119              const unsigned char *toinptr;
00120              unsigned char *outptr;
00121 
00122              while (to_tbl[idx2 + len] != L'\0')
00123               ++len;
00124 
00125              /* Try this input text.  */
00126              toinptr = (const unsigned char *) &to_tbl[idx2];
00127              outptr = *outbufstart;
00128              res = DL_CALL_FCT (fct,
00129                              (step, step_data, &toinptr,
00130                               (const unsigned char *) &to_tbl[idx2 + len],
00131                               &outptr, NULL, 0, 0));
00132              if (res != __GCONV_ILLEGAL_INPUT)
00133               {
00134                 /* If the conversion succeeds we have to increment the
00135                    input buffer.  */
00136                 if (res == __GCONV_EMPTY_INPUT)
00137                   {
00138                     *inbufp += cnt * sizeof (uint32_t);
00139                     ++*irreversible;
00140                     res = __GCONV_OK;
00141                   }
00142                 *outbufstart = outptr;
00143 
00144                 return res;
00145               }
00146 
00147              /* Next replacement.  */
00148              idx2 += len + 1;
00149            }
00150          while (to_tbl[idx2] != L'\0');
00151 
00152          /* Nothing found, continue searching.  */
00153        }
00154       else if (cnt > 0)
00155        /* This means that the input buffer contents matches a prefix of
00156           an entry.  Since we cannot match it unless we get more input,
00157           we will tell the caller about it.  */
00158        return __GCONV_INCOMPLETE_INPUT;
00159 
00160       if (winbuf + cnt >= winbufend || from_tbl[idx + cnt] < winbuf[cnt])
00161        low = med + 1;
00162       else
00163        high = med;
00164     }
00165 
00166  no_rules:
00167   /* Maybe the character is supposed to be ignored.  */
00168   if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE_LEN) != 0)
00169     {
00170       int n = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE_LEN);
00171       const uint32_t *ranges =
00172        (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE);
00173       const uint32_t wc = *(const uint32_t *) (*inbufp);
00174       int i;
00175 
00176       /* Test whether there is enough input.  */
00177       if (winbuf + 1 > winbufend)
00178        return (winbuf == winbufend
00179               ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
00180 
00181       for (i = 0; i < n; ranges += 3, ++i)
00182        if (ranges[0] <= wc && wc <= ranges[1]
00183            && (wc - ranges[0]) % ranges[2] == 0)
00184          {
00185            /* Matches the range.  Ignore it.  */
00186            *inbufp += 4;
00187            ++*irreversible;
00188            return __GCONV_OK;
00189          }
00190        else if (wc < ranges[0])
00191          /* There cannot be any other matching range since they are
00192              sorted.  */
00193          break;
00194     }
00195 
00196   /* One last chance: use the default replacement.  */
00197   if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN) != 0)
00198     {
00199       const uint32_t *default_missing = (const uint32_t *)
00200        _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING);
00201       const unsigned char *toinptr = (const unsigned char *) default_missing;
00202       uint32_t len = _NL_CURRENT_WORD (LC_CTYPE,
00203                                    _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN);
00204       unsigned char *outptr;
00205       int res;
00206 
00207       /* Test whether there is enough input.  */
00208       if (winbuf + 1 > winbufend)
00209        return (winbuf == winbufend
00210               ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
00211 
00212       outptr = *outbufstart;
00213       res = DL_CALL_FCT (fct,
00214                       (step, step_data, &toinptr,
00215                        (const unsigned char *) (default_missing + len),
00216                        &outptr, NULL, 0, 0));
00217 
00218       if (res != __GCONV_ILLEGAL_INPUT)
00219        {
00220          /* If the conversion succeeds we have to increment the
00221             input buffer.  */
00222          if (res == __GCONV_EMPTY_INPUT)
00223            {
00224              /* This worked but is not reversible.  */
00225              ++*irreversible;
00226              *inbufp += 4;
00227              res = __GCONV_OK;
00228            }
00229          *outbufstart = outptr;
00230 
00231          return res;
00232        }
00233     }
00234 
00235   /* Haven't found a match.  */
00236   return __GCONV_ILLEGAL_INPUT;
00237 }
00238 
00239 
00240 /* Structure to represent results of found (or not) transliteration
00241    modules.  */
00242 struct known_trans
00243 {
00244   /* This structure must remain the first member.  */
00245   struct trans_struct info;
00246 
00247   char *fname;
00248   void *handle;
00249   int open_count;
00250 };
00251 
00252 
00253 /* Tree with results of previous calls to __gconv_translit_find.  */
00254 static void *search_tree;
00255 
00256 /* We modify global data.   */
00257 __libc_lock_define_initialized (static, lock);
00258 
00259 
00260 /* Compare two transliteration entries.  */
00261 static int
00262 trans_compare (const void *p1, const void *p2)
00263 {
00264   const struct known_trans *s1 = (const struct known_trans *) p1;
00265   const struct known_trans *s2 = (const struct known_trans *) p2;
00266 
00267   return strcmp (s1->info.name, s2->info.name);
00268 }
00269 
00270 
00271 /* Open (maybe reopen) the module named in the struct.  Get the function
00272    and data structure pointers we need.  */
00273 static int
00274 open_translit (struct known_trans *trans)
00275 {
00276   __gconv_trans_query_fct queryfct;
00277 
00278   trans->handle = __libc_dlopen (trans->fname);
00279   if (trans->handle == NULL)
00280     /* Not available.  */
00281     return 1;
00282 
00283   /* Find the required symbol.  */
00284   queryfct = __libc_dlsym (trans->handle, "gconv_trans_context");
00285   if (queryfct == NULL)
00286     {
00287       /* We cannot live with that.  */
00288     close_and_out:
00289       __libc_dlclose (trans->handle);
00290       trans->handle = NULL;
00291       return 1;
00292     }
00293 
00294   /* Get the context.  */
00295   if (queryfct (trans->info.name, &trans->info.csnames, &trans->info.ncsnames)
00296       != 0)
00297     goto close_and_out;
00298 
00299   /* Of course we also have to have the actual function.  */
00300   trans->info.trans_fct = __libc_dlsym (trans->handle, "gconv_trans");
00301   if (trans->info.trans_fct == NULL)
00302     goto close_and_out;
00303 
00304   /* Now the optional functions.  */
00305   trans->info.trans_init_fct =
00306     __libc_dlsym (trans->handle, "gconv_trans_init");
00307   trans->info.trans_context_fct =
00308     __libc_dlsym (trans->handle, "gconv_trans_context");
00309   trans->info.trans_end_fct =
00310     __libc_dlsym (trans->handle, "gconv_trans_end");
00311 
00312   trans->open_count = 1;
00313 
00314   return 0;
00315 }
00316 
00317 
00318 int
00319 internal_function
00320 __gconv_translit_find (struct trans_struct *trans)
00321 {
00322   struct known_trans **found;
00323   const struct path_elem *runp;
00324   int res = 1;
00325 
00326   /* We have to have a name.  */
00327   assert (trans->name != NULL);
00328 
00329   /* Acquire the lock.  */
00330   __libc_lock_lock (lock);
00331 
00332   /* See whether we know this module already.  */
00333   found = __tfind (trans, &search_tree, trans_compare);
00334   if (found != NULL)
00335     {
00336       /* Is this module available?  */
00337       if ((*found)->handle != NULL)
00338        {
00339          /* Maybe we have to reopen the file.  */
00340          if ((*found)->handle != (void *) -1)
00341            /* The object is not unloaded.  */
00342            res = 0;
00343          else if (open_translit (*found) == 0)
00344            {
00345              /* Copy the data.  */
00346              *trans = (*found)->info;
00347              (*found)->open_count++;
00348              res = 0;
00349            }
00350        }
00351     }
00352   else
00353     {
00354       size_t name_len = strlen (trans->name) + 1;
00355       int need_so = 0;
00356       struct known_trans *newp;
00357 
00358       /* We have to continue looking for the module.  */
00359       if (__gconv_path_elem == NULL)
00360        __gconv_get_path ();
00361 
00362       /* See whether we have to append .so.  */
00363       if (name_len <= 4 || memcmp (&trans->name[name_len - 4], ".so", 3) != 0)
00364        need_so = 1;
00365 
00366       /* Create a new entry.  */
00367       newp = (struct known_trans *) malloc (sizeof (struct known_trans)
00368                                        + (__gconv_max_path_elem_len
00369                                           + name_len + 3)
00370                                        + name_len);
00371       if (newp != NULL)
00372        {
00373          char *cp;
00374 
00375          /* Clear the struct.  */
00376          memset (newp, '\0', sizeof (struct known_trans));
00377 
00378          /* Store a copy of the module name.  */
00379          newp->info.name = cp = (char *) (newp + 1);
00380          cp = __mempcpy (cp, trans->name, name_len);
00381 
00382          newp->fname = cp;
00383 
00384          /* Search in all the directories.  */
00385          for (runp = __gconv_path_elem; runp->name != NULL; ++runp)
00386            {
00387              cp = __mempcpy (__stpcpy ((char *) newp->fname, runp->name),
00388                            trans->name, name_len);
00389              if (need_so)
00390               memcpy (cp, ".so", sizeof (".so"));
00391 
00392              if (open_translit (newp) == 0)
00393               {
00394                 /* We found a module.  */
00395                 res = 0;
00396                 break;
00397               }
00398            }
00399 
00400          if (res)
00401            newp->fname = NULL;
00402 
00403          /* In any case we'll add the entry to our search tree.  */
00404          if (__tsearch (newp, &search_tree, trans_compare) == NULL)
00405            {
00406              /* Yickes, this should not happen.  Unload the object.  */
00407              res = 1;
00408              /* XXX unload here.  */
00409            }
00410        }
00411     }
00412 
00413   __libc_lock_unlock (lock);
00414 
00415   return res;
00416 }