Back to index

glibc  2.9
stringprep.c
Go to the documentation of this file.
00001 /* stringprep.c --- Core stringprep implementation.
00002  * Copyright (C) 2002, 2003, 2004  Simon Josefsson
00003  *
00004  * This file is part of GNU Libidn.
00005  *
00006  * GNU Libidn is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU Lesser General Public
00008  * License as published by the Free Software Foundation; either
00009  * version 2.1 of the License, or (at your option) any later version.
00010  *
00011  * GNU Libidn is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  * Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with GNU Libidn; if not, write to the Free Software
00018  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00019  *
00020  */
00021 
00022 #if HAVE_CONFIG_H
00023 # include "config.h"
00024 #endif
00025 
00026 #include <stdlib.h>
00027 #include <string.h>
00028 
00029 #include "stringprep.h"
00030 
00031 static ssize_t
00032 stringprep_find_character_in_table (uint32_t ucs4,
00033                                 const Stringprep_table_element * table)
00034 {
00035   ssize_t i;
00036 
00037   /* This is where typical uses of Libidn spends very close to all CPU
00038      time and causes most cache misses.  One could easily do a binary
00039      search instead.  Before rewriting this, I want hard evidence this
00040      slowness is at all relevant in typical applications.  (I don't
00041      dispute optimization may improve matters significantly, I'm
00042      mostly interested in having someone give real-world benchmark on
00043      the impact of libidn.) */
00044 
00045   for (i = 0; table[i].start || table[i].end; i++)
00046     if (ucs4 >= table[i].start &&
00047        ucs4 <= (table[i].end ? table[i].end : table[i].start))
00048       return i;
00049 
00050   return -1;
00051 }
00052 
00053 static ssize_t
00054 stringprep_find_string_in_table (uint32_t * ucs4,
00055                              size_t ucs4len,
00056                              size_t * tablepos,
00057                              const Stringprep_table_element * table)
00058 {
00059   size_t j;
00060   ssize_t pos;
00061 
00062   for (j = 0; j < ucs4len; j++)
00063     if ((pos = stringprep_find_character_in_table (ucs4[j], table)) != -1)
00064       {
00065        if (tablepos)
00066          *tablepos = pos;
00067        return j;
00068       }
00069 
00070   return -1;
00071 }
00072 
00073 static int
00074 stringprep_apply_table_to_string (uint32_t * ucs4,
00075                               size_t * ucs4len,
00076                               size_t maxucs4len,
00077                               const Stringprep_table_element * table)
00078 {
00079   ssize_t pos;
00080   size_t i, maplen;
00081 
00082   while ((pos = stringprep_find_string_in_table (ucs4, *ucs4len,
00083                                            &i, table)) != -1)
00084     {
00085       for (maplen = STRINGPREP_MAX_MAP_CHARS;
00086           maplen > 0 && table[i].map[maplen - 1] == 0; maplen--)
00087        ;
00088 
00089       if (*ucs4len - 1 + maplen >= maxucs4len)
00090        return STRINGPREP_TOO_SMALL_BUFFER;
00091 
00092       memmove (&ucs4[pos + maplen], &ucs4[pos + 1],
00093               sizeof (uint32_t) * (*ucs4len - pos - 1));
00094       memcpy (&ucs4[pos], table[i].map, sizeof (uint32_t) * maplen);
00095       *ucs4len = *ucs4len - 1 + maplen;
00096     }
00097 
00098   return STRINGPREP_OK;
00099 }
00100 
00101 #define INVERTED(x) ((x) & ((~0UL) >> 1))
00102 #define UNAPPLICAPLEFLAGS(flags, profileflags) \
00103   ((!INVERTED(profileflags) && !(profileflags & flags) && profileflags) || \
00104    ( INVERTED(profileflags) && (profileflags & flags)))
00105 
00136 int
00137 stringprep_4i (uint32_t * ucs4, size_t * len, size_t maxucs4len,
00138               Stringprep_profile_flags flags,
00139               const Stringprep_profile * profile)
00140 {
00141   size_t i, j;
00142   ssize_t k;
00143   size_t ucs4len = *len;
00144   int rc;
00145 
00146   for (i = 0; profile[i].operation; i++)
00147     {
00148       switch (profile[i].operation)
00149        {
00150        case STRINGPREP_NFKC:
00151          {
00152            uint32_t *q = 0;
00153 
00154            if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
00155              break;
00156 
00157            if (flags & STRINGPREP_NO_NFKC && !profile[i].flags)
00158              /* Profile requires NFKC, but callee asked for no NFKC. */
00159              return STRINGPREP_FLAG_ERROR;
00160 
00161            q = stringprep_ucs4_nfkc_normalize (ucs4, ucs4len);
00162            if (!q)
00163              return STRINGPREP_NFKC_FAILED;
00164 
00165            for (ucs4len = 0; q[ucs4len]; ucs4len++)
00166              ;
00167 
00168            if (ucs4len >= maxucs4len)
00169              {
00170               free (q);
00171               return STRINGPREP_TOO_SMALL_BUFFER;
00172              }
00173 
00174            memcpy (ucs4, q, ucs4len * sizeof (ucs4[0]));
00175 
00176            free (q);
00177          }
00178          break;
00179 
00180        case STRINGPREP_PROHIBIT_TABLE:
00181          k = stringprep_find_string_in_table (ucs4, ucs4len,
00182                                           NULL, profile[i].table);
00183          if (k != -1)
00184            return STRINGPREP_CONTAINS_PROHIBITED;
00185          break;
00186 
00187        case STRINGPREP_UNASSIGNED_TABLE:
00188          if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
00189            break;
00190          if (flags & STRINGPREP_NO_UNASSIGNED)
00191            {
00192              k = stringprep_find_string_in_table
00193               (ucs4, ucs4len, NULL, profile[i].table);
00194              if (k != -1)
00195               return STRINGPREP_CONTAINS_UNASSIGNED;
00196            }
00197          break;
00198 
00199        case STRINGPREP_MAP_TABLE:
00200          if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
00201            break;
00202          rc = stringprep_apply_table_to_string
00203            (ucs4, &ucs4len, maxucs4len, profile[i].table);
00204          if (rc != STRINGPREP_OK)
00205            return rc;
00206          break;
00207 
00208        case STRINGPREP_BIDI_PROHIBIT_TABLE:
00209        case STRINGPREP_BIDI_RAL_TABLE:
00210        case STRINGPREP_BIDI_L_TABLE:
00211          break;
00212 
00213        case STRINGPREP_BIDI:
00214          {
00215            int done_prohibited = 0;
00216            int done_ral = 0;
00217            int done_l = 0;
00218            int contains_ral = -1;
00219            int contains_l = -1;
00220 
00221            for (j = 0; profile[j].operation; j++)
00222              if (profile[j].operation == STRINGPREP_BIDI_PROHIBIT_TABLE)
00223               {
00224                 done_prohibited = 1;
00225                 k = stringprep_find_string_in_table (ucs4, ucs4len,
00226                                                  NULL,
00227                                                  profile[j].table);
00228                 if (k != -1)
00229                   return STRINGPREP_BIDI_CONTAINS_PROHIBITED;
00230               }
00231              else if (profile[j].operation == STRINGPREP_BIDI_RAL_TABLE)
00232               {
00233                 done_ral = 1;
00234                 if (stringprep_find_string_in_table
00235                     (ucs4, ucs4len, NULL, profile[j].table) != -1)
00236                   contains_ral = j;
00237               }
00238              else if (profile[j].operation == STRINGPREP_BIDI_L_TABLE)
00239               {
00240                 done_l = 1;
00241                 if (stringprep_find_string_in_table
00242                     (ucs4, ucs4len, NULL, profile[j].table) != -1)
00243                   contains_l = j;
00244               }
00245 
00246            if (!done_prohibited || !done_ral || !done_l)
00247              return STRINGPREP_PROFILE_ERROR;
00248 
00249            if (contains_ral != -1 && contains_l != -1)
00250              return STRINGPREP_BIDI_BOTH_L_AND_RAL;
00251 
00252            if (contains_ral != -1)
00253              {
00254               if (!(stringprep_find_character_in_table
00255                     (ucs4[0], profile[contains_ral].table) != -1 &&
00256                     stringprep_find_character_in_table
00257                     (ucs4[ucs4len - 1], profile[contains_ral].table) != -1))
00258                 return STRINGPREP_BIDI_LEADTRAIL_NOT_RAL;
00259              }
00260          }
00261          break;
00262 
00263        default:
00264          return STRINGPREP_PROFILE_ERROR;
00265          break;
00266        }
00267     }
00268 
00269   *len = ucs4len;
00270 
00271   return STRINGPREP_OK;
00272 }
00273 
00274 static int
00275 stringprep_4zi_1 (uint32_t * ucs4, size_t ucs4len, size_t maxucs4len,
00276                 Stringprep_profile_flags flags,
00277                 const Stringprep_profile * profile)
00278 {
00279   int rc;
00280 
00281   rc = stringprep_4i (ucs4, &ucs4len, maxucs4len, flags, profile);
00282   if (rc != STRINGPREP_OK)
00283     return rc;
00284 
00285   if (ucs4len >= maxucs4len)
00286     return STRINGPREP_TOO_SMALL_BUFFER;
00287 
00288   ucs4[ucs4len] = 0;
00289 
00290   return STRINGPREP_OK;
00291 }
00292 
00316 int
00317 stringprep_4zi (uint32_t * ucs4, size_t maxucs4len,
00318               Stringprep_profile_flags flags,
00319               const Stringprep_profile * profile)
00320 {
00321   size_t ucs4len;
00322 
00323   for (ucs4len = 0; ucs4len < maxucs4len && ucs4[ucs4len] != 0; ucs4len++)
00324     ;
00325 
00326   return stringprep_4zi_1 (ucs4, ucs4len, maxucs4len, flags, profile);
00327 }
00328 
00356 int
00357 stringprep (char *in,
00358            size_t maxlen,
00359            Stringprep_profile_flags flags,
00360            const Stringprep_profile * profile)
00361 {
00362   int rc;
00363   char *utf8 = NULL;
00364   uint32_t *ucs4 = NULL;
00365   size_t ucs4len, maxucs4len, adducs4len = 50;
00366 
00367   do
00368     {
00369       free (ucs4);
00370       ucs4 = stringprep_utf8_to_ucs4 (in, -1, &ucs4len);
00371       maxucs4len = ucs4len + adducs4len;
00372       uint32_t *newp = realloc (ucs4, maxucs4len * sizeof (uint32_t));
00373       if (!newp)
00374        {
00375          free (ucs4);
00376          return STRINGPREP_MALLOC_ERROR;
00377        }
00378       ucs4 = newp;
00379 
00380       rc = stringprep_4i (ucs4, &ucs4len, maxucs4len, flags, profile);
00381       adducs4len += 50;
00382     }
00383   while (rc == STRINGPREP_TOO_SMALL_BUFFER);
00384   if (rc != STRINGPREP_OK)
00385     {
00386       free (ucs4);
00387       return rc;
00388     }
00389 
00390   utf8 = stringprep_ucs4_to_utf8 (ucs4, ucs4len, 0, 0);
00391   free (ucs4);
00392   if (!utf8)
00393     return STRINGPREP_MALLOC_ERROR;
00394 
00395   if (strlen (utf8) >= maxlen)
00396     {
00397       free (utf8);
00398       return STRINGPREP_TOO_SMALL_BUFFER;
00399     }
00400 
00401   strcpy (in, utf8);        /* flawfinder: ignore */
00402 
00403   free (utf8);
00404 
00405   return STRINGPREP_OK;
00406 }
00407 
00432 int
00433 stringprep_profile (const char *in,
00434                   char **out,
00435                   const char *profile, Stringprep_profile_flags flags)
00436 {
00437   const Stringprep_profiles *p;
00438   char *str = NULL;
00439   size_t len = strlen (in) + 1;
00440   int rc;
00441 
00442   for (p = &stringprep_profiles[0]; p->name; p++)
00443     if (strcmp (p->name, profile) == 0)
00444       break;
00445 
00446   if (!p || !p->name || !p->tables)
00447     return STRINGPREP_UNKNOWN_PROFILE;
00448 
00449   do
00450     {
00451       free (str);
00452       str = (char *) malloc (len);
00453       if (str == NULL)
00454        return STRINGPREP_MALLOC_ERROR;
00455 
00456       strcpy (str, in);
00457 
00458       rc = stringprep (str, len, flags, p->tables);
00459       len += 50;
00460     }
00461   while (rc == STRINGPREP_TOO_SMALL_BUFFER);
00462 
00463   if (rc == STRINGPREP_OK)
00464     *out = str;
00465   else
00466     free (str);
00467 
00468   return rc;
00469 }
00470