Back to index

lightning-sunbird  0.9+nobinonly
unistring.c
Go to the documentation of this file.
00001 /* ***** BEGIN LICENSE BLOCK *****
00002  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00003  *
00004  * The contents of this file are subject to the Mozilla Public License Version
00005  * 1.1 (the "License"); you may not use this file except in compliance with
00006  * the License. You may obtain a copy of the License at
00007  * http://www.mozilla.org/MPL/
00008  *
00009  * Software distributed under the License is distributed on an "AS IS" basis,
00010  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00011  * for the specific language governing rights and limitations under the
00012  * License.
00013  *
00014  * The Original Code is lineterm.
00015  *
00016  * The Initial Developer of the Original Code is
00017  * Ramalingam Saravanan.
00018  * Portions created by the Initial Developer are Copyright (C) 1999
00019  * the Initial Developer. All Rights Reserved.
00020  *
00021  * Contributor(s):
00022  *
00023  * Alternatively, the contents of this file may be used under the terms of
00024  * either the GNU General Public License Version 2 or later (the "GPL"), or
00025  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00026  * in which case the provisions of the GPL or the LGPL are applicable instead
00027  * of those above. If you wish to allow use of your version of this file only
00028  * under the terms of either the GPL or the LGPL, and not to allow others to
00029  * use your version of this file under the terms of the MPL, indicate your
00030  * decision by deleting the provisions above and replace them with the notice
00031  * and other provisions required by the GPL or the LGPL. If you do not delete
00032  * the provisions above, a recipient may use your version of this file under
00033  * the terms of any one of the MPL, the GPL or the LGPL.
00034  *
00035  * ***** END LICENSE BLOCK ***** */
00036 
00037 /* unistring.c: Unicode string operations implementation */
00038 
00039 /* public declarations */
00040 #include "unistring.h"
00041 
00042 /* private declarations */
00043 
00048 void ucstoutf8(const UNICHAR* us, int nus, char* s, int ns, 
00049                int* remaining, int* encoded)
00050 {
00051   int j, k;
00052 
00053   j = 0;
00054   k = 0;
00055   while ((j < ns) && (k < nus)) {
00056     UNICHAR uch = us[k++];
00057 
00058     if (uch < 0x0080) {
00059       s[j++] = uch;
00060 
00061     } else if (uch < 0x0800) {
00062       if (j >= ns-1) break;
00063       s[j++] = ((uch & 0x07C0) >>  6) | 0xC0;
00064       s[j++] =  (uch & 0x003F)        | 0x80;
00065 
00066     } else {
00067       if (j >= ns-2) break;
00068       s[j++] = ((uch & 0xF000) >> 12) | 0xE0;
00069       s[j++] = ((uch & 0x0FC0) >>  6) | 0x80;
00070       s[j++] =  (uch & 0x003F)        | 0x80;
00071     }
00072   }
00073 
00074   if (remaining)
00075     *remaining = nus - k;
00076 
00077   if (encoded)
00078     *encoded = j;
00079 }
00080 
00081 
00089 int utf8toucs(const char* s, int ns, UNICHAR* us, int nus,
00090               int skipNUL, int* remaining, int* decoded)
00091 {
00092   int j, k;
00093   int retcode = 0;
00094 
00095   j = 0;
00096   k = 0;
00097   while ((j < ns) && (k < nus)) {
00098     char ch = s[j];
00099 
00100     if (0x80 & ch) {
00101       if (0x40 & ch) {
00102         if (0x20 & ch) {
00103           /* consume 3 */
00104           if (j >= ns-2) break;
00105 
00106           if ( (s[j+1] & 0x40) || !(s[j+1] & 0x80) ||
00107                (s[j+2] & 0x40) || !(s[j+2] & 0x80) ) {
00108             retcode = -1;
00109           }
00110 
00111           us[k++] =   ((ch     & 0x0F) << 12)
00112                     | ((s[j+1] & 0x3F) << 6)
00113                     | ( s[j+2] & 0x3F);
00114 
00115           j += 3;
00116 
00117         } else {
00118           /* consume 2 */
00119           if (j >= ns-1) break;
00120 
00121           if ( (s[j+1] & 0x40) || !(s[j+1] & 0x80) ) {
00122             retcode = -1;
00123           }
00124 
00125           us[k++] =   ((ch     & 0x1F) << 6)
00126                     | ( s[j+1] & 0x3F);
00127           j += 2;
00128         }
00129 
00130       } else {
00131         /* consume 1 (error) */
00132         retcode = -1;
00133         j++;
00134       }
00135 
00136     } else {
00137       /* consume 1 */
00138       if (ch || !skipNUL) {
00139         us[k++] = ch;
00140       }
00141       j++;
00142     }
00143   }
00144 
00145   if (remaining)
00146     *remaining = ns - j;
00147 
00148   if (decoded)
00149     *decoded = k;
00150 
00151   return retcode;
00152 }
00153 
00154 
00158 void ucsprint(FILE* stream, const UNICHAR* us, int nus)
00159 {
00160   static const char hexDigits[17] = "0123456789abcdef";
00161   UNICHAR uch;
00162   int k;
00163 
00164   for (k=0; k<nus; k++) {
00165     uch = us[k];
00166 
00167     if (uch < (UNICHAR)U_SPACE) {
00168       /* ASCII control character */
00169       fprintf(stream, "^%c", (char) uch+U_ATSIGN);
00170 
00171     } else if (uch == (UNICHAR)U_CARET) {
00172       /* Caret */
00173       fprintf(stream, "^^");
00174 
00175     } else if (uch < (UNICHAR)U_DEL) {
00176       /* Printable ASCII character */
00177       fprintf(stream, "%c", (char) uch);
00178 
00179     } else {
00180       /* DEL or non-ASCII character */
00181       char esc_str[8]="&#0000;";
00182       int j;
00183       for (j=5; j>1; j--) {
00184         esc_str[j] = hexDigits[uch%16];
00185         uch = uch / 16;
00186       }
00187       fprintf(stream, "%s", esc_str);
00188     }
00189   }
00190 }
00191 
00192 
00197 UNICHAR* ucscopy(register UNICHAR* dest, register const char* srcplain,
00198                  size_t n)
00199 {
00200   register UNICHAR ch;
00201   register const UNICHAR* destmx = dest + n;
00202 
00203   /* Copy characters from source to destination, stopping at NUL */
00204   while (dest < destmx) {
00205     *dest++ = (ch = *srcplain++);
00206     if (ch == U_NUL)
00207       break;
00208   }
00209 
00210   /* Pad with NULs, if necessary */
00211   while (dest < destmx)
00212     *dest++ = U_NUL;
00213 
00214   return dest;
00215 }
00216     
00217 
00218 #ifndef USE_WCHAR
00219 
00222 UNICHAR* ucschr(register const UNICHAR* str, register const UNICHAR chr)
00223 {
00224   do {
00225     if (*str == chr)
00226       return (UNICHAR*) str;
00227   } while (*str++ != U_NUL);
00228 
00229   return NULL;
00230 }
00231 
00232 
00236 UNICHAR* ucsrchr(register const UNICHAR* str, register const UNICHAR chr)
00237 {
00238   const UNICHAR* retstr = NULL;
00239   do {
00240     if (*str == chr)
00241       retstr = str;
00242   } while (*str++ != U_NUL);
00243 
00244   return (UNICHAR*) retstr;
00245 }
00246 
00247 
00254 int ucscmp(register const UNICHAR* str1, register const UNICHAR* str2)
00255 {
00256   register UNICHAR ch1, ch2;
00257 
00258   do {
00259     if ((ch1 = *str1++) != (ch2 = *str2++))
00260       return ch1 - ch2;
00261 
00262   } while (ch1 != U_NUL);
00263 
00264   return 0;
00265 }
00266 
00267     
00274 int ucsncmp(register const UNICHAR* str1, register const UNICHAR* str2,
00275             size_t n)
00276 {
00277   register UNICHAR ch1, ch2;
00278   register const UNICHAR* str1mx = str1 + n;
00279 
00280   while (str1 < str1mx) {
00281     if ((ch1 = *str1++) != (ch2 = *str2++))
00282       return ch1 - ch2;
00283 
00284     if (ch1 == U_NUL)
00285       break;
00286   }
00287 
00288   return 0;
00289 }
00290 
00291     
00296 UNICHAR* ucsncpy(register UNICHAR* dest, register const UNICHAR* src,
00297                  size_t n)
00298 {
00299   register UNICHAR ch;
00300   register const UNICHAR* destmx = dest + n;
00301 
00302   /* Copy characters from source to destination, stopping at NUL */
00303   while (dest < destmx) {
00304     *dest++ = (ch = *src++);
00305     if (ch == U_NUL)
00306       break;
00307   }
00308 
00309   /* Pad with NULs, if necessary */
00310   while (dest < destmx)
00311     *dest++ = U_NUL;
00312 
00313   return dest;
00314 }
00315     
00316 
00319 size_t ucslen(const UNICHAR* str)
00320 {
00321   register const UNICHAR* strcp = str;
00322 
00323   while (*strcp++ != U_NUL);
00324 
00325   return strcp - str - 1;
00326 }
00327 
00328     
00333 UNICHAR* ucsstr(register const UNICHAR* str, const UNICHAR* substr)
00334 {
00335   register UNICHAR subch1, ch;
00336 
00337   /* If null substring, return string */
00338   if (*substr == U_NUL)
00339     return (UNICHAR*) str;
00340 
00341   /* First character of non-null substring */
00342   subch1 = *substr;
00343 
00344   if ((ch = *str) == U_NUL)
00345     return NULL;
00346 
00347   do {
00348 
00349     if (ch == subch1) {
00350       /* First character matches; check if rest of substring matches */
00351       register const UNICHAR* strcp = str;
00352       register const UNICHAR* substrcp = substr;
00353       do {
00354         substrcp++;
00355         strcp++;
00356         if (*substrcp == U_NUL)
00357           return (UNICHAR*) str;
00358       } while (*substrcp == *strcp);
00359     }
00360 
00361   } while ((ch = *(++str)) != U_NUL);
00362 
00363   return NULL;
00364 }
00365     
00366 
00370 size_t ucsspn(const UNICHAR* str, const UNICHAR* chars)
00371 {
00372   register UNICHAR strch, ch;
00373   register const UNICHAR* charscp;
00374   register const UNICHAR* strcp = str;
00375 
00376   while ((strch = *strcp++) != U_NUL) {
00377     charscp = chars;
00378 
00379     /* Check that it is one of the specified characters */
00380     while ((ch = *charscp++) != U_NUL) {
00381       if (strch == ch)
00382         break;
00383     }
00384     if (ch == U_NUL)
00385       return (size_t) (strcp - str - 1);
00386   }
00387 
00388   return (size_t) (strcp - str - 1);
00389 }
00390     
00391 
00395 size_t ucscspn(const UNICHAR* str, const UNICHAR* chars)
00396 {
00397   register UNICHAR strch, ch;
00398   register const UNICHAR* charscp;
00399   register const UNICHAR* strcp = str;
00400 
00401   while ((strch = *strcp++) != U_NUL) {
00402     charscp = chars;
00403 
00404     /* Check that it is not one of the specified characters */
00405     while ((ch = *charscp++) != U_NUL) {
00406       if (strch == ch)
00407         return (size_t) (strcp - str - 1);
00408     }
00409   }
00410 
00411   return (size_t) (strcp - str - 1);
00412 }
00413 #endif  /* !USE_WCHAR */