Back to index

lightning-sunbird  0.9+nobinonly
utf8.c
Go to the documentation of this file.
00001 /* ***** BEGIN LICENSE BLOCK *****
00002  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00003  *
00004  * The contents of this file are subject to the Mozilla Public License Version
00005  * 1.1 (the "License"); you may not use this file except in compliance with
00006  * the License. You may obtain a copy of the License at
00007  * http://www.mozilla.org/MPL/
00008  *
00009  * Software distributed under the License is distributed on an "AS IS" basis,
00010  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00011  * for the specific language governing rights and limitations under the
00012  * License.
00013  *
00014  * The Original Code is the Netscape security libraries.
00015  *
00016  * The Initial Developer of the Original Code is
00017  * Netscape Communications Corporation.
00018  * Portions created by the Initial Developer are Copyright (C) 1994-2000
00019  * the Initial Developer. All Rights Reserved.
00020  *
00021  * Contributor(s):
00022  *   John Gardiner Myers <jgmyers@speakeasy.net>
00023  *
00024  * Alternatively, the contents of this file may be used under the terms of
00025  * either the GNU General Public License Version 2 or later (the "GPL"), or
00026  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00027  * in which case the provisions of the GPL or the LGPL are applicable instead
00028  * of those above. If you wish to allow use of your version of this file only
00029  * under the terms of either the GPL or the LGPL, and not to allow others to
00030  * use your version of this file under the terms of the MPL, indicate your
00031  * decision by deleting the provisions above and replace them with the notice
00032  * and other provisions required by the GPL or the LGPL. If you do not delete
00033  * the provisions above, a recipient may use your version of this file under
00034  * the terms of any one of the MPL, the GPL or the LGPL.
00035  *
00036  * ***** END LICENSE BLOCK ***** */
00037 
00038 #ifdef DEBUG
00039 static const char CVS_ID[] = "@(#) $RCSfile: utf8.c,v $ $Revision: 1.12 $ $Date: 2005/01/20 02:25:50 $";
00040 #endif /* DEBUG */
00041 
00042 #include "seccomon.h"
00043 #include "secport.h"
00044 
00045 #ifdef TEST_UTF8
00046 #include <assert.h>
00047 #undef PORT_Assert
00048 #define PORT_Assert assert
00049 #endif
00050 
00051 /*
00052  * From RFC 2044:
00053  *
00054  * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
00055  * 0000 0000-0000 007F   0xxxxxxx
00056  * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
00057  * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
00058  * 0001 0000-001F FFFF   11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
00059  * 0020 0000-03FF FFFF   111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
00060  * 0400 0000-7FFF FFFF   1111110x 10xxxxxx ... 10xxxxxx
00061  */  
00062 
00063 /*
00064  * From http://www.imc.org/draft-hoffman-utf16
00065  *
00066  * For U on [0x00010000,0x0010FFFF]:  Let U' = U - 0x00010000
00067  *
00068  * U' = yyyyyyyyyyxxxxxxxxxx
00069  * W1 = 110110yyyyyyyyyy
00070  * W2 = 110111xxxxxxxxxx
00071  */
00072 
00073 /*
00074  * This code is assuming NETWORK BYTE ORDER for the 16- and 32-bit
00075  * character values.  If you wish to use this code for working with
00076  * host byte order values, define the following:
00077  *
00078  * #if IS_BIG_ENDIAN
00079  * #define L_0 0
00080  * #define L_1 1
00081  * #define L_2 2
00082  * #define L_3 3
00083  * #define H_0 0
00084  * #define H_1 1
00085  * #else / * not everyone has elif * /
00086  * #if IS_LITTLE_ENDIAN
00087  * #define L_0 3
00088  * #define L_1 2
00089  * #define L_2 1
00090  * #define L_3 0
00091  * #define H_0 1
00092  * #define H_1 0
00093  * #else
00094  * #error "PDP and NUXI support deferred"
00095  * #endif / * IS_LITTLE_ENDIAN * /
00096  * #endif / * IS_BIG_ENDIAN * /
00097  */
00098 
00099 #define L_0 0
00100 #define L_1 1
00101 #define L_2 2
00102 #define L_3 3
00103 #define H_0 0
00104 #define H_1 1
00105 
00106 #define BAD_UTF8 ((PRUint32)-1)
00107 
00108 /*
00109  * Parse a single UTF-8 character per the spec. in section 3.9 (D36)
00110  * of Unicode 4.0.0.
00111  *
00112  * Parameters:
00113  * index - Points to the byte offset in inBuf of character to read.  On success,
00114  *         updated to the offset of the following character.
00115  * inBuf - Input buffer, UTF-8 encoded
00116  * inbufLen - Length of input buffer, in bytes.
00117  *
00118  * Returns:
00119  * Success - The UCS4 encoded character
00120  * Failure - BAD_UTF8
00121  */
00122 static PRUint32
00123 sec_port_read_utf8(unsigned int *index, unsigned char *inBuf, unsigned int inBufLen)
00124 {
00125   PRUint32 result;
00126   unsigned int i = *index;
00127   int bytes_left;
00128   PRUint32 min_value;
00129 
00130   PORT_Assert(i < inBufLen);
00131 
00132   if ( (inBuf[i] & 0x80) == 0x00 ) {
00133     result = inBuf[i++];
00134     bytes_left = 0;
00135     min_value = 0;
00136   } else if ( (inBuf[i] & 0xE0) == 0xC0 ) {
00137     result = inBuf[i++] & 0x1F;
00138     bytes_left = 1;
00139     min_value = 0x80;
00140   } else if ( (inBuf[i] & 0xF0) == 0xE0) {
00141     result = inBuf[i++] & 0x0F;
00142     bytes_left = 2;
00143     min_value = 0x800;
00144   } else if ( (inBuf[i] & 0xF8) == 0xF0) {
00145     result = inBuf[i++] & 0x07;
00146     bytes_left = 3;
00147     min_value = 0x10000;
00148   } else {
00149     return BAD_UTF8;
00150   }
00151 
00152   while (bytes_left--) {
00153     if (i >= inBufLen || (inBuf[i] & 0xC0) != 0x80) return BAD_UTF8;
00154     result = (result << 6) | (inBuf[i++] & 0x3F);
00155   }
00156 
00157   /* Check for overlong sequences, surrogates, and outside unicode range */
00158   if (result < min_value || (result & 0xFFFFF800) == 0xD800 || result > 0x10FFFF) {
00159     return BAD_UTF8;
00160   }
00161 
00162   *index = i;
00163   return result;
00164 }
00165 
00166 PR_IMPLEMENT(PRBool)
00167 sec_port_ucs4_utf8_conversion_function
00168 (
00169   PRBool toUnicode,
00170   unsigned char *inBuf,
00171   unsigned int inBufLen,
00172   unsigned char *outBuf,
00173   unsigned int maxOutBufLen,
00174   unsigned int *outBufLen
00175 )
00176 {
00177   PORT_Assert((unsigned int *)NULL != outBufLen);
00178 
00179   if( toUnicode ) {
00180     unsigned int i, len = 0;
00181 
00182     for( i = 0; i < inBufLen; ) {
00183       if( (inBuf[i] & 0x80) == 0x00 ) i += 1;
00184       else if( (inBuf[i] & 0xE0) == 0xC0 ) i += 2;
00185       else if( (inBuf[i] & 0xF0) == 0xE0 ) i += 3;
00186       else if( (inBuf[i] & 0xF8) == 0xF0 ) i += 4;
00187       else return PR_FALSE;
00188 
00189       len += 4;
00190     }
00191 
00192     if( len > maxOutBufLen ) {
00193       *outBufLen = len;
00194       return PR_FALSE;
00195     }
00196 
00197     len = 0;
00198 
00199     for( i = 0; i < inBufLen; ) {
00200       PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen);
00201 
00202       if (ucs4 == BAD_UTF8) return PR_FALSE;
00203            
00204       outBuf[len+L_0] = 0x00;
00205       outBuf[len+L_1] = (unsigned char)(ucs4 >> 16);
00206       outBuf[len+L_2] = (unsigned char)(ucs4 >> 8);
00207       outBuf[len+L_3] = (unsigned char)ucs4;
00208 
00209       len += 4;
00210     }
00211 
00212     *outBufLen = len;
00213     return PR_TRUE;
00214   } else {
00215     unsigned int i, len = 0;
00216     PORT_Assert((inBufLen % 4) == 0);
00217     if ((inBufLen % 4) != 0) {
00218       *outBufLen = 0;
00219       return PR_FALSE;
00220     }
00221 
00222     for( i = 0; i < inBufLen; i += 4 ) {
00223       if( (inBuf[i+L_0] > 0x00) || (inBuf[i+L_1] > 0x10) ) {
00224        *outBufLen = 0;
00225        return PR_FALSE;
00226       } else if( inBuf[i+L_1] >= 0x01 ) len += 4;
00227       else if( inBuf[i+L_2] >= 0x08 ) len += 3;
00228       else if( (inBuf[i+L_2] > 0x00) || (inBuf[i+L_3] >= 0x80) ) len += 2;
00229       else len += 1;
00230     }
00231 
00232     if( len > maxOutBufLen ) {
00233       *outBufLen = len;
00234       return PR_FALSE;
00235     }
00236 
00237     len = 0;
00238 
00239     for( i = 0; i < inBufLen; i += 4 ) {
00240       if( inBuf[i+L_1] >= 0x01 ) {
00241         /* 0001 0000-001F FFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
00242         /* 00000000 000abcde fghijklm nopqrstu ->
00243            11110abc 10defghi 10jklmno 10pqrstu */
00244 
00245         outBuf[len+0] = 0xF0 | ((inBuf[i+L_1] & 0x1C) >> 2);
00246         outBuf[len+1] = 0x80 | ((inBuf[i+L_1] & 0x03) << 4)
00247                              | ((inBuf[i+L_2] & 0xF0) >> 4);
00248         outBuf[len+2] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2)
00249                              | ((inBuf[i+L_3] & 0xC0) >> 6);
00250         outBuf[len+3] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0);
00251 
00252         len += 4;
00253       } else if( inBuf[i+L_2] >= 0x08 ) {
00254         /* 0000 0800-0000 FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */
00255         /* 00000000 00000000 abcdefgh ijklmnop ->
00256            1110abcd 10efghij 10klmnop */
00257 
00258         outBuf[len+0] = 0xE0 | ((inBuf[i+L_2] & 0xF0) >> 4);
00259         outBuf[len+1] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2)
00260                              | ((inBuf[i+L_3] & 0xC0) >> 6);
00261         outBuf[len+2] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0);
00262 
00263         len += 3;
00264       } else if( (inBuf[i+L_2] > 0x00) || (inBuf[i+L_3] >= 0x80) ) {
00265         /* 0000 0080-0000 07FF -> 110xxxxx 10xxxxxx */
00266         /* 00000000 00000000 00000abc defghijk ->
00267            110abcde 10fghijk */
00268 
00269         outBuf[len+0] = 0xC0 | ((inBuf[i+L_2] & 0x07) << 2)
00270                              | ((inBuf[i+L_3] & 0xC0) >> 6);
00271         outBuf[len+1] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0);
00272 
00273         len += 2;
00274       } else {
00275         /* 0000 0000-0000 007F -> 0xxxxxx */
00276         /* 00000000 00000000 00000000 0abcdefg ->
00277            0abcdefg */
00278 
00279         outBuf[len+0] = (inBuf[i+L_3] & 0x7F);
00280 
00281         len += 1;
00282       }
00283     }
00284                             
00285     *outBufLen = len;
00286     return PR_TRUE;
00287   }
00288 }
00289 
00290 PR_IMPLEMENT(PRBool)
00291 sec_port_ucs2_utf8_conversion_function
00292 (
00293   PRBool toUnicode,
00294   unsigned char *inBuf,
00295   unsigned int inBufLen,
00296   unsigned char *outBuf,
00297   unsigned int maxOutBufLen,
00298   unsigned int *outBufLen
00299 )
00300 {
00301   PORT_Assert((unsigned int *)NULL != outBufLen);
00302 
00303   if( toUnicode ) {
00304     unsigned int i, len = 0;
00305 
00306     for( i = 0; i < inBufLen; ) {
00307       if( (inBuf[i] & 0x80) == 0x00 ) {
00308         i += 1;
00309         len += 2;
00310       } else if( (inBuf[i] & 0xE0) == 0xC0 ) {
00311         i += 2;
00312         len += 2;
00313       } else if( (inBuf[i] & 0xF0) == 0xE0 ) {
00314         i += 3;
00315         len += 2;
00316       } else if( (inBuf[i] & 0xF8) == 0xF0 ) { 
00317         i += 4;
00318         len += 4;
00319       } else return PR_FALSE;
00320     }
00321 
00322     if( len > maxOutBufLen ) {
00323       *outBufLen = len;
00324       return PR_FALSE;
00325     }
00326 
00327     len = 0;
00328 
00329     for( i = 0; i < inBufLen; ) {
00330       PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen);
00331 
00332       if (ucs4 == BAD_UTF8) return PR_FALSE;
00333 
00334       if( ucs4 < 0x10000) {
00335         outBuf[len+H_0] = (unsigned char)(ucs4 >> 8);
00336         outBuf[len+H_1] = (unsigned char)ucs4;
00337         len += 2;
00338       } else {
00339        ucs4 -= 0x10000;
00340         outBuf[len+0+H_0] = (unsigned char)(0xD8 | ((ucs4 >> 18) & 0x3));
00341         outBuf[len+0+H_1] = (unsigned char)(ucs4 >> 10);
00342         outBuf[len+2+H_0] = (unsigned char)(0xDC | ((ucs4 >> 8) & 0x3));
00343         outBuf[len+2+H_1] = (unsigned char)ucs4;
00344        len += 4;
00345       }
00346     }
00347 
00348     *outBufLen = len;
00349     return PR_TRUE;
00350   } else {
00351     unsigned int i, len = 0;
00352     PORT_Assert((inBufLen % 2) == 0);
00353     if ((inBufLen % 2) != 0) {
00354       *outBufLen = 0;
00355       return PR_FALSE;
00356     }
00357 
00358     for( i = 0; i < inBufLen; i += 2 ) {
00359       if( (inBuf[i+H_0] == 0x00) && ((inBuf[i+H_0] & 0x80) == 0x00) ) len += 1;
00360       else if( inBuf[i+H_0] < 0x08 ) len += 2;
00361       else if( ((inBuf[i+0+H_0] & 0xDC) == 0xD8) ) {
00362         if( ((inBuf[i+2+H_0] & 0xDC) == 0xDC) && ((inBufLen - i) > 2) ) {
00363           i += 2;
00364           len += 4;
00365         } else {
00366           return PR_FALSE;
00367         }
00368       }
00369       else len += 3;
00370     }
00371 
00372     if( len > maxOutBufLen ) {
00373       *outBufLen = len;
00374       return PR_FALSE;
00375     }
00376 
00377     len = 0;
00378 
00379     for( i = 0; i < inBufLen; i += 2 ) {
00380       if( (inBuf[i+H_0] == 0x00) && ((inBuf[i+H_1] & 0x80) == 0x00) ) {
00381         /* 0000-007F -> 0xxxxxx */
00382         /* 00000000 0abcdefg -> 0abcdefg */
00383 
00384         outBuf[len] = inBuf[i+H_1] & 0x7F;
00385 
00386         len += 1;
00387       } else if( inBuf[i+H_0] < 0x08 ) {
00388         /* 0080-07FF -> 110xxxxx 10xxxxxx */
00389         /* 00000abc defghijk -> 110abcde 10fghijk */
00390 
00391         outBuf[len+0] = 0xC0 | ((inBuf[i+H_0] & 0x07) << 2) 
00392                              | ((inBuf[i+H_1] & 0xC0) >> 6);
00393         outBuf[len+1] = 0x80 | ((inBuf[i+H_1] & 0x3F) >> 0);
00394 
00395         len += 2;
00396       } else if( (inBuf[i+H_0] & 0xDC) == 0xD8 ) {
00397         int abcde, BCDE;
00398 
00399         PORT_Assert(((inBuf[i+2+H_0] & 0xDC) == 0xDC) && ((inBufLen - i) > 2));
00400 
00401         /* D800-DBFF DC00-DFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
00402         /* 110110BC DEfghijk 110111lm nopqrstu ->
00403            { Let abcde = BCDE + 1 }
00404            11110abc 10defghi 10jklmno 10pqrstu */
00405 
00406         BCDE = ((inBuf[i+H_0] & 0x03) << 2) | ((inBuf[i+H_1] & 0xC0) >> 6);
00407         abcde = BCDE + 1;
00408 
00409         outBuf[len+0] = 0xF0 | ((abcde & 0x1C) >> 2);
00410         outBuf[len+1] = 0x80 | ((abcde & 0x03) << 4) 
00411                              | ((inBuf[i+0+H_1] & 0x3C) >> 2);
00412         outBuf[len+2] = 0x80 | ((inBuf[i+0+H_1] & 0x03) << 4)
00413                              | ((inBuf[i+2+H_0] & 0x03) << 2)
00414                              | ((inBuf[i+2+H_1] & 0xC0) >> 6);
00415         outBuf[len+3] = 0x80 | ((inBuf[i+2+H_1] & 0x3F) >> 0);
00416 
00417         i += 2;
00418         len += 4;
00419       } else {
00420         /* 0800-FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */
00421         /* abcdefgh ijklmnop -> 1110abcd 10efghij 10klmnop */
00422 
00423         outBuf[len+0] = 0xE0 | ((inBuf[i+H_0] & 0xF0) >> 4);
00424         outBuf[len+1] = 0x80 | ((inBuf[i+H_0] & 0x0F) << 2) 
00425                              | ((inBuf[i+H_1] & 0xC0) >> 6);
00426         outBuf[len+2] = 0x80 | ((inBuf[i+H_1] & 0x3F) >> 0);
00427 
00428         len += 3;
00429       }
00430     }
00431 
00432     *outBufLen = len;
00433     return PR_TRUE;
00434   }
00435 }
00436 
00437 PRBool
00438 sec_port_iso88591_utf8_conversion_function
00439 (
00440   const unsigned char *inBuf,
00441   unsigned int inBufLen,
00442   unsigned char *outBuf,
00443   unsigned int maxOutBufLen,
00444   unsigned int *outBufLen
00445 )
00446 {
00447   unsigned int i, len = 0;
00448 
00449   PORT_Assert((unsigned int *)NULL != outBufLen);
00450 
00451   for( i = 0; i < inBufLen; i++) {
00452     if( (inBuf[i] & 0x80) == 0x00 ) len += 1;
00453     else len += 2;
00454   }
00455 
00456   if( len > maxOutBufLen ) {
00457     *outBufLen = len;
00458     return PR_FALSE;
00459   }
00460 
00461   len = 0;
00462 
00463   for( i = 0; i < inBufLen; i++) {
00464     if( (inBuf[i] & 0x80) == 0x00 ) {
00465       /* 00-7F -> 0xxxxxxx */
00466       /* 0abcdefg -> 0abcdefg */
00467 
00468       outBuf[len] = inBuf[i];
00469       len += 1;
00470     } else {
00471       /* 80-FF <- 110xxxxx 10xxxxxx */
00472       /* 00000000 abcdefgh -> 110000ab 10cdefgh */
00473 
00474       outBuf[len+0] = 0xC0 | ((inBuf[i] & 0xC0) >> 6);
00475       outBuf[len+1] = 0x80 | ((inBuf[i] & 0x3F) >> 0);
00476 
00477       len += 2;
00478     }
00479   }
00480 
00481   *outBufLen = len;
00482   return PR_TRUE;
00483 }
00484 
00485 #ifdef TEST_UTF8
00486 
00487 #include <stdio.h>
00488 #include <string.h>
00489 #include <stdlib.h>
00490 #include <netinet/in.h> /* for htonl and htons */
00491 
00492 /*
00493  * UCS-4 vectors
00494  */
00495 
00496 struct ucs4 {
00497   PRUint32 c;
00498   char *utf8;
00499 };
00500 
00501 /*
00502  * UCS-2 vectors
00503  */
00504 
00505 struct ucs2 {
00506   PRUint16 c;
00507   char *utf8;
00508 };
00509 
00510 /*
00511  * UTF-16 vectors
00512  */
00513 
00514 struct utf16 {
00515   PRUint32 c;
00516   PRUint16 w[2];
00517 };
00518 
00519 
00520 /*
00521  * UCS-4 vectors
00522  */
00523 
00524 struct ucs4 ucs4[] = {
00525   { 0x00000001, "\x01" },
00526   { 0x00000002, "\x02" },
00527   { 0x00000003, "\x03" },
00528   { 0x00000004, "\x04" },
00529   { 0x00000007, "\x07" },
00530   { 0x00000008, "\x08" },
00531   { 0x0000000F, "\x0F" },
00532   { 0x00000010, "\x10" },
00533   { 0x0000001F, "\x1F" },
00534   { 0x00000020, "\x20" },
00535   { 0x0000003F, "\x3F" },
00536   { 0x00000040, "\x40" },
00537   { 0x0000007F, "\x7F" },
00538           
00539   { 0x00000080, "\xC2\x80" },
00540   { 0x00000081, "\xC2\x81" },
00541   { 0x00000082, "\xC2\x82" },
00542   { 0x00000084, "\xC2\x84" },
00543   { 0x00000088, "\xC2\x88" },
00544   { 0x00000090, "\xC2\x90" },
00545   { 0x000000A0, "\xC2\xA0" },
00546   { 0x000000C0, "\xC3\x80" },
00547   { 0x000000FF, "\xC3\xBF" },
00548   { 0x00000100, "\xC4\x80" },
00549   { 0x00000101, "\xC4\x81" },
00550   { 0x00000102, "\xC4\x82" },
00551   { 0x00000104, "\xC4\x84" },
00552   { 0x00000108, "\xC4\x88" },
00553   { 0x00000110, "\xC4\x90" },
00554   { 0x00000120, "\xC4\xA0" },
00555   { 0x00000140, "\xC5\x80" },
00556   { 0x00000180, "\xC6\x80" },
00557   { 0x000001FF, "\xC7\xBF" },
00558   { 0x00000200, "\xC8\x80" },
00559   { 0x00000201, "\xC8\x81" },
00560   { 0x00000202, "\xC8\x82" },
00561   { 0x00000204, "\xC8\x84" },
00562   { 0x00000208, "\xC8\x88" },
00563   { 0x00000210, "\xC8\x90" },
00564   { 0x00000220, "\xC8\xA0" },
00565   { 0x00000240, "\xC9\x80" },
00566   { 0x00000280, "\xCA\x80" },
00567   { 0x00000300, "\xCC\x80" },
00568   { 0x000003FF, "\xCF\xBF" },
00569   { 0x00000400, "\xD0\x80" },
00570   { 0x00000401, "\xD0\x81" },
00571   { 0x00000402, "\xD0\x82" },
00572   { 0x00000404, "\xD0\x84" },
00573   { 0x00000408, "\xD0\x88" },
00574   { 0x00000410, "\xD0\x90" },
00575   { 0x00000420, "\xD0\xA0" },
00576   { 0x00000440, "\xD1\x80" },
00577   { 0x00000480, "\xD2\x80" },
00578   { 0x00000500, "\xD4\x80" },
00579   { 0x00000600, "\xD8\x80" },
00580   { 0x000007FF, "\xDF\xBF" },
00581           
00582   { 0x00000800, "\xE0\xA0\x80" },
00583   { 0x00000801, "\xE0\xA0\x81" },
00584   { 0x00000802, "\xE0\xA0\x82" },
00585   { 0x00000804, "\xE0\xA0\x84" },
00586   { 0x00000808, "\xE0\xA0\x88" },
00587   { 0x00000810, "\xE0\xA0\x90" },
00588   { 0x00000820, "\xE0\xA0\xA0" },
00589   { 0x00000840, "\xE0\xA1\x80" },
00590   { 0x00000880, "\xE0\xA2\x80" },
00591   { 0x00000900, "\xE0\xA4\x80" },
00592   { 0x00000A00, "\xE0\xA8\x80" },
00593   { 0x00000C00, "\xE0\xB0\x80" },
00594   { 0x00000FFF, "\xE0\xBF\xBF" },
00595   { 0x00001000, "\xE1\x80\x80" },
00596   { 0x00001001, "\xE1\x80\x81" },
00597   { 0x00001002, "\xE1\x80\x82" },
00598   { 0x00001004, "\xE1\x80\x84" },
00599   { 0x00001008, "\xE1\x80\x88" },
00600   { 0x00001010, "\xE1\x80\x90" },
00601   { 0x00001020, "\xE1\x80\xA0" },
00602   { 0x00001040, "\xE1\x81\x80" },
00603   { 0x00001080, "\xE1\x82\x80" },
00604   { 0x00001100, "\xE1\x84\x80" },
00605   { 0x00001200, "\xE1\x88\x80" },
00606   { 0x00001400, "\xE1\x90\x80" },
00607   { 0x00001800, "\xE1\xA0\x80" },
00608   { 0x00001FFF, "\xE1\xBF\xBF" },
00609   { 0x00002000, "\xE2\x80\x80" },
00610   { 0x00002001, "\xE2\x80\x81" },
00611   { 0x00002002, "\xE2\x80\x82" },
00612   { 0x00002004, "\xE2\x80\x84" },
00613   { 0x00002008, "\xE2\x80\x88" },
00614   { 0x00002010, "\xE2\x80\x90" },
00615   { 0x00002020, "\xE2\x80\xA0" },
00616   { 0x00002040, "\xE2\x81\x80" },
00617   { 0x00002080, "\xE2\x82\x80" },
00618   { 0x00002100, "\xE2\x84\x80" },
00619   { 0x00002200, "\xE2\x88\x80" },
00620   { 0x00002400, "\xE2\x90\x80" },
00621   { 0x00002800, "\xE2\xA0\x80" },
00622   { 0x00003000, "\xE3\x80\x80" },
00623   { 0x00003FFF, "\xE3\xBF\xBF" },
00624   { 0x00004000, "\xE4\x80\x80" },
00625   { 0x00004001, "\xE4\x80\x81" },
00626   { 0x00004002, "\xE4\x80\x82" },
00627   { 0x00004004, "\xE4\x80\x84" },
00628   { 0x00004008, "\xE4\x80\x88" },
00629   { 0x00004010, "\xE4\x80\x90" },
00630   { 0x00004020, "\xE4\x80\xA0" },
00631   { 0x00004040, "\xE4\x81\x80" },
00632   { 0x00004080, "\xE4\x82\x80" },
00633   { 0x00004100, "\xE4\x84\x80" },
00634   { 0x00004200, "\xE4\x88\x80" },
00635   { 0x00004400, "\xE4\x90\x80" },
00636   { 0x00004800, "\xE4\xA0\x80" },
00637   { 0x00005000, "\xE5\x80\x80" },
00638   { 0x00006000, "\xE6\x80\x80" },
00639   { 0x00007FFF, "\xE7\xBF\xBF" },
00640   { 0x00008000, "\xE8\x80\x80" },
00641   { 0x00008001, "\xE8\x80\x81" },
00642   { 0x00008002, "\xE8\x80\x82" },
00643   { 0x00008004, "\xE8\x80\x84" },
00644   { 0x00008008, "\xE8\x80\x88" },
00645   { 0x00008010, "\xE8\x80\x90" },
00646   { 0x00008020, "\xE8\x80\xA0" },
00647   { 0x00008040, "\xE8\x81\x80" },
00648   { 0x00008080, "\xE8\x82\x80" },
00649   { 0x00008100, "\xE8\x84\x80" },
00650   { 0x00008200, "\xE8\x88\x80" },
00651   { 0x00008400, "\xE8\x90\x80" },
00652   { 0x00008800, "\xE8\xA0\x80" },
00653   { 0x00009000, "\xE9\x80\x80" },
00654   { 0x0000A000, "\xEA\x80\x80" },
00655   { 0x0000C000, "\xEC\x80\x80" },
00656   { 0x0000FFFF, "\xEF\xBF\xBF" },
00657           
00658   { 0x00010000, "\xF0\x90\x80\x80" },
00659   { 0x00010001, "\xF0\x90\x80\x81" },
00660   { 0x00010002, "\xF0\x90\x80\x82" },
00661   { 0x00010004, "\xF0\x90\x80\x84" },
00662   { 0x00010008, "\xF0\x90\x80\x88" },
00663   { 0x00010010, "\xF0\x90\x80\x90" },
00664   { 0x00010020, "\xF0\x90\x80\xA0" },
00665   { 0x00010040, "\xF0\x90\x81\x80" },
00666   { 0x00010080, "\xF0\x90\x82\x80" },
00667   { 0x00010100, "\xF0\x90\x84\x80" },
00668   { 0x00010200, "\xF0\x90\x88\x80" },
00669   { 0x00010400, "\xF0\x90\x90\x80" },
00670   { 0x00010800, "\xF0\x90\xA0\x80" },
00671   { 0x00011000, "\xF0\x91\x80\x80" },
00672   { 0x00012000, "\xF0\x92\x80\x80" },
00673   { 0x00014000, "\xF0\x94\x80\x80" },
00674   { 0x00018000, "\xF0\x98\x80\x80" },
00675   { 0x0001FFFF, "\xF0\x9F\xBF\xBF" },
00676   { 0x00020000, "\xF0\xA0\x80\x80" },
00677   { 0x00020001, "\xF0\xA0\x80\x81" },
00678   { 0x00020002, "\xF0\xA0\x80\x82" },
00679   { 0x00020004, "\xF0\xA0\x80\x84" },
00680   { 0x00020008, "\xF0\xA0\x80\x88" },
00681   { 0x00020010, "\xF0\xA0\x80\x90" },
00682   { 0x00020020, "\xF0\xA0\x80\xA0" },
00683   { 0x00020040, "\xF0\xA0\x81\x80" },
00684   { 0x00020080, "\xF0\xA0\x82\x80" },
00685   { 0x00020100, "\xF0\xA0\x84\x80" },
00686   { 0x00020200, "\xF0\xA0\x88\x80" },
00687   { 0x00020400, "\xF0\xA0\x90\x80" },
00688   { 0x00020800, "\xF0\xA0\xA0\x80" },
00689   { 0x00021000, "\xF0\xA1\x80\x80" },
00690   { 0x00022000, "\xF0\xA2\x80\x80" },
00691   { 0x00024000, "\xF0\xA4\x80\x80" },
00692   { 0x00028000, "\xF0\xA8\x80\x80" },
00693   { 0x00030000, "\xF0\xB0\x80\x80" },
00694   { 0x0003FFFF, "\xF0\xBF\xBF\xBF" },
00695   { 0x00040000, "\xF1\x80\x80\x80" },
00696   { 0x00040001, "\xF1\x80\x80\x81" },
00697   { 0x00040002, "\xF1\x80\x80\x82" },
00698   { 0x00040004, "\xF1\x80\x80\x84" },
00699   { 0x00040008, "\xF1\x80\x80\x88" },
00700   { 0x00040010, "\xF1\x80\x80\x90" },
00701   { 0x00040020, "\xF1\x80\x80\xA0" },
00702   { 0x00040040, "\xF1\x80\x81\x80" },
00703   { 0x00040080, "\xF1\x80\x82\x80" },
00704   { 0x00040100, "\xF1\x80\x84\x80" },
00705   { 0x00040200, "\xF1\x80\x88\x80" },
00706   { 0x00040400, "\xF1\x80\x90\x80" },
00707   { 0x00040800, "\xF1\x80\xA0\x80" },
00708   { 0x00041000, "\xF1\x81\x80\x80" },
00709   { 0x00042000, "\xF1\x82\x80\x80" },
00710   { 0x00044000, "\xF1\x84\x80\x80" },
00711   { 0x00048000, "\xF1\x88\x80\x80" },
00712   { 0x00050000, "\xF1\x90\x80\x80" },
00713   { 0x00060000, "\xF1\xA0\x80\x80" },
00714   { 0x0007FFFF, "\xF1\xBF\xBF\xBF" },
00715   { 0x00080000, "\xF2\x80\x80\x80" },
00716   { 0x00080001, "\xF2\x80\x80\x81" },
00717   { 0x00080002, "\xF2\x80\x80\x82" },
00718   { 0x00080004, "\xF2\x80\x80\x84" },
00719   { 0x00080008, "\xF2\x80\x80\x88" },
00720   { 0x00080010, "\xF2\x80\x80\x90" },
00721   { 0x00080020, "\xF2\x80\x80\xA0" },
00722   { 0x00080040, "\xF2\x80\x81\x80" },
00723   { 0x00080080, "\xF2\x80\x82\x80" },
00724   { 0x00080100, "\xF2\x80\x84\x80" },
00725   { 0x00080200, "\xF2\x80\x88\x80" },
00726   { 0x00080400, "\xF2\x80\x90\x80" },
00727   { 0x00080800, "\xF2\x80\xA0\x80" },
00728   { 0x00081000, "\xF2\x81\x80\x80" },
00729   { 0x00082000, "\xF2\x82\x80\x80" },
00730   { 0x00084000, "\xF2\x84\x80\x80" },
00731   { 0x00088000, "\xF2\x88\x80\x80" },
00732   { 0x00090000, "\xF2\x90\x80\x80" },
00733   { 0x000A0000, "\xF2\xA0\x80\x80" },
00734   { 0x000C0000, "\xF3\x80\x80\x80" },
00735   { 0x000FFFFF, "\xF3\xBF\xBF\xBF" },
00736   { 0x00100000, "\xF4\x80\x80\x80" },
00737   { 0x00100001, "\xF4\x80\x80\x81" },
00738   { 0x00100002, "\xF4\x80\x80\x82" },
00739   { 0x00100004, "\xF4\x80\x80\x84" },
00740   { 0x00100008, "\xF4\x80\x80\x88" },
00741   { 0x00100010, "\xF4\x80\x80\x90" },
00742   { 0x00100020, "\xF4\x80\x80\xA0" },
00743   { 0x00100040, "\xF4\x80\x81\x80" },
00744   { 0x00100080, "\xF4\x80\x82\x80" },
00745   { 0x00100100, "\xF4\x80\x84\x80" },
00746   { 0x00100200, "\xF4\x80\x88\x80" },
00747   { 0x00100400, "\xF4\x80\x90\x80" },
00748   { 0x00100800, "\xF4\x80\xA0\x80" },
00749   { 0x00101000, "\xF4\x81\x80\x80" },
00750   { 0x00102000, "\xF4\x82\x80\x80" },
00751   { 0x00104000, "\xF4\x84\x80\x80" },
00752   { 0x00108000, "\xF4\x88\x80\x80" },
00753   { 0x0010FFFF, "\xF4\x8F\xBF\xBF" },
00754 };
00755 
00756 /*
00757  * UCS-2 vectors
00758  */
00759 
00760 struct ucs2 ucs2[] = {
00761   { 0x0001, "\x01" },
00762   { 0x0002, "\x02" },
00763   { 0x0003, "\x03" },
00764   { 0x0004, "\x04" },
00765   { 0x0007, "\x07" },
00766   { 0x0008, "\x08" },
00767   { 0x000F, "\x0F" },
00768   { 0x0010, "\x10" },
00769   { 0x001F, "\x1F" },
00770   { 0x0020, "\x20" },
00771   { 0x003F, "\x3F" },
00772   { 0x0040, "\x40" },
00773   { 0x007F, "\x7F" },
00774           
00775   { 0x0080, "\xC2\x80" },
00776   { 0x0081, "\xC2\x81" },
00777   { 0x0082, "\xC2\x82" },
00778   { 0x0084, "\xC2\x84" },
00779   { 0x0088, "\xC2\x88" },
00780   { 0x0090, "\xC2\x90" },
00781   { 0x00A0, "\xC2\xA0" },
00782   { 0x00C0, "\xC3\x80" },
00783   { 0x00FF, "\xC3\xBF" },
00784   { 0x0100, "\xC4\x80" },
00785   { 0x0101, "\xC4\x81" },
00786   { 0x0102, "\xC4\x82" },
00787   { 0x0104, "\xC4\x84" },
00788   { 0x0108, "\xC4\x88" },
00789   { 0x0110, "\xC4\x90" },
00790   { 0x0120, "\xC4\xA0" },
00791   { 0x0140, "\xC5\x80" },
00792   { 0x0180, "\xC6\x80" },
00793   { 0x01FF, "\xC7\xBF" },
00794   { 0x0200, "\xC8\x80" },
00795   { 0x0201, "\xC8\x81" },
00796   { 0x0202, "\xC8\x82" },
00797   { 0x0204, "\xC8\x84" },
00798   { 0x0208, "\xC8\x88" },
00799   { 0x0210, "\xC8\x90" },
00800   { 0x0220, "\xC8\xA0" },
00801   { 0x0240, "\xC9\x80" },
00802   { 0x0280, "\xCA\x80" },
00803   { 0x0300, "\xCC\x80" },
00804   { 0x03FF, "\xCF\xBF" },
00805   { 0x0400, "\xD0\x80" },
00806   { 0x0401, "\xD0\x81" },
00807   { 0x0402, "\xD0\x82" },
00808   { 0x0404, "\xD0\x84" },
00809   { 0x0408, "\xD0\x88" },
00810   { 0x0410, "\xD0\x90" },
00811   { 0x0420, "\xD0\xA0" },
00812   { 0x0440, "\xD1\x80" },
00813   { 0x0480, "\xD2\x80" },
00814   { 0x0500, "\xD4\x80" },
00815   { 0x0600, "\xD8\x80" },
00816   { 0x07FF, "\xDF\xBF" },
00817           
00818   { 0x0800, "\xE0\xA0\x80" },
00819   { 0x0801, "\xE0\xA0\x81" },
00820   { 0x0802, "\xE0\xA0\x82" },
00821   { 0x0804, "\xE0\xA0\x84" },
00822   { 0x0808, "\xE0\xA0\x88" },
00823   { 0x0810, "\xE0\xA0\x90" },
00824   { 0x0820, "\xE0\xA0\xA0" },
00825   { 0x0840, "\xE0\xA1\x80" },
00826   { 0x0880, "\xE0\xA2\x80" },
00827   { 0x0900, "\xE0\xA4\x80" },
00828   { 0x0A00, "\xE0\xA8\x80" },
00829   { 0x0C00, "\xE0\xB0\x80" },
00830   { 0x0FFF, "\xE0\xBF\xBF" },
00831   { 0x1000, "\xE1\x80\x80" },
00832   { 0x1001, "\xE1\x80\x81" },
00833   { 0x1002, "\xE1\x80\x82" },
00834   { 0x1004, "\xE1\x80\x84" },
00835   { 0x1008, "\xE1\x80\x88" },
00836   { 0x1010, "\xE1\x80\x90" },
00837   { 0x1020, "\xE1\x80\xA0" },
00838   { 0x1040, "\xE1\x81\x80" },
00839   { 0x1080, "\xE1\x82\x80" },
00840   { 0x1100, "\xE1\x84\x80" },
00841   { 0x1200, "\xE1\x88\x80" },
00842   { 0x1400, "\xE1\x90\x80" },
00843   { 0x1800, "\xE1\xA0\x80" },
00844   { 0x1FFF, "\xE1\xBF\xBF" },
00845   { 0x2000, "\xE2\x80\x80" },
00846   { 0x2001, "\xE2\x80\x81" },
00847   { 0x2002, "\xE2\x80\x82" },
00848   { 0x2004, "\xE2\x80\x84" },
00849   { 0x2008, "\xE2\x80\x88" },
00850   { 0x2010, "\xE2\x80\x90" },
00851   { 0x2020, "\xE2\x80\xA0" },
00852   { 0x2040, "\xE2\x81\x80" },
00853   { 0x2080, "\xE2\x82\x80" },
00854   { 0x2100, "\xE2\x84\x80" },
00855   { 0x2200, "\xE2\x88\x80" },
00856   { 0x2400, "\xE2\x90\x80" },
00857   { 0x2800, "\xE2\xA0\x80" },
00858   { 0x3000, "\xE3\x80\x80" },
00859   { 0x3FFF, "\xE3\xBF\xBF" },
00860   { 0x4000, "\xE4\x80\x80" },
00861   { 0x4001, "\xE4\x80\x81" },
00862   { 0x4002, "\xE4\x80\x82" },
00863   { 0x4004, "\xE4\x80\x84" },
00864   { 0x4008, "\xE4\x80\x88" },
00865   { 0x4010, "\xE4\x80\x90" },
00866   { 0x4020, "\xE4\x80\xA0" },
00867   { 0x4040, "\xE4\x81\x80" },
00868   { 0x4080, "\xE4\x82\x80" },
00869   { 0x4100, "\xE4\x84\x80" },
00870   { 0x4200, "\xE4\x88\x80" },
00871   { 0x4400, "\xE4\x90\x80" },
00872   { 0x4800, "\xE4\xA0\x80" },
00873   { 0x5000, "\xE5\x80\x80" },
00874   { 0x6000, "\xE6\x80\x80" },
00875   { 0x7FFF, "\xE7\xBF\xBF" },
00876   { 0x8000, "\xE8\x80\x80" },
00877   { 0x8001, "\xE8\x80\x81" },
00878   { 0x8002, "\xE8\x80\x82" },
00879   { 0x8004, "\xE8\x80\x84" },
00880   { 0x8008, "\xE8\x80\x88" },
00881   { 0x8010, "\xE8\x80\x90" },
00882   { 0x8020, "\xE8\x80\xA0" },
00883   { 0x8040, "\xE8\x81\x80" },
00884   { 0x8080, "\xE8\x82\x80" },
00885   { 0x8100, "\xE8\x84\x80" },
00886   { 0x8200, "\xE8\x88\x80" },
00887   { 0x8400, "\xE8\x90\x80" },
00888   { 0x8800, "\xE8\xA0\x80" },
00889   { 0x9000, "\xE9\x80\x80" },
00890   { 0xA000, "\xEA\x80\x80" },
00891   { 0xC000, "\xEC\x80\x80" },
00892   { 0xFFFF, "\xEF\xBF\xBF" }
00893 
00894 };
00895 
00896 /*
00897  * UTF-16 vectors
00898  */
00899 
00900 struct utf16 utf16[] = {
00901   { 0x00010000, { 0xD800, 0xDC00 } },
00902   { 0x00010001, { 0xD800, 0xDC01 } },
00903   { 0x00010002, { 0xD800, 0xDC02 } },
00904   { 0x00010003, { 0xD800, 0xDC03 } },
00905   { 0x00010004, { 0xD800, 0xDC04 } },
00906   { 0x00010007, { 0xD800, 0xDC07 } },
00907   { 0x00010008, { 0xD800, 0xDC08 } },
00908   { 0x0001000F, { 0xD800, 0xDC0F } },
00909   { 0x00010010, { 0xD800, 0xDC10 } },
00910   { 0x0001001F, { 0xD800, 0xDC1F } },
00911   { 0x00010020, { 0xD800, 0xDC20 } },
00912   { 0x0001003F, { 0xD800, 0xDC3F } },
00913   { 0x00010040, { 0xD800, 0xDC40 } },
00914   { 0x0001007F, { 0xD800, 0xDC7F } },
00915   { 0x00010080, { 0xD800, 0xDC80 } },
00916   { 0x00010081, { 0xD800, 0xDC81 } },
00917   { 0x00010082, { 0xD800, 0xDC82 } },
00918   { 0x00010084, { 0xD800, 0xDC84 } },
00919   { 0x00010088, { 0xD800, 0xDC88 } },
00920   { 0x00010090, { 0xD800, 0xDC90 } },
00921   { 0x000100A0, { 0xD800, 0xDCA0 } },
00922   { 0x000100C0, { 0xD800, 0xDCC0 } },
00923   { 0x000100FF, { 0xD800, 0xDCFF } },
00924   { 0x00010100, { 0xD800, 0xDD00 } },
00925   { 0x00010101, { 0xD800, 0xDD01 } },
00926   { 0x00010102, { 0xD800, 0xDD02 } },
00927   { 0x00010104, { 0xD800, 0xDD04 } },
00928   { 0x00010108, { 0xD800, 0xDD08 } },
00929   { 0x00010110, { 0xD800, 0xDD10 } },
00930   { 0x00010120, { 0xD800, 0xDD20 } },
00931   { 0x00010140, { 0xD800, 0xDD40 } },
00932   { 0x00010180, { 0xD800, 0xDD80 } },
00933   { 0x000101FF, { 0xD800, 0xDDFF } },
00934   { 0x00010200, { 0xD800, 0xDE00 } },
00935   { 0x00010201, { 0xD800, 0xDE01 } },
00936   { 0x00010202, { 0xD800, 0xDE02 } },
00937   { 0x00010204, { 0xD800, 0xDE04 } },
00938   { 0x00010208, { 0xD800, 0xDE08 } },
00939   { 0x00010210, { 0xD800, 0xDE10 } },
00940   { 0x00010220, { 0xD800, 0xDE20 } },
00941   { 0x00010240, { 0xD800, 0xDE40 } },
00942   { 0x00010280, { 0xD800, 0xDE80 } },
00943   { 0x00010300, { 0xD800, 0xDF00 } },
00944   { 0x000103FF, { 0xD800, 0xDFFF } },
00945   { 0x00010400, { 0xD801, 0xDC00 } },
00946   { 0x00010401, { 0xD801, 0xDC01 } },
00947   { 0x00010402, { 0xD801, 0xDC02 } },
00948   { 0x00010404, { 0xD801, 0xDC04 } },
00949   { 0x00010408, { 0xD801, 0xDC08 } },
00950   { 0x00010410, { 0xD801, 0xDC10 } },
00951   { 0x00010420, { 0xD801, 0xDC20 } },
00952   { 0x00010440, { 0xD801, 0xDC40 } },
00953   { 0x00010480, { 0xD801, 0xDC80 } },
00954   { 0x00010500, { 0xD801, 0xDD00 } },
00955   { 0x00010600, { 0xD801, 0xDE00 } },
00956   { 0x000107FF, { 0xD801, 0xDFFF } },
00957   { 0x00010800, { 0xD802, 0xDC00 } },
00958   { 0x00010801, { 0xD802, 0xDC01 } },
00959   { 0x00010802, { 0xD802, 0xDC02 } },
00960   { 0x00010804, { 0xD802, 0xDC04 } },
00961   { 0x00010808, { 0xD802, 0xDC08 } },
00962   { 0x00010810, { 0xD802, 0xDC10 } },
00963   { 0x00010820, { 0xD802, 0xDC20 } },
00964   { 0x00010840, { 0xD802, 0xDC40 } },
00965   { 0x00010880, { 0xD802, 0xDC80 } },
00966   { 0x00010900, { 0xD802, 0xDD00 } },
00967   { 0x00010A00, { 0xD802, 0xDE00 } },
00968   { 0x00010C00, { 0xD803, 0xDC00 } },
00969   { 0x00010FFF, { 0xD803, 0xDFFF } },
00970   { 0x00011000, { 0xD804, 0xDC00 } },
00971   { 0x00011001, { 0xD804, 0xDC01 } },
00972   { 0x00011002, { 0xD804, 0xDC02 } },
00973   { 0x00011004, { 0xD804, 0xDC04 } },
00974   { 0x00011008, { 0xD804, 0xDC08 } },
00975   { 0x00011010, { 0xD804, 0xDC10 } },
00976   { 0x00011020, { 0xD804, 0xDC20 } },
00977   { 0x00011040, { 0xD804, 0xDC40 } },
00978   { 0x00011080, { 0xD804, 0xDC80 } },
00979   { 0x00011100, { 0xD804, 0xDD00 } },
00980   { 0x00011200, { 0xD804, 0xDE00 } },
00981   { 0x00011400, { 0xD805, 0xDC00 } },
00982   { 0x00011800, { 0xD806, 0xDC00 } },
00983   { 0x00011FFF, { 0xD807, 0xDFFF } },
00984   { 0x00012000, { 0xD808, 0xDC00 } },
00985   { 0x00012001, { 0xD808, 0xDC01 } },
00986   { 0x00012002, { 0xD808, 0xDC02 } },
00987   { 0x00012004, { 0xD808, 0xDC04 } },
00988   { 0x00012008, { 0xD808, 0xDC08 } },
00989   { 0x00012010, { 0xD808, 0xDC10 } },
00990   { 0x00012020, { 0xD808, 0xDC20 } },
00991   { 0x00012040, { 0xD808, 0xDC40 } },
00992   { 0x00012080, { 0xD808, 0xDC80 } },
00993   { 0x00012100, { 0xD808, 0xDD00 } },
00994   { 0x00012200, { 0xD808, 0xDE00 } },
00995   { 0x00012400, { 0xD809, 0xDC00 } },
00996   { 0x00012800, { 0xD80A, 0xDC00 } },
00997   { 0x00013000, { 0xD80C, 0xDC00 } },
00998   { 0x00013FFF, { 0xD80F, 0xDFFF } },
00999   { 0x00014000, { 0xD810, 0xDC00 } },
01000   { 0x00014001, { 0xD810, 0xDC01 } },
01001   { 0x00014002, { 0xD810, 0xDC02 } },
01002   { 0x00014004, { 0xD810, 0xDC04 } },
01003   { 0x00014008, { 0xD810, 0xDC08 } },
01004   { 0x00014010, { 0xD810, 0xDC10 } },
01005   { 0x00014020, { 0xD810, 0xDC20 } },
01006   { 0x00014040, { 0xD810, 0xDC40 } },
01007   { 0x00014080, { 0xD810, 0xDC80 } },
01008   { 0x00014100, { 0xD810, 0xDD00 } },
01009   { 0x00014200, { 0xD810, 0xDE00 } },
01010   { 0x00014400, { 0xD811, 0xDC00 } },
01011   { 0x00014800, { 0xD812, 0xDC00 } },
01012   { 0x00015000, { 0xD814, 0xDC00 } },
01013   { 0x00016000, { 0xD818, 0xDC00 } },
01014   { 0x00017FFF, { 0xD81F, 0xDFFF } },
01015   { 0x00018000, { 0xD820, 0xDC00 } },
01016   { 0x00018001, { 0xD820, 0xDC01 } },
01017   { 0x00018002, { 0xD820, 0xDC02 } },
01018   { 0x00018004, { 0xD820, 0xDC04 } },
01019   { 0x00018008, { 0xD820, 0xDC08 } },
01020   { 0x00018010, { 0xD820, 0xDC10 } },
01021   { 0x00018020, { 0xD820, 0xDC20 } },
01022   { 0x00018040, { 0xD820, 0xDC40 } },
01023   { 0x00018080, { 0xD820, 0xDC80 } },
01024   { 0x00018100, { 0xD820, 0xDD00 } },
01025   { 0x00018200, { 0xD820, 0xDE00 } },
01026   { 0x00018400, { 0xD821, 0xDC00 } },
01027   { 0x00018800, { 0xD822, 0xDC00 } },
01028   { 0x00019000, { 0xD824, 0xDC00 } },
01029   { 0x0001A000, { 0xD828, 0xDC00 } },
01030   { 0x0001C000, { 0xD830, 0xDC00 } },
01031   { 0x0001FFFF, { 0xD83F, 0xDFFF } },
01032   { 0x00020000, { 0xD840, 0xDC00 } },
01033   { 0x00020001, { 0xD840, 0xDC01 } },
01034   { 0x00020002, { 0xD840, 0xDC02 } },
01035   { 0x00020004, { 0xD840, 0xDC04 } },
01036   { 0x00020008, { 0xD840, 0xDC08 } },
01037   { 0x00020010, { 0xD840, 0xDC10 } },
01038   { 0x00020020, { 0xD840, 0xDC20 } },
01039   { 0x00020040, { 0xD840, 0xDC40 } },
01040   { 0x00020080, { 0xD840, 0xDC80 } },
01041   { 0x00020100, { 0xD840, 0xDD00 } },
01042   { 0x00020200, { 0xD840, 0xDE00 } },
01043   { 0x00020400, { 0xD841, 0xDC00 } },
01044   { 0x00020800, { 0xD842, 0xDC00 } },
01045   { 0x00021000, { 0xD844, 0xDC00 } },
01046   { 0x00022000, { 0xD848, 0xDC00 } },
01047   { 0x00024000, { 0xD850, 0xDC00 } },
01048   { 0x00028000, { 0xD860, 0xDC00 } },
01049   { 0x0002FFFF, { 0xD87F, 0xDFFF } },
01050   { 0x00030000, { 0xD880, 0xDC00 } },
01051   { 0x00030001, { 0xD880, 0xDC01 } },
01052   { 0x00030002, { 0xD880, 0xDC02 } },
01053   { 0x00030004, { 0xD880, 0xDC04 } },
01054   { 0x00030008, { 0xD880, 0xDC08 } },
01055   { 0x00030010, { 0xD880, 0xDC10 } },
01056   { 0x00030020, { 0xD880, 0xDC20 } },
01057   { 0x00030040, { 0xD880, 0xDC40 } },
01058   { 0x00030080, { 0xD880, 0xDC80 } },
01059   { 0x00030100, { 0xD880, 0xDD00 } },
01060   { 0x00030200, { 0xD880, 0xDE00 } },
01061   { 0x00030400, { 0xD881, 0xDC00 } },
01062   { 0x00030800, { 0xD882, 0xDC00 } },
01063   { 0x00031000, { 0xD884, 0xDC00 } },
01064   { 0x00032000, { 0xD888, 0xDC00 } },
01065   { 0x00034000, { 0xD890, 0xDC00 } },
01066   { 0x00038000, { 0xD8A0, 0xDC00 } },
01067   { 0x0003FFFF, { 0xD8BF, 0xDFFF } },
01068   { 0x00040000, { 0xD8C0, 0xDC00 } },
01069   { 0x00040001, { 0xD8C0, 0xDC01 } },
01070   { 0x00040002, { 0xD8C0, 0xDC02 } },
01071   { 0x00040004, { 0xD8C0, 0xDC04 } },
01072   { 0x00040008, { 0xD8C0, 0xDC08 } },
01073   { 0x00040010, { 0xD8C0, 0xDC10 } },
01074   { 0x00040020, { 0xD8C0, 0xDC20 } },
01075   { 0x00040040, { 0xD8C0, 0xDC40 } },
01076   { 0x00040080, { 0xD8C0, 0xDC80 } },
01077   { 0x00040100, { 0xD8C0, 0xDD00 } },
01078   { 0x00040200, { 0xD8C0, 0xDE00 } },
01079   { 0x00040400, { 0xD8C1, 0xDC00 } },
01080   { 0x00040800, { 0xD8C2, 0xDC00 } },
01081   { 0x00041000, { 0xD8C4, 0xDC00 } },
01082   { 0x00042000, { 0xD8C8, 0xDC00 } },
01083   { 0x00044000, { 0xD8D0, 0xDC00 } },
01084   { 0x00048000, { 0xD8E0, 0xDC00 } },
01085   { 0x0004FFFF, { 0xD8FF, 0xDFFF } },
01086   { 0x00050000, { 0xD900, 0xDC00 } },
01087   { 0x00050001, { 0xD900, 0xDC01 } },
01088   { 0x00050002, { 0xD900, 0xDC02 } },
01089   { 0x00050004, { 0xD900, 0xDC04 } },
01090   { 0x00050008, { 0xD900, 0xDC08 } },
01091   { 0x00050010, { 0xD900, 0xDC10 } },
01092   { 0x00050020, { 0xD900, 0xDC20 } },
01093   { 0x00050040, { 0xD900, 0xDC40 } },
01094   { 0x00050080, { 0xD900, 0xDC80 } },
01095   { 0x00050100, { 0xD900, 0xDD00 } },
01096   { 0x00050200, { 0xD900, 0xDE00 } },
01097   { 0x00050400, { 0xD901, 0xDC00 } },
01098   { 0x00050800, { 0xD902, 0xDC00 } },
01099   { 0x00051000, { 0xD904, 0xDC00 } },
01100   { 0x00052000, { 0xD908, 0xDC00 } },
01101   { 0x00054000, { 0xD910, 0xDC00 } },
01102   { 0x00058000, { 0xD920, 0xDC00 } },
01103   { 0x00060000, { 0xD940, 0xDC00 } },
01104   { 0x00070000, { 0xD980, 0xDC00 } },
01105   { 0x0007FFFF, { 0xD9BF, 0xDFFF } },
01106   { 0x00080000, { 0xD9C0, 0xDC00 } },
01107   { 0x00080001, { 0xD9C0, 0xDC01 } },
01108   { 0x00080002, { 0xD9C0, 0xDC02 } },
01109   { 0x00080004, { 0xD9C0, 0xDC04 } },
01110   { 0x00080008, { 0xD9C0, 0xDC08 } },
01111   { 0x00080010, { 0xD9C0, 0xDC10 } },
01112   { 0x00080020, { 0xD9C0, 0xDC20 } },
01113   { 0x00080040, { 0xD9C0, 0xDC40 } },
01114   { 0x00080080, { 0xD9C0, 0xDC80 } },
01115   { 0x00080100, { 0xD9C0, 0xDD00 } },
01116   { 0x00080200, { 0xD9C0, 0xDE00 } },
01117   { 0x00080400, { 0xD9C1, 0xDC00 } },
01118   { 0x00080800, { 0xD9C2, 0xDC00 } },
01119   { 0x00081000, { 0xD9C4, 0xDC00 } },
01120   { 0x00082000, { 0xD9C8, 0xDC00 } },
01121   { 0x00084000, { 0xD9D0, 0xDC00 } },
01122   { 0x00088000, { 0xD9E0, 0xDC00 } },
01123   { 0x0008FFFF, { 0xD9FF, 0xDFFF } },
01124   { 0x00090000, { 0xDA00, 0xDC00 } },
01125   { 0x00090001, { 0xDA00, 0xDC01 } },
01126   { 0x00090002, { 0xDA00, 0xDC02 } },
01127   { 0x00090004, { 0xDA00, 0xDC04 } },
01128   { 0x00090008, { 0xDA00, 0xDC08 } },
01129   { 0x00090010, { 0xDA00, 0xDC10 } },
01130   { 0x00090020, { 0xDA00, 0xDC20 } },
01131   { 0x00090040, { 0xDA00, 0xDC40 } },
01132   { 0x00090080, { 0xDA00, 0xDC80 } },
01133   { 0x00090100, { 0xDA00, 0xDD00 } },
01134   { 0x00090200, { 0xDA00, 0xDE00 } },
01135   { 0x00090400, { 0xDA01, 0xDC00 } },
01136   { 0x00090800, { 0xDA02, 0xDC00 } },
01137   { 0x00091000, { 0xDA04, 0xDC00 } },
01138   { 0x00092000, { 0xDA08, 0xDC00 } },
01139   { 0x00094000, { 0xDA10, 0xDC00 } },
01140   { 0x00098000, { 0xDA20, 0xDC00 } },
01141   { 0x000A0000, { 0xDA40, 0xDC00 } },
01142   { 0x000B0000, { 0xDA80, 0xDC00 } },
01143   { 0x000C0000, { 0xDAC0, 0xDC00 } },
01144   { 0x000D0000, { 0xDB00, 0xDC00 } },
01145   { 0x000FFFFF, { 0xDBBF, 0xDFFF } },
01146   { 0x0010FFFF, { 0xDBFF, 0xDFFF } }
01147 
01148 };
01149 
01150 /* illegal utf8 sequences */
01151 char *utf8_bad[] = {
01152   "\xC0\x80",
01153   "\xC1\xBF",
01154   "\xE0\x80\x80",
01155   "\xE0\x9F\xBF",
01156   "\xF0\x80\x80\x80",
01157   "\xF0\x8F\xBF\xBF",
01158   "\xF4\x90\x80\x80",
01159   "\xF7\xBF\xBF\xBF",
01160   "\xF8\x80\x80\x80\x80",
01161   "\xF8\x88\x80\x80\x80",
01162   "\xF8\x92\x80\x80\x80",
01163   "\xF8\x9F\xBF\xBF\xBF",
01164   "\xF8\xA0\x80\x80\x80",
01165   "\xF8\xA8\x80\x80\x80",
01166   "\xF8\xB0\x80\x80\x80",
01167   "\xF8\xBF\xBF\xBF\xBF",
01168   "\xF9\x80\x80\x80\x88",
01169   "\xF9\x84\x80\x80\x80",
01170   "\xF9\xBF\xBF\xBF\xBF",
01171   "\xFA\x80\x80\x80\x80",
01172   "\xFA\x90\x80\x80\x80",
01173   "\xFB\xBF\xBF\xBF\xBF",
01174   "\xFC\x84\x80\x80\x80\x81",
01175   "\xFC\x85\x80\x80\x80\x80",
01176   "\xFC\x86\x80\x80\x80\x80",
01177   "\xFC\x87\xBF\xBF\xBF\xBF",
01178   "\xFC\x88\xA0\x80\x80\x80",
01179   "\xFC\x89\x80\x80\x80\x80",
01180   "\xFC\x8A\x80\x80\x80\x80",
01181   "\xFC\x90\x80\x80\x80\x82",
01182   "\xFD\x80\x80\x80\x80\x80",
01183   "\xFD\xBF\xBF\xBF\xBF\xBF",
01184   "\x80",
01185   "\xC3",
01186   "\xC3\xC3\x80",
01187   "\xED\xA0\x80",
01188   "\xED\xBF\x80",
01189   "\xED\xBF\xBF",
01190   "\xED\xA0\x80\xE0\xBF\xBF",
01191 };
01192 
01193 static void
01194 dump_utf8
01195 (
01196   char *word,
01197   unsigned char *utf8,
01198   char *end
01199 )
01200 {
01201   fprintf(stdout, "%s ", word);
01202   for( ; *utf8; utf8++ ) {
01203     fprintf(stdout, "%02.2x ", (unsigned int)*utf8);
01204   }
01205   fprintf(stdout, "%s", end);
01206 }
01207 
01208 static PRBool
01209 test_ucs4_chars
01210 (
01211   void
01212 )
01213 {
01214   PRBool rv = PR_TRUE;
01215   int i;
01216 
01217   for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) {
01218     struct ucs4 *e = &ucs4[i];
01219     PRBool result;
01220     unsigned char utf8[8];
01221     unsigned int len = 0;
01222     PRUint32 back = 0;
01223 
01224     (void)memset(utf8, 0, sizeof(utf8));
01225     
01226     result = sec_port_ucs4_utf8_conversion_function(PR_FALSE, 
01227       (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len);
01228 
01229     if( !result ) {
01230       fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8\n", e->c);
01231       rv = PR_FALSE;
01232       continue;
01233     }
01234 
01235     if( (len >= sizeof(utf8)) ||
01236         (strlen(e->utf8) != len) ||
01237         (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) {
01238       fprintf(stdout, "Wrong conversion of UCS-4 0x%08.8x to UTF-8: ", e->c);
01239       dump_utf8("expected", e->utf8, ", ");
01240       dump_utf8("received", utf8, "\n");
01241       rv = PR_FALSE;
01242       continue;
01243     }
01244 
01245     result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
01246       utf8, len, (unsigned char *)&back, sizeof(back), &len);
01247 
01248     if( !result ) {
01249       dump_utf8("Failed to convert UTF-8", utf8, "to UCS-4\n");
01250       rv = PR_FALSE;
01251       continue;
01252     }
01253 
01254     if( (sizeof(back) != len) || (e->c != back) ) {
01255       dump_utf8("Wrong conversion of UTF-8", utf8, " to UCS-4:");
01256       fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back);
01257       rv = PR_FALSE;
01258       continue;
01259     }
01260   }
01261 
01262   return rv;
01263 }
01264 
01265 static PRBool
01266 test_ucs2_chars
01267 (
01268   void
01269 )
01270 {
01271   PRBool rv = PR_TRUE;
01272   int i;
01273 
01274   for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
01275     struct ucs2 *e = &ucs2[i];
01276     PRBool result;
01277     unsigned char utf8[8];
01278     unsigned int len = 0;
01279     PRUint16 back = 0;
01280 
01281     (void)memset(utf8, 0, sizeof(utf8));
01282     
01283     result = sec_port_ucs2_utf8_conversion_function(PR_FALSE,
01284       (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len);
01285 
01286     if( !result ) {
01287       fprintf(stdout, "Failed to convert UCS-2 0x%04.4x to UTF-8\n", e->c);
01288       rv = PR_FALSE;
01289       continue;
01290     }
01291 
01292     if( (len >= sizeof(utf8)) ||
01293         (strlen(e->utf8) != len) ||
01294         (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) {
01295       fprintf(stdout, "Wrong conversion of UCS-2 0x%04.4x to UTF-8: ", e->c);
01296       dump_utf8("expected", e->utf8, ", ");
01297       dump_utf8("received", utf8, "\n");
01298       rv = PR_FALSE;
01299       continue;
01300     }
01301 
01302     result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
01303       utf8, len, (unsigned char *)&back, sizeof(back), &len);
01304 
01305     if( !result ) {
01306       dump_utf8("Failed to convert UTF-8", utf8, "to UCS-2\n");
01307       rv = PR_FALSE;
01308       continue;
01309     }
01310 
01311     if( (sizeof(back) != len) || (e->c != back) ) {
01312       dump_utf8("Wrong conversion of UTF-8", utf8, "to UCS-2:");
01313       fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back);
01314       rv = PR_FALSE;
01315       continue;
01316     }
01317   }
01318 
01319   return rv;
01320 }
01321 
01322 static PRBool
01323 test_utf16_chars
01324 (
01325   void
01326 )
01327 {
01328   PRBool rv = PR_TRUE;
01329   int i;
01330 
01331   for( i = 0; i < sizeof(utf16)/sizeof(utf16[0]); i++ ) {
01332     struct utf16 *e = &utf16[i];
01333     PRBool result;
01334     unsigned char utf8[8];
01335     unsigned int len = 0;
01336     PRUint32 back32 = 0;
01337     PRUint16 back[2];
01338 
01339     (void)memset(utf8, 0, sizeof(utf8));
01340     
01341     result = sec_port_ucs2_utf8_conversion_function(PR_FALSE, 
01342       (unsigned char *)&e->w[0], sizeof(e->w), utf8, sizeof(utf8), &len);
01343 
01344     if( !result ) {
01345       fprintf(stdout, "Failed to convert UTF-16 0x%04.4x 0x%04.4x to UTF-8\n", 
01346               e->w[0], e->w[1]);
01347       rv = PR_FALSE;
01348       continue;
01349     }
01350 
01351     result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
01352       utf8, len, (unsigned char *)&back32, sizeof(back32), &len);
01353 
01354     if( 4 != len ) {
01355       fprintf(stdout, "Failed to convert UTF-16 0x%04.4x 0x%04.4x to UTF-8: "
01356               "unexpected len %d\n", e->w[0], e->w[1], len);
01357       rv = PR_FALSE;
01358       continue;
01359     }
01360 
01361     utf8[len] = '\0'; /* null-terminate for printing */
01362 
01363     if( !result ) {
01364       dump_utf8("Failed to convert UTF-8", utf8, "to UCS-4 (utf-16 test)\n");
01365       rv = PR_FALSE;
01366       continue;
01367     }
01368 
01369     if( (sizeof(back32) != len) || (e->c != back32) ) {
01370       fprintf(stdout, "Wrong conversion of UTF-16 0x%04.4x 0x%04.4x ", 
01371               e->w[0], e->w[1]);
01372       dump_utf8("to UTF-8", utf8, "and then to UCS-4: ");
01373       if( sizeof(back32) != len ) {
01374         fprintf(stdout, "len is %d\n", len);
01375       } else {
01376         fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back32);
01377       }
01378       rv = PR_FALSE;
01379       continue;
01380     }
01381 
01382     (void)memset(utf8, 0, sizeof(utf8));
01383     back[0] = back[1] = 0;
01384 
01385     result = sec_port_ucs4_utf8_conversion_function(PR_FALSE,
01386       (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len);
01387 
01388     if( !result ) {
01389       fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8 (utf-16 test)\n",
01390               e->c);
01391       rv = PR_FALSE;
01392       continue;
01393     }
01394 
01395     result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
01396       utf8, len, (unsigned char *)&back[0], sizeof(back), &len);
01397 
01398     if( 4 != len ) {
01399       fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8: "
01400               "unexpected len %d\n", e->c, len);
01401       rv = PR_FALSE;
01402       continue;
01403     }
01404 
01405     utf8[len] = '\0'; /* null-terminate for printing */
01406 
01407     if( !result ) {
01408       dump_utf8("Failed to convert UTF-8", utf8, "to UTF-16\n");
01409       rv = PR_FALSE;
01410       continue;
01411     }
01412 
01413     if( (sizeof(back) != len) || (e->w[0] != back[0]) || (e->w[1] != back[1]) ) {
01414       fprintf(stdout, "Wrong conversion of UCS-4 0x%08.8x to UTF-8", e->c);
01415       dump_utf8("", utf8, "and then to UTF-16:");
01416       if( sizeof(back) != len ) {
01417         fprintf(stdout, "len is %d\n", len);
01418       } else {
01419         fprintf(stdout, "expected 0x%04.4x 0x%04.4x, received 0x%04.4x 0x%04.4xx\n",
01420                 e->w[0], e->w[1], back[0], back[1]);
01421       }
01422       rv = PR_FALSE;
01423       continue;
01424     }
01425   }
01426 
01427   return rv;
01428 }
01429 
01430 static PRBool
01431 test_utf8_bad_chars
01432 (
01433   void
01434 )
01435 {
01436   PRBool rv = PR_TRUE;
01437   int i;
01438 
01439   for( i = 0; i < sizeof(utf8_bad)/sizeof(utf8_bad[0]); i++ ) {
01440     PRBool result;
01441     unsigned char destbuf[30];
01442     unsigned int len = 0;
01443 
01444     result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
01445       (unsigned char *)utf8_bad[i], strlen(utf8_bad[i]), destbuf, sizeof(destbuf), &len);
01446 
01447     if( result ) {
01448       dump_utf8("Failed to detect bad UTF-8 string converting to UCS2: ", utf8_bad[i], "\n");
01449       rv = PR_FALSE;
01450       continue;
01451     }
01452     result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
01453       (unsigned char *)utf8_bad[i], strlen(utf8_bad[i]), destbuf, sizeof(destbuf), &len);
01454 
01455     if( result ) {
01456       dump_utf8("Failed to detect bad UTF-8 string converting to UCS4: ", utf8_bad[i], "\n");
01457       rv = PR_FALSE;
01458       continue;
01459     }
01460 
01461   }
01462 
01463   return rv;
01464 }
01465 
01466 static PRBool
01467 test_iso88591_chars
01468 (
01469   void
01470 )
01471 {
01472   PRBool rv = PR_TRUE;
01473   int i;
01474 
01475   for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
01476     struct ucs2 *e = &ucs2[i];
01477     PRBool result;
01478     unsigned char iso88591;
01479     unsigned char utf8[3];
01480     unsigned int len = 0;
01481 
01482     if (ntohs(e->c) > 0xFF) continue;
01483 
01484     (void)memset(utf8, 0, sizeof(utf8));
01485     iso88591 = ntohs(e->c);
01486     
01487     result = sec_port_iso88591_utf8_conversion_function(&iso88591,
01488       1, utf8, sizeof(utf8), &len);
01489 
01490     if( !result ) {
01491       fprintf(stdout, "Failed to convert ISO-8859-1 0x%02.2x to UTF-8\n", iso88591);
01492       rv = PR_FALSE;
01493       continue;
01494     }
01495 
01496     if( (len >= sizeof(utf8)) ||
01497         (strlen(e->utf8) != len) ||
01498         (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) {
01499       fprintf(stdout, "Wrong conversion of ISO-8859-1 0x%02.2x to UTF-8: ", iso88591);
01500       dump_utf8("expected", e->utf8, ", ");
01501       dump_utf8("received", utf8, "\n");
01502       rv = PR_FALSE;
01503       continue;
01504     }
01505 
01506   }
01507 
01508   return rv;
01509 }
01510 
01511 static PRBool
01512 test_zeroes
01513 (
01514   void
01515 )
01516 {
01517   PRBool rv = PR_TRUE;
01518   PRBool result;
01519   PRUint32 lzero = 0;
01520   PRUint16 szero = 0;
01521   unsigned char utf8[8];
01522   unsigned int len = 0;
01523   PRUint32 lback = 1;
01524   PRUint16 sback = 1;
01525 
01526   (void)memset(utf8, 1, sizeof(utf8));
01527 
01528   result = sec_port_ucs4_utf8_conversion_function(PR_FALSE, 
01529     (unsigned char *)&lzero, sizeof(lzero), utf8, sizeof(utf8), &len);
01530 
01531   if( !result ) {
01532     fprintf(stdout, "Failed to convert UCS-4 0x00000000 to UTF-8\n");
01533     rv = PR_FALSE;
01534   } else if( 1 != len ) {
01535     fprintf(stdout, "Wrong conversion of UCS-4 0x00000000: len = %d\n", len);
01536     rv = PR_FALSE;
01537   } else if( '\0' != *utf8 ) {
01538     fprintf(stdout, "Wrong conversion of UCS-4 0x00000000: expected 00 ,"
01539             "received %02.2x\n", (unsigned int)*utf8);
01540     rv = PR_FALSE;
01541   }
01542 
01543   result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
01544     "", 1, (unsigned char *)&lback, sizeof(lback), &len);
01545 
01546   if( !result ) {
01547     fprintf(stdout, "Failed to convert UTF-8 00 to UCS-4\n");
01548     rv = PR_FALSE;
01549   } else if( 4 != len ) {
01550     fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-4: len = %d\n", len);
01551     rv = PR_FALSE;
01552   } else if( 0 != lback ) {
01553     fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-4: "
01554             "expected 0x00000000, received 0x%08.8x\n", lback);
01555     rv = PR_FALSE;
01556   }
01557 
01558   (void)memset(utf8, 1, sizeof(utf8));
01559 
01560   result = sec_port_ucs2_utf8_conversion_function(PR_FALSE, 
01561     (unsigned char *)&szero, sizeof(szero), utf8, sizeof(utf8), &len);
01562 
01563   if( !result ) {
01564     fprintf(stdout, "Failed to convert UCS-2 0x0000 to UTF-8\n");
01565     rv = PR_FALSE;
01566   } else if( 1 != len ) {
01567     fprintf(stdout, "Wrong conversion of UCS-2 0x0000: len = %d\n", len);
01568     rv = PR_FALSE;
01569   } else if( '\0' != *utf8 ) {
01570     fprintf(stdout, "Wrong conversion of UCS-2 0x0000: expected 00 ,"
01571             "received %02.2x\n", (unsigned int)*utf8);
01572     rv = PR_FALSE;
01573   }
01574 
01575   result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
01576     "", 1, (unsigned char *)&sback, sizeof(sback), &len);
01577 
01578   if( !result ) {
01579     fprintf(stdout, "Failed to convert UTF-8 00 to UCS-2\n");
01580     rv = PR_FALSE;
01581   } else if( 2 != len ) {
01582     fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-2: len = %d\n", len);
01583     rv = PR_FALSE;
01584   } else if( 0 != sback ) {
01585     fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-2: "
01586             "expected 0x0000, received 0x%04.4x\n", sback);
01587     rv = PR_FALSE;
01588   }
01589 
01590   return rv;
01591 }
01592 
01593 static PRBool
01594 test_multichars
01595 (
01596   void
01597 )
01598 {
01599   int i;
01600   unsigned int len, lenout;
01601   PRUint32 *ucs4s;
01602   char *ucs4_utf8;
01603   PRUint16 *ucs2s;
01604   char *ucs2_utf8;
01605   void *tmp;
01606   PRBool result;
01607 
01608   ucs4s = (PRUint32 *)calloc(sizeof(ucs4)/sizeof(ucs4[0]), sizeof(PRUint32));
01609   ucs2s = (PRUint16 *)calloc(sizeof(ucs2)/sizeof(ucs2[0]), sizeof(PRUint16));
01610 
01611   if( ((PRUint32 *)NULL == ucs4s) || ((PRUint16 *)NULL == ucs2s) ) {
01612     fprintf(stderr, "out of memory\n");
01613     exit(1);
01614   }
01615 
01616   len = 0;
01617   for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) {
01618     ucs4s[i] = ucs4[i].c;
01619     len += strlen(ucs4[i].utf8);
01620   }
01621 
01622   ucs4_utf8 = (char *)malloc(len);
01623 
01624   len = 0;
01625   for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
01626     ucs2s[i] = ucs2[i].c;
01627     len += strlen(ucs2[i].utf8);
01628   }
01629 
01630   ucs2_utf8 = (char *)malloc(len);
01631 
01632   if( ((char *)NULL == ucs4_utf8) || ((char *)NULL == ucs2_utf8) ) {
01633     fprintf(stderr, "out of memory\n");
01634     exit(1);
01635   }
01636 
01637   *ucs4_utf8 = '\0';
01638   for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) {
01639     strcat(ucs4_utf8, ucs4[i].utf8);
01640   }
01641 
01642   *ucs2_utf8 = '\0';
01643   for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
01644     strcat(ucs2_utf8, ucs2[i].utf8);
01645   }
01646 
01647   /* UTF-8 -> UCS-4 */
01648   len = sizeof(ucs4)/sizeof(ucs4[0]) * sizeof(PRUint32);
01649   tmp = calloc(len, 1);
01650   if( (void *)NULL == tmp ) {
01651     fprintf(stderr, "out of memory\n");
01652     exit(1);
01653   }
01654 
01655   result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
01656     ucs4_utf8, strlen(ucs4_utf8), tmp, len, &lenout);
01657   if( !result ) {
01658     fprintf(stdout, "Failed to convert much UTF-8 to UCS-4\n");
01659     goto done;
01660   }
01661 
01662   if( lenout != len ) {
01663     fprintf(stdout, "Unexpected length converting much UTF-8 to UCS-4\n");
01664     goto loser;
01665   }
01666 
01667   if( 0 != memcmp(ucs4s, tmp, len) ) {
01668     fprintf(stdout, "Wrong conversion of much UTF-8 to UCS-4\n");
01669     goto loser;
01670   }
01671 
01672   free(tmp); tmp = (void *)NULL;
01673 
01674   /* UCS-4 -> UTF-8 */
01675   len = strlen(ucs4_utf8);
01676   tmp = calloc(len, 1);
01677   if( (void *)NULL == tmp ) {
01678     fprintf(stderr, "out of memory\n");
01679     exit(1);
01680   }
01681 
01682   result = sec_port_ucs4_utf8_conversion_function(PR_FALSE,
01683     (unsigned char *)ucs4s, sizeof(ucs4)/sizeof(ucs4[0]) * sizeof(PRUint32), 
01684     tmp, len, &lenout);
01685   if( !result ) {
01686     fprintf(stdout, "Failed to convert much UCS-4 to UTF-8\n");
01687     goto done;
01688   }
01689 
01690   if( lenout != len ) {
01691     fprintf(stdout, "Unexpected length converting much UCS-4 to UTF-8\n");
01692     goto loser;
01693   }
01694 
01695   if( 0 != strncmp(ucs4_utf8, tmp, len) ) {
01696     fprintf(stdout, "Wrong conversion of much UCS-4 to UTF-8\n");
01697     goto loser;
01698   }
01699 
01700   free(tmp); tmp = (void *)NULL;
01701 
01702   /* UTF-8 -> UCS-2 */
01703   len = sizeof(ucs2)/sizeof(ucs2[0]) * sizeof(PRUint16);
01704   tmp = calloc(len, 1);
01705   if( (void *)NULL == tmp ) {
01706     fprintf(stderr, "out of memory\n");
01707     exit(1);
01708   }
01709 
01710   result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
01711     ucs2_utf8, strlen(ucs2_utf8), tmp, len, &lenout);
01712   if( !result ) {
01713     fprintf(stdout, "Failed to convert much UTF-8 to UCS-2\n");
01714     goto done;
01715   }
01716 
01717   if( lenout != len ) {
01718     fprintf(stdout, "Unexpected length converting much UTF-8 to UCS-2\n");
01719     goto loser;
01720   }
01721 
01722   if( 0 != memcmp(ucs2s, tmp, len) ) {
01723     fprintf(stdout, "Wrong conversion of much UTF-8 to UCS-2\n");
01724     goto loser;
01725   }
01726 
01727   free(tmp); tmp = (void *)NULL;
01728 
01729   /* UCS-2 -> UTF-8 */
01730   len = strlen(ucs2_utf8);
01731   tmp = calloc(len, 1);
01732   if( (void *)NULL == tmp ) {
01733     fprintf(stderr, "out of memory\n");
01734     exit(1);
01735   }
01736 
01737   result = sec_port_ucs2_utf8_conversion_function(PR_FALSE,
01738     (unsigned char *)ucs2s, sizeof(ucs2)/sizeof(ucs2[0]) * sizeof(PRUint16), 
01739     tmp, len, &lenout);
01740   if( !result ) {
01741     fprintf(stdout, "Failed to convert much UCS-2 to UTF-8\n");
01742     goto done;
01743   }
01744 
01745   if( lenout != len ) {
01746     fprintf(stdout, "Unexpected length converting much UCS-2 to UTF-8\n");
01747     goto loser;
01748   }
01749 
01750   if( 0 != strncmp(ucs2_utf8, tmp, len) ) {
01751     fprintf(stdout, "Wrong conversion of much UCS-2 to UTF-8\n");
01752     goto loser;
01753   }
01754 
01755   /* implement UTF16 */
01756 
01757   result = PR_TRUE;
01758   goto done;
01759 
01760  loser:
01761   result = PR_FALSE;
01762  done:
01763   free(ucs4s);
01764   free(ucs4_utf8);
01765   free(ucs2s);
01766   free(ucs2_utf8);
01767   if( (void *)NULL != tmp ) free(tmp);
01768   return result;
01769 }
01770 
01771 void
01772 byte_order
01773 (
01774   void
01775 )
01776 {
01777   /*
01778    * The implementation (now) expects the 16- and 32-bit characters
01779    * to be in network byte order, not host byte order.  Therefore I
01780    * have to byteswap all those test vectors above.  hton[ls] may be
01781    * functions, so I have to do this dynamically.  If you want to 
01782    * use this code to do host byte order conversions, just remove
01783    * the call in main() to this function.
01784    */
01785 
01786   int i;
01787 
01788   for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) {
01789     struct ucs4 *e = &ucs4[i];
01790     e->c = htonl(e->c);
01791   }
01792 
01793   for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
01794     struct ucs2 *e = &ucs2[i];
01795     e->c = htons(e->c);
01796   }
01797 
01798   for( i = 0; i < sizeof(utf16)/sizeof(utf16[0]); i++ ) {
01799     struct utf16 *e = &utf16[i];
01800     e->c = htonl(e->c);
01801     e->w[0] = htons(e->w[0]);
01802     e->w[1] = htons(e->w[1]);
01803   }
01804 
01805   return;
01806 }
01807 
01808 int
01809 main
01810 (
01811   int argc,
01812   char *argv[]
01813 )
01814 {
01815   byte_order();
01816 
01817   if( test_ucs4_chars() &&
01818       test_ucs2_chars() &&
01819       test_utf16_chars() &&
01820       test_utf8_bad_chars() &&
01821       test_iso88591_chars() &&
01822       test_zeroes() &&
01823       test_multichars() &&
01824       PR_TRUE ) {
01825     fprintf(stderr, "PASS\n");
01826     return 1;
01827   } else {
01828     fprintf(stderr, "FAIL\n");
01829     return 0;
01830   }
01831 }
01832 
01833 #endif /* TEST_UTF8 */