Back to index

lightning-sunbird  0.9+nobinonly
utf.c
Go to the documentation of this file.
00001 /*
00002 ** 2004 April 13
00003 **
00004 ** The author disclaims copyright to this source code.  In place of
00005 ** a legal notice, here is a blessing:
00006 **
00007 **    May you do good and not evil.
00008 **    May you find forgiveness for yourself and forgive others.
00009 **    May you share freely, never taking more than you give.
00010 **
00011 *************************************************************************
00012 ** This file contains routines used to translate between UTF-8, 
00013 ** UTF-16, UTF-16BE, and UTF-16LE.
00014 **
00015 ** $Id: utf.c,v 1.39 2006/04/16 12:05:03 drh Exp $
00016 **
00017 ** Notes on UTF-8:
00018 **
00019 **   Byte-0    Byte-1    Byte-2    Byte-3    Value
00020 **  0xxxxxxx                                 00000000 00000000 0xxxxxxx
00021 **  110yyyyy  10xxxxxx                       00000000 00000yyy yyxxxxxx
00022 **  1110zzzz  10yyyyyy  10xxxxxx             00000000 zzzzyyyy yyxxxxxx
00023 **  11110uuu  10uuzzzz  10yyyyyy  10xxxxxx   000uuuuu zzzzyyyy yyxxxxxx
00024 **
00025 **
00026 ** Notes on UTF-16:  (with wwww+1==uuuuu)
00027 **
00028 **      Word-0               Word-1          Value
00029 **  110110ww wwzzzzyy   110111yy yyxxxxxx    000uuuuu zzzzyyyy yyxxxxxx
00030 **  zzzzyyyy yyxxxxxx                        00000000 zzzzyyyy yyxxxxxx
00031 **
00032 **
00033 ** BOM or Byte Order Mark:
00034 **     0xff 0xfe   little-endian utf-16 follows
00035 **     0xfe 0xff   big-endian utf-16 follows
00036 **
00037 **
00038 ** Handling of malformed strings:
00039 **
00040 ** SQLite accepts and processes malformed strings without an error wherever
00041 ** possible. However this is not possible when converting between UTF-8 and
00042 ** UTF-16.
00043 **
00044 ** When converting malformed UTF-8 strings to UTF-16, one instance of the
00045 ** replacement character U+FFFD for each byte that cannot be interpeted as
00046 ** part of a valid unicode character.
00047 **
00048 ** When converting malformed UTF-16 strings to UTF-8, one instance of the
00049 ** replacement character U+FFFD for each pair of bytes that cannot be
00050 ** interpeted as part of a valid unicode character.
00051 **
00052 ** This file contains the following public routines:
00053 **
00054 ** sqlite3VdbeMemTranslate() - Translate the encoding used by a Mem* string.
00055 ** sqlite3VdbeMemHandleBom() - Handle byte-order-marks in UTF16 Mem* strings.
00056 ** sqlite3utf16ByteLen()     - Calculate byte-length of a void* UTF16 string.
00057 ** sqlite3utf8CharLen()      - Calculate char-length of a char* UTF8 string.
00058 ** sqlite3utf8LikeCompare()  - Do a LIKE match given two UTF8 char* strings.
00059 **
00060 */
00061 #include "sqliteInt.h"
00062 #include <assert.h>
00063 #include "vdbeInt.h"
00064 
00065 /*
00066 ** This table maps from the first byte of a UTF-8 character to the number
00067 ** of trailing bytes expected. A value '255' indicates that the table key
00068 ** is not a legal first byte for a UTF-8 character.
00069 */
00070 static const u8 xtra_utf8_bytes[256]  = {
00071 /* 0xxxxxxx */
00072 0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
00073 0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
00074 0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
00075 0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
00076 0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
00077 0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
00078 0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
00079 0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
00080 
00081 /* 10wwwwww */
00082 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
00083 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
00084 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
00085 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
00086 
00087 /* 110yyyyy */
00088 1, 1, 1, 1, 1, 1, 1, 1,     1, 1, 1, 1, 1, 1, 1, 1,
00089 1, 1, 1, 1, 1, 1, 1, 1,     1, 1, 1, 1, 1, 1, 1, 1,
00090 
00091 /* 1110zzzz */
00092 2, 2, 2, 2, 2, 2, 2, 2,     2, 2, 2, 2, 2, 2, 2, 2,
00093 
00094 /* 11110yyy */
00095 3, 3, 3, 3, 3, 3, 3, 3,     255, 255, 255, 255, 255, 255, 255, 255,
00096 };
00097 
00098 /*
00099 ** This table maps from the number of trailing bytes in a UTF-8 character
00100 ** to an integer constant that is effectively calculated for each character
00101 ** read by a naive implementation of a UTF-8 character reader. The code
00102 ** in the READ_UTF8 macro explains things best.
00103 */
00104 static const int xtra_utf8_bits[4] =  {
00105 0,
00106 12416,          /* (0xC0 << 6) + (0x80) */
00107 925824,         /* (0xE0 << 12) + (0x80 << 6) + (0x80) */
00108 63447168        /* (0xF0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
00109 };
00110 
00111 #define READ_UTF8(zIn, c) { \
00112   int xtra;                                            \
00113   c = *(zIn)++;                                        \
00114   xtra = xtra_utf8_bytes[c];                           \
00115   switch( xtra ){                                      \
00116     case 255: c = (int)0xFFFD; break;                  \
00117     case 3: c = (c<<6) + *(zIn)++;                     \
00118     case 2: c = (c<<6) + *(zIn)++;                     \
00119     case 1: c = (c<<6) + *(zIn)++;                     \
00120     c -= xtra_utf8_bits[xtra];                         \
00121   }                                                    \
00122 }
00123 int sqlite3ReadUtf8(const unsigned char *z){
00124   int c;
00125   READ_UTF8(z, c);
00126   return c;
00127 }
00128 
00129 #define SKIP_UTF8(zIn) {                               \
00130   zIn += (xtra_utf8_bytes[*(u8 *)zIn] + 1);            \
00131 }
00132 
00133 #define WRITE_UTF8(zOut, c) {                          \
00134   if( c<0x00080 ){                                     \
00135     *zOut++ = (c&0xFF);                                \
00136   }                                                    \
00137   else if( c<0x00800 ){                                \
00138     *zOut++ = 0xC0 + ((c>>6)&0x1F);                    \
00139     *zOut++ = 0x80 + (c & 0x3F);                       \
00140   }                                                    \
00141   else if( c<0x10000 ){                                \
00142     *zOut++ = 0xE0 + ((c>>12)&0x0F);                   \
00143     *zOut++ = 0x80 + ((c>>6) & 0x3F);                  \
00144     *zOut++ = 0x80 + (c & 0x3F);                       \
00145   }else{                                               \
00146     *zOut++ = 0xF0 + ((c>>18) & 0x07);                 \
00147     *zOut++ = 0x80 + ((c>>12) & 0x3F);                 \
00148     *zOut++ = 0x80 + ((c>>6) & 0x3F);                  \
00149     *zOut++ = 0x80 + (c & 0x3F);                       \
00150   }                                                    \
00151 }
00152 
00153 #define WRITE_UTF16LE(zOut, c) {                                \
00154   if( c<=0xFFFF ){                                              \
00155     *zOut++ = (c&0x00FF);                                       \
00156     *zOut++ = ((c>>8)&0x00FF);                                  \
00157   }else{                                                        \
00158     *zOut++ = (((c>>10)&0x003F) + (((c-0x10000)>>10)&0x00C0));  \
00159     *zOut++ = (0x00D8 + (((c-0x10000)>>18)&0x03));              \
00160     *zOut++ = (c&0x00FF);                                       \
00161     *zOut++ = (0x00DC + ((c>>8)&0x03));                         \
00162   }                                                             \
00163 }
00164 
00165 #define WRITE_UTF16BE(zOut, c) {                                \
00166   if( c<=0xFFFF ){                                              \
00167     *zOut++ = ((c>>8)&0x00FF);                                  \
00168     *zOut++ = (c&0x00FF);                                       \
00169   }else{                                                        \
00170     *zOut++ = (0x00D8 + (((c-0x10000)>>18)&0x03));              \
00171     *zOut++ = (((c>>10)&0x003F) + (((c-0x10000)>>10)&0x00C0));  \
00172     *zOut++ = (0x00DC + ((c>>8)&0x03));                         \
00173     *zOut++ = (c&0x00FF);                                       \
00174   }                                                             \
00175 }
00176 
00177 #define READ_UTF16LE(zIn, c){                                         \
00178   c = (*zIn++);                                                       \
00179   c += ((*zIn++)<<8);                                                 \
00180   if( c>=0xD800 && c<=0xE000 ){                                       \
00181     int c2 = (*zIn++);                                                \
00182     c2 += ((*zIn++)<<8);                                              \
00183     c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10);   \
00184   }                                                                   \
00185 }
00186 
00187 #define READ_UTF16BE(zIn, c){                                         \
00188   c = ((*zIn++)<<8);                                                  \
00189   c += (*zIn++);                                                      \
00190   if( c>=0xD800 && c<=0xE000 ){                                       \
00191     int c2 = ((*zIn++)<<8);                                           \
00192     c2 += (*zIn++);                                                   \
00193     c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10);   \
00194   }                                                                   \
00195 }
00196 
00197 #define SKIP_UTF16BE(zIn){                                            \
00198   if( *zIn>=0xD8 && (*zIn<0xE0 || (*zIn==0xE0 && *(zIn+1)==0x00)) ){  \
00199     zIn += 4;                                                         \
00200   }else{                                                              \
00201     zIn += 2;                                                         \
00202   }                                                                   \
00203 }
00204 #define SKIP_UTF16LE(zIn){                                            \
00205   zIn++;                                                              \
00206   if( *zIn>=0xD8 && (*zIn<0xE0 || (*zIn==0xE0 && *(zIn-1)==0x00)) ){  \
00207     zIn += 3;                                                         \
00208   }else{                                                              \
00209     zIn += 1;                                                         \
00210   }                                                                   \
00211 }
00212 
00213 #define RSKIP_UTF16LE(zIn){                                            \
00214   if( *zIn>=0xD8 && (*zIn<0xE0 || (*zIn==0xE0 && *(zIn-1)==0x00)) ){  \
00215     zIn -= 4;                                                         \
00216   }else{                                                              \
00217     zIn -= 2;                                                         \
00218   }                                                                   \
00219 }
00220 #define RSKIP_UTF16BE(zIn){                                            \
00221   zIn--;                                                              \
00222   if( *zIn>=0xD8 && (*zIn<0xE0 || (*zIn==0xE0 && *(zIn+1)==0x00)) ){  \
00223     zIn -= 3;                                                         \
00224   }else{                                                              \
00225     zIn -= 1;                                                         \
00226   }                                                                   \
00227 }
00228 
00229 /*
00230 ** If the TRANSLATE_TRACE macro is defined, the value of each Mem is
00231 ** printed on stderr on the way into and out of sqlite3VdbeMemTranslate().
00232 */ 
00233 /* #define TRANSLATE_TRACE 1 */
00234 
00235 #ifndef SQLITE_OMIT_UTF16
00236 /*
00237 ** This routine transforms the internal text encoding used by pMem to
00238 ** desiredEnc. It is an error if the string is already of the desired
00239 ** encoding, or if *pMem does not contain a string value.
00240 */
00241 int sqlite3VdbeMemTranslate(Mem *pMem, u8 desiredEnc){
00242   unsigned char zShort[NBFS]; /* Temporary short output buffer */
00243   int len;                    /* Maximum length of output string in bytes */
00244   unsigned char *zOut;                  /* Output buffer */
00245   unsigned char *zIn;                   /* Input iterator */
00246   unsigned char *zTerm;                 /* End of input */
00247   unsigned char *z;                     /* Output iterator */
00248   int c;
00249 
00250   assert( pMem->flags&MEM_Str );
00251   assert( pMem->enc!=desiredEnc );
00252   assert( pMem->enc!=0 );
00253   assert( pMem->n>=0 );
00254 
00255 #if defined(TRANSLATE_TRACE) && defined(SQLITE_DEBUG)
00256   {
00257     char zBuf[100];
00258     sqlite3VdbeMemPrettyPrint(pMem, zBuf);
00259     fprintf(stderr, "INPUT:  %s\n", zBuf);
00260   }
00261 #endif
00262 
00263   /* If the translation is between UTF-16 little and big endian, then 
00264   ** all that is required is to swap the byte order. This case is handled
00265   ** differently from the others.
00266   */
00267   if( pMem->enc!=SQLITE_UTF8 && desiredEnc!=SQLITE_UTF8 ){
00268     u8 temp;
00269     int rc;
00270     rc = sqlite3VdbeMemMakeWriteable(pMem);
00271     if( rc!=SQLITE_OK ){
00272       assert( rc==SQLITE_NOMEM );
00273       return SQLITE_NOMEM;
00274     }
00275     zIn = (u8*)pMem->z;
00276     zTerm = &zIn[pMem->n];
00277     while( zIn<zTerm ){
00278       temp = *zIn;
00279       *zIn = *(zIn+1);
00280       zIn++;
00281       *zIn++ = temp;
00282     }
00283     pMem->enc = desiredEnc;
00284     goto translate_out;
00285   }
00286 
00287   /* Set len to the maximum number of bytes required in the output buffer. */
00288   if( desiredEnc==SQLITE_UTF8 ){
00289     /* When converting from UTF-16, the maximum growth results from
00290     ** translating a 2-byte character to a 4-byte UTF-8 character.
00291     ** A single byte is required for the output string
00292     ** nul-terminator.
00293     */
00294     len = pMem->n * 2 + 1;
00295   }else{
00296     /* When converting from UTF-8 to UTF-16 the maximum growth is caused
00297     ** when a 1-byte UTF-8 character is translated into a 2-byte UTF-16
00298     ** character. Two bytes are required in the output buffer for the
00299     ** nul-terminator.
00300     */
00301     len = pMem->n * 2 + 2;
00302   }
00303 
00304   /* Set zIn to point at the start of the input buffer and zTerm to point 1
00305   ** byte past the end.
00306   **
00307   ** Variable zOut is set to point at the output buffer. This may be space
00308   ** obtained from malloc(), or Mem.zShort, if it large enough and not in
00309   ** use, or the zShort array on the stack (see above).
00310   */
00311   zIn = (u8*)pMem->z;
00312   zTerm = &zIn[pMem->n];
00313   if( len>NBFS ){
00314     zOut = sqliteMallocRaw(len);
00315     if( !zOut ) return SQLITE_NOMEM;
00316   }else{
00317     zOut = zShort;
00318   }
00319   z = zOut;
00320 
00321   if( pMem->enc==SQLITE_UTF8 ){
00322     if( desiredEnc==SQLITE_UTF16LE ){
00323       /* UTF-8 -> UTF-16 Little-endian */
00324       while( zIn<zTerm ){
00325         READ_UTF8(zIn, c); 
00326         WRITE_UTF16LE(z, c);
00327       }
00328     }else{
00329       assert( desiredEnc==SQLITE_UTF16BE );
00330       /* UTF-8 -> UTF-16 Big-endian */
00331       while( zIn<zTerm ){
00332         READ_UTF8(zIn, c); 
00333         WRITE_UTF16BE(z, c);
00334       }
00335     }
00336     pMem->n = z - zOut;
00337     *z++ = 0;
00338   }else{
00339     assert( desiredEnc==SQLITE_UTF8 );
00340     if( pMem->enc==SQLITE_UTF16LE ){
00341       /* UTF-16 Little-endian -> UTF-8 */
00342       while( zIn<zTerm ){
00343         READ_UTF16LE(zIn, c); 
00344         WRITE_UTF8(z, c);
00345       }
00346     }else{
00347       /* UTF-16 Little-endian -> UTF-8 */
00348       while( zIn<zTerm ){
00349         READ_UTF16BE(zIn, c); 
00350         WRITE_UTF8(z, c);
00351       }
00352     }
00353     pMem->n = z - zOut;
00354   }
00355   *z = 0;
00356   assert( (pMem->n+(desiredEnc==SQLITE_UTF8?1:2))<=len );
00357 
00358   sqlite3VdbeMemRelease(pMem);
00359   pMem->flags &= ~(MEM_Static|MEM_Dyn|MEM_Ephem|MEM_Short);
00360   pMem->enc = desiredEnc;
00361   if( zOut==zShort ){
00362     memcpy(pMem->zShort, zOut, len);
00363     zOut = (u8*)pMem->zShort;
00364     pMem->flags |= (MEM_Term|MEM_Short);
00365   }else{
00366     pMem->flags |= (MEM_Term|MEM_Dyn);
00367   }
00368   pMem->z = (char*)zOut;
00369 
00370 translate_out:
00371 #if defined(TRANSLATE_TRACE) && defined(SQLITE_DEBUG)
00372   {
00373     char zBuf[100];
00374     sqlite3VdbeMemPrettyPrint(pMem, zBuf);
00375     fprintf(stderr, "OUTPUT: %s\n", zBuf);
00376   }
00377 #endif
00378   return SQLITE_OK;
00379 }
00380 
00381 /*
00382 ** This routine checks for a byte-order mark at the beginning of the 
00383 ** UTF-16 string stored in *pMem. If one is present, it is removed and
00384 ** the encoding of the Mem adjusted. This routine does not do any
00385 ** byte-swapping, it just sets Mem.enc appropriately.
00386 **
00387 ** The allocation (static, dynamic etc.) and encoding of the Mem may be
00388 ** changed by this function.
00389 */
00390 int sqlite3VdbeMemHandleBom(Mem *pMem){
00391   int rc = SQLITE_OK;
00392   u8 bom = 0;
00393 
00394   if( pMem->n<0 || pMem->n>1 ){
00395     u8 b1 = *(u8 *)pMem->z;
00396     u8 b2 = *(((u8 *)pMem->z) + 1);
00397     if( b1==0xFE && b2==0xFF ){
00398       bom = SQLITE_UTF16BE;
00399     }
00400     if( b1==0xFF && b2==0xFE ){
00401       bom = SQLITE_UTF16LE;
00402     }
00403   }
00404   
00405   if( bom ){
00406     /* This function is called as soon as a string is stored in a Mem*,
00407     ** from within sqlite3VdbeMemSetStr(). At that point it is not possible
00408     ** for the string to be stored in Mem.zShort, or for it to be stored
00409     ** in dynamic memory with no destructor.
00410     */
00411     assert( !(pMem->flags&MEM_Short) );
00412     assert( !(pMem->flags&MEM_Dyn) || pMem->xDel );
00413     if( pMem->flags & MEM_Dyn ){
00414       void (*xDel)(void*) = pMem->xDel;
00415       char *z = pMem->z;
00416       pMem->z = 0;
00417       pMem->xDel = 0;
00418       rc = sqlite3VdbeMemSetStr(pMem, &z[2], pMem->n-2, bom, SQLITE_TRANSIENT);
00419       xDel(z);
00420     }else{
00421       rc = sqlite3VdbeMemSetStr(pMem, &pMem->z[2], pMem->n-2, bom, 
00422           SQLITE_TRANSIENT);
00423     }
00424   }
00425   return rc;
00426 }
00427 #endif /* SQLITE_OMIT_UTF16 */
00428 
00429 /*
00430 ** pZ is a UTF-8 encoded unicode string. If nByte is less than zero,
00431 ** return the number of unicode characters in pZ up to (but not including)
00432 ** the first 0x00 byte. If nByte is not less than zero, return the
00433 ** number of unicode characters in the first nByte of pZ (or up to 
00434 ** the first 0x00, whichever comes first).
00435 */
00436 int sqlite3utf8CharLen(const char *z, int nByte){
00437   int r = 0;
00438   const char *zTerm;
00439   if( nByte>=0 ){
00440     zTerm = &z[nByte];
00441   }else{
00442     zTerm = (const char *)(-1);
00443   }
00444   assert( z<=zTerm );
00445   while( *z!=0 && z<zTerm ){
00446     SKIP_UTF8(z);
00447     r++;
00448   }
00449   return r;
00450 }
00451 
00452 #ifndef SQLITE_OMIT_UTF16
00453 /*
00454 ** Convert a UTF-16 string in the native encoding into a UTF-8 string.
00455 ** Memory to hold the UTF-8 string is obtained from malloc and must be
00456 ** freed by the calling function.
00457 **
00458 ** NULL is returned if there is an allocation error.
00459 */
00460 char *sqlite3utf16to8(const void *z, int nByte){
00461   Mem m;
00462   memset(&m, 0, sizeof(m));
00463   sqlite3VdbeMemSetStr(&m, z, nByte, SQLITE_UTF16NATIVE, SQLITE_STATIC);
00464   sqlite3VdbeChangeEncoding(&m, SQLITE_UTF8);
00465   assert( m.flags & MEM_Term );
00466   assert( m.flags & MEM_Str );
00467   return (m.flags & MEM_Dyn)!=0 ? m.z : sqliteStrDup(m.z);
00468 }
00469 
00470 /*
00471 ** pZ is a UTF-16 encoded unicode string. If nChar is less than zero,
00472 ** return the number of bytes up to (but not including), the first pair
00473 ** of consecutive 0x00 bytes in pZ. If nChar is not less than zero,
00474 ** then return the number of bytes in the first nChar unicode characters
00475 ** in pZ (or up until the first pair of 0x00 bytes, whichever comes first).
00476 */
00477 int sqlite3utf16ByteLen(const void *zIn, int nChar){
00478   int c = 1;
00479   char const *z = zIn;
00480   int n = 0;
00481   if( SQLITE_UTF16NATIVE==SQLITE_UTF16BE ){
00482     /* Using an "if (SQLITE_UTF16NATIVE==SQLITE_UTF16BE)" construct here
00483     ** and in other parts of this file means that at one branch will
00484     ** not be covered by coverage testing on any single host. But coverage
00485     ** will be complete if the tests are run on both a little-endian and 
00486     ** big-endian host. Because both the UTF16NATIVE and SQLITE_UTF16BE
00487     ** macros are constant at compile time the compiler can determine
00488     ** which branch will be followed. It is therefore assumed that no runtime
00489     ** penalty is paid for this "if" statement.
00490     */
00491     while( c && ((nChar<0) || n<nChar) ){
00492       READ_UTF16BE(z, c);
00493       n++;
00494     }
00495   }else{
00496     while( c && ((nChar<0) || n<nChar) ){
00497       READ_UTF16LE(z, c);
00498       n++;
00499     }
00500   }
00501   return (z-(char const *)zIn)-((c==0)?2:0);
00502 }
00503 
00504 /*
00505 ** UTF-16 implementation of the substr()
00506 */
00507 void sqlite3utf16Substr(
00508   sqlite3_context *context,
00509   int argc,
00510   sqlite3_value **argv
00511 ){
00512   int y, z;
00513   unsigned char const *zStr;
00514   unsigned char const *zStrEnd;
00515   unsigned char const *zStart;
00516   unsigned char const *zEnd;
00517   int i;
00518 
00519   zStr = (unsigned char const *)sqlite3_value_text16(argv[0]);
00520   zStrEnd = &zStr[sqlite3_value_bytes16(argv[0])];
00521   y = sqlite3_value_int(argv[1]);
00522   z = sqlite3_value_int(argv[2]);
00523 
00524   if( y>0 ){
00525     y = y-1;
00526     zStart = zStr;
00527     if( SQLITE_UTF16BE==SQLITE_UTF16NATIVE ){
00528       for(i=0; i<y && zStart<zStrEnd; i++) SKIP_UTF16BE(zStart);
00529     }else{
00530       for(i=0; i<y && zStart<zStrEnd; i++) SKIP_UTF16LE(zStart);
00531     }
00532   }else{
00533     zStart = zStrEnd;
00534     if( SQLITE_UTF16BE==SQLITE_UTF16NATIVE ){
00535       for(i=y; i<0 && zStart>zStr; i++) RSKIP_UTF16BE(zStart);
00536     }else{
00537       for(i=y; i<0 && zStart>zStr; i++) RSKIP_UTF16LE(zStart);
00538     }
00539     for(; i<0; i++) z -= 1;
00540   }
00541 
00542   zEnd = zStart;
00543   if( SQLITE_UTF16BE==SQLITE_UTF16NATIVE ){
00544     for(i=0; i<z && zEnd<zStrEnd; i++) SKIP_UTF16BE(zEnd);
00545   }else{
00546     for(i=0; i<z && zEnd<zStrEnd; i++) SKIP_UTF16LE(zEnd);
00547   }
00548 
00549   sqlite3_result_text16(context, zStart, zEnd-zStart, SQLITE_TRANSIENT);
00550 }
00551 
00552 #if defined(SQLITE_TEST)
00553 /*
00554 ** This routine is called from the TCL test function "translate_selftest".
00555 ** It checks that the primitives for serializing and deserializing
00556 ** characters in each encoding are inverses of each other.
00557 */
00558 void sqlite3utfSelfTest(){
00559   int i;
00560   unsigned char zBuf[20];
00561   unsigned char *z;
00562   int n;
00563   int c;
00564 
00565   for(i=0; i<0x00110000; i++){
00566     z = zBuf;
00567     WRITE_UTF8(z, i);
00568     n = z-zBuf;
00569     z = zBuf;
00570     READ_UTF8(z, c);
00571     assert( c==i );
00572     assert( (z-zBuf)==n );
00573   }
00574   for(i=0; i<0x00110000; i++){
00575     if( i>=0xD800 && i<=0xE000 ) continue;
00576     z = zBuf;
00577     WRITE_UTF16LE(z, i);
00578     n = z-zBuf;
00579     z = zBuf;
00580     READ_UTF16LE(z, c);
00581     assert( c==i );
00582     assert( (z-zBuf)==n );
00583   }
00584   for(i=0; i<0x00110000; i++){
00585     if( i>=0xD800 && i<=0xE000 ) continue;
00586     z = zBuf;
00587     WRITE_UTF16BE(z, i);
00588     n = z-zBuf;
00589     z = zBuf;
00590     READ_UTF16BE(z, c);
00591     assert( c==i );
00592     assert( (z-zBuf)==n );
00593   }
00594 }
00595 #endif /* SQLITE_TEST */
00596 #endif /* SQLITE_OMIT_UTF16 */