Back to index

glibc  2.9
gconv_simple.c
Go to the documentation of this file.
00001 /* Simple transformations functions.
00002    Copyright (C) 1997-2005, 2007, 2008 Free Software Foundation, Inc.
00003    This file is part of the GNU C Library.
00004    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
00005 
00006    The GNU C Library is free software; you can redistribute it and/or
00007    modify it under the terms of the GNU Lesser General Public
00008    License as published by the Free Software Foundation; either
00009    version 2.1 of the License, or (at your option) any later version.
00010 
00011    The GNU C Library is distributed in the hope that it will be useful,
00012    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014    Lesser General Public License for more details.
00015 
00016    You should have received a copy of the GNU Lesser General Public
00017    License along with the GNU C Library; if not, write to the Free
00018    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
00019    02111-1307 USA.  */
00020 
00021 #include <byteswap.h>
00022 #include <dlfcn.h>
00023 #include <endian.h>
00024 #include <errno.h>
00025 #include <gconv.h>
00026 #include <stdint.h>
00027 #include <stdlib.h>
00028 #include <string.h>
00029 #include <wchar.h>
00030 #include <sys/param.h>
00031 #include <gconv_int.h>
00032 
00033 #define BUILTIN_ALIAS(s1, s2) /* nothing */
00034 #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
00035                             MinF, MaxF, MinT, MaxT) \
00036   extern int Fct (struct __gconv_step *, struct __gconv_step_data *,        \
00037                 __const unsigned char **, __const unsigned char *,          \
00038                 unsigned char **, size_t *, int, int);
00039 #include "gconv_builtin.h"
00040 
00041 
00042 #ifndef EILSEQ
00043 # define EILSEQ EINVAL
00044 #endif
00045 
00046 
00047 /* Specialized conversion function for a single byte to INTERNAL, recognizing
00048    only ASCII characters.  */
00049 wint_t
00050 __gconv_btwoc_ascii (struct __gconv_step *step, unsigned char c)
00051 {
00052   if (c < 0x80)
00053     return c;
00054   else
00055     return WEOF;
00056 }
00057 
00058 
00059 /* Transform from the internal, UCS4-like format, to UCS4.  The
00060    difference between the internal ucs4 format and the real UCS4
00061    format is, if any, the endianess.  The Unicode/ISO 10646 says that
00062    unless some higher protocol specifies it differently, the byte
00063    order is big endian.*/
00064 #define DEFINE_INIT         0
00065 #define DEFINE_FINI         0
00066 #define MIN_NEEDED_FROM            4
00067 #define MIN_NEEDED_TO              4
00068 #define FROM_DIRECTION             1
00069 #define FROM_LOOP           internal_ucs4_loop
00070 #define TO_LOOP                    internal_ucs4_loop /* This is not used.  */
00071 #define FUNCTION_NAME              __gconv_transform_internal_ucs4
00072 
00073 
00074 static inline int
00075 __attribute ((always_inline))
00076 internal_ucs4_loop (struct __gconv_step *step,
00077                   struct __gconv_step_data *step_data,
00078                   const unsigned char **inptrp, const unsigned char *inend,
00079                   unsigned char **outptrp, unsigned char *outend,
00080                   size_t *irreversible)
00081 {
00082   const unsigned char *inptr = *inptrp;
00083   unsigned char *outptr = *outptrp;
00084   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
00085   int result;
00086 
00087 #if __BYTE_ORDER == __LITTLE_ENDIAN
00088   /* Sigh, we have to do some real work.  */
00089   size_t cnt;
00090   uint32_t *outptr32 = (uint32_t *) outptr;
00091 
00092   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
00093     *outptr32++ = bswap_32 (*(const uint32_t *) inptr);
00094 
00095   *inptrp = inptr;
00096   *outptrp = (unsigned char *) outptr32;
00097 #elif __BYTE_ORDER == __BIG_ENDIAN
00098   /* Simply copy the data.  */
00099   *inptrp = inptr + n_convert * 4;
00100   *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
00101 #else
00102 # error "This endianess is not supported."
00103 #endif
00104 
00105   /* Determine the status.  */
00106   if (*inptrp == inend)
00107     result = __GCONV_EMPTY_INPUT;
00108   else if (*outptrp + 4 > outend)
00109     result = __GCONV_FULL_OUTPUT;
00110   else
00111     result = __GCONV_INCOMPLETE_INPUT;
00112 
00113   return result;
00114 }
00115 
00116 #ifndef _STRING_ARCH_unaligned
00117 static inline int
00118 __attribute ((always_inline))
00119 internal_ucs4_loop_unaligned (struct __gconv_step *step,
00120                            struct __gconv_step_data *step_data,
00121                            const unsigned char **inptrp,
00122                            const unsigned char *inend,
00123                            unsigned char **outptrp, unsigned char *outend,
00124                            size_t *irreversible)
00125 {
00126   const unsigned char *inptr = *inptrp;
00127   unsigned char *outptr = *outptrp;
00128   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
00129   int result;
00130 
00131 # if __BYTE_ORDER == __LITTLE_ENDIAN
00132   /* Sigh, we have to do some real work.  */
00133   size_t cnt;
00134 
00135   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
00136     {
00137       outptr[0] = inptr[3];
00138       outptr[1] = inptr[2];
00139       outptr[2] = inptr[1];
00140       outptr[3] = inptr[0];
00141     }
00142 
00143   *inptrp = inptr;
00144   *outptrp = outptr;
00145 # elif __BYTE_ORDER == __BIG_ENDIAN
00146   /* Simply copy the data.  */
00147   *inptrp = inptr + n_convert * 4;
00148   *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
00149 # else
00150 #  error "This endianess is not supported."
00151 # endif
00152 
00153   /* Determine the status.  */
00154   if (*inptrp == inend)
00155     result = __GCONV_EMPTY_INPUT;
00156   else if (*outptrp + 4 > outend)
00157     result = __GCONV_FULL_OUTPUT;
00158   else
00159     result = __GCONV_INCOMPLETE_INPUT;
00160 
00161   return result;
00162 }
00163 #endif
00164 
00165 
00166 static inline int
00167 __attribute ((always_inline))
00168 internal_ucs4_loop_single (struct __gconv_step *step,
00169                         struct __gconv_step_data *step_data,
00170                         const unsigned char **inptrp,
00171                         const unsigned char *inend,
00172                         unsigned char **outptrp, unsigned char *outend,
00173                         size_t *irreversible)
00174 {
00175   mbstate_t *state = step_data->__statep;
00176   size_t cnt = state->__count & 7;
00177 
00178   while (*inptrp < inend && cnt < 4)
00179     state->__value.__wchb[cnt++] = *(*inptrp)++;
00180 
00181   if (__builtin_expect (cnt < 4, 0))
00182     {
00183       /* Still not enough bytes.  Store the ones in the input buffer.  */
00184       state->__count &= ~7;
00185       state->__count |= cnt;
00186 
00187       return __GCONV_INCOMPLETE_INPUT;
00188     }
00189 
00190 #if __BYTE_ORDER == __LITTLE_ENDIAN
00191   (*outptrp)[0] = state->__value.__wchb[3];
00192   (*outptrp)[1] = state->__value.__wchb[2];
00193   (*outptrp)[2] = state->__value.__wchb[1];
00194   (*outptrp)[3] = state->__value.__wchb[0];
00195 
00196 #elif __BYTE_ORDER == __BIG_ENDIAN
00197   /* XXX unaligned */
00198   (*outptrp)[0] = state->__value.__wchb[0];
00199   (*outptrp)[1] = state->__value.__wchb[1];
00200   (*outptrp)[2] = state->__value.__wchb[2];
00201   (*outptrp)[3] = state->__value.__wchb[3];
00202 #else
00203 # error "This endianess is not supported."
00204 #endif
00205   *outptrp += 4;
00206 
00207   /* Clear the state buffer.  */
00208   state->__count &= ~7;
00209 
00210   return __GCONV_OK;
00211 }
00212 
00213 #include <iconv/skeleton.c>
00214 
00215 
00216 /* Transform from UCS4 to the internal, UCS4-like format.  Unlike
00217    for the other direction we have to check for correct values here.  */
00218 #define DEFINE_INIT         0
00219 #define DEFINE_FINI         0
00220 #define MIN_NEEDED_FROM            4
00221 #define MIN_NEEDED_TO              4
00222 #define FROM_DIRECTION             1
00223 #define FROM_LOOP           ucs4_internal_loop
00224 #define TO_LOOP                    ucs4_internal_loop /* This is not used.  */
00225 #define FUNCTION_NAME              __gconv_transform_ucs4_internal
00226 
00227 
00228 static inline int
00229 __attribute ((always_inline))
00230 ucs4_internal_loop (struct __gconv_step *step,
00231                   struct __gconv_step_data *step_data,
00232                   const unsigned char **inptrp, const unsigned char *inend,
00233                   unsigned char **outptrp, unsigned char *outend,
00234                   size_t *irreversible)
00235 {
00236   int flags = step_data->__flags;
00237   const unsigned char *inptr = *inptrp;
00238   unsigned char *outptr = *outptrp;
00239   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
00240   int result;
00241   size_t cnt;
00242 
00243   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
00244     {
00245       uint32_t inval;
00246 
00247 #if __BYTE_ORDER == __LITTLE_ENDIAN
00248       inval = bswap_32 (*(const uint32_t *) inptr);
00249 #else
00250       inval = *(const uint32_t *) inptr;
00251 #endif
00252 
00253       if (__builtin_expect (inval > 0x7fffffff, 0))
00254        {
00255          /* The value is too large.  We don't try transliteration here since
00256             this is not an error because of the lack of possibilities to
00257             represent the result.  This is a genuine bug in the input since
00258             UCS4 does not allow such values.  */
00259          if (irreversible == NULL)
00260            /* We are transliterating, don't try to correct anything.  */
00261            return __GCONV_ILLEGAL_INPUT;
00262 
00263          if (flags & __GCONV_IGNORE_ERRORS)
00264            {
00265              /* Just ignore this character.  */
00266              ++*irreversible;
00267              continue;
00268            }
00269 
00270          *inptrp = inptr;
00271          *outptrp = outptr;
00272          return __GCONV_ILLEGAL_INPUT;
00273        }
00274 
00275       *((uint32_t *) outptr) = inval;
00276       outptr += sizeof (uint32_t);
00277     }
00278 
00279   *inptrp = inptr;
00280   *outptrp = outptr;
00281 
00282   /* Determine the status.  */
00283   if (*inptrp == inend)
00284     result = __GCONV_EMPTY_INPUT;
00285   else if (*outptrp + 4 > outend)
00286     result = __GCONV_FULL_OUTPUT;
00287   else
00288     result = __GCONV_INCOMPLETE_INPUT;
00289 
00290   return result;
00291 }
00292 
00293 #ifndef _STRING_ARCH_unaligned
00294 static inline int
00295 __attribute ((always_inline))
00296 ucs4_internal_loop_unaligned (struct __gconv_step *step,
00297                            struct __gconv_step_data *step_data,
00298                            const unsigned char **inptrp,
00299                            const unsigned char *inend,
00300                            unsigned char **outptrp, unsigned char *outend,
00301                            size_t *irreversible)
00302 {
00303   int flags = step_data->__flags;
00304   const unsigned char *inptr = *inptrp;
00305   unsigned char *outptr = *outptrp;
00306   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
00307   int result;
00308   size_t cnt;
00309 
00310   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
00311     {
00312       if (__builtin_expect (inptr[0] > 0x80, 0))
00313        {
00314          /* The value is too large.  We don't try transliteration here since
00315             this is not an error because of the lack of possibilities to
00316             represent the result.  This is a genuine bug in the input since
00317             UCS4 does not allow such values.  */
00318          if (irreversible == NULL)
00319            /* We are transliterating, don't try to correct anything.  */
00320            return __GCONV_ILLEGAL_INPUT;
00321 
00322          if (flags & __GCONV_IGNORE_ERRORS)
00323            {
00324              /* Just ignore this character.  */
00325              ++*irreversible;
00326              continue;
00327            }
00328 
00329          *inptrp = inptr;
00330          *outptrp = outptr;
00331          return __GCONV_ILLEGAL_INPUT;
00332        }
00333 
00334 # if __BYTE_ORDER == __LITTLE_ENDIAN
00335       outptr[3] = inptr[0];
00336       outptr[2] = inptr[1];
00337       outptr[1] = inptr[2];
00338       outptr[0] = inptr[3];
00339 # else
00340       outptr[0] = inptr[0];
00341       outptr[1] = inptr[1];
00342       outptr[2] = inptr[2];
00343       outptr[3] = inptr[3];
00344 # endif
00345       outptr += 4;
00346     }
00347 
00348   *inptrp = inptr;
00349   *outptrp = outptr;
00350 
00351   /* Determine the status.  */
00352   if (*inptrp == inend)
00353     result = __GCONV_EMPTY_INPUT;
00354   else if (*outptrp + 4 > outend)
00355     result = __GCONV_FULL_OUTPUT;
00356   else
00357     result = __GCONV_INCOMPLETE_INPUT;
00358 
00359   return result;
00360 }
00361 #endif
00362 
00363 
00364 static inline int
00365 __attribute ((always_inline))
00366 ucs4_internal_loop_single (struct __gconv_step *step,
00367                         struct __gconv_step_data *step_data,
00368                         const unsigned char **inptrp,
00369                         const unsigned char *inend,
00370                         unsigned char **outptrp, unsigned char *outend,
00371                         size_t *irreversible)
00372 {
00373   mbstate_t *state = step_data->__statep;
00374   int flags = step_data->__flags;
00375   size_t cnt = state->__count & 7;
00376 
00377   while (*inptrp < inend && cnt < 4)
00378     state->__value.__wchb[cnt++] = *(*inptrp)++;
00379 
00380   if (__builtin_expect (cnt < 4, 0))
00381     {
00382       /* Still not enough bytes.  Store the ones in the input buffer.  */
00383       state->__count &= ~7;
00384       state->__count |= cnt;
00385 
00386       return __GCONV_INCOMPLETE_INPUT;
00387     }
00388 
00389   if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0] > 0x80,
00390                      0))
00391     {
00392       /* The value is too large.  We don't try transliteration here since
00393         this is not an error because of the lack of possibilities to
00394         represent the result.  This is a genuine bug in the input since
00395         UCS4 does not allow such values.  */
00396       if (!(flags & __GCONV_IGNORE_ERRORS))
00397        {
00398          *inptrp -= cnt - (state->__count & 7);
00399          return __GCONV_ILLEGAL_INPUT;
00400        }
00401     }
00402   else
00403     {
00404 #if __BYTE_ORDER == __LITTLE_ENDIAN
00405       (*outptrp)[0] = state->__value.__wchb[3];
00406       (*outptrp)[1] = state->__value.__wchb[2];
00407       (*outptrp)[2] = state->__value.__wchb[1];
00408       (*outptrp)[3] = state->__value.__wchb[0];
00409 #elif __BYTE_ORDER == __BIG_ENDIAN
00410       (*outptrp)[0] = state->__value.__wchb[0];
00411       (*outptrp)[1] = state->__value.__wchb[1];
00412       (*outptrp)[2] = state->__value.__wchb[2];
00413       (*outptrp)[3] = state->__value.__wchb[3];
00414 #endif
00415 
00416       *outptrp += 4;
00417     }
00418 
00419   /* Clear the state buffer.  */
00420   state->__count &= ~7;
00421 
00422   return __GCONV_OK;
00423 }
00424 
00425 #include <iconv/skeleton.c>
00426 
00427 
00428 /* Similarly for the little endian form.  */
00429 #define DEFINE_INIT         0
00430 #define DEFINE_FINI         0
00431 #define MIN_NEEDED_FROM            4
00432 #define MIN_NEEDED_TO              4
00433 #define FROM_DIRECTION             1
00434 #define FROM_LOOP           internal_ucs4le_loop
00435 #define TO_LOOP                    internal_ucs4le_loop /* This is not used.  */
00436 #define FUNCTION_NAME              __gconv_transform_internal_ucs4le
00437 
00438 
00439 static inline int
00440 __attribute ((always_inline))
00441 internal_ucs4le_loop (struct __gconv_step *step,
00442                     struct __gconv_step_data *step_data,
00443                     const unsigned char **inptrp, const unsigned char *inend,
00444                     unsigned char **outptrp, unsigned char *outend,
00445                     size_t *irreversible)
00446 {
00447   const unsigned char *inptr = *inptrp;
00448   unsigned char *outptr = *outptrp;
00449   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
00450   int result;
00451 
00452 #if __BYTE_ORDER == __BIG_ENDIAN
00453   /* Sigh, we have to do some real work.  */
00454   size_t cnt;
00455   uint32_t *outptr32 = (uint32_t *) outptr;
00456 
00457   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
00458     *outptr32++ = bswap_32 (*(const uint32_t *) inptr);
00459   outptr = (unsigned char *) outptr32;
00460 
00461   *inptrp = inptr;
00462   *outptrp = outptr;
00463 #elif __BYTE_ORDER == __LITTLE_ENDIAN
00464   /* Simply copy the data.  */
00465   *inptrp = inptr + n_convert * 4;
00466   *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
00467 #else
00468 # error "This endianess is not supported."
00469 #endif
00470 
00471   /* Determine the status.  */
00472   if (*inptrp == inend)
00473     result = __GCONV_EMPTY_INPUT;
00474   else if (*outptrp + 4 > outend)
00475     result = __GCONV_FULL_OUTPUT;
00476   else
00477     result = __GCONV_INCOMPLETE_INPUT;
00478 
00479   return result;
00480 }
00481 
00482 #ifndef _STRING_ARCH_unaligned
00483 static inline int
00484 __attribute ((always_inline))
00485 internal_ucs4le_loop_unaligned (struct __gconv_step *step,
00486                             struct __gconv_step_data *step_data,
00487                             const unsigned char **inptrp,
00488                             const unsigned char *inend,
00489                             unsigned char **outptrp, unsigned char *outend,
00490                             size_t *irreversible)
00491 {
00492   const unsigned char *inptr = *inptrp;
00493   unsigned char *outptr = *outptrp;
00494   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
00495   int result;
00496 
00497 # if __BYTE_ORDER == __BIG_ENDIAN
00498   /* Sigh, we have to do some real work.  */
00499   size_t cnt;
00500 
00501   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
00502     {
00503       outptr[0] = inptr[3];
00504       outptr[1] = inptr[2];
00505       outptr[2] = inptr[1];
00506       outptr[3] = inptr[0];
00507     }
00508 
00509   *inptrp = inptr;
00510   *outptrp = outptr;
00511 # elif __BYTE_ORDER == __LITTLE_ENDIAN
00512   /* Simply copy the data.  */
00513   *inptrp = inptr + n_convert * 4;
00514   *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
00515 # else
00516 #  error "This endianess is not supported."
00517 # endif
00518 
00519   /* Determine the status.  */
00520   if (*inptrp == inend)
00521     result = __GCONV_EMPTY_INPUT;
00522   else if (*inptrp + 4 > inend)
00523     result = __GCONV_INCOMPLETE_INPUT;
00524   else
00525     {
00526       assert (*outptrp + 4 > outend);
00527       result = __GCONV_FULL_OUTPUT;
00528     }
00529 
00530   return result;
00531 }
00532 #endif
00533 
00534 
00535 static inline int
00536 __attribute ((always_inline))
00537 internal_ucs4le_loop_single (struct __gconv_step *step,
00538                           struct __gconv_step_data *step_data,
00539                           const unsigned char **inptrp,
00540                           const unsigned char *inend,
00541                           unsigned char **outptrp, unsigned char *outend,
00542                           size_t *irreversible)
00543 {
00544   mbstate_t *state = step_data->__statep;
00545   size_t cnt = state->__count & 7;
00546 
00547   while (*inptrp < inend && cnt < 4)
00548     state->__value.__wchb[cnt++] = *(*inptrp)++;
00549 
00550   if (__builtin_expect (cnt < 4, 0))
00551     {
00552       /* Still not enough bytes.  Store the ones in the input buffer.  */
00553       state->__count &= ~7;
00554       state->__count |= cnt;
00555 
00556       return __GCONV_INCOMPLETE_INPUT;
00557     }
00558 
00559 #if __BYTE_ORDER == __BIG_ENDIAN
00560   (*outptrp)[0] = state->__value.__wchb[3];
00561   (*outptrp)[1] = state->__value.__wchb[2];
00562   (*outptrp)[2] = state->__value.__wchb[1];
00563   (*outptrp)[3] = state->__value.__wchb[0];
00564 
00565 #else
00566   /* XXX unaligned */
00567   (*outptrp)[0] = state->__value.__wchb[0];
00568   (*outptrp)[1] = state->__value.__wchb[1];
00569   (*outptrp)[2] = state->__value.__wchb[2];
00570   (*outptrp)[3] = state->__value.__wchb[3];
00571 
00572 #endif
00573 
00574   *outptrp += 4;
00575 
00576   /* Clear the state buffer.  */
00577   state->__count &= ~7;
00578 
00579   return __GCONV_OK;
00580 }
00581 
00582 #include <iconv/skeleton.c>
00583 
00584 
00585 /* And finally from UCS4-LE to the internal encoding.  */
00586 #define DEFINE_INIT         0
00587 #define DEFINE_FINI         0
00588 #define MIN_NEEDED_FROM            4
00589 #define MIN_NEEDED_TO              4
00590 #define FROM_DIRECTION             1
00591 #define FROM_LOOP           ucs4le_internal_loop
00592 #define TO_LOOP                    ucs4le_internal_loop /* This is not used.  */
00593 #define FUNCTION_NAME              __gconv_transform_ucs4le_internal
00594 
00595 
00596 static inline int
00597 __attribute ((always_inline))
00598 ucs4le_internal_loop (struct __gconv_step *step,
00599                     struct __gconv_step_data *step_data,
00600                     const unsigned char **inptrp, const unsigned char *inend,
00601                     unsigned char **outptrp, unsigned char *outend,
00602                     size_t *irreversible)
00603 {
00604   int flags = step_data->__flags;
00605   const unsigned char *inptr = *inptrp;
00606   unsigned char *outptr = *outptrp;
00607   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
00608   int result;
00609   size_t cnt;
00610 
00611   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
00612     {
00613       uint32_t inval;
00614 
00615 #if __BYTE_ORDER == __BIG_ENDIAN
00616       inval = bswap_32 (*(const uint32_t *) inptr);
00617 #else
00618       inval = *(const uint32_t *) inptr;
00619 #endif
00620 
00621       if (__builtin_expect (inval > 0x7fffffff, 0))
00622        {
00623          /* The value is too large.  We don't try transliteration here since
00624             this is not an error because of the lack of possibilities to
00625             represent the result.  This is a genuine bug in the input since
00626             UCS4 does not allow such values.  */
00627          if (irreversible == NULL)
00628            /* We are transliterating, don't try to correct anything.  */
00629            return __GCONV_ILLEGAL_INPUT;
00630 
00631          if (flags & __GCONV_IGNORE_ERRORS)
00632            {
00633              /* Just ignore this character.  */
00634              ++*irreversible;
00635              continue;
00636            }
00637 
00638          return __GCONV_ILLEGAL_INPUT;
00639        }
00640 
00641       *((uint32_t *) outptr) = inval;
00642       outptr += sizeof (uint32_t);
00643     }
00644 
00645   *inptrp = inptr;
00646   *outptrp = outptr;
00647 
00648   /* Determine the status.  */
00649   if (*inptrp == inend)
00650     result = __GCONV_EMPTY_INPUT;
00651   else if (*inptrp + 4 > inend)
00652     result = __GCONV_INCOMPLETE_INPUT;
00653   else
00654     {
00655       assert (*outptrp + 4 > outend);
00656       result = __GCONV_FULL_OUTPUT;
00657     }
00658 
00659   return result;
00660 }
00661 
00662 #ifndef _STRING_ARCH_unaligned
00663 static inline int
00664 __attribute ((always_inline))
00665 ucs4le_internal_loop_unaligned (struct __gconv_step *step,
00666                             struct __gconv_step_data *step_data,
00667                             const unsigned char **inptrp,
00668                             const unsigned char *inend,
00669                             unsigned char **outptrp, unsigned char *outend,
00670                             size_t *irreversible)
00671 {
00672   int flags = step_data->__flags;
00673   const unsigned char *inptr = *inptrp;
00674   unsigned char *outptr = *outptrp;
00675   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
00676   int result;
00677   size_t cnt;
00678 
00679   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
00680     {
00681       if (__builtin_expect (inptr[3] > 0x80, 0))
00682        {
00683          /* The value is too large.  We don't try transliteration here since
00684             this is not an error because of the lack of possibilities to
00685             represent the result.  This is a genuine bug in the input since
00686             UCS4 does not allow such values.  */
00687          if (irreversible == NULL)
00688            /* We are transliterating, don't try to correct anything.  */
00689            return __GCONV_ILLEGAL_INPUT;
00690 
00691          if (flags & __GCONV_IGNORE_ERRORS)
00692            {
00693              /* Just ignore this character.  */
00694              ++*irreversible;
00695              continue;
00696            }
00697 
00698          *inptrp = inptr;
00699          *outptrp = outptr;
00700          return __GCONV_ILLEGAL_INPUT;
00701        }
00702 
00703 # if __BYTE_ORDER == __BIG_ENDIAN
00704       outptr[3] = inptr[0];
00705       outptr[2] = inptr[1];
00706       outptr[1] = inptr[2];
00707       outptr[0] = inptr[3];
00708 # else
00709       outptr[0] = inptr[0];
00710       outptr[1] = inptr[1];
00711       outptr[2] = inptr[2];
00712       outptr[3] = inptr[3];
00713 # endif
00714 
00715       outptr += 4;
00716     }
00717 
00718   *inptrp = inptr;
00719   *outptrp = outptr;
00720 
00721   /* Determine the status.  */
00722   if (*inptrp == inend)
00723     result = __GCONV_EMPTY_INPUT;
00724   else if (*inptrp + 4 > inend)
00725     result = __GCONV_INCOMPLETE_INPUT;
00726   else
00727     {
00728       assert (*outptrp + 4 > outend);
00729       result = __GCONV_FULL_OUTPUT;
00730     }
00731 
00732   return result;
00733 }
00734 #endif
00735 
00736 
00737 static inline int
00738 __attribute ((always_inline))
00739 ucs4le_internal_loop_single (struct __gconv_step *step,
00740                           struct __gconv_step_data *step_data,
00741                           const unsigned char **inptrp,
00742                           const unsigned char *inend,
00743                           unsigned char **outptrp, unsigned char *outend,
00744                           size_t *irreversible)
00745 {
00746   mbstate_t *state = step_data->__statep;
00747   int flags = step_data->__flags;
00748   size_t cnt = state->__count & 7;
00749 
00750   while (*inptrp < inend && cnt < 4)
00751     state->__value.__wchb[cnt++] = *(*inptrp)++;
00752 
00753   if (__builtin_expect (cnt < 4, 0))
00754     {
00755       /* Still not enough bytes.  Store the ones in the input buffer.  */
00756       state->__count &= ~7;
00757       state->__count |= cnt;
00758 
00759       return __GCONV_INCOMPLETE_INPUT;
00760     }
00761 
00762   if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3] > 0x80,
00763                      0))
00764     {
00765       /* The value is too large.  We don't try transliteration here since
00766         this is not an error because of the lack of possibilities to
00767         represent the result.  This is a genuine bug in the input since
00768         UCS4 does not allow such values.  */
00769       if (!(flags & __GCONV_IGNORE_ERRORS))
00770        return __GCONV_ILLEGAL_INPUT;
00771     }
00772   else
00773     {
00774 #if __BYTE_ORDER == __BIG_ENDIAN
00775       (*outptrp)[0] = state->__value.__wchb[3];
00776       (*outptrp)[1] = state->__value.__wchb[2];
00777       (*outptrp)[2] = state->__value.__wchb[1];
00778       (*outptrp)[3] = state->__value.__wchb[0];
00779 #else
00780       (*outptrp)[0] = state->__value.__wchb[0];
00781       (*outptrp)[1] = state->__value.__wchb[1];
00782       (*outptrp)[2] = state->__value.__wchb[2];
00783       (*outptrp)[3] = state->__value.__wchb[3];
00784 #endif
00785 
00786       *outptrp += 4;
00787     }
00788 
00789   /* Clear the state buffer.  */
00790   state->__count &= ~7;
00791 
00792   return __GCONV_OK;
00793 }
00794 
00795 #include <iconv/skeleton.c>
00796 
00797 
00798 /* Convert from ISO 646-IRV to the internal (UCS4-like) format.  */
00799 #define DEFINE_INIT         0
00800 #define DEFINE_FINI         0
00801 #define MIN_NEEDED_FROM            1
00802 #define MIN_NEEDED_TO              4
00803 #define FROM_DIRECTION             1
00804 #define FROM_LOOP           ascii_internal_loop
00805 #define TO_LOOP                    ascii_internal_loop /* This is not used.  */
00806 #define FUNCTION_NAME              __gconv_transform_ascii_internal
00807 #define ONE_DIRECTION              1
00808 
00809 #define MIN_NEEDED_INPUT    MIN_NEEDED_FROM
00810 #define MIN_NEEDED_OUTPUT   MIN_NEEDED_TO
00811 #define LOOPFCT                    FROM_LOOP
00812 #define BODY \
00813   {                                                                  \
00814     if (__builtin_expect (*inptr > '\x7f', 0))                              \
00815       {                                                                     \
00816        /* The value is too large.  We don't try transliteration here since   \
00817           this is not an error because of the lack of possibilities to             \
00818           represent the result.  This is a genuine bug in the input since    \
00819           ASCII does not allow such values.  */                      \
00820        STANDARD_FROM_LOOP_ERR_HANDLER (1);                                  \
00821       }                                                                     \
00822     else                                                             \
00823       {                                                                     \
00824        /* It's an one byte sequence.  */                             \
00825        *((uint32_t *) outptr) = *inptr++;                            \
00826        outptr += sizeof (uint32_t);                                         \
00827       }                                                                     \
00828   }
00829 #define LOOP_NEED_FLAGS
00830 #include <iconv/loop.c>
00831 #include <iconv/skeleton.c>
00832 
00833 
00834 /* Convert from the internal (UCS4-like) format to ISO 646-IRV.  */
00835 #define DEFINE_INIT         0
00836 #define DEFINE_FINI         0
00837 #define MIN_NEEDED_FROM            4
00838 #define MIN_NEEDED_TO              1
00839 #define FROM_DIRECTION             1
00840 #define FROM_LOOP           internal_ascii_loop
00841 #define TO_LOOP                    internal_ascii_loop /* This is not used.  */
00842 #define FUNCTION_NAME              __gconv_transform_internal_ascii
00843 #define ONE_DIRECTION              1
00844 
00845 #define MIN_NEEDED_INPUT    MIN_NEEDED_FROM
00846 #define MIN_NEEDED_OUTPUT   MIN_NEEDED_TO
00847 #define LOOPFCT                    FROM_LOOP
00848 #define BODY \
00849   {                                                                  \
00850     if (__builtin_expect (*((const uint32_t *) inptr) > 0x7f, 0))           \
00851       {                                                                     \
00852        UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4);                \
00853        STANDARD_TO_LOOP_ERR_HANDLER (4);                             \
00854       }                                                                     \
00855     else                                                             \
00856       {                                                                     \
00857        /* It's an one byte sequence.  */                             \
00858        *outptr++ = *((const uint32_t *) inptr);                      \
00859        inptr += sizeof (uint32_t);                                   \
00860       }                                                                     \
00861   }
00862 #define LOOP_NEED_FLAGS
00863 #include <iconv/loop.c>
00864 #include <iconv/skeleton.c>
00865 
00866 
00867 /* Convert from the internal (UCS4-like) format to UTF-8.  */
00868 #define DEFINE_INIT         0
00869 #define DEFINE_FINI         0
00870 #define MIN_NEEDED_FROM            4
00871 #define MIN_NEEDED_TO              1
00872 #define MAX_NEEDED_TO              6
00873 #define FROM_DIRECTION             1
00874 #define FROM_LOOP           internal_utf8_loop
00875 #define TO_LOOP                    internal_utf8_loop /* This is not used.  */
00876 #define FUNCTION_NAME              __gconv_transform_internal_utf8
00877 #define ONE_DIRECTION              1
00878 
00879 #define MIN_NEEDED_INPUT    MIN_NEEDED_FROM
00880 #define MIN_NEEDED_OUTPUT   MIN_NEEDED_TO
00881 #define MAX_NEEDED_OUTPUT   MAX_NEEDED_TO
00882 #define LOOPFCT                    FROM_LOOP
00883 #define BODY \
00884   {                                                                  \
00885     uint32_t wc = *((const uint32_t *) inptr);                              \
00886                                                                      \
00887     if (__builtin_expect (wc < 0x80, 1))                             \
00888       /* It's an one byte sequence.  */                                     \
00889       *outptr++ = (unsigned char) wc;                                       \
00890     else if (__builtin_expect (wc <= 0x7fffffff, 1))                        \
00891       {                                                                     \
00892        size_t step;                                                  \
00893        unsigned char *start;                                                \
00894                                                                      \
00895        for (step = 2; step < 6; ++step)                              \
00896          if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0)                  \
00897            break;                                                    \
00898                                                                      \
00899        if (__builtin_expect (outptr + step > outend, 0))                    \
00900          {                                                           \
00901            /* Too long.  */                                          \
00902            result = __GCONV_FULL_OUTPUT;                             \
00903            break;                                                    \
00904          }                                                           \
00905                                                                      \
00906        start = outptr;                                                      \
00907        *outptr = (unsigned char) (~0xff >> step);                           \
00908        outptr += step;                                                      \
00909        do                                                            \
00910          {                                                           \
00911            start[--step] = 0x80 | (wc & 0x3f);                              \
00912            wc >>= 6;                                                 \
00913          }                                                           \
00914        while (step > 1);                                             \
00915        start[0] |= wc;                                                      \
00916       }                                                                     \
00917     else                                                             \
00918       {                                                                     \
00919        STANDARD_TO_LOOP_ERR_HANDLER (4);                             \
00920       }                                                                     \
00921                                                                      \
00922     inptr += 4;                                                             \
00923   }
00924 #define LOOP_NEED_FLAGS
00925 #include <iconv/loop.c>
00926 #include <iconv/skeleton.c>
00927 
00928 
00929 /* Convert from UTF-8 to the internal (UCS4-like) format.  */
00930 #define DEFINE_INIT         0
00931 #define DEFINE_FINI         0
00932 #define MIN_NEEDED_FROM            1
00933 #define MAX_NEEDED_FROM            6
00934 #define MIN_NEEDED_TO              4
00935 #define FROM_DIRECTION             1
00936 #define FROM_LOOP           utf8_internal_loop
00937 #define TO_LOOP                    utf8_internal_loop /* This is not used.  */
00938 #define FUNCTION_NAME              __gconv_transform_utf8_internal
00939 #define ONE_DIRECTION              1
00940 
00941 #define MIN_NEEDED_INPUT    MIN_NEEDED_FROM
00942 #define MAX_NEEDED_INPUT    MAX_NEEDED_FROM
00943 #define MIN_NEEDED_OUTPUT   MIN_NEEDED_TO
00944 #define LOOPFCT                    FROM_LOOP
00945 #define BODY \
00946   {                                                                  \
00947     /* Next input byte.  */                                          \
00948     uint32_t ch = *inptr;                                            \
00949                                                                      \
00950     if (__builtin_expect (ch < 0x80, 1))                             \
00951       {                                                                     \
00952        /* One byte sequence.  */                                     \
00953        ++inptr;                                                      \
00954       }                                                                     \
00955     else                                                             \
00956       {                                                                     \
00957        uint_fast32_t cnt;                                            \
00958        uint_fast32_t i;                                              \
00959                                                                      \
00960        if (ch >= 0xc2 && ch < 0xe0)                                         \
00961          {                                                           \
00962            /* We expect two bytes.  The first byte cannot be 0xc0 or 0xc1,   \
00963               otherwise the wide character could have been represented             \
00964               using a single byte.  */                                      \
00965            cnt = 2;                                                  \
00966            ch &= 0x1f;                                                      \
00967          }                                                           \
00968         else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1))                 \
00969          {                                                           \
00970            /* We expect three bytes.  */                             \
00971            cnt = 3;                                                  \
00972            ch &= 0x0f;                                                      \
00973          }                                                           \
00974        else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1))                  \
00975          {                                                           \
00976            /* We expect four bytes.  */                              \
00977            cnt = 4;                                                  \
00978            ch &= 0x07;                                                      \
00979          }                                                           \
00980        else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1))                  \
00981          {                                                           \
00982            /* We expect five bytes.  */                              \
00983            cnt = 5;                                                  \
00984            ch &= 0x03;                                                      \
00985          }                                                           \
00986        else if (__builtin_expect ((ch & 0xfe) == 0xfc, 1))                  \
00987          {                                                           \
00988            /* We expect six bytes.  */                                      \
00989            cnt = 6;                                                  \
00990            ch &= 0x01;                                                      \
00991          }                                                           \
00992        else                                                          \
00993          {                                                           \
00994            /* Search the end of this ill-formed UTF-8 character.  This             \
00995               is the next byte with (x & 0xc0) != 0x80.  */                 \
00996            i = 0;                                                    \
00997            do                                                        \
00998              ++i;                                                    \
00999            while (inptr + i < inend                                         \
01000                  && (*(inptr + i) & 0xc0) == 0x80                           \
01001                  && i < 5);                                          \
01002                                                                      \
01003          errout:                                                     \
01004            STANDARD_FROM_LOOP_ERR_HANDLER (i);                              \
01005          }                                                           \
01006                                                                      \
01007        if (__builtin_expect (inptr + cnt > inend, 0))                       \
01008          {                                                           \
01009            /* We don't have enough input.  But before we report that check   \
01010               that all the bytes are correct.  */                           \
01011            for (i = 1; inptr + i < inend; ++i)                              \
01012              if ((inptr[i] & 0xc0) != 0x80)                                 \
01013               break;                                                 \
01014                                                                      \
01015            if (__builtin_expect (inptr + i == inend, 1))                    \
01016              {                                                              \
01017               result = __GCONV_INCOMPLETE_INPUT;                     \
01018               break;                                                 \
01019              }                                                              \
01020                                                                      \
01021            goto errout;                                              \
01022          }                                                           \
01023                                                                      \
01024        /* Read the possible remaining bytes.  */                     \
01025        for (i = 1; i < cnt; ++i)                                     \
01026          {                                                           \
01027            uint32_t byte = inptr[i];                                        \
01028                                                                      \
01029            if ((byte & 0xc0) != 0x80)                                       \
01030              /* This is an illegal encoding.  */                     \
01031              break;                                                  \
01032                                                                      \
01033            ch <<= 6;                                                 \
01034            ch |= byte & 0x3f;                                               \
01035          }                                                           \
01036                                                                      \
01037        /* If i < cnt, some trail byte was not >= 0x80, < 0xc0.              \
01038           If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could             \
01039           have been represented with fewer than cnt bytes.  */              \
01040        if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0))              \
01041          {                                                           \
01042            /* This is an illegal encoding.  */                              \
01043            goto errout;                                              \
01044          }                                                           \
01045                                                                      \
01046        inptr += cnt;                                                 \
01047       }                                                                     \
01048                                                                      \
01049     /* Now adjust the pointers and store the result.  */                    \
01050     *((uint32_t *) outptr) = ch;                                     \
01051     outptr += sizeof (uint32_t);                                     \
01052   }
01053 #define LOOP_NEED_FLAGS
01054 
01055 #define STORE_REST \
01056   {                                                                  \
01057     /* We store the remaining bytes while converting them into the UCS4            \
01058        format.  We can assume that the first byte in the buffer is          \
01059        correct and that it requires a larger number of bytes than there            \
01060        are in the input buffer.  */                                         \
01061     wint_t ch = **inptrp;                                            \
01062     size_t cnt, r;                                                   \
01063                                                                      \
01064     state->__count = inend - *inptrp;                                       \
01065                                                                      \
01066     if (ch >= 0xc2 && ch < 0xe0)                                     \
01067       {                                                                     \
01068        /* We expect two bytes.  The first byte cannot be 0xc0 or            \
01069           0xc1, otherwise the wide character could have been                \
01070           represented using a single byte.  */                              \
01071        cnt = 2;                                                      \
01072        ch &= 0x1f;                                                   \
01073       }                                                                     \
01074     else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1))                     \
01075       {                                                                     \
01076        /* We expect three bytes.  */                                        \
01077        cnt = 3;                                                      \
01078        ch &= 0x0f;                                                   \
01079       }                                                                     \
01080     else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1))                     \
01081       {                                                                     \
01082        /* We expect four bytes.  */                                         \
01083        cnt = 4;                                                      \
01084        ch &= 0x07;                                                   \
01085       }                                                                     \
01086     else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1))                     \
01087       {                                                                     \
01088        /* We expect five bytes.  */                                         \
01089        cnt = 5;                                                      \
01090        ch &= 0x03;                                                   \
01091       }                                                                     \
01092     else                                                             \
01093       {                                                                     \
01094        /* We expect six bytes.  */                                   \
01095        cnt = 6;                                                      \
01096        ch &= 0x01;                                                   \
01097       }                                                                     \
01098                                                                      \
01099     /* The first byte is already consumed.  */                              \
01100     r = cnt - 1;                                                     \
01101     while (++(*inptrp) < inend)                                             \
01102       {                                                                     \
01103        ch <<= 6;                                                     \
01104        ch |= **inptrp & 0x3f;                                               \
01105        --r;                                                          \
01106       }                                                                     \
01107                                                                      \
01108     /* Shift for the so far missing bytes.  */                              \
01109     ch <<= r * 6;                                                    \
01110                                                                      \
01111     /* Store the number of bytes expected for the entire sequence.  */             \
01112     state->__count |= cnt << 8;                                             \
01113                                                                      \
01114     /* Store the value.  */                                          \
01115     state->__value.__wch = ch;                                              \
01116   }
01117 
01118 #define UNPACK_BYTES \
01119   {                                                                  \
01120     static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc };  \
01121     wint_t wch = state->__value.__wch;                                      \
01122     size_t ntotal = state->__count >> 8;                             \
01123                                                                      \
01124     inlen = state->__count & 255;                                    \
01125                                                                      \
01126     bytebuf[0] = inmask[ntotal - 2];                                        \
01127                                                                      \
01128     do                                                               \
01129       {                                                                     \
01130        if (--ntotal < inlen)                                                \
01131          bytebuf[ntotal] = 0x80 | (wch & 0x3f);                      \
01132        wch >>= 6;                                                    \
01133       }                                                                     \
01134     while (ntotal > 1);                                                     \
01135                                                                      \
01136     bytebuf[0] |= wch;                                                      \
01137   }
01138 
01139 #define CLEAR_STATE \
01140   state->__count = 0
01141 
01142 
01143 #include <iconv/loop.c>
01144 #include <iconv/skeleton.c>
01145 
01146 
01147 /* Convert from UCS2 to the internal (UCS4-like) format.  */
01148 #define DEFINE_INIT         0
01149 #define DEFINE_FINI         0
01150 #define MIN_NEEDED_FROM            2
01151 #define MIN_NEEDED_TO              4
01152 #define FROM_DIRECTION             1
01153 #define FROM_LOOP           ucs2_internal_loop
01154 #define TO_LOOP                    ucs2_internal_loop /* This is not used.  */
01155 #define FUNCTION_NAME              __gconv_transform_ucs2_internal
01156 #define ONE_DIRECTION              1
01157 
01158 #define MIN_NEEDED_INPUT    MIN_NEEDED_FROM
01159 #define MIN_NEEDED_OUTPUT   MIN_NEEDED_TO
01160 #define LOOPFCT                    FROM_LOOP
01161 #define BODY \
01162   {                                                                  \
01163     uint16_t u1 = get16 (inptr);                                     \
01164                                                                      \
01165     if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0))                  \
01166       {                                                                     \
01167        /* Surrogate characters in UCS-2 input are not valid.  Reject        \
01168           them.  (Catching this here is not security relevant.)  */         \
01169        STANDARD_FROM_LOOP_ERR_HANDLER (2);                                  \
01170       }                                                                     \
01171                                                                      \
01172     *((uint32_t *) outptr) = u1;                                     \
01173     outptr += sizeof (uint32_t);                                     \
01174     inptr += 2;                                                             \
01175   }
01176 #define LOOP_NEED_FLAGS
01177 #include <iconv/loop.c>
01178 #include <iconv/skeleton.c>
01179 
01180 
01181 /* Convert from the internal (UCS4-like) format to UCS2.  */
01182 #define DEFINE_INIT         0
01183 #define DEFINE_FINI         0
01184 #define MIN_NEEDED_FROM            4
01185 #define MIN_NEEDED_TO              2
01186 #define FROM_DIRECTION             1
01187 #define FROM_LOOP           internal_ucs2_loop
01188 #define TO_LOOP                    internal_ucs2_loop /* This is not used.  */
01189 #define FUNCTION_NAME              __gconv_transform_internal_ucs2
01190 #define ONE_DIRECTION              1
01191 
01192 #define MIN_NEEDED_INPUT    MIN_NEEDED_FROM
01193 #define MIN_NEEDED_OUTPUT   MIN_NEEDED_TO
01194 #define LOOPFCT                    FROM_LOOP
01195 #define BODY \
01196   {                                                                  \
01197     uint32_t val = *((const uint32_t *) inptr);                             \
01198                                                                      \
01199     if (__builtin_expect (val >= 0x10000, 0))                               \
01200       {                                                                     \
01201        UNICODE_TAG_HANDLER (val, 4);                                        \
01202        STANDARD_TO_LOOP_ERR_HANDLER (4);                             \
01203       }                                                                     \
01204     else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0))           \
01205       {                                                                     \
01206        /* Surrogate characters in UCS-4 input are not valid.                \
01207           We must catch this, because the UCS-2 output might be             \
01208           interpreted as UTF-16 by other programs.  If we let               \
01209           surrogates pass through, attackers could make a security          \
01210           hole exploit by synthesizing any desired plane 1-16               \
01211           character.  */                                             \
01212        result = __GCONV_ILLEGAL_INPUT;                                      \
01213        if (! ignore_errors_p ())                                     \
01214          break;                                                      \
01215        inptr += 4;                                                   \
01216        ++*irreversible;                                              \
01217        continue;                                                     \
01218       }                                                                     \
01219     else                                                             \
01220       {                                                                     \
01221        put16 (outptr, val);                                          \
01222         outptr += sizeof (uint16_t);                                        \
01223        inptr += 4;                                                   \
01224       }                                                                     \
01225   }
01226 #define LOOP_NEED_FLAGS
01227 #include <iconv/loop.c>
01228 #include <iconv/skeleton.c>
01229 
01230 
01231 /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
01232 #define DEFINE_INIT         0
01233 #define DEFINE_FINI         0
01234 #define MIN_NEEDED_FROM            2
01235 #define MIN_NEEDED_TO              4
01236 #define FROM_DIRECTION             1
01237 #define FROM_LOOP           ucs2reverse_internal_loop
01238 #define TO_LOOP                    ucs2reverse_internal_loop/* This is not used.*/
01239 #define FUNCTION_NAME              __gconv_transform_ucs2reverse_internal
01240 #define ONE_DIRECTION              1
01241 
01242 #define MIN_NEEDED_INPUT    MIN_NEEDED_FROM
01243 #define MIN_NEEDED_OUTPUT   MIN_NEEDED_TO
01244 #define LOOPFCT                    FROM_LOOP
01245 #define BODY \
01246   {                                                                  \
01247     uint16_t u1 = bswap_16 (get16 (inptr));                                 \
01248                                                                      \
01249     if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0))                  \
01250       {                                                                     \
01251        /* Surrogate characters in UCS-2 input are not valid.  Reject        \
01252           them.  (Catching this here is not security relevant.)  */         \
01253        if (! ignore_errors_p ())                                     \
01254          {                                                           \
01255            result = __GCONV_ILLEGAL_INPUT;                                  \
01256            break;                                                    \
01257          }                                                           \
01258        inptr += 2;                                                   \
01259        ++*irreversible;                                              \
01260        continue;                                                     \
01261       }                                                                     \
01262                                                                      \
01263     *((uint32_t *) outptr) = u1;                                     \
01264     outptr += sizeof (uint32_t);                                     \
01265     inptr += 2;                                                             \
01266   }
01267 #define LOOP_NEED_FLAGS
01268 #include <iconv/loop.c>
01269 #include <iconv/skeleton.c>
01270 
01271 
01272 /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
01273 #define DEFINE_INIT         0
01274 #define DEFINE_FINI         0
01275 #define MIN_NEEDED_FROM            4
01276 #define MIN_NEEDED_TO              2
01277 #define FROM_DIRECTION             1
01278 #define FROM_LOOP           internal_ucs2reverse_loop
01279 #define TO_LOOP                    internal_ucs2reverse_loop/* This is not used.*/
01280 #define FUNCTION_NAME              __gconv_transform_internal_ucs2reverse
01281 #define ONE_DIRECTION              1
01282 
01283 #define MIN_NEEDED_INPUT    MIN_NEEDED_FROM
01284 #define MIN_NEEDED_OUTPUT   MIN_NEEDED_TO
01285 #define LOOPFCT                    FROM_LOOP
01286 #define BODY \
01287   {                                                                  \
01288     uint32_t val = *((const uint32_t *) inptr);                             \
01289     if (__builtin_expect (val >= 0x10000, 0))                               \
01290       {                                                                     \
01291        UNICODE_TAG_HANDLER (val, 4);                                        \
01292        STANDARD_TO_LOOP_ERR_HANDLER (4);                             \
01293       }                                                                     \
01294     else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0))           \
01295       {                                                                     \
01296        /* Surrogate characters in UCS-4 input are not valid.                \
01297           We must catch this, because the UCS-2 output might be             \
01298           interpreted as UTF-16 by other programs.  If we let               \
01299           surrogates pass through, attackers could make a security          \
01300           hole exploit by synthesizing any desired plane 1-16               \
01301           character.  */                                             \
01302        if (! ignore_errors_p ())                                     \
01303          {                                                           \
01304            result = __GCONV_ILLEGAL_INPUT;                                  \
01305            break;                                                    \
01306          }                                                           \
01307        inptr += 4;                                                   \
01308        ++*irreversible;                                              \
01309        continue;                                                     \
01310       }                                                                     \
01311     else                                                             \
01312       {                                                                     \
01313        put16 (outptr, bswap_16 (val));                                      \
01314        outptr += sizeof (uint16_t);                                         \
01315        inptr += 4;                                                   \
01316       }                                                                     \
01317   }
01318 #define LOOP_NEED_FLAGS
01319 #include <iconv/loop.c>
01320 #include <iconv/skeleton.c>