Back to index

glibc  2.9
iso-2022-jp.c
Go to the documentation of this file.
00001 /* Conversion module for ISO-2022-JP and ISO-2022-JP-2.
00002    Copyright (C) 1998, 1999, 2000-2002 Free Software Foundation, Inc.
00003    This file is part of the GNU C Library.
00004    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
00005 
00006    The GNU C Library is free software; you can redistribute it and/or
00007    modify it under the terms of the GNU Lesser General Public
00008    License as published by the Free Software Foundation; either
00009    version 2.1 of the License, or (at your option) any later version.
00010 
00011    The GNU C Library is distributed in the hope that it will be useful,
00012    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014    Lesser General Public License for more details.
00015 
00016    You should have received a copy of the GNU Lesser General Public
00017    License along with the GNU C Library; if not, write to the Free
00018    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
00019    02111-1307 USA.  */
00020 
00021 #include <assert.h>
00022 #include <dlfcn.h>
00023 #include <gconv.h>
00024 #include <stdint.h>
00025 #include <stdlib.h>
00026 #include <string.h>
00027 #include "jis0201.h"
00028 #include "jis0208.h"
00029 #include "jis0212.h"
00030 #include "gb2312.h"
00031 #include "ksc5601.h"
00032 
00033 struct gap
00034 {
00035   uint16_t start;
00036   uint16_t end;
00037   int32_t idx;
00038 };
00039 
00040 #include "iso8859-7jp.h"
00041 
00042 /* This makes obvious what everybody knows: 0x1b is the Esc character.  */
00043 #define ESC 0x1b
00044 
00045 /* We provide our own initialization and destructor function.  */
00046 #define DEFINE_INIT  0
00047 #define DEFINE_FINI  0
00048 
00049 /* Definitions used in the body of the `gconv' function.  */
00050 #define FROM_LOOP           from_iso2022jp_loop
00051 #define TO_LOOP                    to_iso2022jp_loop
00052 #define FROM_LOOP_MIN_NEEDED_FROM  1
00053 #define FROM_LOOP_MAX_NEEDED_FROM  4
00054 #define FROM_LOOP_MIN_NEEDED_TO           4
00055 #define FROM_LOOP_MAX_NEEDED_TO           4
00056 #define TO_LOOP_MIN_NEEDED_FROM           4
00057 #define TO_LOOP_MAX_NEEDED_FROM           4
00058 #define TO_LOOP_MIN_NEEDED_TO             1
00059 #define TO_LOOP_MAX_NEEDED_TO             6
00060 #define FROM_DIRECTION             (dir == from_iso2022jp)
00061 #define PREPARE_LOOP \
00062   enum direction dir = ((struct iso2022jp_data *) step->__data)->dir;       \
00063   enum variant var = ((struct iso2022jp_data *) step->__data)->var;         \
00064   int save_set;                                                             \
00065   int *setp = &data->__statep->__count;
00066 #define EXTRA_LOOP_ARGS            , var, setp
00067 
00068 
00069 /* Direction of the transformation.  */
00070 enum direction
00071 {
00072   illegal_dir,
00073   to_iso2022jp,
00074   from_iso2022jp
00075 };
00076 
00077 /* We handle ISO-2022-jp and ISO-2022-JP-2 here.  */
00078 enum variant
00079 {
00080   illegal_var,
00081   iso2022jp,
00082   iso2022jp2
00083 };
00084 
00085 
00086 struct iso2022jp_data
00087 {
00088   enum direction dir;
00089   enum variant var;
00090 };
00091 
00092 
00093 /* The COUNT element of the state keeps track of the currently selected
00094    character set.  The possible values are:  */
00095 enum
00096 {
00097   ASCII_set = 0,
00098   JISX0208_1978_set = 1 << 3,
00099   JISX0208_1983_set = 2 << 3,
00100   JISX0201_Roman_set = 3 << 3,
00101   JISX0201_Kana_set = 4 << 3,
00102   GB2312_set = 5 << 3,
00103   KSC5601_set = 6 << 3,
00104   JISX0212_set = 7 << 3,
00105   CURRENT_SEL_MASK = 7 << 3
00106 };
00107 
00108 /* The second value stored is the designation of the G2 set.  The following
00109    values are possible:  */
00110 enum
00111 {
00112   UNSPECIFIED_set = 0,
00113   ISO88591_set = 1 << 6,
00114   ISO88597_set = 2 << 6,
00115   CURRENT_ASSIGN_MASK = 3 << 6
00116 };
00117 
00118 /* The third value, only used during conversion from Unicode to ISO-2022-JP-2,
00119    describes the language tag parsing status.  The possible values are as
00120    follows.  Values >= TAG_language are temporary tag parsing states.  */
00121 enum
00122 {
00123   TAG_none = 0,
00124   TAG_language = 4 << 8,
00125   TAG_language_j = 5 << 8,
00126   TAG_language_ja = 1 << 8,
00127   TAG_language_k = 6 << 8,
00128   TAG_language_ko = 2 << 8,
00129   TAG_language_z = 7 << 8,
00130   TAG_language_zh = 3 << 8,
00131   CURRENT_TAG_MASK = 7 << 8
00132 };
00133 
00134 
00135 extern int gconv_init (struct __gconv_step *step);
00136 int
00137 gconv_init (struct __gconv_step *step)
00138 {
00139   /* Determine which direction.  */
00140   struct iso2022jp_data *new_data;
00141   enum direction dir = illegal_dir;
00142   enum variant var = illegal_var;
00143   int result;
00144 
00145   if (__strcasecmp (step->__from_name, "ISO-2022-JP//") == 0)
00146     {
00147       dir = from_iso2022jp;
00148       var = iso2022jp;
00149     }
00150   else if (__strcasecmp (step->__to_name, "ISO-2022-JP//") == 0)
00151     {
00152       dir = to_iso2022jp;
00153       var = iso2022jp;
00154     }
00155   else if (__strcasecmp (step->__from_name, "ISO-2022-JP-2//") == 0)
00156     {
00157       dir = from_iso2022jp;
00158       var = iso2022jp2;
00159     }
00160   else if (__strcasecmp (step->__to_name, "ISO-2022-JP-2//") == 0)
00161     {
00162       dir = to_iso2022jp;
00163       var = iso2022jp2;
00164     }
00165 
00166   result = __GCONV_NOCONV;
00167   if (__builtin_expect (dir, from_iso2022jp) != illegal_dir)
00168     {
00169       new_data
00170        = (struct iso2022jp_data *) malloc (sizeof (struct iso2022jp_data));
00171 
00172       result = __GCONV_NOMEM;
00173       if (new_data != NULL)
00174        {
00175          new_data->dir = dir;
00176          new_data->var = var;
00177          step->__data = new_data;
00178 
00179          if (dir == from_iso2022jp)
00180            {
00181              step->__min_needed_from = FROM_LOOP_MIN_NEEDED_FROM;
00182              step->__max_needed_from = FROM_LOOP_MAX_NEEDED_FROM;
00183              step->__min_needed_to = FROM_LOOP_MIN_NEEDED_TO;
00184              step->__max_needed_to = FROM_LOOP_MAX_NEEDED_TO;
00185            }
00186          else
00187            {
00188              step->__min_needed_from = TO_LOOP_MIN_NEEDED_FROM;
00189              step->__max_needed_from = TO_LOOP_MAX_NEEDED_FROM;
00190              step->__min_needed_to = TO_LOOP_MIN_NEEDED_TO;
00191              step->__max_needed_to = TO_LOOP_MAX_NEEDED_TO;
00192            }
00193 
00194          /* Yes, this is a stateful encoding.  */
00195          step->__stateful = 1;
00196 
00197          result = __GCONV_OK;
00198        }
00199     }
00200 
00201   return result;
00202 }
00203 
00204 
00205 extern void gconv_end (struct __gconv_step *data);
00206 void
00207 gconv_end (struct __gconv_step *data)
00208 {
00209   free (data->__data);
00210 }
00211 
00212 
00213 /* Since this is a stateful encoding we have to provide code which resets
00214    the output state to the initial state.  This has to be done during the
00215    flushing.  */
00216 #define EMIT_SHIFT_TO_INIT \
00217   /* Avoid warning about unused variable 'var'.  */                         \
00218   (void) var;                                                        \
00219                                                                      \
00220   if ((data->__statep->__count & ~7) != ASCII_set)                          \
00221     {                                                                \
00222       if (dir == from_iso2022jp                                             \
00223          || (data->__statep->__count & CURRENT_SEL_MASK) == ASCII_set)             \
00224        {                                                             \
00225          /* It's easy, we don't have to emit anything, we just reset the     \
00226             state for the input.  Note that this also clears the G2         \
00227             designation.  */                                                \
00228          data->__statep->__count &= 7;                                      \
00229          data->__statep->__count |= ASCII_set;                              \
00230        }                                                             \
00231       else                                                           \
00232        {                                                             \
00233          /* We are not in the initial state.  To switch back we have        \
00234             to emit the sequence `Esc ( B'.  */                      \
00235          if (__builtin_expect (outbuf + 3 > outend, 0))              \
00236            /* We don't have enough room in the output buffer.  */           \
00237            status = __GCONV_FULL_OUTPUT;                             \
00238          else                                                        \
00239            {                                                         \
00240              /* Write out the shift sequence.  */                           \
00241              *outbuf++ = ESC;                                               \
00242              *outbuf++ = '(';                                               \
00243              *outbuf++ = 'B';                                               \
00244              /* Note that this also clears the G2 designation.  */          \
00245              data->__statep->__count &= 7;                                  \
00246              data->__statep->__count |= ASCII_set;                          \
00247            }                                                         \
00248        }                                                             \
00249     }
00250 
00251 
00252 /* Since we might have to reset input pointer we must be able to save
00253    and retore the state.  */
00254 #define SAVE_RESET_STATE(Save) \
00255   if (Save)                                                          \
00256     save_set = *setp;                                                       \
00257   else                                                               \
00258     *setp = save_set
00259 
00260 
00261 /* First define the conversion function from ISO-2022-JP to UCS4.  */
00262 #define MIN_NEEDED_INPUT    FROM_LOOP_MIN_NEEDED_FROM
00263 #define MAX_NEEDED_INPUT    FROM_LOOP_MAX_NEEDED_FROM
00264 #define MIN_NEEDED_OUTPUT   FROM_LOOP_MIN_NEEDED_TO
00265 #define MAX_NEEDED_OUTPUT   FROM_LOOP_MAX_NEEDED_TO
00266 #define LOOPFCT                    FROM_LOOP
00267 #define BODY \
00268   {                                                                  \
00269     uint32_t ch = *inptr;                                            \
00270                                                                      \
00271     /* Recognize escape sequences.  */                                      \
00272     if (__builtin_expect (ch, 0) == ESC)                             \
00273       {                                                                     \
00274        /* We now must be prepared to read two to three more                 \
00275           chracters.  If we have a match in the first character but         \
00276           then the input buffer ends we terminate with an error since       \
00277           we must not risk missing an escape sequence just because it       \
00278           is not entirely in the current input buffer.  */                  \
00279        if (__builtin_expect (inptr + 2 >= inend, 0)                         \
00280            || (var == iso2022jp2 && inptr[1] == '$' && inptr[2] == '('             \
00281               && __builtin_expect (inptr + 3 >= inend, 0)))                 \
00282          {                                                           \
00283            /* Not enough input available.  */                               \
00284            result = __GCONV_INCOMPLETE_INPUT;                               \
00285            break;                                                    \
00286          }                                                           \
00287                                                                      \
00288        if (inptr[1] == '(')                                          \
00289          {                                                           \
00290            if (inptr[2] == 'B')                                      \
00291              {                                                              \
00292               /* ASCII selected.  */                                        \
00293               set = ASCII_set;                                       \
00294               inptr += 3;                                            \
00295               continue;                                              \
00296              }                                                              \
00297            else if (inptr[2] == 'J')                                        \
00298              {                                                              \
00299               /* JIS X 0201 selected.  */                            \
00300               set = JISX0201_Roman_set;                              \
00301               inptr += 3;                                            \
00302               continue;                                              \
00303              }                                                              \
00304            else if (var == iso2022jp2 && inptr[2] == 'I')                   \
00305              {                                                              \
00306               /* JIS X 0201 selected.  */                            \
00307               set = JISX0201_Kana_set;                               \
00308               inptr += 3;                                            \
00309               continue;                                              \
00310              }                                                              \
00311          }                                                           \
00312        else if (inptr[1] == '$')                                     \
00313          {                                                           \
00314            if (inptr[2] == '@')                                      \
00315              {                                                              \
00316               /* JIS X 0208-1978 selected.  */                       \
00317               set = JISX0208_1978_set;                               \
00318               inptr += 3;                                            \
00319               continue;                                              \
00320              }                                                              \
00321            else if (inptr[2] == 'B')                                        \
00322              {                                                              \
00323               /* JIS X 0208-1983 selected.  */                       \
00324               set = JISX0208_1983_set;                               \
00325               inptr += 3;                                            \
00326               continue;                                              \
00327              }                                                              \
00328            else if (var == iso2022jp2)                                      \
00329              {                                                              \
00330               if (inptr[2] == 'A')                                   \
00331                 {                                                    \
00332                   /* GB 2312-1980 selected.  */                      \
00333                   set = GB2312_set;                                         \
00334                   inptr += 3;                                               \
00335                   continue;                                          \
00336                 }                                                    \
00337               else if (inptr[2] == '(')                              \
00338                 {                                                    \
00339                   if (inptr[3] == 'C')                               \
00340                     {                                                       \
00341                      /* KSC 5601-1987 selected.  */                         \
00342                      set = KSC5601_set;                              \
00343                      inptr += 4;                                     \
00344                      continue;                                       \
00345                     }                                                       \
00346                   else if (inptr[3] == 'D')                                 \
00347                     {                                                       \
00348                      /* JIS X 0212-1990 selected.  */                \
00349                      set = JISX0212_set;                             \
00350                      inptr += 4;                                     \
00351                      continue;                                       \
00352                     }                                                       \
00353                 }                                                    \
00354              }                                                              \
00355          }                                                           \
00356        else if (var == iso2022jp2 && inptr[1] == '.')                       \
00357          {                                                           \
00358            if (inptr[2] == 'A')                                      \
00359              {                                                              \
00360               /* ISO 8859-1-GR selected.  */                                \
00361               set2 = ISO88591_set;                                   \
00362               inptr += 3;                                            \
00363               continue;                                              \
00364              }                                                              \
00365            else if (inptr[2] == 'F')                                        \
00366              {                                                              \
00367               /* ISO 8859-7-GR selected.  */                                \
00368               set2 = ISO88597_set;                                   \
00369               inptr += 3;                                            \
00370               continue;                                              \
00371              }                                                              \
00372          }                                                           \
00373       }                                                                     \
00374                                                                      \
00375     if (ch == ESC && var == iso2022jp2 && inptr[1] == 'N')                  \
00376       {                                                                     \
00377        if (set2 == ISO88591_set)                                     \
00378          {                                                           \
00379            ch = inptr[2] | 0x80;                                     \
00380            inptr += 3;                                                      \
00381          }                                                           \
00382        else if (__builtin_expect (set2, ISO88597_set) == ISO88597_set)             \
00383          {                                                           \
00384            /* We use the table from the ISO 8859-7 module.  */              \
00385            if (inptr[2] < 0x20 || inptr[2] >= 0x80)                         \
00386              STANDARD_FROM_LOOP_ERR_HANDLER (1);                     \
00387            ch = iso88597_to_ucs4[inptr[2] - 0x20];                          \
00388            if (ch == 0)                                              \
00389              STANDARD_FROM_LOOP_ERR_HANDLER (3);                     \
00390            inptr += 3;                                                      \
00391          }                                                           \
00392        else                                                          \
00393          {                                                           \
00394            STANDARD_FROM_LOOP_ERR_HANDLER (1);                              \
00395          }                                                           \
00396       }                                                                     \
00397     else if (ch >= 0x80)                                             \
00398       {                                                                     \
00399        STANDARD_FROM_LOOP_ERR_HANDLER (1);                                  \
00400       }                                                                     \
00401     else if (set == ASCII_set || (ch < 0x21 || ch == 0x7f))                 \
00402       /* Almost done, just advance the input pointer.  */                   \
00403       ++inptr;                                                              \
00404     else if (set == JISX0201_Roman_set)                                     \
00405       {                                                                     \
00406        /* Use the JIS X 0201 table.  */                              \
00407        ch = jisx0201_to_ucs4 (ch);                                   \
00408        if (__builtin_expect (ch == __UNKNOWN_10646_CHAR, 0))                \
00409          STANDARD_FROM_LOOP_ERR_HANDLER (1);                                \
00410        ++inptr;                                                      \
00411       }                                                                     \
00412     else if (set == JISX0201_Kana_set)                                      \
00413       {                                                                     \
00414        /* Use the JIS X 0201 table.  */                              \
00415        ch = jisx0201_to_ucs4 (ch + 0x80);                            \
00416        if (__builtin_expect (ch == __UNKNOWN_10646_CHAR, 0))                \
00417          STANDARD_FROM_LOOP_ERR_HANDLER (1);                                \
00418        ++inptr;                                                      \
00419       }                                                                     \
00420     else                                                             \
00421       {                                                                     \
00422        if (set == JISX0208_1978_set || set == JISX0208_1983_set)            \
00423          /* XXX I don't have the tables for these two old variants of       \
00424             JIS X 0208.  Therefore I'm using the tables for JIS X           \
00425             0208-1990.  If somebody has problems with this please           \
00426             provide the appropriate tables.  */                      \
00427          ch = jisx0208_to_ucs4 (&inptr, inend - inptr, 0);                  \
00428        else if (set == JISX0212_set)                                        \
00429          /* Use the JIS X 0212 table.  */                            \
00430          ch = jisx0212_to_ucs4 (&inptr, inend - inptr, 0);                  \
00431        else if (set == GB2312_set)                                   \
00432          /* Use the GB 2312 table.  */                                      \
00433          ch = gb2312_to_ucs4 (&inptr, inend - inptr, 0);                    \
00434        else                                                          \
00435          {                                                           \
00436            assert (set == KSC5601_set);                              \
00437                                                                      \
00438            /* Use the KSC 5601 table.  */                            \
00439            ch = ksc5601_to_ucs4 (&inptr, inend - inptr, 0);                 \
00440          }                                                           \
00441                                                                      \
00442        if (__builtin_expect (ch == 0, 0))                            \
00443          {                                                           \
00444            result = __GCONV_INCOMPLETE_INPUT;                               \
00445            break;                                                    \
00446          }                                                           \
00447        else if (__builtin_expect (ch == __UNKNOWN_10646_CHAR, 0))           \
00448          {                                                           \
00449            STANDARD_FROM_LOOP_ERR_HANDLER (1);                              \
00450          }                                                           \
00451       }                                                                     \
00452                                                                      \
00453     put32 (outptr, ch);                                                     \
00454     outptr += 4;                                                     \
00455   }
00456 #define LOOP_NEED_FLAGS
00457 #define EXTRA_LOOP_DECLS    , enum variant var, int *setp
00458 #define INIT_PARAMS         int set = *setp & CURRENT_SEL_MASK;             \
00459                             int set2 = *setp & CURRENT_ASSIGN_MASK
00460 #define UPDATE_PARAMS              *setp = set | set2
00461 #include <iconv/loop.c>
00462 
00463 
00464 /* Next, define the other direction.  */
00465 
00466 enum conversion { none = 0, european, japanese, chinese, korean, other };
00467 
00468 /* A datatype for conversion lists.  */
00469 typedef unsigned int cvlist_t;
00470 #define CVLIST(cv1, cv2, cv3, cv4, cv5) \
00471   ((cv1) + ((cv2) << 3) + ((cv3) << 6) + ((cv4) << 9) + ((cv5) << 12))
00472 #define CVLIST_FIRST(cvl) ((cvl) & ((1 << 3) - 1))
00473 #define CVLIST_REST(cvl) ((cvl) >> 3)
00474 static const cvlist_t conversion_lists[4] =
00475   {
00476     /* TAG_none */        CVLIST (japanese, european, chinese, korean, other),
00477     /* TAG_language_ja */ CVLIST (japanese, european, chinese, korean, other),
00478     /* TAG_language_ko */ CVLIST (korean, european, japanese, chinese, other),
00479     /* TAG_language_zh */ CVLIST (chinese, european, japanese, korean, other)
00480   };
00481 
00482 #define MIN_NEEDED_INPUT    TO_LOOP_MIN_NEEDED_FROM
00483 #define MAX_NEEDED_INPUT    TO_LOOP_MAX_NEEDED_FROM
00484 #define MIN_NEEDED_OUTPUT   TO_LOOP_MIN_NEEDED_TO
00485 #define MAX_NEEDED_OUTPUT   TO_LOOP_MAX_NEEDED_TO
00486 #define LOOPFCT                    TO_LOOP
00487 #define BODY \
00488   {                                                                  \
00489     uint32_t ch;                                                     \
00490     size_t written;                                                  \
00491                                                                      \
00492     ch = get32 (inptr);                                                     \
00493                                                                      \
00494     if (var == iso2022jp2)                                           \
00495       {                                                                     \
00496        /* Handle Unicode tag characters (range U+E0000..U+E007F).  */       \
00497        if (__builtin_expect ((ch >> 7) == (0xe0000 >> 7), 0))               \
00498          {                                                           \
00499            ch &= 0x7f;                                                      \
00500            if (ch >= 'A' && ch <= 'Z')                                      \
00501              ch += 'a' - 'A';                                               \
00502            if (ch == 0x01)                                           \
00503              tag = TAG_language;                                     \
00504            else if (ch == 'j' && tag == TAG_language)                       \
00505              tag = TAG_language_j;                                   \
00506            else if (ch == 'a' && tag == TAG_language_j)              \
00507              tag = TAG_language_ja;                                         \
00508            else if (ch == 'k' && tag == TAG_language)                       \
00509              tag = TAG_language_k;                                   \
00510            else if (ch == 'o' && tag == TAG_language_k)              \
00511              tag = TAG_language_ko;                                         \
00512            else if (ch == 'z' && tag == TAG_language)                       \
00513              tag = TAG_language_z;                                   \
00514            else if (ch == 'h' && tag == TAG_language_z)              \
00515              tag = TAG_language_zh;                                         \
00516            else if (ch == 0x7f)                                      \
00517              tag = TAG_none;                                                \
00518            else                                                      \
00519              {                                                              \
00520               /* Other tag characters reset the tag parsing state (if the   \
00521                  current state is a temporary state) or are ignored (if     \
00522                  the current state is a stable one).  */                    \
00523               if (tag >= TAG_language)                               \
00524                 tag = TAG_none;                                      \
00525              }                                                              \
00526                                                                      \
00527            inptr += 4;                                                      \
00528            continue;                                                 \
00529          }                                                           \
00530                                                                      \
00531        /* Non-tag characters reset the tag parsing state, if the current     \
00532           state is a temporary state.  */                            \
00533        if (__builtin_expect (tag >= TAG_language, 0))                       \
00534          tag = TAG_none;                                             \
00535       }                                                                     \
00536                                                                      \
00537     /* First see whether we can write the character using the currently            \
00538        selected character set.  But ignore the selected character set if      \
00539        the current language tag shows different preferences.  */            \
00540     if (set == ASCII_set)                                            \
00541       {                                                                     \
00542        /* Please note that the NUL byte is *not* matched if we are not             \
00543           currently using the ASCII charset.  This is because we must       \
00544           switch to the initial state whenever a NUL byte is written.  */    \
00545        if (ch <= 0x7f)                                                      \
00546          {                                                           \
00547            *outptr++ = ch;                                           \
00548            written = 1;                                              \
00549                                                                      \
00550            /* At the beginning of a line, G2 designation is cleared.  */     \
00551            if (var == iso2022jp2 && ch == 0x0a)                      \
00552              set2 = UNSPECIFIED_set;                                        \
00553          }                                                           \
00554        else                                                          \
00555          written = __UNKNOWN_10646_CHAR;                             \
00556       }                                                                     \
00557     /* ISO-2022-JP recommends to encode the newline character always in            \
00558        ASCII since this allows a context-free interpretation of the         \
00559        characters at the beginning of the next line.  Otherwise it would      \
00560        have to be known whether the last line ended using ASCII or          \
00561        JIS X 0201.  */                                                      \
00562     else if (set == JISX0201_Roman_set                                      \
00563             && (__builtin_expect (tag == TAG_none, 1)                       \
00564                || tag == TAG_language_ja))                                  \
00565       {                                                                     \
00566        unsigned char buf[1];                                                \
00567        written = ucs4_to_jisx0201 (ch, buf);                                \
00568        if (written != __UNKNOWN_10646_CHAR)                                 \
00569          {                                                           \
00570            if (buf[0] > 0x20 && buf[0] < 0x80)                              \
00571              {                                                              \
00572               *outptr++ = buf[0];                                    \
00573               written = 1;                                           \
00574              }                                                              \
00575            else                                                      \
00576              written = __UNKNOWN_10646_CHAR;                                \
00577          }                                                           \
00578       }                                                                     \
00579     else if (set == JISX0201_Kana_set                                       \
00580             && (__builtin_expect (tag == TAG_none, 1)                       \
00581                || tag == TAG_language_ja))                                  \
00582       {                                                                     \
00583        unsigned char buf[1];                                                \
00584        written = ucs4_to_jisx0201 (ch, buf);                                \
00585        if (written != __UNKNOWN_10646_CHAR)                                 \
00586          {                                                           \
00587            if (buf[0] > 0xa0 && buf[0] < 0xe0)                              \
00588              {                                                              \
00589               *outptr++ = buf[0] - 0x80;                             \
00590               written = 1;                                           \
00591              }                                                              \
00592            else                                                      \
00593              written = __UNKNOWN_10646_CHAR;                                \
00594          }                                                           \
00595       }                                                                     \
00596     else                                                             \
00597       {                                                                     \
00598        if ((set == JISX0208_1978_set || set == JISX0208_1983_set)           \
00599            && (__builtin_expect (tag == TAG_none, 1)                        \
00600               || tag == TAG_language_ja))                            \
00601          written = ucs4_to_jisx0208 (ch, outptr, outend - outptr);          \
00602        else if (set == JISX0212_set                                         \
00603                && (__builtin_expect (tag == TAG_none, 1)                    \
00604                    || tag == TAG_language_ja))                       \
00605          written = ucs4_to_jisx0212 (ch, outptr, outend - outptr);          \
00606        else if (set == GB2312_set                                    \
00607                && (__builtin_expect (tag == TAG_none, 1)                    \
00608                    || tag == TAG_language_zh))                       \
00609          written = ucs4_to_gb2312 (ch, outptr, outend - outptr);            \
00610        else if (set == KSC5601_set                                   \
00611                && (__builtin_expect (tag == TAG_none, 1)                    \
00612                    || tag == TAG_language_ko))                       \
00613          written = ucs4_to_ksc5601 (ch, outptr, outend - outptr);           \
00614        else                                                          \
00615          written = __UNKNOWN_10646_CHAR;                             \
00616                                                                      \
00617        if (__builtin_expect (written == 0, 0))                              \
00618          {                                                           \
00619            result = __GCONV_FULL_OUTPUT;                             \
00620            break;                                                    \
00621          }                                                           \
00622        else if (written != __UNKNOWN_10646_CHAR)                     \
00623          outptr += written;                                          \
00624       }                                                                     \
00625                                                                      \
00626     if (written == __UNKNOWN_10646_CHAR                                     \
00627        && __builtin_expect (tag == TAG_none, 1))                     \
00628       {                                                                     \
00629        if (set2 == ISO88591_set)                                     \
00630          {                                                           \
00631            if (ch >= 0x80 && ch <= 0xff)                             \
00632              {                                                              \
00633               if (__builtin_expect (outptr + 3 > outend, 0))                \
00634                 {                                                    \
00635                   result = __GCONV_FULL_OUTPUT;                      \
00636                   break;                                             \
00637                 }                                                    \
00638                                                                      \
00639               *outptr++ = ESC;                                       \
00640               *outptr++ = 'N';                                       \
00641               *outptr++ = ch & 0x7f;                                        \
00642               written = 3;                                           \
00643              }                                                              \
00644          }                                                           \
00645        else if (set2 == ISO88597_set)                                       \
00646          {                                                           \
00647            if (__builtin_expect (ch < 0xffff, 1))                           \
00648              {                                                              \
00649               const struct gap *rp = from_idx;                       \
00650                                                                      \
00651               while (ch > rp->end)                                   \
00652                 ++rp;                                                       \
00653               if (ch >= rp->start)                                   \
00654                 {                                                    \
00655                   unsigned char res =                                       \
00656                     iso88597_from_ucs4[ch - 0xa0 + rp->idx];                \
00657                   if (res != '\0')                                   \
00658                     {                                                       \
00659                      if (__builtin_expect (outptr + 3 > outend, 0))         \
00660                        {                                             \
00661                          result = __GCONV_FULL_OUTPUT;               \
00662                          break;                                      \
00663                        }                                             \
00664                                                                      \
00665                      *outptr++ = ESC;                                \
00666                      *outptr++ = 'N';                                \
00667                      *outptr++ = res;                                \
00668                      written = 3;                                    \
00669                     }                                                       \
00670                 }                                                    \
00671              }                                                              \
00672          }                                                           \
00673       }                                                                     \
00674                                                                      \
00675     if (written == __UNKNOWN_10646_CHAR)                             \
00676       {                                                                     \
00677        /* The attempts to use the currently selected character set          \
00678           failed, either because the language tag changed, or because       \
00679           the character requires a different character set, or because             \
00680           the character is unknown.                                         \
00681           The CJK character sets partially overlap when seen as subsets      \
00682           of ISO 10646; therefore there is no single correct result.        \
00683           We use a preferrence order which depends on the language tag.  */  \
00684                                                                      \
00685        if (ch <= 0x7f)                                                      \
00686          {                                                           \
00687            /* We must encode using ASCII.  First write out the              \
00688               escape sequence.  */                                   \
00689            if (__builtin_expect (outptr + 3 > outend, 0))                   \
00690              {                                                              \
00691               result = __GCONV_FULL_OUTPUT;                                 \
00692               break;                                                 \
00693              }                                                              \
00694                                                                      \
00695            *outptr++ = ESC;                                          \
00696            *outptr++ = '(';                                          \
00697            *outptr++ = 'B';                                          \
00698            set = ASCII_set;                                          \
00699                                                                      \
00700            if (__builtin_expect (outptr + 1 > outend, 0))                   \
00701              {                                                              \
00702               result = __GCONV_FULL_OUTPUT;                                 \
00703               break;                                                 \
00704              }                                                              \
00705            *outptr++ = ch;                                           \
00706                                                                      \
00707            /* At the beginning of a line, G2 designation is cleared.  */     \
00708            if (var == iso2022jp2 && ch == 0x0a)                      \
00709              set2 = UNSPECIFIED_set;                                        \
00710          }                                                           \
00711        else                                                          \
00712          {                                                           \
00713            /* Now it becomes difficult.  We must search the other           \
00714               character sets one by one.  Use an ordered conversion         \
00715               list that depends on the current language tag.  */            \
00716            cvlist_t conversion_list;                                        \
00717            unsigned char buf[2];                                     \
00718            int res = __GCONV_ILLEGAL_INPUT;                                 \
00719                                                                      \
00720            if (var == iso2022jp2)                                    \
00721              conversion_list = conversion_lists[tag >> 8];                  \
00722            else                                                      \
00723              conversion_list = CVLIST (japanese, 0, 0, 0, 0);               \
00724                                                                      \
00725            do                                                        \
00726              switch (CVLIST_FIRST (conversion_list))                        \
00727               {                                                      \
00728               case european:                                                \
00729                                                                      \
00730                 /* Try ISO 8859-1 upper half.   */                          \
00731                 if (ch >= 0x80 && ch <= 0xff)                               \
00732                   {                                                  \
00733                     if (set2 != ISO88591_set)                               \
00734                      {                                               \
00735                        if (__builtin_expect (outptr + 3 > outend, 0))      \
00736                          {                                           \
00737                            res = __GCONV_FULL_OUTPUT;                \
00738                            break;                                    \
00739                          }                                           \
00740                        *outptr++ = ESC;                              \
00741                        *outptr++ = '.';                              \
00742                        *outptr++ = 'A';                              \
00743                        set2 = ISO88591_set;                                 \
00744                      }                                               \
00745                                                                      \
00746                     if (__builtin_expect (outptr + 3 > outend, 0))          \
00747                      {                                               \
00748                        res = __GCONV_FULL_OUTPUT;                           \
00749                        break;                                        \
00750                      }                                               \
00751                     *outptr++ = ESC;                                        \
00752                     *outptr++ = 'N';                                        \
00753                     *outptr++ = ch - 0x80;                                  \
00754                     res = __GCONV_OK;                                       \
00755                     break;                                           \
00756                   }                                                  \
00757                                                                      \
00758                 /* Try ISO 8859-7 upper half.  */                           \
00759                 if (__builtin_expect (ch < 0xffff, 1))               \
00760                   {                                                  \
00761                     const struct gap *rp = from_idx;                        \
00762                                                                      \
00763                     while (ch > rp->end)                             \
00764                      ++rp;                                           \
00765                     if (ch >= rp->start)                             \
00766                      {                                               \
00767                        unsigned char res =                                  \
00768                          iso88597_from_ucs4[ch - 0xa0 + rp->idx];           \
00769                        if (res != '\0')                              \
00770                          {                                           \
00771                            if (set2 != ISO88597_set)                        \
00772                             {                                        \
00773                               if (__builtin_expect (outptr + 3 > outend,  \
00774                                                  0))                 \
00775                                 {                                    \
00776                                   res = __GCONV_FULL_OUTPUT;         \
00777                                   break;                             \
00778                                 }                                    \
00779                               *outptr++ = ESC;                       \
00780                               *outptr++ = '.';                       \
00781                               *outptr++ = 'F';                       \
00782                               set2 = ISO88597_set;                          \
00783                             }                                        \
00784                                                                      \
00785                            if (__builtin_expect (outptr + 3 > outend, 0))  \
00786                             {                                        \
00787                               res = __GCONV_FULL_OUTPUT;                    \
00788                               break;                                 \
00789                             }                                        \
00790                            *outptr++ = ESC;                                 \
00791                            *outptr++ = 'N';                                 \
00792                            *outptr++ = res;                                 \
00793                            res = __GCONV_OK;                                \
00794                            break;                                    \
00795                          }                                           \
00796                      }                                               \
00797                   }                                                  \
00798                                                                      \
00799                 break;                                               \
00800                                                                      \
00801               case japanese:                                                \
00802                                                                      \
00803                 /* Try JIS X 0201 Roman.  */                                \
00804                 written = ucs4_to_jisx0201 (ch, buf);                       \
00805                 if (written != __UNKNOWN_10646_CHAR                         \
00806                     && buf[0] > 0x20 && buf[0] < 0x80)               \
00807                   {                                                  \
00808                     if (set != JISX0201_Roman_set)                          \
00809                      {                                               \
00810                        if (__builtin_expect (outptr + 3 > outend, 0))      \
00811                          {                                           \
00812                            res = __GCONV_FULL_OUTPUT;                \
00813                            break;                                    \
00814                          }                                           \
00815                        *outptr++ = ESC;                              \
00816                        *outptr++ = '(';                              \
00817                        *outptr++ = 'J';                              \
00818                        set = JISX0201_Roman_set;                     \
00819                      }                                               \
00820                                                                      \
00821                     if (__builtin_expect (outptr + 1 > outend, 0))          \
00822                      {                                               \
00823                        res = __GCONV_FULL_OUTPUT;                           \
00824                        break;                                        \
00825                      }                                               \
00826                     *outptr++ = buf[0];                              \
00827                     res = __GCONV_OK;                                       \
00828                     break;                                           \
00829                   }                                                  \
00830                                                                      \
00831                 /* Try JIS X 0208.  */                               \
00832                 written = ucs4_to_jisx0208 (ch, buf, 2);                    \
00833                 if (written != __UNKNOWN_10646_CHAR)                        \
00834                   {                                                  \
00835                     if (set != JISX0208_1983_set)                           \
00836                      {                                               \
00837                        if (__builtin_expect (outptr + 3 > outend, 0))      \
00838                          {                                           \
00839                            res = __GCONV_FULL_OUTPUT;                \
00840                            break;                                    \
00841                          }                                           \
00842                        *outptr++ = ESC;                              \
00843                        *outptr++ = '$';                              \
00844                        *outptr++ = 'B';                              \
00845                        set = JISX0208_1983_set;                      \
00846                      }                                               \
00847                                                                      \
00848                     if (__builtin_expect (outptr + 2 > outend, 0))          \
00849                      {                                               \
00850                        res = __GCONV_FULL_OUTPUT;                           \
00851                        break;                                        \
00852                      }                                               \
00853                     *outptr++ = buf[0];                              \
00854                     *outptr++ = buf[1];                              \
00855                     res = __GCONV_OK;                                       \
00856                     break;                                           \
00857                   }                                                  \
00858                                                                      \
00859                 if (__builtin_expect (var == iso2022jp, 0))                 \
00860                   /* Don't use the other Japanese character sets.  */       \
00861                   break;                                             \
00862                                                                      \
00863                 /* Try JIS X 0212.  */                               \
00864                 written = ucs4_to_jisx0212 (ch, buf, 2);                    \
00865                 if (written != __UNKNOWN_10646_CHAR)                        \
00866                   {                                                  \
00867                     if (set != JISX0212_set)                                \
00868                      {                                               \
00869                        if (__builtin_expect (outptr + 4 > outend, 0))      \
00870                          {                                           \
00871                            res = __GCONV_FULL_OUTPUT;                \
00872                            break;                                    \
00873                          }                                           \
00874                        *outptr++ = ESC;                              \
00875                        *outptr++ = '$';                              \
00876                        *outptr++ = '(';                              \
00877                        *outptr++ = 'D';                              \
00878                        set = JISX0212_set;                                  \
00879                      }                                               \
00880                                                                      \
00881                     if (__builtin_expect (outptr + 2 > outend, 0))          \
00882                      {                                               \
00883                        res = __GCONV_FULL_OUTPUT;                           \
00884                        break;                                        \
00885                      }                                               \
00886                     *outptr++ = buf[0];                              \
00887                     *outptr++ = buf[1];                              \
00888                     res = __GCONV_OK;                                       \
00889                     break;                                           \
00890                   }                                                  \
00891                                                                      \
00892                 break;                                               \
00893                                                                      \
00894               case chinese:                                          \
00895                 assert (var == iso2022jp2);                                 \
00896                                                                      \
00897                 /* Try GB 2312.  */                                         \
00898                 written = ucs4_to_gb2312 (ch, buf, 2);               \
00899                 if (written != __UNKNOWN_10646_CHAR)                        \
00900                   {                                                  \
00901                     if (set != GB2312_set)                                  \
00902                      {                                               \
00903                        if (__builtin_expect (outptr + 3 > outend, 0))      \
00904                          {                                           \
00905                            res = __GCONV_FULL_OUTPUT;                \
00906                            break;                                    \
00907                          }                                           \
00908                        *outptr++ = ESC;                              \
00909                        *outptr++ = '$';                              \
00910                        *outptr++ = 'A';                              \
00911                        set = GB2312_set;                             \
00912                      }                                               \
00913                                                                      \
00914                     if (__builtin_expect (outptr + 2 > outend, 0))          \
00915                      {                                               \
00916                        res = __GCONV_FULL_OUTPUT;                           \
00917                        break;                                        \
00918                      }                                               \
00919                     *outptr++ = buf[0];                              \
00920                     *outptr++ = buf[1];                              \
00921                     res = __GCONV_OK;                                       \
00922                     break;                                           \
00923                   }                                                  \
00924                                                                      \
00925                 break;                                               \
00926                                                                      \
00927               case korean:                                           \
00928                 assert (var == iso2022jp2);                                 \
00929                                                                      \
00930                 /* Try KSC 5601.  */                                        \
00931                 written = ucs4_to_ksc5601 (ch, buf, 2);              \
00932                 if (written != __UNKNOWN_10646_CHAR)                        \
00933                   {                                                  \
00934                     if (set != KSC5601_set)                                 \
00935                      {                                               \
00936                        if (__builtin_expect (outptr + 4 > outend, 0))      \
00937                          {                                           \
00938                            res = __GCONV_FULL_OUTPUT;                \
00939                            break;                                    \
00940                          }                                           \
00941                        *outptr++ = ESC;                              \
00942                        *outptr++ = '$';                              \
00943                        *outptr++ = '(';                              \
00944                        *outptr++ = 'C';                              \
00945                        set = KSC5601_set;                            \
00946                      }                                               \
00947                                                                      \
00948                     if (__builtin_expect (outptr + 2 > outend, 0))          \
00949                      {                                               \
00950                        res = __GCONV_FULL_OUTPUT;                           \
00951                        break;                                        \
00952                      }                                               \
00953                     *outptr++ = buf[0];                              \
00954                     *outptr++ = buf[1];                              \
00955                     res = __GCONV_OK;                                       \
00956                     break;                                           \
00957                   }                                                  \
00958                                                                      \
00959                 break;                                               \
00960                                                                      \
00961               case other:                                            \
00962                 assert (var == iso2022jp2);                                 \
00963                                                                      \
00964                 /* Try JIS X 0201 Kana.  This is not officially part        \
00965                    of ISO-2022-JP-2, according to RFC 1554.  Therefore      \
00966                    we try this only after all other attempts.  */           \
00967                 written = ucs4_to_jisx0201 (ch, buf);                       \
00968                 if (written != __UNKNOWN_10646_CHAR && buf[0] >= 0x80)      \
00969                   {                                                  \
00970                     if (set != JISX0201_Kana_set)                           \
00971                      {                                               \
00972                        if (__builtin_expect (outptr + 3 > outend, 0))      \
00973                          {                                           \
00974                            res = __GCONV_FULL_OUTPUT;                \
00975                            break;                                    \
00976                          }                                           \
00977                        *outptr++ = ESC;                              \
00978                        *outptr++ = '(';                              \
00979                        *outptr++ = 'I';                              \
00980                        set = JISX0201_Kana_set;                      \
00981                      }                                               \
00982                                                                      \
00983                     if (__builtin_expect (outptr + 1 > outend, 0))          \
00984                      {                                               \
00985                        res = __GCONV_FULL_OUTPUT;                           \
00986                        break;                                        \
00987                      }                                               \
00988                     *outptr++ = buf[0] - 0x80;                       \
00989                     res = __GCONV_OK;                                       \
00990                     break;                                           \
00991                   }                                                  \
00992                                                                      \
00993                 break;                                               \
00994                                                                      \
00995               default:                                               \
00996                 abort ();                                            \
00997               }                                                      \
00998            while (res == __GCONV_ILLEGAL_INPUT                              \
00999                  && (conversion_list = CVLIST_REST (conversion_list)) != 0);\
01000                                                                      \
01001            if (res == __GCONV_FULL_OUTPUT)                                  \
01002              {                                                              \
01003               result = res;                                          \
01004               break;                                                 \
01005              }                                                              \
01006                                                                      \
01007            if (res == __GCONV_ILLEGAL_INPUT)                                \
01008              {                                                              \
01009               STANDARD_TO_LOOP_ERR_HANDLER (4);                      \
01010              }                                                              \
01011          }                                                           \
01012       }                                                                     \
01013                                                                      \
01014     /* Now that we wrote the output increment the input pointer.  */        \
01015     inptr += 4;                                                             \
01016   }
01017 #define LOOP_NEED_FLAGS
01018 #define EXTRA_LOOP_DECLS    , enum variant var, int *setp
01019 #define INIT_PARAMS         int set = *setp & CURRENT_SEL_MASK;             \
01020                             int set2 = *setp & CURRENT_ASSIGN_MASK;         \
01021                             int tag = *setp & CURRENT_TAG_MASK;
01022 #define REINIT_PARAMS              do                                       \
01023                               {                                      \
01024                                 set = *setp & CURRENT_SEL_MASK;             \
01025                                 set2 = *setp & CURRENT_ASSIGN_MASK;         \
01026                                 tag = *setp & CURRENT_TAG_MASK;             \
01027                               }                                      \
01028                             while (0)
01029 #define UPDATE_PARAMS              *setp = set | set2 | tag
01030 #include <iconv/loop.c>
01031 
01032 
01033 /* Now define the toplevel functions.  */
01034 #include <iconv/skeleton.c>