Back to index

glibc  2.9
iso-2022-kr.c
Go to the documentation of this file.
00001 /* Conversion module for ISO-2022-KR.
00002    Copyright (C) 1998, 1999, 2000-2002, 2007, 2008
00003    Free Software Foundation, Inc.
00004    This file is part of the GNU C Library.
00005    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
00006 
00007    The GNU C Library is free software; you can redistribute it and/or
00008    modify it under the terms of the GNU Lesser General Public
00009    License as published by the Free Software Foundation; either
00010    version 2.1 of the License, or (at your option) any later version.
00011 
00012    The GNU C Library is distributed in the hope that it will be useful,
00013    but WITHOUT ANY WARRANTY; without even the implied warranty of
00014    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015    Lesser General Public License for more details.
00016 
00017    You should have received a copy of the GNU Lesser General Public
00018    License along with the GNU C Library; if not, write to the Free
00019    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
00020    02111-1307 USA.  */
00021 
00022 #include <dlfcn.h>
00023 #include <gconv.h>
00024 #include <stdint.h>
00025 #include <string.h>
00026 #include "ksc5601.h"
00027 
00028 #include <assert.h>
00029 
00030 /* This makes obvious what everybody knows: 0x1b is the Esc character.  */
00031 #define ESC   0x1b
00032 
00033 /* The shift sequences for this charset (it does not use ESC).  */
00034 #define SI    0x0f
00035 #define SO    0x0e
00036 
00037 /* Definitions used in the body of the `gconv' function.  */
00038 #define CHARSET_NAME        "ISO-2022-KR//"
00039 #define DEFINE_INIT         1
00040 #define DEFINE_FINI         1
00041 #define FROM_LOOP           from_iso2022kr_loop
00042 #define TO_LOOP                    to_iso2022kr_loop
00043 #define MIN_NEEDED_FROM            1
00044 #define MAX_NEEDED_FROM            4
00045 #define MIN_NEEDED_TO              4
00046 #define MAX_NEEDED_TO              4
00047 #define PREPARE_LOOP \
00048   int save_set;                                                             \
00049   int *setp = &data->__statep->__count;                                     \
00050   if (!FROM_DIRECTION && !data->__internal_use                              \
00051       && data->__invocation_counter == 0)                            \
00052     {                                                                \
00053       /* Emit the designator sequence.  */                                  \
00054       if (outbuf + 4 > outend)                                              \
00055        return __GCONV_FULL_OUTPUT;                                   \
00056                                                                      \
00057       *outbuf++ = ESC;                                                      \
00058       *outbuf++ = '$';                                                      \
00059       *outbuf++ = ')';                                                      \
00060       *outbuf++ = 'C';                                                      \
00061     }
00062 #define EXTRA_LOOP_ARGS            , setp
00063 
00064 
00065 /* The COUNT element of the state keeps track of the currently selected
00066    character set.  The possible values are:  */
00067 enum
00068 {
00069   ASCII_set = 0,
00070   KSC5601_set = 8
00071 };
00072 
00073 
00074 /* Since this is a stateful encoding we have to provide code which resets
00075    the output state to the initial state.  This has to be done during the
00076    flushing.  */
00077 #define EMIT_SHIFT_TO_INIT \
00078   if (data->__statep->__count != ASCII_set)                                 \
00079     {                                                                \
00080       if (FROM_DIRECTION)                                            \
00081        {                                                             \
00082          /* It's easy, we don't have to emit anything, we just reset the     \
00083             state for the input.  */                                        \
00084          data->__statep->__count &= 7;                                      \
00085          data->__statep->__count |= ASCII_set;                              \
00086        }                                                             \
00087       else                                                           \
00088        {                                                             \
00089          /* We are not in the initial state.  To switch back we have        \
00090             to emit `SI'.  */                                               \
00091          if (__builtin_expect (outbuf == outend, 0))                        \
00092            /* We don't have enough room in the output buffer.  */           \
00093            status = __GCONV_FULL_OUTPUT;                             \
00094          else                                                        \
00095            {                                                         \
00096              /* Write out the shift sequence.  */                           \
00097              *outbuf++ = SI;                                                \
00098              data->__statep->__count = ASCII_set;                           \
00099            }                                                         \
00100        }                                                             \
00101     }
00102 
00103 
00104 /* Since we might have to reset input pointer we must be able to save
00105    and retore the state.  */
00106 #define SAVE_RESET_STATE(Save) \
00107   if (Save)                                                          \
00108     save_set = *setp;                                                       \
00109   else                                                               \
00110     *setp = save_set
00111 
00112 
00113 /* First define the conversion function from ISO-2022-KR to UCS4.  */
00114 #define MIN_NEEDED_INPUT    MIN_NEEDED_FROM
00115 #define MAX_NEEDED_INPUT    MAX_NEEDED_FROM
00116 #define MIN_NEEDED_OUTPUT   MIN_NEEDED_TO
00117 #define LOOPFCT                    FROM_LOOP
00118 #define BODY \
00119   {                                                                  \
00120     uint32_t ch = *inptr;                                            \
00121                                                                      \
00122     /* This is a 7bit character set, disallow all 8bit characters.  */             \
00123     if (__builtin_expect (ch > 0x7f, 0))                             \
00124       STANDARD_FROM_LOOP_ERR_HANDLER (1);                            \
00125                                                                      \
00126     /* Recognize escape sequences.  */                                      \
00127     if (__builtin_expect (ch, 0) == ESC)                             \
00128       {                                                                     \
00129        /* We don't really have to handle escape sequences since all the      \
00130           switching is done using the SI and SO bytes.  But we have to             \
00131           recognize `Esc $ ) C' since this is a kind of flag for this       \
00132           encoding.  We simply ignore it.  */                               \
00133        if (__builtin_expect (inptr + 2 > inend, 0)                          \
00134            || (inptr[1] == '$'                                              \
00135               && (__builtin_expect (inptr + 3 > inend, 0)                   \
00136                   || (inptr[2] == ')'                                       \
00137                      && __builtin_expect (inptr + 4 > inend, 0)))))         \
00138          {                                                           \
00139            result = __GCONV_INCOMPLETE_INPUT;                               \
00140            break;                                                    \
00141          }                                                           \
00142        if (inptr[1] == '$' && inptr[2] == ')' && inptr[3] == 'C')           \
00143          {                                                           \
00144            /* Yeah, yeah, we know this is ISO 2022-KR.  */                  \
00145            inptr += 4;                                                      \
00146            continue;                                                 \
00147          }                                                           \
00148       }                                                                     \
00149     else if (__builtin_expect (ch, 0) == SO)                                \
00150       {                                                                     \
00151        /* Switch to use KSC.  */                                     \
00152        ++inptr;                                                      \
00153        set = KSC5601_set;                                            \
00154        continue;                                                     \
00155       }                                                                     \
00156     else if (__builtin_expect (ch, 0) == SI)                                \
00157       {                                                                     \
00158        /* Switch to use ASCII.  */                                   \
00159        ++inptr;                                                      \
00160        set = ASCII_set;                                              \
00161        continue;                                                     \
00162       }                                                                     \
00163                                                                      \
00164     if (set == ASCII_set)                                            \
00165       {                                                                     \
00166        /* Almost done, just advance the input pointer.  */                  \
00167        ++inptr;                                                      \
00168       }                                                                     \
00169     else                                                             \
00170       {                                                                     \
00171        assert (set == KSC5601_set);                                         \
00172                                                                      \
00173        /* Use the KSC 5601 table.  */                                       \
00174        ch = ksc5601_to_ucs4 (&inptr, inend - inptr, 0);              \
00175                                                                      \
00176        if (__builtin_expect (ch == 0, 0))                            \
00177          {                                                           \
00178            result = __GCONV_INCOMPLETE_INPUT;                               \
00179            break;                                                    \
00180          }                                                           \
00181        else if (__builtin_expect (ch == __UNKNOWN_10646_CHAR, 0))           \
00182          {                                                           \
00183            STANDARD_FROM_LOOP_ERR_HANDLER (1);                              \
00184          }                                                           \
00185       }                                                                     \
00186                                                                      \
00187     put32 (outptr, ch);                                                     \
00188     outptr += 4;                                                     \
00189   }
00190 #define LOOP_NEED_FLAGS
00191 #define EXTRA_LOOP_DECLS    , int *setp
00192 #define INIT_PARAMS         int set = *setp
00193 #define UPDATE_PARAMS              *setp = set
00194 #include <iconv/loop.c>
00195 
00196 
00197 /* Next, define the other direction.  */
00198 #define MIN_NEEDED_INPUT    MIN_NEEDED_TO
00199 #define MIN_NEEDED_OUTPUT   MIN_NEEDED_FROM
00200 #define MAX_NEEDED_OUTPUT   MAX_NEEDED_FROM
00201 #define LOOPFCT                    TO_LOOP
00202 #define BODY \
00203   {                                                                  \
00204     uint32_t ch = get32 (inptr);                                     \
00205                                                                      \
00206     /* First see whether we can write the character using the currently            \
00207        selected character set.  */                                   \
00208     if (ch < 0x80)                                                   \
00209       {                                                                     \
00210        if (set != ASCII_set)                                                \
00211          {                                                           \
00212            *outptr++ = SI;                                           \
00213            set = ASCII_set;                                          \
00214            if (__builtin_expect (outptr == outend, 0))                      \
00215              {                                                              \
00216               result = __GCONV_FULL_OUTPUT;                                 \
00217               break;                                                 \
00218              }                                                              \
00219          }                                                           \
00220                                                                      \
00221        *outptr++ = ch;                                                      \
00222       }                                                                     \
00223     else                                                             \
00224       {                                                                     \
00225        unsigned char buf[2];                                                \
00226        /* Fake initialization to keep gcc quiet.  */                        \
00227        asm ("" : "=m" (buf));                                               \
00228                                                                      \
00229        size_t written = ucs4_to_ksc5601 (ch, buf, 2);                       \
00230        if (__builtin_expect (written, 0) == __UNKNOWN_10646_CHAR)           \
00231          {                                                           \
00232            UNICODE_TAG_HANDLER (ch, 4);                              \
00233                                                                      \
00234            /* Illegal character.  */                                        \
00235            STANDARD_TO_LOOP_ERR_HANDLER (4);                                \
00236          }                                                           \
00237        else                                                          \
00238          {                                                           \
00239            assert (written == 2);                                    \
00240                                                                      \
00241            /* We use KSC 5601.  */                                   \
00242            if (set != KSC5601_set)                                   \
00243              {                                                              \
00244               *outptr++ = SO;                                               \
00245               set = KSC5601_set;                                     \
00246              }                                                              \
00247                                                                      \
00248            if (__builtin_expect (outptr + 2 > outend, 0))                   \
00249              {                                                              \
00250               result = __GCONV_FULL_OUTPUT;                                 \
00251               break;                                                 \
00252              }                                                              \
00253                                                                      \
00254            *outptr++ = buf[0];                                              \
00255            *outptr++ = buf[1];                                              \
00256          }                                                           \
00257       }                                                                     \
00258                                                                      \
00259     /* Now that we wrote the output increment the input pointer.  */        \
00260     inptr += 4;                                                             \
00261   }
00262 #define LOOP_NEED_FLAGS
00263 #define EXTRA_LOOP_DECLS    , int *setp
00264 #define INIT_PARAMS         int set = *setp
00265 #define REINIT_PARAMS              set = *setp
00266 #define UPDATE_PARAMS              *setp = set
00267 #include <iconv/loop.c>
00268 
00269 
00270 /* Now define the toplevel functions.  */
00271 #include <iconv/skeleton.c>