Back to index

glibc  2.9
ibm930.c
Go to the documentation of this file.
00001 /* Conversion from and to IBM930.
00002    Copyright (C) 2000-2002, 2008 Free Software Foundation, Inc.
00003    This file is part of the GNU C Library.
00004    Contributed by Masahide Washizawa <washi@yamato.ibm.co.jp>, 2000.
00005 
00006    The GNU C Library is free software; you can redistribute it and/or
00007    modify it under the terms of the GNU Lesser General Public
00008    License as published by the Free Software Foundation; either
00009    version 2.1 of the License, or (at your option) any later version.
00010 
00011    The GNU C Library is distributed in the hope that it will be useful,
00012    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014    Lesser General Public License for more details.
00015 
00016    You should have received a copy of the GNU Lesser General Public
00017    License along with the GNU C Library; if not, write to the Free
00018    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
00019    02111-1307 USA.  */
00020 
00021 #include <dlfcn.h>
00022 #include <stdint.h>
00023 #include <wchar.h>
00024 #include <byteswap.h>
00025 #include "ibm930.h"
00026 
00027 /* The shift sequences for this charset (it does not use ESC). */
00028 #define SI           0x0F  /* Shift In, host code to turn DBCS off.  */
00029 #define SO           0x0E  /* Shift Out, host code to turn DBCS on.  */
00030 
00031 /* Definitions used in the body of the `gconv' function.  */
00032 #define CHARSET_NAME "IBM930//"
00033 #define FROM_LOOP    from_ibm930
00034 #define TO_LOOP             to_ibm930
00035 #define FROM_LOOP_MIN_NEEDED_FROM  1
00036 #define FROM_LOOP_MAX_NEEDED_FROM  2
00037 #define FROM_LOOP_MIN_NEEDED_TO           4
00038 #define FROM_LOOP_MAX_NEEDED_TO           4
00039 #define TO_LOOP_MIN_NEEDED_FROM           4
00040 #define TO_LOOP_MAX_NEEDED_FROM           4
00041 #define TO_LOOP_MIN_NEEDED_TO             1
00042 #define TO_LOOP_MAX_NEEDED_TO             3
00043 #define PREPARE_LOOP \
00044   int save_curcs;                                                    \
00045   int *curcsp = &data->__statep->__count;
00046 #define EXTRA_LOOP_ARGS            , curcsp
00047 
00048 /* Definitions of initialization and destructor function.  */
00049 #define DEFINE_INIT  1
00050 #define DEFINE_FINI  1
00051 
00052 
00053 /* Since this is a stateful encoding we have to provide code which resets
00054    the output state to the initial state.  This has to be done during the
00055    flushing.  */
00056 #define EMIT_SHIFT_TO_INIT \
00057   if ((data->__statep->__count & ~7) != sb)                                 \
00058     {                                                                \
00059       if (FROM_DIRECTION)                                            \
00060        data->__statep->__count &= 7;                                        \
00061       else                                                           \
00062        {                                                             \
00063          /* We are not in the initial state.  To switch back we have        \
00064             to emit `SI'.  */                                               \
00065          if (__builtin_expect (outbuf >= outend, 0))                        \
00066            /* We don't have enough room in the output buffer.  */           \
00067            status = __GCONV_FULL_OUTPUT;                             \
00068          else                                                        \
00069            {                                                         \
00070              /* Write out the shift sequence.  */                           \
00071              *outbuf++ = SI;                                                \
00072              data->__statep->__count &= 7;                                  \
00073            }                                                         \
00074        }                                                             \
00075     }
00076 
00077 
00078 /* Since we might have to reset input pointer we must be able to save
00079    and retore the state.  */
00080 #define SAVE_RESET_STATE(Save) \
00081   if (Save)                                                          \
00082     save_curcs = *curcsp;                                            \
00083   else                                                               \
00084     *curcsp = save_curcs
00085 
00086 
00087 /* Current codeset type.  */
00088 enum
00089 {
00090   sb = 0,
00091   db = 64
00092 };
00093 
00094 
00095 /* First, define the conversion function from IBM-930 to UCS4.  */
00096 #define MIN_NEEDED_INPUT    FROM_LOOP_MIN_NEEDED_FROM
00097 #define MAX_NEEDED_INPUT    FROM_LOOP_MAX_NEEDED_FROM
00098 #define MIN_NEEDED_OUTPUT   FROM_LOOP_MIN_NEEDED_TO
00099 #define MAX_NEEDED_OUTPUT   FROM_LOOP_MAX_NEEDED_TO
00100 #define LOOPFCT             FROM_LOOP
00101 #define BODY \
00102   {                                                                  \
00103     uint32_t ch = *inptr;                                            \
00104     uint32_t res;                                                    \
00105                                                                      \
00106     if (__builtin_expect (ch, 0) == SO)                                     \
00107       {                                                                     \
00108        /* Shift OUT, change to DBCS converter.  */                          \
00109        if (curcs == db)                                              \
00110          {                                                           \
00111            result = __GCONV_ILLEGAL_INPUT;                                  \
00112            break;                                                    \
00113          }                                                           \
00114        curcs = db;                                                   \
00115        ++inptr;                                                      \
00116        continue;                                                     \
00117       }                                                                     \
00118     else if (__builtin_expect (ch, 0) == SI)                                \
00119       {                                                                     \
00120        /* Shift IN, change to SBCS converter */                      \
00121        if (curcs == sb)                                              \
00122          {                                                           \
00123            result = __GCONV_ILLEGAL_INPUT;                                  \
00124            break;                                                    \
00125          }                                                           \
00126        curcs = sb;                                                   \
00127        ++inptr;                                                      \
00128        continue;                                                     \
00129       }                                                                     \
00130                                                                      \
00131     if (curcs == sb)                                                 \
00132       {                                                                     \
00133        /* Use the IBM930 table for single byte.  */                         \
00134        res = __ibm930sb_to_ucs4[ch];                                        \
00135        if (__builtin_expect (res, L'\1') == L'\0' && ch != '\0')            \
00136          {                                                           \
00137            /* This is an illegal character.  */                      \
00138            STANDARD_FROM_LOOP_ERR_HANDLER (1);                              \
00139          }                                                           \
00140        else                                                          \
00141          {                                                           \
00142            put32 (outptr, res);                                      \
00143            outptr += 4;                                              \
00144          }                                                           \
00145        ++inptr;                                                      \
00146       }                                                                     \
00147     else                                                             \
00148       {                                                                     \
00149        /* Use the IBM930 table for double byte. */                          \
00150        const struct gap *rp2 = __ibm930db_to_ucs4_idx;                      \
00151                                                                      \
00152        assert (curcs == db);                                                \
00153                                                                      \
00154        if (__builtin_expect (inptr + 1 >= inend, 0))                        \
00155          {                                                           \
00156            /* The second character is not available.  Store the             \
00157               intermediate result. */                                       \
00158            result = __GCONV_INCOMPLETE_INPUT;                               \
00159            break;                                                    \
00160          }                                                           \
00161                                                                      \
00162        ch = (ch * 0x100) + inptr[1];                                        \
00163        while (ch > rp2->end)                                                \
00164          ++rp2;                                                      \
00165                                                                      \
00166        if (__builtin_expect (ch < rp2->start, 0)                     \
00167            || (res = __ibm930db_to_ucs4[ch + rp2->idx],              \
00168               __builtin_expect (res, L'\1') == L'\0' && ch != '\0'))        \
00169          {                                                           \
00170            /* This is an illegal character.  */                      \
00171            STANDARD_FROM_LOOP_ERR_HANDLER (2);                              \
00172          }                                                           \
00173        else                                                          \
00174          {                                                           \
00175            put32 (outptr, res);                                      \
00176            outptr += 4;                                              \
00177          }                                                           \
00178        inptr += 2;                                                   \
00179       }                                                                     \
00180   }
00181 #define LOOP_NEED_FLAGS
00182 #define EXTRA_LOOP_DECLS    , int *curcsp
00183 #define INIT_PARAMS         int curcs = *curcsp & ~7
00184 #define UPDATE_PARAMS              *curcsp = curcs
00185 #include <iconv/loop.c>
00186 
00187 /* Next, define the other direction.  */
00188 #define MIN_NEEDED_INPUT    TO_LOOP_MIN_NEEDED_FROM
00189 #define MAX_NEEDED_INPUT    TO_LOOP_MAX_NEEDED_FROM
00190 #define MIN_NEEDED_OUTPUT   TO_LOOP_MIN_NEEDED_TO
00191 #define MAX_NEEDED_OUTPUT   TO_LOOP_MAX_NEEDED_TO
00192 #define LOOPFCT                    TO_LOOP
00193 #define BODY \
00194   {                                                                  \
00195     uint32_t ch = get32 (inptr);                                     \
00196     const struct gap *rp1 = __ucs4_to_ibm930sb_idx;                         \
00197     const struct gap *rp2 = __ucs4_to_ibm930db_idx;                         \
00198     const char *cp;                                                  \
00199                                                                      \
00200     if (__builtin_expect (ch >= 0xffff, 0))                                 \
00201       {                                                                     \
00202        UNICODE_TAG_HANDLER (ch, 4);                                         \
00203                                                                      \
00204        STANDARD_TO_LOOP_ERR_HANDLER (4);                             \
00205       }                                                                     \
00206                                                                      \
00207     while (ch > rp1->end)                                            \
00208       ++rp1;                                                         \
00209                                                                      \
00210     /* Use the UCS4 table for single byte.  */                              \
00211     if (__builtin_expect (ch < rp1->start, 0)                               \
00212        || (cp = __ucs4_to_ibm930sb[ch + rp1->idx],                          \
00213            __builtin_expect (cp[0], L'\1') == L'\0' && ch != '\0'))         \
00214       {                                                                     \
00215        /* Use the UCS4 table for double byte. */                     \
00216        while (ch > rp2->end)                                                \
00217          ++rp2;                                                      \
00218                                                                      \
00219        if (__builtin_expect (ch < rp2->start, 0)                     \
00220            || (cp = __ucs4_to_ibm930db[ch + rp2->idx],                      \
00221               __builtin_expect (cp[0], L'\1')== L'\0' && ch != '\0'))       \
00222          {                                                           \
00223            /* This is an illegal character.  */                      \
00224            STANDARD_TO_LOOP_ERR_HANDLER (4);                                \
00225          }                                                           \
00226        else                                                          \
00227          {                                                           \
00228            if (curcs == sb)                                          \
00229              {                                                              \
00230               if (__builtin_expect (outptr + 1 > outend, 0))                \
00231                 {                                                    \
00232                   result = __GCONV_FULL_OUTPUT;                      \
00233                   break;                                             \
00234                 }                                                    \
00235               *outptr++ = SO;                                               \
00236               curcs = db;                                            \
00237              }                                                              \
00238                                                                      \
00239            if (__builtin_expect (outptr + 2 > outend, 0))                   \
00240              {                                                              \
00241               result = __GCONV_FULL_OUTPUT;                                 \
00242               break;                                                 \
00243              }                                                              \
00244            *outptr++ = cp[0];                                               \
00245            *outptr++ = cp[1];                                               \
00246          }                                                           \
00247       }                                                                     \
00248     else                                                             \
00249       {                                                                     \
00250        if (curcs == db)                                              \
00251          {                                                           \
00252            if (__builtin_expect (outptr + 1 > outend, 0))                   \
00253              {                                                              \
00254               result = __GCONV_FULL_OUTPUT;                                 \
00255               break;                                                 \
00256              }                                                              \
00257            *outptr++ = SI;                                           \
00258          }                                                           \
00259                                                                      \
00260        if (__builtin_expect (outptr + 1 > outend, 0))                       \
00261          {                                                           \
00262            result = __GCONV_FULL_OUTPUT;                             \
00263            break;                                                    \
00264          }                                                           \
00265        if (ch == 0x7e)                                                      \
00266          *outptr++ = 0xa1;                                           \
00267        else if (ch == 0x5c)                                          \
00268          *outptr++ = 0x5b;                                           \
00269        else                                                          \
00270          *outptr++ = cp[0];                                          \
00271        curcs = sb;                                                   \
00272       }                                                                     \
00273                                                                      \
00274     /* Now that we wrote the output increment the input pointer.  */        \
00275     inptr += 4;                                                             \
00276   }
00277 #define LOOP_NEED_FLAGS
00278 #define EXTRA_LOOP_DECLS    , int *curcsp
00279 #define INIT_PARAMS         int curcs = *curcsp & ~7
00280 #define REINIT_PARAMS              curcs = *curcsp & ~7
00281 #define UPDATE_PARAMS              *curcsp = curcs
00282 #include <iconv/loop.c>
00283 
00284 /* Now define the toplevel functions.  */
00285 #include <iconv/skeleton.c>