Back to index

glibc  2.9
ibm935.c
Go to the documentation of this file.
00001 /* Conversion from and to IBM935
00002    Copyright (C) 2000-2002, 2008 Free Software Foundation, Inc.
00003    This file is part of the GNU C Library.
00004    Contributed by Masahide Washizawa <washi@yamato.ibm.co.jp>, 2000.
00005 
00006    The GNU C Library is free software; you can redistribute it and/or
00007    modify it under the terms of the GNU Lesser General Public
00008    License as published by the Free Software Foundation; either
00009    version 2.1 of the License, or (at your option) any later version.
00010 
00011    The GNU C Library is distributed in the hope that it will be useful,
00012    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014    Lesser General Public License for more details.
00015 
00016    You should have received a copy of the GNU Lesser General Public
00017    License along with the GNU C Library; if not, write to the Free
00018    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
00019    02111-1307 USA.  */
00020 
00021 #include <dlfcn.h>
00022 #include <stdint.h>
00023 #include <wchar.h>
00024 #include <byteswap.h>
00025 #include "ibm935.h"
00026 
00027 /* The shift sequences for this charset (it does not use ESC).  */
00028 #define SI           0x0F  /* Shift In, host code to turn DBCS off.  */
00029 #define SO           0x0E  /* Shift Out, host code to turn DBCS on.  */
00030 
00031 /* Definitions used in the body of the `gconv' function.  */
00032 #define CHARSET_NAME "IBM935//"
00033 #define FROM_LOOP    from_ibm935
00034 #define TO_LOOP             to_ibm935
00035 #define FROM_LOOP_MIN_NEEDED_FROM  1
00036 #define FROM_LOOP_MAX_NEEDED_FROM  2
00037 #define FROM_LOOP_MIN_NEEDED_TO           4
00038 #define FROM_LOOP_MAX_NEEDED_TO           4
00039 #define TO_LOOP_MIN_NEEDED_FROM           4
00040 #define TO_LOOP_MAX_NEEDED_FROM           4
00041 #define TO_LOOP_MIN_NEEDED_TO             1
00042 #define TO_LOOP_MAX_NEEDED_TO             3
00043 #define PREPARE_LOOP \
00044   int save_curcs;                                                    \
00045   int *curcsp = &data->__statep->__count;
00046 #define EXTRA_LOOP_ARGS            , curcsp
00047 
00048 /* Definitions of initialization and destructor function.  */
00049 #define DEFINE_INIT  1
00050 #define DEFINE_FINI  1
00051 
00052 
00053 /* Since this is a stateful encoding we have to provide code which resets
00054    the output state to the initial state.  This has to be done during the
00055    flushing.  */
00056 #define EMIT_SHIFT_TO_INIT \
00057   if ((data->__statep->__count & ~7) != sb)                                 \
00058     {                                                                \
00059       if (FROM_DIRECTION)                                            \
00060        data->__statep->__count &= 7;                                        \
00061       else                                                           \
00062        {                                                             \
00063          /* We are not in the initial state.  To switch back we have        \
00064             to emit `SI'.  */                                               \
00065          if (__builtin_expect (outbuf >= outend, 0))                        \
00066            /* We don't have enough room in the output buffer.  */           \
00067            status = __GCONV_FULL_OUTPUT;                             \
00068          else                                                        \
00069            {                                                         \
00070              /* Write out the shift sequence.  */                           \
00071              *outbuf++ = SI;                                                \
00072              data->__statep->__count &= 7;                                  \
00073            }                                                         \
00074        }                                                             \
00075     }
00076 
00077 
00078 /* Since we might have to reset input pointer we must be able to save
00079    and retore the state.  */
00080 #define SAVE_RESET_STATE(Save) \
00081   if (Save)                                                          \
00082     save_curcs = *curcsp;                                            \
00083   else                                                               \
00084     *curcsp = save_curcs
00085 
00086 
00087 /* Current codeset type.  */
00088 enum
00089 {
00090   sb = 0,
00091   db = 64
00092 };
00093 
00094 /* First, define the conversion function from IBM-935 to UCS4.  */
00095 #define MIN_NEEDED_INPUT    FROM_LOOP_MIN_NEEDED_FROM
00096 #define MAX_NEEDED_INPUT    FROM_LOOP_MAX_NEEDED_FROM
00097 #define MIN_NEEDED_OUTPUT   FROM_LOOP_MIN_NEEDED_TO
00098 #define MAX_NEEDED_OUTPUT   FROM_LOOP_MAX_NEEDED_TO
00099 #define LOOPFCT             FROM_LOOP
00100 #define BODY \
00101   {                                                                  \
00102     uint32_t ch = *inptr;                                            \
00103     uint32_t res;                                                    \
00104                                                                      \
00105     if (__builtin_expect(ch, 0) == SO)                                      \
00106       {                                                                     \
00107        /* Shift OUT, change to DBCS converter.  */                          \
00108        if (curcs == db)                                              \
00109          {                                                           \
00110            result = __GCONV_ILLEGAL_INPUT;                                  \
00111            break;                                                    \
00112          }                                                           \
00113        curcs = db;                                                   \
00114        ++inptr;                                                      \
00115        continue;                                                     \
00116       }                                                                     \
00117     else if (__builtin_expect (ch, 0) == SI)                                \
00118       {                                                                     \
00119        /* Shift IN, change to SBCS converter.  */                           \
00120        if (curcs == sb)                                              \
00121          {                                                           \
00122            result = __GCONV_ILLEGAL_INPUT;                                  \
00123            break;                                                    \
00124          }                                                           \
00125        curcs = sb;                                                   \
00126        ++inptr;                                                      \
00127        continue;                                                     \
00128       }                                                                     \
00129                                                                      \
00130     if (curcs == sb)                                                 \
00131       {                                                                     \
00132        /* Use the IBM935 table for single byte.  */                         \
00133        res = __ibm935sb_to_ucs4[ch];                                        \
00134        if (__builtin_expect (res, L'\1') == L'\0' && ch != '\0')            \
00135          {                                                           \
00136            /* This is an illegal character.  */                      \
00137            STANDARD_FROM_LOOP_ERR_HANDLER (1);                              \
00138          }                                                           \
00139        else                                                          \
00140          {                                                           \
00141            put32 (outptr, res);                                      \
00142            outptr += 4;                                              \
00143          }                                                           \
00144        ++inptr;                                                      \
00145       }                                                                     \
00146     else                                                             \
00147       {                                                                     \
00148        const struct gap *rp2 = __ibm935db_to_ucs4_idx;                      \
00149                                                                      \
00150        assert (curcs == db);                                                \
00151                                                                      \
00152        /* Use the IBM935 table for double byte.  */                         \
00153        if (__builtin_expect (inptr + 1 >= inend, 0))                        \
00154          {                                                           \
00155            /* The second character is not available.                        \
00156               Store the intermediate result. */                      \
00157            result = __GCONV_INCOMPLETE_INPUT;                               \
00158            break;                                                    \
00159          }                                                           \
00160                                                                      \
00161        ch = (ch * 0x100) + inptr[1];                                        \
00162        while (ch > rp2->end)                                                \
00163          ++rp2;                                                      \
00164                                                                      \
00165        if (__builtin_expect (rp2 == NULL, 0)                                \
00166            || __builtin_expect (ch < rp2->start, 0)                         \
00167            || (res = __ibm935db_to_ucs4[ch + rp2->idx],              \
00168               __builtin_expect (res, L'\1') == L'\0' && ch != '\0'))        \
00169          {                                                           \
00170            /* This is an illegal character.  */                      \
00171            STANDARD_FROM_LOOP_ERR_HANDLER (2);                              \
00172          }                                                           \
00173        else                                                          \
00174          {                                                           \
00175            put32 (outptr, res);                                      \
00176            outptr += 4;                                              \
00177          }                                                           \
00178        inptr += 2;                                                   \
00179       }                                                                     \
00180   }
00181 #define LOOP_NEED_FLAGS
00182 #define EXTRA_LOOP_DECLS    , int *curcsp
00183 #define INIT_PARAMS         int curcs = *curcsp & ~7
00184 #define UPDATE_PARAMS              *curcsp = curcs
00185 #include <iconv/loop.c>
00186 
00187 /* Next, define the other direction.  */
00188 #define MIN_NEEDED_INPUT    TO_LOOP_MIN_NEEDED_FROM
00189 #define MAX_NEEDED_INPUT    TO_LOOP_MAX_NEEDED_FROM
00190 #define MIN_NEEDED_OUTPUT   TO_LOOP_MIN_NEEDED_TO
00191 #define MAX_NEEDED_OUTPUT   TO_LOOP_MAX_NEEDED_TO
00192 #define LOOPFCT                    TO_LOOP
00193 #define BODY \
00194   {                                                                  \
00195     uint32_t ch = get32 (inptr);                                     \
00196     const struct gap *rp1 = __ucs4_to_ibm935sb_idx;                         \
00197     const struct gap *rp2 = __ucs4_to_ibm935db_idx;                         \
00198     const char *cp;                                                  \
00199                                                                      \
00200     if (__builtin_expect (ch >= 0xffff, 0))                                 \
00201       {                                                                     \
00202        UNICODE_TAG_HANDLER (ch, 4);                                         \
00203                                                                      \
00204        STANDARD_TO_LOOP_ERR_HANDLER (4);                             \
00205       }                                                                     \
00206                                                                      \
00207     while (ch > rp1->end)                                            \
00208       ++rp1;                                                         \
00209                                                                      \
00210     /* Use the UCS4 table for single byte.  */                              \
00211     if (__builtin_expect (ch < rp1->start, 0)                               \
00212        || (cp = __ucs4_to_ibm935sb[ch + rp1->idx],                          \
00213            __builtin_expect (cp[0], L'\1') == L'\0' && ch != '\0'))         \
00214       {                                                                     \
00215        /* Use the UCS4 table for double byte. */                     \
00216        while (ch > rp2->end)                                                \
00217          ++rp2;                                                      \
00218                                                                      \
00219        if (__builtin_expect (ch < rp2->start, 0)                     \
00220            || (cp = __ucs4_to_ibm935db[ch + rp2->idx],                      \
00221               __builtin_expect (cp[0], L'\1')==L'\0' && ch != '\0'))        \
00222          {                                                           \
00223            /* This is an illegal character.  */                      \
00224            STANDARD_TO_LOOP_ERR_HANDLER (4);                                \
00225          }                                                           \
00226        else                                                          \
00227          {                                                           \
00228            if (curcs == sb)                                          \
00229              {                                                              \
00230               if (__builtin_expect (outptr + 1 > outend, 0))                \
00231                 {                                                    \
00232                   result = __GCONV_FULL_OUTPUT;                      \
00233                   break;                                             \
00234                 }                                                    \
00235               *outptr++ = SO;                                               \
00236               curcs = db;                                            \
00237              }                                                              \
00238                                                                      \
00239            if (__builtin_expect (outptr + 2 > outend, 0))                   \
00240              {                                                              \
00241               result = __GCONV_FULL_OUTPUT;                                 \
00242               break;                                                 \
00243              }                                                              \
00244            *outptr++ = cp[0];                                               \
00245            *outptr++ = cp[1];                                               \
00246          }                                                           \
00247       }                                                                     \
00248     else                                                             \
00249       {                                                                     \
00250        if (curcs == db)                                              \
00251          {                                                           \
00252            if (__builtin_expect (outptr + 1 > outend, 0))                   \
00253              {                                                              \
00254               result = __GCONV_FULL_OUTPUT;                                 \
00255               break;                                                 \
00256              }                                                              \
00257            *outptr++ = SI;                                           \
00258          }                                                           \
00259                                                                      \
00260        if (__builtin_expect (outptr + 1 > outend, 0))                       \
00261          {                                                           \
00262            result = __GCONV_FULL_OUTPUT;                             \
00263            break;                                                    \
00264          }                                                           \
00265        *outptr++ = cp[0];                                            \
00266        curcs = sb;                                                   \
00267       }                                                                     \
00268                                                                      \
00269     /* Now that we wrote the output increment the input pointer.  */        \
00270     inptr += 4;                                                             \
00271   }
00272 #define LOOP_NEED_FLAGS
00273 #define EXTRA_LOOP_DECLS    , int *curcsp
00274 #define INIT_PARAMS         int curcs = *curcsp & ~7
00275 #define REINIT_PARAMS              curcs = *curcsp & ~7
00276 #define UPDATE_PARAMS              *curcsp = curcs
00277 #include <iconv/loop.c>
00278 
00279 /* Now define the toplevel functions.  */
00280 #include <iconv/skeleton.c>