Back to index

glibc  2.9
ibm1364.c
Go to the documentation of this file.
00001 /* Conversion from and to IBM1364.
00002    Copyright (C) 2005, 2008 Free Software Foundation, Inc.
00003    This file is part of the GNU C Library.
00004    Contributed by Masahide Washizawa <washi@jp.ibm.com>, 2005.
00005 
00006    The GNU C Library is free software; you can redistribute it and/or
00007    modify it under the terms of the GNU Lesser General Public
00008    License as published by the Free Software Foundation; either
00009    version 2.1 of the License, or (at your option) any later version.
00010 
00011    The GNU C Library is distributed in the hope that it will be useful,
00012    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014    Lesser General Public License for more details.
00015 
00016    You should have received a copy of the GNU Lesser General Public
00017    License along with the GNU C Library; if not, write to the Free
00018    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
00019    02111-1307 USA.  */
00020 
00021 #include <dlfcn.h>
00022 #include <stdint.h>
00023 #include <wchar.h>
00024 #include <byteswap.h>
00025 
00026 #ifndef CHARSET_NAME
00027 /* This is really the IBM1364 converter, not another module sharing
00028    the code.  */
00029 # define DATA_HEADER "ibm1364.h"
00030 # define CHARSET_NAME       "IBM1364//"
00031 # define FROM_LOOP   from_ibm1364
00032 # define TO_LOOP     to_ibm1364
00033 # define SB_TO_UCS4  __ibm1364sb_to_ucs4
00034 # define DB_TO_UCS4_IDX     __ibm1364db_to_ucs4_idx
00035 # define DB_TO_UCS4  __ibm1364db_to_ucs4
00036 # define UCS4_TO_SB_IDX     __ucs4_to_ibm1364sb_idx
00037 # define UCS4_TO_SB  __ucs4_to_ibm1364sb
00038 # define UCS4_TO_DB_IDX     __ucs4_to_ibm1364db_idx
00039 # define UCS4_TO_DB  __ucs4_to_ibm1364db
00040 # define UCS_LIMIT   0xffff
00041 #endif
00042 
00043 
00044 #include DATA_HEADER
00045 
00046 /* The shift sequences for this charset (it does not use ESC).  */
00047 #define SI           0x0F  /* Shift In, host code to turn DBCS off.  */
00048 #define SO           0x0E  /* Shift Out, host code to turn DBCS on.  */
00049 
00050 /* Definitions used in the body of the `gconv' function.  */
00051 #define MIN_NEEDED_FROM     1
00052 #define MAX_NEEDED_FROM     2
00053 #define MIN_NEEDED_TO       4
00054 #ifdef HAS_COMBINED
00055 # define MAX_NEEDED_TO      8
00056 #else
00057 # define MAX_NEEDED_TO      4
00058 #endif
00059 #define PREPARE_LOOP \
00060   int save_curcs;                                                    \
00061   int *curcsp = &data->__statep->__count;
00062 #define EXTRA_LOOP_ARGS            , curcsp
00063 
00064 /* Definitions of initialization and destructor function.  */
00065 #define DEFINE_INIT  1
00066 #define DEFINE_FINI  1
00067 
00068 
00069 /* Since this is a stateful encoding we have to provide code which resets
00070    the output state to the initial state.  This has to be done during the
00071    flushing.  */
00072 #define EMIT_SHIFT_TO_INIT \
00073   if ((data->__statep->__count & ~7) != sb)                                 \
00074     {                                                                \
00075       if (FROM_DIRECTION)                                            \
00076        data->__statep->__count &= 7;                                        \
00077       else                                                           \
00078        {                                                             \
00079          /* We are not in the initial state.  To switch back we have        \
00080             to emit `SI'.  */                                               \
00081          if (__builtin_expect (outbuf >= outend, 0))                        \
00082            /* We don't have enough room in the output buffer.  */           \
00083            status = __GCONV_FULL_OUTPUT;                             \
00084          else                                                        \
00085            {                                                         \
00086              /* Write out the shift sequence.  */                           \
00087              *outbuf++ = SI;                                                \
00088              data->__statep->__count &= 7;                                  \
00089            }                                                         \
00090        }                                                             \
00091     }
00092 
00093 
00094 /* Since we might have to reset input pointer we must be able to save
00095    and retore the state.  */
00096 #define SAVE_RESET_STATE(Save) \
00097   if (Save)                                                          \
00098     save_curcs = *curcsp;                                            \
00099   else                                                               \
00100     *curcsp = save_curcs
00101 
00102 
00103 /* Current codeset type.  */
00104 enum
00105 {
00106   sb = 0,
00107   db = 64
00108 };
00109 
00110 
00111 /* Subroutine to write out converted UCS4 from IBM-13XX.  */
00112 #ifdef HAS_COMBINED
00113 # define SUB_COMBINED_UCS_FROM_IBM13XX \
00114   {                                                                  \
00115     if (res != UCS_LIMIT || ch < __TO_UCS4_COMBINED_MIN                     \
00116        || ch > __TO_UCS4_COMBINED_MAX)                                      \
00117       {                                                                     \
00118        put32 (outptr, res);                                          \
00119        outptr += 4;                                                  \
00120       }                                                                     \
00121     else                                                             \
00122       {                                                                     \
00123        /* This is a combined character.  Make sure we have room.  */        \
00124        if (__builtin_expect (outptr + 8 > outend, 0))                       \
00125          {                                                           \
00126            result = __GCONV_FULL_OUTPUT;                             \
00127            break;                                                    \
00128          }                                                           \
00129                                                                      \
00130        const struct divide *cmbp                                     \
00131          = &DB_TO_UCS4_COMB[ch - __TO_UCS4_COMBINED_MIN];                   \
00132        assert (cmbp->res1 != 0 && cmbp->res2 != 0);                         \
00133                                                                      \
00134        put32 (outptr, cmbp->res1);                                   \
00135        outptr += 4;                                                  \
00136        put32 (outptr, cmbp->res2);                                   \
00137        outptr += 4;                                                  \
00138       }                                                                     \
00139   }
00140 #else
00141 # define SUB_COMBINED_UCS_FROM_IBM13XX \
00142   {                                                                  \
00143     put32 (outptr, res);                                             \
00144     outptr += 4;                                                     \
00145   }
00146 #endif /* HAS_COMBINED */
00147 
00148 
00149 /* First, define the conversion function from IBM-13XX to UCS4.  */
00150 #define MIN_NEEDED_INPUT    MIN_NEEDED_FROM
00151 #define MAX_NEEDED_INPUT    MAX_NEEDED_FROM
00152 #define MIN_NEEDED_OUTPUT   MIN_NEEDED_TO
00153 #define LOOPFCT             FROM_LOOP
00154 #define BODY \
00155   {                                                                  \
00156     uint32_t ch = *inptr;                                            \
00157                                                                      \
00158     if (__builtin_expect (ch, 0) == SO)                                     \
00159       {                                                                     \
00160        /* Shift OUT, change to DBCS converter.  */                          \
00161        if (curcs == db)                                              \
00162          {                                                           \
00163            result = __GCONV_ILLEGAL_INPUT;                                  \
00164            break;                                                    \
00165          }                                                           \
00166        curcs = db;                                                   \
00167        ++inptr;                                                      \
00168        continue;                                                     \
00169       }                                                                     \
00170     if (__builtin_expect (ch, 0) == SI)                                     \
00171       {                                                                     \
00172        /* Shift IN, change to SBCS converter.  */                           \
00173        if (curcs == sb)                                              \
00174          {                                                           \
00175            result = __GCONV_ILLEGAL_INPUT;                                  \
00176            break;                                                    \
00177          }                                                           \
00178        curcs = sb;                                                   \
00179        ++inptr;                                                      \
00180        continue;                                                     \
00181       }                                                                     \
00182                                                                      \
00183     if (curcs == sb)                                                 \
00184       {                                                                     \
00185        /* Use the IBM13XX table for single byte.  */                        \
00186        uint32_t res = SB_TO_UCS4[ch];                                \
00187        if (__builtin_expect (res, L'\1') == L'\0' && ch != '\0')            \
00188          {                                                           \
00189            /* This is an illegal character.  */                      \
00190            if (! ignore_errors_p ())                                        \
00191              {                                                              \
00192               result = __GCONV_ILLEGAL_INPUT;                               \
00193               break;                                                 \
00194              }                                                              \
00195            ++*irreversible;                                          \
00196          }                                                           \
00197        else                                                          \
00198          {                                                           \
00199            put32 (outptr, res);                                      \
00200            outptr += 4;                                              \
00201          }                                                           \
00202        ++inptr;                                                      \
00203       }                                                                     \
00204     else                                                             \
00205       {                                                                     \
00206        assert (curcs == db);                                                \
00207                                                                      \
00208        if (__builtin_expect (inptr + 1 >= inend, 0))                        \
00209          {                                                           \
00210            /* The second character is not available.  Store the             \
00211               intermediate result.  */                                      \
00212            result = __GCONV_INCOMPLETE_INPUT;                               \
00213            break;                                                    \
00214          }                                                           \
00215                                                                      \
00216        ch = (ch * 0x100) + inptr[1];                                        \
00217                                                                      \
00218        /* Use the IBM1364 table for double byte.  */                        \
00219        const struct gap *rp2 = DB_TO_UCS4_IDX;                              \
00220        while (ch > rp2->end)                                                \
00221          ++rp2;                                                      \
00222                                                                      \
00223        uint32_t res;                                                 \
00224        if (__builtin_expect (ch < rp2->start, 0)                     \
00225            || (res = DB_TO_UCS4[ch + rp2->idx],                      \
00226               __builtin_expect (res, L'\1') == L'\0' && ch != '\0'))        \
00227          {                                                           \
00228            /* This is an illegal character.  */                      \
00229            if (! ignore_errors_p ())                                        \
00230              {                                                              \
00231               result = __GCONV_ILLEGAL_INPUT;                               \
00232               break;                                                 \
00233              }                                                              \
00234            ++*irreversible;                                          \
00235          }                                                           \
00236        else                                                          \
00237          {                                                           \
00238            SUB_COMBINED_UCS_FROM_IBM13XX;                            \
00239          }                                                           \
00240        inptr += 2;                                                   \
00241       }                                                                     \
00242   }
00243 #define LOOP_NEED_FLAGS
00244 #define EXTRA_LOOP_DECLS    , int *curcsp
00245 #define INIT_PARAMS         int curcs = *curcsp & ~7
00246 #define UPDATE_PARAMS              *curcsp = curcs
00247 #include <iconv/loop.c>
00248 
00249 
00250 /* Subroutine to convert two UCS4 codes to IBM-13XX.  */
00251 #ifdef HAS_COMBINED
00252 # define SUB_COMBINED_UCS_TO_IBM13XX \
00253   {                                                                  \
00254     const struct combine *cmbp = UCS4_COMB_TO_DB;                           \
00255     while (cmbp->res1 < ch)                                          \
00256       ++cmbp;                                                        \
00257     /* XXX if last char is beginning of combining store in state */         \
00258     if (cmbp->res1 == ch && inptr + 4 < inend)                              \
00259       {                                                                     \
00260        /* See if input is part of a combined character.  */                 \
00261        uint32_t ch_next = get32 (inptr + 4);                                \
00262        while (cmbp->res2 != ch_next)                                        \
00263          {                                                           \
00264            ++cmbp;                                                   \
00265            if (cmbp->res1 != ch)                                     \
00266              goto not_combined;                                      \
00267          }                                                           \
00268                                                                      \
00269        /* It is a combined character.  First make sure we are in            \
00270           double byte mode.  */                                      \
00271        if (curcs == sb)                                              \
00272          {                                                           \
00273            /* We know there is room for at least one byte.  */              \
00274            *outptr++ = SO;                                           \
00275            curcs = db;                                                      \
00276          }                                                           \
00277                                                                      \
00278        if (__builtin_expect (outptr + 2 > outend, 0))                       \
00279          {                                                           \
00280            result = __GCONV_FULL_OUTPUT;                             \
00281            break;                                                    \
00282          }                                                           \
00283        *outptr++ = cmbp->ch[0];                                      \
00284        *outptr++ = cmbp->ch[1];                                      \
00285        inptr += 8;                                                   \
00286        continue;                                                     \
00287                                                                      \
00288       not_combined:;                                                 \
00289       }                                                                     \
00290   }
00291 #else
00292 # define SUB_COMBINED_UCS_TO_IBM13XX
00293 #endif /* HAS_COMBINED */
00294 
00295 
00296 /* Next, define the other direction.  */
00297 #define MIN_NEEDED_INPUT    MIN_NEEDED_TO
00298 #define MIN_NEEDED_OUTPUT   MIN_NEEDED_FROM
00299 #define MAX_NEEDED_OUTPUT   MAX_NEEDED_FROM
00300 #define LOOPFCT                    TO_LOOP
00301 #define BODY \
00302   {                                                                  \
00303     uint32_t ch = get32 (inptr);                                     \
00304                                                                      \
00305     if (__builtin_expect (ch >= UCS_LIMIT, 0))                              \
00306       {                                                                     \
00307        UNICODE_TAG_HANDLER (ch, 4);                                         \
00308                                                                      \
00309        if (! ignore_errors_p ())                                     \
00310          {                                                           \
00311            result = __GCONV_ILLEGAL_INPUT;                                  \
00312            break;                                                    \
00313          }                                                           \
00314        ++*irreversible;                                              \
00315        inptr += 4;                                                   \
00316        continue;                                                     \
00317       }                                                                     \
00318                                                                      \
00319     SUB_COMBINED_UCS_TO_IBM13XX;                                     \
00320                                                                      \
00321     const struct gap *rp1 = UCS4_TO_SB_IDX;                                 \
00322     while (ch > rp1->end)                                            \
00323       ++rp1;                                                         \
00324                                                                      \
00325     /* Use the UCS4 table for single byte.  */                              \
00326     const char *cp;                                                  \
00327     if (__builtin_expect (ch < rp1->start, 0)                               \
00328        || (cp = UCS4_TO_SB[ch + rp1->idx],                                  \
00329            __builtin_expect (cp[0], L'\1') == L'\0' && ch != '\0'))         \
00330       {                                                                     \
00331        /* Use the UCS4 table for double byte.  */                           \
00332        const struct gap *rp2 = UCS4_TO_DB_IDX;                              \
00333        while (ch > rp2->end)                                                \
00334          ++rp2;                                                      \
00335                                                                      \
00336        if (__builtin_expect (ch < rp2->start, 0)                     \
00337            || (cp = UCS4_TO_DB[ch + rp2->idx],                              \
00338               __builtin_expect (cp[0], L'\1') == L'\0' && ch != '\0'))      \
00339          {                                                           \
00340            /* This is an illegal character.  */                      \
00341            if (! ignore_errors_p ())                                        \
00342              {                                                              \
00343               result = __GCONV_ILLEGAL_INPUT;                               \
00344               break;                                                 \
00345              }                                                              \
00346            ++*irreversible;                                          \
00347          }                                                           \
00348        else                                                          \
00349          {                                                           \
00350            if (curcs == sb)                                          \
00351              {                                                              \
00352               /* We know there is room for at least one byte.  */           \
00353               *outptr++ = SO;                                               \
00354               curcs = db;                                            \
00355              }                                                              \
00356                                                                      \
00357            if (__builtin_expect (outptr + 2 > outend, 0))                   \
00358              {                                                              \
00359               result = __GCONV_FULL_OUTPUT;                                 \
00360               break;                                                 \
00361              }                                                              \
00362            *outptr++ = cp[0];                                               \
00363            *outptr++ = cp[1];                                               \
00364          }                                                           \
00365       }                                                                     \
00366     else                                                             \
00367       {                                                                     \
00368        if (__builtin_expect (curcs == db, 0))                               \
00369          {                                                           \
00370            /* We know there is room for at least one byte.  */              \
00371            *outptr++ = SI;                                           \
00372            curcs = sb;                                                      \
00373                                                                      \
00374            if (__builtin_expect (outptr >= outend, 0))                      \
00375              {                                                              \
00376               result = __GCONV_FULL_OUTPUT;                                 \
00377               break;                                                 \
00378              }                                                              \
00379          }                                                           \
00380                                                                      \
00381        *outptr++ = cp[0];                                            \
00382       }                                                                     \
00383                                                                      \
00384     /* Now that we wrote the output increment the input pointer.  */        \
00385     inptr += 4;                                                             \
00386   }
00387 #define LOOP_NEED_FLAGS
00388 #define EXTRA_LOOP_DECLS    , int *curcsp
00389 #define INIT_PARAMS         int curcs = *curcsp & ~7
00390 #define REINIT_PARAMS              curcs = *curcsp & ~7
00391 #define UPDATE_PARAMS              *curcsp = curcs
00392 #include <iconv/loop.c>
00393 
00394 /* Now define the toplevel functions.  */
00395 #include <iconv/skeleton.c>