Back to index

glibc  2.9
euc-jp.c
Go to the documentation of this file.
00001 /* Mapping tables for EUC-JP handling.
00002    Copyright (C) 1998, 1999, 2000-2002 Free Software Foundation, Inc.
00003    This file is part of the GNU C Library.
00004    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
00005 
00006    The GNU C Library is free software; you can redistribute it and/or
00007    modify it under the terms of the GNU Lesser General Public
00008    License as published by the Free Software Foundation; either
00009    version 2.1 of the License, or (at your option) any later version.
00010 
00011    The GNU C Library is distributed in the hope that it will be useful,
00012    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014    Lesser General Public License for more details.
00015 
00016    You should have received a copy of the GNU Lesser General Public
00017    License along with the GNU C Library; if not, write to the Free
00018    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
00019    02111-1307 USA.  */
00020 
00021 #include <dlfcn.h>
00022 #include <stdint.h>
00023 #include <gconv.h>
00024 #include <jis0201.h>
00025 #include <jis0208.h>
00026 #include <jis0212.h>
00027 
00028 /* Definitions used in the body of the `gconv' function.  */
00029 #define CHARSET_NAME        "EUC-JP//"
00030 #define FROM_LOOP           from_euc_jp
00031 #define TO_LOOP                    to_euc_jp
00032 #define DEFINE_INIT         1
00033 #define DEFINE_FINI         1
00034 #define MIN_NEEDED_FROM            1
00035 #define MAX_NEEDED_FROM            3
00036 #define MIN_NEEDED_TO              4
00037 
00038 
00039 /* First define the conversion function from EUC-JP to UCS4.  */
00040 #define MIN_NEEDED_INPUT    MIN_NEEDED_FROM
00041 #define MAX_NEEDED_INPUT    MAX_NEEDED_FROM
00042 #define MIN_NEEDED_OUTPUT   MIN_NEEDED_TO
00043 #define LOOPFCT                    FROM_LOOP
00044 #define BODY \
00045   {                                                                  \
00046     uint32_t ch = *inptr;                                            \
00047                                                                      \
00048     if (ch < 0x8e || (ch >= 0x90 && ch <= 0x9f))                     \
00049       ++inptr;                                                              \
00050     else if (ch == 0xff)                                             \
00051       {                                                                     \
00052        /* This is illegal.  */                                              \
00053        STANDARD_FROM_LOOP_ERR_HANDLER (1);                                  \
00054       }                                                                     \
00055     else                                                             \
00056       {                                                                     \
00057        /* Two or more byte character.  First test whether the next          \
00058           byte is also available.  */                                       \
00059        int ch2;                                                      \
00060                                                                      \
00061        if (__builtin_expect (inptr + 1 >= inend, 0))                        \
00062          {                                                           \
00063            /* The second byte is not available.  Store the                  \
00064               intermediate result.  */                                      \
00065            result = __GCONV_INCOMPLETE_INPUT;                               \
00066            break;                                                    \
00067          }                                                           \
00068                                                                      \
00069        ch2 = inptr[1];                                                      \
00070                                                                      \
00071        /* All second bytes of a multibyte character must be >= 0xa1. */      \
00072        if (__builtin_expect (ch2 < 0xa1, 0))                                \
00073          STANDARD_FROM_LOOP_ERR_HANDLER (1);                                \
00074                                                                      \
00075        if (ch == 0x8e)                                                      \
00076          {                                                           \
00077            /* This is code set 2: half-width katakana.  */                  \
00078            ch = jisx0201_to_ucs4 (ch2);                              \
00079            if (__builtin_expect (ch, 0) == __UNKNOWN_10646_CHAR)            \
00080              STANDARD_FROM_LOOP_ERR_HANDLER (1);                     \
00081                                                                      \
00082            inptr += 2;                                                      \
00083          }                                                           \
00084        else                                                          \
00085          {                                                           \
00086            const unsigned char *endp;                                       \
00087                                                                      \
00088            if (ch == 0x8f)                                           \
00089              {                                                              \
00090               /* This is code set 3: JIS X 0212-1990.  */                   \
00091               endp = inptr + 1;                                      \
00092                                                                      \
00093               ch = jisx0212_to_ucs4 (&endp, inend - endp, 0x80);            \
00094              }                                                              \
00095            else                                                      \
00096              {                                                              \
00097               /* This is code set 1: JIS X 0208.  */                        \
00098               endp = inptr;                                          \
00099                                                                      \
00100               ch = jisx0208_to_ucs4 (&endp, inend - inptr, 0x80);           \
00101              }                                                              \
00102                                                                      \
00103            if (__builtin_expect (ch, 1) == 0)                               \
00104              {                                                              \
00105               /* Not enough input available.  */                     \
00106               result = __GCONV_INCOMPLETE_INPUT;                     \
00107               break;                                                 \
00108              }                                                              \
00109            if (__builtin_expect (ch == __UNKNOWN_10646_CHAR, 0))            \
00110              /* Illegal character.  */                                      \
00111              STANDARD_FROM_LOOP_ERR_HANDLER (1);                     \
00112                                                                      \
00113            inptr = endp;                                             \
00114          }                                                           \
00115       }                                                                     \
00116                                                                      \
00117     put32 (outptr, ch);                                                     \
00118     outptr += 4;                                                     \
00119   }
00120 #define ONEBYTE_BODY \
00121   {                                                                  \
00122     if (c < 0x8e || (c >= 0x90 && c <= 0x9f))                               \
00123       return c;                                                             \
00124     else                                                             \
00125       return WEOF;                                                   \
00126   }
00127 #define LOOP_NEED_FLAGS
00128 #include <iconv/loop.c>
00129 
00130 
00131 /* Next, define the other direction.  */
00132 #define MIN_NEEDED_INPUT    MIN_NEEDED_TO
00133 #define MIN_NEEDED_OUTPUT   MIN_NEEDED_FROM
00134 #define MAX_NEEDED_OUTPUT   MAX_NEEDED_FROM
00135 #define LOOPFCT                    TO_LOOP
00136 #define BODY \
00137   {                                                                  \
00138     uint32_t ch = get32 (inptr);                                     \
00139                                                                      \
00140     if (ch < 0x8e || (ch >= 0x90 && ch <= 0x9f))                     \
00141       /* It's plain ASCII or C1.  */                                        \
00142       *outptr++ = ch;                                                       \
00143     else if (ch == 0xa5)                                             \
00144       /* YEN sign => backslash  */                                   \
00145       *outptr++ = 0x5c;                                                     \
00146     else if (ch == 0x203e)                                           \
00147       /* overscore => asciitilde */                                         \
00148       *outptr++ = 0x7e;                                                     \
00149     else                                                             \
00150       {                                                                     \
00151        /* Try the JIS character sets.  */                            \
00152        size_t found;                                                 \
00153                                                                      \
00154        /* See whether we have room for at least two characters.  */         \
00155        if (__builtin_expect (outptr + 1 >= outend, 0))                      \
00156          {                                                           \
00157            result = __GCONV_FULL_OUTPUT;                             \
00158            break;                                                    \
00159          }                                                           \
00160                                                                      \
00161        found = ucs4_to_jisx0201 (ch, outptr + 1);                           \
00162        if (found != __UNKNOWN_10646_CHAR)                            \
00163          {                                                           \
00164            /* Yes, it's a JIS 0201 character.  Store the shift byte.  */     \
00165            *outptr = 0x8e;                                           \
00166            outptr += 2;                                              \
00167          }                                                           \
00168        else                                                          \
00169          {                                                           \
00170            /* No JIS 0201 character.  */                             \
00171            found = ucs4_to_jisx0208 (ch, outptr, 2);                        \
00172            /* Please note that we always have enough room for the output. */ \
00173            if (found != __UNKNOWN_10646_CHAR)                               \
00174              {                                                              \
00175               /* It's a JIS 0208 character, adjust it for EUC-JP.  */       \
00176               *outptr++ += 0x80;                                     \
00177               *outptr++ += 0x80;                                     \
00178              }                                                              \
00179            else                                                      \
00180              {                                                              \
00181               /* No JIS 0208 character.  */                                 \
00182               found = ucs4_to_jisx0212 (ch, outptr + 1,              \
00183                                      outend - outptr - 1);                  \
00184                                                                      \
00185               if (__builtin_expect (found, 1) == 0)                         \
00186                 {                                                    \
00187                   /* We ran out of space.  */                               \
00188                   result = __GCONV_FULL_OUTPUT;                      \
00189                   break;                                             \
00190                 }                                                    \
00191               else if (__builtin_expect (found, 0) != __UNKNOWN_10646_CHAR) \
00192                 {                                                    \
00193                   /* It's a JIS 0212 character, adjust it for EUC-JP.  */   \
00194                   *outptr++ = 0x8f;                                         \
00195                   *outptr++ += 0x80;                                        \
00196                   *outptr++ += 0x80;                                        \
00197                 }                                                    \
00198               else                                                   \
00199                 {                                                    \
00200                   UNICODE_TAG_HANDLER (ch, 4);                       \
00201                                                                      \
00202                   /* Illegal character.  */                                 \
00203                   STANDARD_TO_LOOP_ERR_HANDLER (4);                         \
00204                 }                                                    \
00205              }                                                              \
00206          }                                                           \
00207       }                                                                     \
00208                                                                      \
00209     inptr += 4;                                                             \
00210   }
00211 #define LOOP_NEED_FLAGS
00212 #include <iconv/loop.c>
00213 
00214 
00215 /* Now define the toplevel functions.  */
00216 #include <iconv/skeleton.c>