Back to index

glibc  2.9
euc-kr.c
Go to the documentation of this file.
00001 /* Mapping tables for EUC-KR handling.
00002    Copyright (C) 1998, 1999, 2000-2002, 2003, 2007
00003    Free Software Foundation, Inc.
00004    This file is part of the GNU C Library.
00005    Contributed by Jungshik Shin <jshin@pantheon.yale.edu>
00006    and Ulrich Drepper <drepper@cygnus.com>, 1998.
00007 
00008    The GNU C Library is free software; you can redistribute it and/or
00009    modify it under the terms of the GNU Lesser General Public
00010    License as published by the Free Software Foundation; either
00011    version 2.1 of the License, or (at your option) any later version.
00012 
00013    The GNU C Library is distributed in the hope that it will be useful,
00014    but WITHOUT ANY WARRANTY; without even the implied warranty of
00015    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00016    Lesser General Public License for more details.
00017 
00018    You should have received a copy of the GNU Lesser General Public
00019    License along with the GNU C Library; if not, write to the Free
00020    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
00021    02111-1307 USA.  */
00022 
00023 #include <dlfcn.h>
00024 #include <stdint.h>
00025 #include <ksc5601.h>
00026 
00027 
00028 static inline void
00029 __attribute ((always_inline))
00030 euckr_from_ucs4 (uint32_t ch, unsigned char *cp)
00031 {
00032   if (ch > 0x9f)
00033     {
00034       if (__builtin_expect (ch, 0) == 0x20a9)
00035        {
00036          /* Half-width Korean Currency WON sign.  There is no
00037              equivalent in EUC-KR.  Some mappings use \x5c because
00038              this is what some old Korean ASCII variants used but this
00039              is causing problems.  We map it to the FULL WIDTH WON SIGN.  */
00040          cp[0] = '\xa3';
00041          cp[1] = '\xdc';
00042        }
00043       else if (__builtin_expect (ucs4_to_ksc5601 (ch, cp, 2), 0)
00044          != __UNKNOWN_10646_CHAR)
00045        {
00046          cp[0] |= 0x80;
00047          cp[1] |= 0x80;
00048        }
00049       else
00050        cp[0] = cp[1] = '\0';
00051     }
00052   else
00053     {
00054       /* There is no mapping for U005c but we nevertheless map it to
00055         \x5c.  */
00056       cp[0] = (unsigned char) ch;
00057       cp[1] = '\0';
00058     }
00059 }
00060 
00061 
00062 /* Definitions used in the body of the `gconv' function.  */
00063 #define CHARSET_NAME        "EUC-KR//"
00064 #define FROM_LOOP           from_euc_kr
00065 #define TO_LOOP                    to_euc_kr
00066 #define DEFINE_INIT         1
00067 #define DEFINE_FINI         1
00068 #define MIN_NEEDED_FROM            1
00069 #define MAX_NEEDED_FROM            2
00070 #define MIN_NEEDED_TO              4
00071 
00072 
00073 /* First define the conversion function from EUC-KR to UCS4.  */
00074 #define MIN_NEEDED_INPUT    MIN_NEEDED_FROM
00075 #define MAX_NEEDED_INPUT    MAX_NEEDED_FROM
00076 #define MIN_NEEDED_OUTPUT   MIN_NEEDED_TO
00077 #define LOOPFCT                    FROM_LOOP
00078 #define BODY \
00079   {                                                                  \
00080     uint32_t ch = *inptr;                                            \
00081                                                                      \
00082     if (ch <= 0x9f)                                                  \
00083       ++inptr;                                                              \
00084     /* 0xfe(->0x7e : row 94) and 0xc9(->0x59 : row 41) are                  \
00085        user-defined areas.  */                                              \
00086     else if (__builtin_expect (ch == 0xa0, 0)                               \
00087             || __builtin_expect (ch > 0xfe, 0)                              \
00088             || __builtin_expect (ch == 0xc9, 0))                     \
00089       {                                                                     \
00090        /* This is illegal.  */                                              \
00091        STANDARD_FROM_LOOP_ERR_HANDLER (1);                                  \
00092       }                                                                     \
00093     else                                                             \
00094       {                                                                     \
00095        /* Two-byte character.  First test whether the next byte             \
00096           is also available.  */                                     \
00097        ch = ksc5601_to_ucs4 (&inptr, inend - inptr, 0x80);                  \
00098        if (__builtin_expect (ch == 0, 0))                            \
00099          {                                                           \
00100            /* The second byte is not available.  */                         \
00101            result = __GCONV_INCOMPLETE_INPUT;                               \
00102            break;                                                    \
00103          }                                                           \
00104        if (__builtin_expect (ch == __UNKNOWN_10646_CHAR, 0))                \
00105          /* This is an illegal character.  */                               \
00106          STANDARD_FROM_LOOP_ERR_HANDLER (2);                                \
00107       }                                                                     \
00108                                                                      \
00109     put32 (outptr, ch);                                                     \
00110     outptr += 4;                                                     \
00111   }
00112 #define LOOP_NEED_FLAGS
00113 #define ONEBYTE_BODY \
00114   {                                                                  \
00115     if (c <= 0x9f)                                                   \
00116       return c;                                                             \
00117     else                                                             \
00118       return WEOF;                                                   \
00119   }
00120 #include <iconv/loop.c>
00121 
00122 
00123 /* Next, define the other direction.  */
00124 #define MIN_NEEDED_INPUT    MIN_NEEDED_TO
00125 #define MIN_NEEDED_OUTPUT   MIN_NEEDED_FROM
00126 #define MAX_NEEDED_OUTPUT   MAX_NEEDED_FROM
00127 #define LOOPFCT                    TO_LOOP
00128 #define BODY \
00129   {                                                                  \
00130     uint32_t ch = get32 (inptr);                                     \
00131     unsigned char cp[2];                                             \
00132                                                                      \
00133     /* Decomposing Hangul syllables not available in KS C 5601 into         \
00134        Jamos should be considered either here or in euckr_from_ucs4() */      \
00135     euckr_from_ucs4 (ch, cp);                                               \
00136                                                                      \
00137     if (__builtin_expect (cp[0], '\1') == '\0' && ch != 0)                  \
00138       {                                                                     \
00139        UNICODE_TAG_HANDLER (ch, 4);                                         \
00140                                                                      \
00141        /* Illegal character.  */                                     \
00142        STANDARD_TO_LOOP_ERR_HANDLER (4);                             \
00143       }                                                                     \
00144                                                                      \
00145     *outptr++ = cp[0];                                                      \
00146     /* Now test for a possible second byte and write this if possible.  */    \
00147     if (cp[1] != '\0')                                                      \
00148       {                                                                     \
00149        if (__builtin_expect (outptr >= outend, 0))                          \
00150          {                                                           \
00151            /* The result does not fit into the buffer.  */                  \
00152            --outptr;                                                 \
00153            result = __GCONV_FULL_OUTPUT;                             \
00154            break;                                                    \
00155          }                                                           \
00156        *outptr++ = cp[1];                                            \
00157       }                                                                     \
00158                                                                      \
00159     inptr += 4;                                                             \
00160   }
00161 #define LOOP_NEED_FLAGS
00162 #include <iconv/loop.c>
00163 
00164 
00165 /* Now define the toplevel functions.  */
00166 #include <iconv/skeleton.c>