Back to index

glibc  2.9
ksc5601.h
Go to the documentation of this file.
00001 /* Access functions for KS C 5601-1992 based encoding conversion.
00002    Copyright (C) 1998, 1999, 2000, 2003, 2007 Free Software Foundation, Inc.
00003    This file is part of the GNU C Library.
00004 
00005    The GNU C Library is free software; you can redistribute it and/or
00006    modify it under the terms of the GNU Lesser General Public
00007    License as published by the Free Software Foundation; either
00008    version 2.1 of the License, or (at your option) any later version.
00009 
00010    The GNU C Library is distributed in the hope that it will be useful,
00011    but WITHOUT ANY WARRANTY; without even the implied warranty of
00012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013    Lesser General Public License for more details.
00014 
00015    You should have received a copy of the GNU Lesser General Public
00016    License along with the GNU C Library; if not, write to the Free
00017    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
00018    02111-1307 USA.  */
00019 
00020 #ifndef _KSC5601_H
00021 #define _KSC5601_H   1
00022 
00023 #define KSC5601_HANGUL 2350
00024 #define KSC5601_HANJA  4888
00025 #define KSC5601_SYMBOL  989
00026 
00027 #include <gconv.h>
00028 #include <stdint.h>
00029 
00030 /* Structure to map from UCS to KSC.  This structure should be packed
00031    on all platforms.  */
00032 struct map
00033 {
00034   uint16_t ucs;
00035   char val[2];
00036 };
00037 
00038 /* Conversion table.  */
00039 extern const uint16_t __ksc5601_hangul_to_ucs[KSC5601_HANGUL];
00040 extern const uint16_t __ksc5601_sym_to_ucs[];
00041 extern const struct map __ksc5601_sym_from_ucs[KSC5601_SYMBOL];
00042 extern const uint16_t __ksc5601_hanja_to_ucs[KSC5601_HANJA];
00043 extern const struct map __ksc5601_hanja_from_ucs[KSC5601_HANJA];
00044 
00045 
00046 static inline uint32_t
00047 __attribute ((always_inline))
00048 ksc5601_to_ucs4 (const unsigned char **s, size_t avail, unsigned char offset)
00049 {
00050   unsigned char ch = **s;
00051   unsigned char ch2;
00052   int idx;
00053 
00054   /* row 94(0x7e) and row 41(0x49) are user-defined area in KS C 5601 */
00055 
00056   if (ch < offset || (ch - offset) <= 0x20 || (ch - offset) >= 0x7e
00057       || (ch - offset) == 0x49)
00058     return __UNKNOWN_10646_CHAR;
00059 
00060   if (avail < 2)
00061     return 0;
00062 
00063   ch2 = (*s)[1];
00064   if (ch2 < offset || (ch2 - offset) <= 0x20 || (ch2 - offset) >= 0x7f)
00065     return __UNKNOWN_10646_CHAR;
00066 
00067   idx = (ch - offset - 0x21) * 94 + (ch2 - offset - 0x21);
00068 
00069   /* 1410 = 15 * 94 , 3760 = 40 * 94
00070      Hangul in KS C 5601 : row 16 - row 40 */
00071 
00072   *s += 2;
00073 
00074   if (idx >= 1410 && idx < 1410 + KSC5601_HANGUL)
00075     return (__ksc5601_hangul_to_ucs[idx - 1410]
00076            ?: (*s -= 2, __UNKNOWN_10646_CHAR));
00077   else if (idx >= 3854)
00078     /* Hanja : row 42 - row 93 : 3854 = 94 * (42-1) */
00079    return (__ksc5601_hanja_to_ucs[idx - 3854]
00080           ?: (*s -= 2, __UNKNOWN_10646_CHAR));
00081   else if (idx <= 1114)
00082     return __ksc5601_sym_to_ucs[idx] ?: (*s -= 2, __UNKNOWN_10646_CHAR);
00083 
00084   *s -= 2;
00085   return __UNKNOWN_10646_CHAR;
00086 }
00087 
00088 static inline size_t
00089 __attribute ((always_inline))
00090 ucs4_to_ksc5601_hangul (uint32_t wch, unsigned char *s, size_t avail)
00091 {
00092   int l = 0;
00093   int u = KSC5601_HANGUL - 1;
00094   uint32_t try;
00095 
00096   while (l <= u)
00097     {
00098       int m = (l + u) / 2;
00099       try = (uint32_t) __ksc5601_hangul_to_ucs[m];
00100       if (try > wch)
00101        u = m - 1;
00102       else if (try < wch)
00103        l= m + 1;
00104       else
00105        {
00106          if (avail < 2)
00107            return 0;
00108 
00109          s[0] = (m / 94) + 0x30;
00110          s[1] = (m % 94) + 0x21;
00111 
00112          return 2;
00113        }
00114     }
00115 
00116   return __UNKNOWN_10646_CHAR;
00117 }
00118 
00119 
00120 static inline size_t
00121 __attribute ((always_inline))
00122 ucs4_to_ksc5601_hanja (uint32_t wch, unsigned char *s, size_t avail)
00123 {
00124   int l = 0;
00125   int u = KSC5601_HANJA - 1;
00126   uint32_t try;
00127 
00128   while (l <= u)
00129     {
00130       int m = (l + u) / 2;
00131       try = (uint32_t) __ksc5601_hanja_from_ucs[m].ucs;
00132       if (try > wch)
00133        u=m-1;
00134       else if (try < wch)
00135        l = m + 1;
00136       else
00137        {
00138          if (avail < 2)
00139            return 0;
00140 
00141          s[0] = __ksc5601_hanja_from_ucs[m].val[0];
00142          s[1] = __ksc5601_hanja_from_ucs[m].val[1];
00143 
00144          return 2;
00145        }
00146     }
00147 
00148   return __UNKNOWN_10646_CHAR;
00149 }
00150 
00151 static inline  size_t
00152 __attribute ((always_inline))
00153 ucs4_to_ksc5601_sym (uint32_t wch, unsigned char *s, size_t avail)
00154 {
00155   int l = 0;
00156   int u = KSC5601_SYMBOL - 1;
00157   uint32_t try;
00158 
00159   while (l <= u)
00160     {
00161       int m = (l + u) / 2;
00162       try = __ksc5601_sym_from_ucs[m].ucs;
00163       if (try > wch)
00164        u = m - 1;
00165       else if (try < wch)
00166        l = m + 1;
00167       else
00168        {
00169          if (avail < 2)
00170            return 0;
00171 
00172          s[0] = __ksc5601_sym_from_ucs[m].val[0];
00173          s[1] = __ksc5601_sym_from_ucs[m].val[1];
00174 
00175          return 2;
00176        }
00177     }
00178 
00179   return __UNKNOWN_10646_CHAR;
00180 }
00181 
00182 
00183 static inline size_t
00184 __attribute ((always_inline))
00185 ucs4_to_ksc5601 (uint32_t wch, unsigned char *s, size_t avail)
00186 {
00187   if (wch >= 0xac00 && wch <= 0xd7a3)
00188     return ucs4_to_ksc5601_hangul (wch, s, avail);
00189   else if ((wch >= 0x4e00 && wch <= 0x9fff)
00190           || (wch >= 0xf900 && wch <= 0xfa0b))
00191     return ucs4_to_ksc5601_hanja (wch, s, avail);
00192   else
00193     return ucs4_to_ksc5601_sym (wch, s, avail);
00194 }
00195 
00196 #endif /* ksc5601.h */