Back to index

glibc  2.9
gb2312.h
Go to the documentation of this file.
00001 /* Access functions for GB2312 conversion.
00002    Copyright (C) 1998, 1999, 2003 Free Software Foundation, Inc.
00003    This file is part of the GNU C Library.
00004    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
00005 
00006    The GNU C Library is free software; you can redistribute it and/or
00007    modify it under the terms of the GNU Lesser General Public
00008    License as published by the Free Software Foundation; either
00009    version 2.1 of the License, or (at your option) any later version.
00010 
00011    The GNU C Library is distributed in the hope that it will be useful,
00012    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014    Lesser General Public License for more details.
00015 
00016    You should have received a copy of the GNU Lesser General Public
00017    License along with the GNU C Library; if not, write to the Free
00018    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
00019    02111-1307 USA.  */
00020 
00021 #ifndef _GB2312_H
00022 #define _GB2312_H    1
00023 
00024 #include <gconv.h>
00025 #include <stdint.h>
00026 #include <assert.h>
00027 
00028 /* Conversion table.  */
00029 extern const uint16_t __gb2312_to_ucs[];
00030 
00031 
00032 static inline uint32_t
00033 __attribute ((always_inline))
00034 gb2312_to_ucs4 (const unsigned char **s, size_t avail, unsigned char offset)
00035 {
00036   unsigned char ch = *(*s);
00037   unsigned char ch2;
00038   int idx;
00039 
00040   if (ch < offset || (ch - offset) <= 0x20 || (ch - offset) > 0x77)
00041     return __UNKNOWN_10646_CHAR;
00042 
00043   if (avail < 2)
00044     return 0;
00045 
00046   ch2 = (*s)[1];
00047   if ((ch2 - offset) <= 0x20 || (ch2 - offset) >= 0x7f)
00048     return __UNKNOWN_10646_CHAR;
00049 
00050   idx = (ch - 0x21 - offset) * 94 + (ch2 - 0x21 - offset);
00051   if (idx > 0x1ff1)
00052     return __UNKNOWN_10646_CHAR;
00053 
00054   (*s) += 2;
00055 
00056   return __gb2312_to_ucs[idx] ?: ((*s) -= 2, __UNKNOWN_10646_CHAR);
00057 }
00058 
00059 
00060 extern const char __gb2312_from_ucs4_tab1[][2];
00061 extern const char __gb2312_from_ucs4_tab2[][2];
00062 extern const char __gb2312_from_ucs4_tab3[][2];
00063 extern const char __gb2312_from_ucs4_tab4[][2];
00064 extern const char __gb2312_from_ucs4_tab5[][2];
00065 extern const char __gb2312_from_ucs4_tab6[][2];
00066 extern const char __gb2312_from_ucs4_tab7[][2];
00067 extern const char __gb2312_from_ucs4_tab8[][2];
00068 extern const char __gb2312_from_ucs4_tab9[][2];
00069 
00070 static inline size_t
00071 __attribute ((always_inline))
00072 ucs4_to_gb2312 (uint32_t wch, unsigned char *s, size_t avail)
00073 {
00074   unsigned int ch = (unsigned int) wch;
00075   char buf[2];
00076   const char *cp = buf;
00077 
00078   switch (ch)
00079     {
00080     case 0xa4 ... 0x101:
00081       cp = __gb2312_from_ucs4_tab1[ch - 0xa4];
00082       break;
00083     case 0x113:
00084       cp = "\x28\x25";
00085       break;
00086     case 0x11b:
00087       cp = "\x28\x27";
00088       break;
00089     case 0x12b:
00090       cp = "\x28\x29";
00091       break;
00092     case 0x14d:
00093       cp = "\x28\x2d";
00094       break;
00095     case 0x16b:
00096       cp = "\x28\x31";
00097       break;
00098     case 0x1ce:
00099       cp = "\x28\x23";
00100       break;
00101     case 0x1d0:
00102       cp = "\x28\x2b";
00103       break;
00104     case 0x1d2:
00105       cp = "\x28\x2f";
00106       break;
00107     case 0x1d4:
00108       cp = "\x28\x33";
00109       break;
00110     case 0x1d6:
00111       cp = "\x28\x35";
00112       break;
00113     case 0x1d8:
00114       cp = "\x28\x36";
00115       break;
00116     case 0x1da:
00117       cp = "\x28\x37";
00118       break;
00119     case 0x1dc:
00120       cp = "\x28\x38";
00121       break;
00122     case 0x2c7:
00123       cp = "\x21\x26";
00124       break;
00125     case 0x2c9:
00126       cp = "\x21\x25";
00127       break;
00128     case 0x391 ... 0x3c9:
00129       cp = __gb2312_from_ucs4_tab2[ch - 0x391];
00130       break;
00131     case 0x401 ... 0x451:
00132       cp = __gb2312_from_ucs4_tab3[ch - 0x401];
00133       break;
00134     case 0x2015 ... 0x203b:
00135       cp = __gb2312_from_ucs4_tab4[ch - 0x2015];
00136       break;
00137     case 0x2103 ... 0x22a5:
00138       cp = __gb2312_from_ucs4_tab5[ch - 0x2103];
00139       break;
00140     case 0x2312:
00141       cp = "\x21\x50";
00142       break;
00143     case 0x2460 ... 0x249b:
00144       cp = __gb2312_from_ucs4_tab6[ch - 0x2460];
00145       break;
00146     case 0x2500 ... 0x254b:
00147       buf[0] = '\x29';
00148       buf[1] = '\x24' + (ch % 256);
00149       break;
00150     case 0x25a0:
00151       cp = "\x21\x76";
00152       break;
00153     case 0x25a1:
00154       cp = "\x21\x75";
00155       break;
00156     case 0x25b2:
00157       cp = "\x21\x78";
00158       break;
00159     case 0x25b3:
00160       cp = "\x21\x77";
00161       break;
00162     case 0x25c6:
00163       cp = "\x21\x74";
00164       break;
00165     case 0x25c7:
00166       cp = "\x21\x73";
00167       break;
00168     case 0x25cb:
00169       cp = "\x21\x70";
00170       break;
00171     case 0x25ce:
00172       cp = "\x21\x72";
00173       break;
00174     case 0x25cf:
00175       cp = "\x21\x71";
00176       break;
00177     case 0x2605:
00178       cp = "\x21\x6f";
00179       break;
00180     case 0x2606:
00181       cp = "\x21\x6e";
00182       break;
00183     case 0x2640:
00184       cp = "\x21\x62";
00185       break;
00186     case 0x2642:
00187       cp = "\x21\x61";
00188       break;
00189     case 0x3000 ... 0x3129:
00190       cp = __gb2312_from_ucs4_tab7[ch - 0x3000];
00191       break;
00192     case 0x3220 ... 0x3229:
00193       buf[0] = '\x22';
00194       buf[1] = '\x65' + (ch - 0x3220);
00195       break;
00196     case 0x4e00 ... 0x9fa0:
00197       cp = __gb2312_from_ucs4_tab8[ch - 0x4e00];
00198       break;
00199     case 0xff01 ... 0xff5e:
00200       cp = __gb2312_from_ucs4_tab9[ch - 0xff01];
00201       break;
00202     case 0xffe0:
00203       cp = "\x21\x69";
00204       break;
00205     case 0xffe1:
00206       cp = "\x21\x6a";
00207       break;
00208     case 0xffe3:
00209       cp = "\x23\x7e";
00210       break;
00211     case 0xffe5:
00212       cp = "\x23\x24";
00213       break;
00214     default:
00215       return __UNKNOWN_10646_CHAR;
00216     }
00217 
00218   if (cp[0] == '\0')
00219     return __UNKNOWN_10646_CHAR;
00220 
00221   assert (cp[1] != '\0');
00222 
00223   if (avail < 2)
00224     return 0;
00225 
00226   s[0] = cp[0];
00227   s[1] = cp[1];
00228 
00229   return 2;
00230 }
00231 
00232 #endif /* gb2312.h */