Back to index

php5  5.3.10
mktable.c
Go to the documentation of this file.
00001 /**********************************************************************
00002   mktable.c
00003 **********************************************************************/
00004 /*-
00005  * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
00006  * All rights reserved.
00007  *
00008  * Redistribution and use in source and binary forms, with or without
00009  * modification, are permitted provided that the following conditions
00010  * are met:
00011  * 1. Redistributions of source code must retain the above copyright
00012  *    notice, this list of conditions and the following disclaimer.
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in the
00015  *    documentation and/or other materials provided with the distribution.
00016  *
00017  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
00018  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00019  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00020  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
00021  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00022  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00023  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00024  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00025  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00026  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00027  * SUCH DAMAGE.
00028  */
00029 
00030 #include <stdlib.h>
00031 #include <stdio.h>
00032 
00033 #define NOT_RUBY
00034 #include "regenc.h"
00035 
00036 #define UNICODE_ISO_8859_1     0
00037 #define ISO_8859_1     1
00038 #define ISO_8859_2     2
00039 #define ISO_8859_3     3
00040 #define ISO_8859_4     4
00041 #define ISO_8859_5     5
00042 #define ISO_8859_6     6
00043 #define ISO_8859_7     7
00044 #define ISO_8859_8     8
00045 #define ISO_8859_9     9
00046 #define ISO_8859_10   10
00047 #define ISO_8859_11   11
00048 #define ISO_8859_13   12
00049 #define ISO_8859_14   13
00050 #define ISO_8859_15   14
00051 #define ISO_8859_16   15
00052 #define KOI8          16
00053 #define KOI8_R        17
00054 
00055 typedef struct {
00056   int   num;
00057   char* name;
00058 } ENC_INFO;
00059 
00060 static ENC_INFO Info[] = {
00061   { UNICODE_ISO_8859_1,  "UNICODE_ISO_8859_1"  },
00062   { ISO_8859_1,  "ISO_8859_1"  },
00063   { ISO_8859_2,  "ISO_8859_2"  },
00064   { ISO_8859_3,  "ISO_8859_3"  },
00065   { ISO_8859_4,  "ISO_8859_4"  },
00066   { ISO_8859_5,  "ISO_8859_5"  },
00067   { ISO_8859_6,  "ISO_8859_6"  },
00068   { ISO_8859_7,  "ISO_8859_7"  },
00069   { ISO_8859_8,  "ISO_8859_8"  },
00070   { ISO_8859_9,  "ISO_8859_9"  },
00071   { ISO_8859_10, "ISO_8859_10" },
00072   { ISO_8859_11, "ISO_8859_11" },
00073   { ISO_8859_13, "ISO_8859_13" },
00074   { ISO_8859_14, "ISO_8859_14" },
00075   { ISO_8859_15, "ISO_8859_15" },
00076   { ISO_8859_16, "ISO_8859_16" },
00077   { KOI8,        "KOI8" },
00078   { KOI8_R,      "KOI8_R" }
00079 };
00080 
00081 
00082 static int IsAlpha(int enc, int c)
00083 {
00084   if (c >= 0x41 && c <= 0x5a) return 1;
00085   if (c >= 0x61 && c <= 0x7a) return 1;
00086 
00087   switch (enc) {
00088   case UNICODE_ISO_8859_1:
00089   case ISO_8859_1:
00090   case ISO_8859_9:
00091     if (c == 0xaa) return 1;
00092     if (c == 0xb5) return 1;
00093     if (c == 0xba) return 1;
00094     if (c >= 0xc0 && c <= 0xd6) return 1;
00095     if (c >= 0xd8 && c <= 0xf6) return 1;
00096     if (c >= 0xf8 && c <= 0xff) return 1;
00097     break;
00098 
00099   case ISO_8859_2:
00100     if (c == 0xa1 || c == 0xa3) return 1;
00101     if (c == 0xa5 || c == 0xa6) return 1;
00102     if (c >= 0xa9 && c <= 0xac) return 1;
00103     if (c >= 0xae && c <= 0xaf) return 1;
00104     if (c == 0xb1 || c == 0xb3) return 1;
00105     if (c == 0xb5 || c == 0xb6) return 1;
00106     if (c >= 0xb9 && c <= 0xbc) return 1;
00107     if (c >= 0xbe && c <= 0xbf) return 1;
00108     if (c >= 0xc0 && c <= 0xd6) return 1;
00109     if (c >= 0xd8 && c <= 0xf6) return 1;
00110     if (c >= 0xf8 && c <= 0xfe) return 1;
00111     break;
00112 
00113   case ISO_8859_3:
00114     if (c == 0xa1) return 1;
00115     if (c == 0xa6) return 1;
00116     if (c >= 0xa9 && c <= 0xac) return 1;
00117     if (c == 0xaf) return 1;
00118     if (c == 0xb1) return 1;
00119     if (c == 0xb5 || c == 0xb6) return 1;
00120     if (c >= 0xb9 && c <= 0xbc) return 1;
00121     if (c == 0xbf) return 1;
00122     if (c >= 0xc0 && c <= 0xc2) return 1;
00123     if (c >= 0xc4 && c <= 0xcf) return 1;
00124     if (c >= 0xd1 && c <= 0xd6) return 1;
00125     if (c >= 0xd8 && c <= 0xe2) return 1;
00126     if (c >= 0xe4 && c <= 0xef) return 1;
00127     if (c >= 0xf1 && c <= 0xf6) return 1;
00128     if (c >= 0xf8 && c <= 0xfe) return 1;
00129     break;
00130 
00131   case ISO_8859_4:
00132     if (c >= 0xa1 && c <= 0xa3) return 1;
00133     if (c == 0xa5 || c == 0xa6) return 1;
00134     if (c >= 0xa9 && c <= 0xac) return 1;
00135     if (c == 0xae) return 1;
00136     if (c == 0xb1 || c == 0xb3) return 1;
00137     if (c == 0xb5 || c == 0xb6) return 1;
00138     if (c >= 0xb9 && c <= 0xbf) return 1;
00139     if (c >= 0xc0 && c <= 0xd6) return 1;
00140     if (c >= 0xd8 && c <= 0xf6) return 1;
00141     if (c >= 0xf8 && c <= 0xfe) return 1;
00142     break;
00143 
00144   case ISO_8859_5:
00145     if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1;
00146     if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1;
00147     break;
00148 
00149   case ISO_8859_6:
00150     if (c >= 0xc1 && c <= 0xda) return 1;
00151     if (c >= 0xe0 && c <= 0xf2) return 1;
00152     break;
00153 
00154   case ISO_8859_7:
00155     if (c == 0xb6) return 1;
00156     if (c >= 0xb8 && c <= 0xba) return 1;
00157     if (c == 0xbc) return 1;
00158     if (c >= 0xbe && c <= 0xbf) return 1;
00159     if (c == 0xc0) return 1;
00160     if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1;
00161     if (c >= 0xdc && c <= 0xfe) return 1;
00162     break;
00163 
00164   case ISO_8859_8:
00165     if (c == 0xb5) return 1;
00166     if (c >= 0xe0 && c <= 0xfa) return 1;
00167     break;
00168 
00169   case ISO_8859_10:
00170     if (c >= 0xa1 && c <= 0xa6) return 1;
00171     if (c >= 0xa8 && c <= 0xac) return 1;
00172     if (c == 0xae || c == 0xaf) return 1;
00173     if (c >= 0xb1 && c <= 0xb6) return 1;
00174     if (c >= 0xb8 && c <= 0xbc) return 1;
00175     if (c >= 0xbe && c <= 0xff) return 1;
00176     break;
00177 
00178   case ISO_8859_11:
00179     if (c >= 0xa1 && c <= 0xda) return 1;
00180     if (c >= 0xdf && c <= 0xfb) return 1;
00181     break;
00182 
00183   case ISO_8859_13:
00184     if (c == 0xa8) return 1;
00185     if (c == 0xaa) return 1;
00186     if (c == 0xaf) return 1;
00187     if (c == 0xb5) return 1;
00188     if (c == 0xb8) return 1;
00189     if (c == 0xba) return 1;
00190     if (c >= 0xbf && c <= 0xd6) return 1;
00191     if (c >= 0xd8 && c <= 0xf6) return 1;
00192     if (c >= 0xf8 && c <= 0xfe) return 1;
00193     break;
00194 
00195   case ISO_8859_14:
00196     if (c == 0xa1 || c == 0xa2) return 1;
00197     if (c == 0xa4 || c == 0xa5) return 1;
00198     if (c == 0xa6 || c == 0xa8) return 1;
00199     if (c >= 0xaa && c <= 0xac) return 1;
00200     if (c >= 0xaf && c <= 0xb5) return 1;
00201     if (c >= 0xb7 && c <= 0xff) return 1;
00202     break;
00203 
00204   case ISO_8859_15:
00205     if (c == 0xaa) return 1;
00206     if (c == 0xb5) return 1;
00207     if (c == 0xba) return 1;
00208     if (c >= 0xc0 && c <= 0xd6) return 1;
00209     if (c >= 0xd8 && c <= 0xf6) return 1;
00210     if (c >= 0xf8 && c <= 0xff) return 1;
00211     if (c == 0xa6) return 1;
00212     if (c == 0xa8) return 1;
00213     if (c == 0xb4) return 1;
00214     if (c == 0xb8) return 1;
00215     if (c == 0xbc) return 1;
00216     if (c == 0xbd) return 1;
00217     if (c == 0xbe) return 1;
00218     break;
00219 
00220   case ISO_8859_16:
00221     if (c == 0xa1) return 1;
00222     if (c == 0xa2) return 1;
00223     if (c == 0xa3) return 1;
00224     if (c == 0xa6) return 1;
00225     if (c == 0xa8) return 1;
00226     if (c == 0xaa) return 1;
00227     if (c == 0xac) return 1;
00228     if (c == 0xae) return 1;
00229     if (c == 0xaf) return 1;
00230     if (c == 0xb2) return 1;
00231     if (c == 0xb3) return 1;
00232     if (c == 0xb4) return 1;
00233     if (c >= 0xb8 && c <= 0xba) return 1;
00234     if (c == 0xbc) return 1;
00235     if (c == 0xbd) return 1;
00236     if (c == 0xbe) return 1;
00237     if (c == 0xbf) return 1;
00238     if (c >= 0xc0 && c <= 0xde) return 1;
00239     if (c >= 0xdf && c <= 0xff) return 1;
00240     break;
00241 
00242   case KOI8_R:
00243     if (c == 0xa3 || c == 0xb3) return 1;
00244     /* fall */
00245   case KOI8:
00246     if (c >= 0xc0 && c <= 0xff) return 1;
00247     break;
00248 
00249   default:
00250     exit(-1);
00251   }
00252 
00253   return 0;
00254 }
00255 
00256 static int IsBlank(int enc, int c)
00257 {
00258   if (c == 0x09      || c == 0x20) return 1;
00259 
00260   switch (enc) {
00261   case UNICODE_ISO_8859_1:
00262   case ISO_8859_1:
00263   case ISO_8859_2:
00264   case ISO_8859_3:
00265   case ISO_8859_4:
00266   case ISO_8859_5:
00267   case ISO_8859_6:
00268   case ISO_8859_7:
00269   case ISO_8859_8:
00270   case ISO_8859_9:
00271   case ISO_8859_10:
00272   case ISO_8859_11:
00273   case ISO_8859_13:
00274   case ISO_8859_14:
00275   case ISO_8859_15:
00276   case ISO_8859_16:
00277   case KOI8:
00278     if (c == 0xa0) return 1;
00279     break;
00280 
00281   case KOI8_R:
00282     if (c == 0x9a) return 1;
00283     break;
00284 
00285   default:
00286     exit(-1);
00287   }
00288 
00289   return 0;
00290 }
00291 
00292 static int IsCntrl(int enc, int c)
00293 {
00294   if (c >= 0x00      && c <= 0x1F) return 1;
00295 
00296   switch (enc) {
00297   case UNICODE_ISO_8859_1:
00298     if (c == 0xad) return 1;
00299     /* fall */
00300   case ISO_8859_1:
00301   case ISO_8859_2:
00302   case ISO_8859_3:
00303   case ISO_8859_4:
00304   case ISO_8859_5:
00305   case ISO_8859_6:
00306   case ISO_8859_7:
00307   case ISO_8859_8:
00308   case ISO_8859_9:
00309   case ISO_8859_10:
00310   case ISO_8859_11:
00311   case ISO_8859_13:
00312   case ISO_8859_14:
00313   case ISO_8859_15:
00314   case ISO_8859_16:
00315   case KOI8:
00316     if (c >= 0x7f && c <= 0x9F) return 1;
00317     break;
00318 
00319 
00320   case KOI8_R:
00321     if (c == 0x7f) return 1;
00322     break;
00323 
00324   default:
00325     exit(-1);
00326   }
00327 
00328   return 0;
00329 }
00330 
00331 static int IsDigit(int enc, int c)
00332 {
00333   if (c >= 0x30 && c <= 0x39) return 1;
00334   return 0;
00335 }
00336 
00337 static int IsGraph(int enc, int c)
00338 {
00339   if (c >= 0x21 && c <= 0x7e) return 1;
00340 
00341   switch (enc) {
00342   case UNICODE_ISO_8859_1:
00343   case ISO_8859_1:
00344   case ISO_8859_2:
00345   case ISO_8859_4:
00346   case ISO_8859_5:
00347   case ISO_8859_9:
00348   case ISO_8859_10:
00349   case ISO_8859_13:
00350   case ISO_8859_14:
00351   case ISO_8859_15:
00352   case ISO_8859_16:
00353     if (c >= 0xa1 && c <= 0xff) return 1;
00354     break;
00355 
00356   case ISO_8859_3:
00357     if (c >= 0xa1) {
00358       if (c == 0xa5 || c == 0xae || c == 0xbe || c == 0xc3 || c == 0xd0 ||
00359          c == 0xe3 || c == 0xf0)
00360        return 0;
00361       else
00362        return 1;
00363     }
00364     break;
00365 
00366   case ISO_8859_6:
00367     if (c == 0xa4 || c == 0xac || c == 0xad || c == 0xbb || c == 0xbf)
00368       return 1;
00369     if (c >= 0xc1 && c <= 0xda) return 1;
00370     if (c >= 0xe0 && c <= 0xf2) return 1;
00371     break;
00372 
00373   case ISO_8859_7:
00374     if (c >= 0xa1 && c <= 0xfe &&
00375        c != 0xa4 && c != 0xa5 && c != 0xaa &&
00376        c != 0xae && c != 0xd2) return 1;
00377     break;
00378 
00379   case ISO_8859_8:
00380     if (c >= 0xa2 && c <= 0xfa) {
00381       if (c >= 0xbf && c <= 0xde) return 0;
00382       return 1;
00383     }
00384     break;
00385 
00386   case ISO_8859_11:
00387     if (c >= 0xa1 && c <= 0xda) return 1;
00388     if (c >= 0xdf && c <= 0xfb) return 1;
00389     break;
00390 
00391   case KOI8:
00392     if (c >= 0xc0 && c <= 0xff) return 1;
00393     break;
00394 
00395   case KOI8_R:
00396     if (c >= 0x80 && c <= 0xff && c != 0x9a) return 1;
00397     break;
00398 
00399   default:
00400     exit(-1);
00401   }
00402 
00403   return 0;
00404 }
00405 
00406 static int IsLower(int enc, int c)
00407 {
00408   if (c >= 0x61 && c <= 0x7a) return 1;
00409 
00410   switch (enc) {
00411   case UNICODE_ISO_8859_1:
00412   case ISO_8859_1:
00413   case ISO_8859_9:
00414     if (c == 0xaa) return 1;
00415     if (c == 0xb5) return 1;
00416     if (c == 0xba) return 1;
00417     if (c >= 0xdf && c <= 0xf6) return 1;
00418     if (c >= 0xf8 && c <= 0xff) return 1;
00419     break;
00420 
00421   case ISO_8859_2:
00422     if (c == 0xb1 || c == 0xb3) return 1;
00423     if (c == 0xb5 || c == 0xb6) return 1;
00424     if (c >= 0xb9 && c <= 0xbc) return 1;
00425     if (c >= 0xbe && c <= 0xbf) return 1;
00426     if (c >= 0xdf && c <= 0xf6) return 1;
00427     if (c >= 0xf8 && c <= 0xfe) return 1;
00428     break;
00429 
00430   case ISO_8859_3:
00431     if (c == 0xb1) return 1;
00432     if (c == 0xb5 || c == 0xb6) return 1;
00433     if (c >= 0xb9 && c <= 0xbc) return 1;
00434     if (c == 0xbf) return 1;
00435     if (c == 0xdf) return 1;
00436     if (c >= 0xe0 && c <= 0xe2) return 1;
00437     if (c >= 0xe4 && c <= 0xef) return 1;
00438     if (c >= 0xf1 && c <= 0xf6) return 1;
00439     if (c >= 0xf8 && c <= 0xfe) return 1;
00440     break;
00441 
00442   case ISO_8859_4:
00443     if (c == 0xa2) return 1;
00444     if (c == 0xb1 || c == 0xb3) return 1;
00445     if (c == 0xb5 || c == 0xb6) return 1;
00446     if (c >= 0xb9 && c <= 0xbc) return 1;
00447     if (c >= 0xbe && c <= 0xbf) return 1;
00448     if (c == 0xdf) return 1;
00449     if (c >= 0xe0 && c <= 0xf6) return 1;
00450     if (c >= 0xf8 && c <= 0xfe) return 1;
00451     break;
00452 
00453   case ISO_8859_5:
00454     if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1;
00455     break;
00456 
00457   case ISO_8859_6:
00458     break;
00459 
00460   case ISO_8859_7:
00461     if (c == 0xc0) return 1;
00462     if (c >= 0xdc && c <= 0xfe) return 1;
00463     break;
00464 
00465   case ISO_8859_8:
00466     if (c == 0xb5) return 1;
00467     break;
00468 
00469   case ISO_8859_10:
00470     if (c >= 0xb1 && c <= 0xb6) return 1;
00471     if (c >= 0xb8 && c <= 0xbc) return 1;
00472     if (c == 0xbe || c == 0xbf) return 1;
00473     if (c >= 0xdf && c <= 0xff) return 1;
00474     break;
00475 
00476   case ISO_8859_11:
00477     break;
00478 
00479   case ISO_8859_13:
00480     if (c == 0xb5) return 1;
00481     if (c == 0xb8) return 1;
00482     if (c == 0xba) return 1;
00483     if (c == 0xbf) return 1;
00484     if (c >= 0xdf && c <= 0xf6) return 1;
00485     if (c >= 0xf8 && c <= 0xfe) return 1;
00486     break;
00487 
00488   case ISO_8859_14:
00489     if (c == 0xa2) return 1;
00490     if (c == 0xa5) return 1;
00491     if (c == 0xab) return 1;
00492     if (c == 0xb1 || c == 0xb3 || c == 0xb5) return 1;
00493     if (c >= 0xb8 && c <= 0xba) return 1;
00494     if (c == 0xbc) return 1;
00495     if (c == 0xbe || c == 0xbf) return 1;
00496     if (c >= 0xdf && c <= 0xff) return 1;
00497     break;
00498 
00499   case ISO_8859_15:
00500     if (c == 0xaa) return 1;
00501     if (c == 0xb5) return 1;
00502     if (c == 0xba) return 1;
00503     if (c >= 0xdf && c <= 0xf6) return 1;
00504     if (c >= 0xf8 && c <= 0xff) return 1;
00505     if (c == 0xa8) return 1;
00506     if (c == 0xb8) return 1;
00507     if (c == 0xbd) return 1;
00508     break;
00509 
00510   case ISO_8859_16:
00511     if (c == 0xa2) return 1;
00512     if (c == 0xa8) return 1;
00513     if (c == 0xae) return 1;
00514     if (c == 0xb3) return 1;
00515     if (c >= 0xb8 && c <= 0xba) return 1;
00516     if (c == 0xbd) return 1;
00517     if (c == 0xbf) return 1;
00518     if (c >= 0xdf && c <= 0xff) return 1;
00519     break;
00520 
00521   case KOI8_R:
00522     if (c == 0xa3) return 1;
00523     /* fall */
00524   case KOI8:
00525     if (c >= 0xc0 && c <= 0xdf) return 1;
00526     break;
00527 
00528   default:
00529     exit(-1);
00530   }
00531 
00532   return 0;
00533 }
00534 
00535 static int IsPrint(int enc, int c)
00536 {
00537   if (c >= 0x20 && c <= 0x7e) return 1;
00538 
00539   switch (enc) {
00540   case UNICODE_ISO_8859_1:
00541     if (c >= 0x09 && c <= 0x0d) return 1;
00542     if (c == 0x85) return 1;
00543     /* fall */
00544   case ISO_8859_1:
00545   case ISO_8859_2:
00546   case ISO_8859_4:
00547   case ISO_8859_5:
00548   case ISO_8859_9:
00549   case ISO_8859_10:
00550   case ISO_8859_13:
00551   case ISO_8859_14:
00552   case ISO_8859_15:
00553   case ISO_8859_16:
00554     if (c >= 0xa0 && c <= 0xff) return 1;
00555     break;
00556 
00557   case ISO_8859_3:
00558     if (c >= 0xa0) {
00559       if (c == 0xa5 || c == 0xae || c == 0xbe || c == 0xc3 || c == 0xd0 ||
00560          c == 0xe3 || c == 0xf0)
00561        return 0;
00562       else
00563        return 1;
00564     }
00565     break;
00566 
00567   case ISO_8859_6:
00568     if (c == 0xa0) return 1;
00569     if (c == 0xa4 || c == 0xac || c == 0xad || c == 0xbb || c == 0xbf)
00570       return 1;
00571     if (c >= 0xc1 && c <= 0xda) return 1;
00572     if (c >= 0xe0 && c <= 0xf2) return 1;
00573     break;
00574 
00575   case ISO_8859_7:
00576     if (c >= 0xa0 && c <= 0xfe &&
00577        c != 0xa4 && c != 0xa5 && c != 0xaa &&
00578        c != 0xae && c != 0xd2) return 1;
00579     break;
00580 
00581   case ISO_8859_8:
00582     if (c >= 0xa0 && c <= 0xfa) {
00583       if (c >= 0xbf && c <= 0xde) return 0;
00584       if (c == 0xa1) return 0;
00585       return 1;
00586     }
00587     break;
00588 
00589   case ISO_8859_11:
00590     if (c >= 0xa0 && c <= 0xda) return 1;
00591     if (c >= 0xdf && c <= 0xfb) return 1;
00592     break;
00593 
00594   case KOI8:
00595     if (c == 0xa0) return 1;
00596     if (c >= 0xc0 && c <= 0xff) return 1;
00597     break;
00598 
00599   case KOI8_R:
00600     if (c >= 0x80 && c <= 0xff) return 1;
00601     break;
00602 
00603   default:
00604     exit(-1);
00605   }
00606 
00607   return 0;
00608 }
00609 
00610 static int IsPunct(int enc, int c)
00611 {
00612   if (enc == UNICODE_ISO_8859_1) {
00613     if (c == 0x24 || c == 0x2b || c == 0x5e || c == 0x60 ||
00614         c == 0x7c || c == 0x7e) return 1;
00615     if (c >= 0x3c && c <= 0x3e) return 1;
00616   }
00617 
00618   if (c >= 0x21 && c <= 0x2f) return 1;
00619   if (c >= 0x3a && c <= 0x40) return 1;
00620   if (c >= 0x5b && c <= 0x60) return 1;
00621   if (c >= 0x7b && c <= 0x7e) return 1;
00622 
00623   switch (enc) {
00624   case ISO_8859_1:
00625   case ISO_8859_9:
00626   case ISO_8859_15:
00627     if (c == 0xad) return 1;
00628     /* fall */
00629   case UNICODE_ISO_8859_1:
00630     if (c == 0xa1) return 1;
00631     if (c == 0xab) return 1;
00632     if (c == 0xb7) return 1;
00633     if (c == 0xbb) return 1;
00634     if (c == 0xbf) return 1;
00635     break;
00636 
00637   case ISO_8859_2:
00638   case ISO_8859_4:
00639   case ISO_8859_5:
00640   case ISO_8859_14:
00641     if (c == 0xad) return 1;
00642     break;
00643 
00644   case ISO_8859_3:
00645   case ISO_8859_10:
00646     if (c == 0xad) return 1;
00647     if (c == 0xb7) return 1;
00648     if (c == 0xbd) return 1;
00649     break;
00650 
00651   case ISO_8859_6:
00652     if (c == 0xac) return 1;
00653     if (c == 0xad) return 1;
00654     if (c == 0xbb) return 1;
00655     if (c == 0xbf) return 1;
00656     break;
00657 
00658   case ISO_8859_7:
00659     if (c == 0xa1 || c == 0xa2) return 1;
00660     if (c == 0xab) return 1;
00661     if (c == 0xaf) return 1;
00662     if (c == 0xad) return 1;
00663     if (c == 0xb7 || c == 0xbb) return 1;
00664     break;
00665 
00666   case ISO_8859_8:
00667     if (c == 0xab) return 1;
00668     if (c == 0xad) return 1;
00669     if (c == 0xb7) return 1;
00670     if (c == 0xbb) return 1;
00671     if (c == 0xdf) return 1;
00672     break;
00673 
00674   case ISO_8859_13:
00675     if (c == 0xa1 || c == 0xa5) return 1;
00676     if (c == 0xab || c == 0xad) return 1;
00677     if (c == 0xb4 || c == 0xb7) return 1;
00678     if (c == 0xbb) return 1;
00679     if (c == 0xff) return 1;
00680     break;
00681 
00682   case ISO_8859_16:
00683     if (c == 0xa5) return 1;
00684     if (c == 0xab) return 1;
00685     if (c == 0xad) return 1;
00686     if (c == 0xb5) return 1;
00687     if (c == 0xb7) return 1;
00688     if (c == 0xbb) return 1;
00689     break;
00690 
00691   case KOI8_R:
00692     if (c == 0x9e) return 1;
00693     break;
00694 
00695   case ISO_8859_11:
00696   case KOI8:
00697     break;
00698 
00699   default:
00700     exit(-1);
00701   }
00702 
00703   return 0;
00704 }
00705 
00706 static int IsSpace(int enc, int c)
00707 {
00708   if (c >= 0x09 && c <= 0x0d) return 1;
00709   if (c == 0x20) return 1;
00710 
00711   switch (enc) {
00712   case UNICODE_ISO_8859_1:
00713     if (c == 0x85) return 1;
00714     /* fall */
00715   case ISO_8859_1:
00716   case ISO_8859_2:
00717   case ISO_8859_3:
00718   case ISO_8859_4:
00719   case ISO_8859_5:
00720   case ISO_8859_6:
00721   case ISO_8859_7:
00722   case ISO_8859_8:
00723   case ISO_8859_9:
00724   case ISO_8859_10:
00725   case ISO_8859_11:
00726   case ISO_8859_13:
00727   case ISO_8859_14:
00728   case ISO_8859_15:
00729   case ISO_8859_16:
00730   case KOI8:
00731     if (c == 0xa0) return 1;
00732     break;
00733 
00734   case KOI8_R:
00735     if (c == 0x9a) return 1;
00736     break;
00737 
00738   default:
00739     exit(-1);
00740   }
00741 
00742   return 0;
00743 }
00744 
00745 static int IsUpper(int enc, int c)
00746 {
00747   if (c >= 0x41 && c <= 0x5a) return 1;
00748 
00749   switch (enc) {
00750   case UNICODE_ISO_8859_1:
00751   case ISO_8859_1:
00752   case ISO_8859_9:
00753     if (c >= 0xc0 && c <= 0xd6) return 1;
00754     if (c >= 0xd8 && c <= 0xde) return 1;
00755     break;
00756 
00757   case ISO_8859_2:
00758     if (c == 0xa1 || c == 0xa3) return 1;
00759     if (c == 0xa5 || c == 0xa6) return 1;
00760     if (c >= 0xa9 && c <= 0xac) return 1;
00761     if (c >= 0xae && c <= 0xaf) return 1;
00762     if (c >= 0xc0 && c <= 0xd6) return 1;
00763     if (c >= 0xd8 && c <= 0xde) return 1;
00764     break;
00765 
00766   case ISO_8859_3:
00767     if (c == 0xa1) return 1;
00768     if (c == 0xa6) return 1;
00769     if (c >= 0xa9 && c <= 0xac) return 1;
00770     if (c == 0xaf) return 1;
00771     if (c >= 0xc0 && c <= 0xc2) return 1;
00772     if (c >= 0xc4 && c <= 0xcf) return 1;
00773     if (c >= 0xd1 && c <= 0xd6) return 1;
00774     if (c >= 0xd8 && c <= 0xde) return 1;
00775     break;
00776 
00777   case ISO_8859_4:
00778     if (c == 0xa1 || c == 0xa3) return 1;
00779     if (c == 0xa5 || c == 0xa6) return 1;
00780     if (c >= 0xa9 && c <= 0xac) return 1;
00781     if (c == 0xae) return 1;
00782     if (c == 0xbd) return 1;
00783     if (c >= 0xc0 && c <= 0xd6) return 1;
00784     if (c >= 0xd8 && c <= 0xde) return 1;
00785     break;
00786 
00787   case ISO_8859_5:
00788     if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1;
00789     break;
00790 
00791   case ISO_8859_6:
00792     break;
00793 
00794   case ISO_8859_7:
00795     if (c == 0xb6) return 1;
00796     if (c >= 0xb8 && c <= 0xba) return 1;
00797     if (c == 0xbc) return 1;
00798     if (c >= 0xbe && c <= 0xbf) return 1;
00799     if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1;
00800     break;
00801 
00802   case ISO_8859_8:
00803   case ISO_8859_11:
00804     break;
00805 
00806   case ISO_8859_10:
00807     if (c >= 0xa1 && c <= 0xa6) return 1;
00808     if (c >= 0xa8 && c <= 0xac) return 1;
00809     if (c == 0xae || c == 0xaf) return 1;
00810     if (c >= 0xc0 && c <= 0xde) return 1;
00811     break;
00812 
00813   case ISO_8859_13:
00814     if (c == 0xa8) return 1;
00815     if (c == 0xaa) return 1;
00816     if (c == 0xaf) return 1;
00817     if (c >= 0xc0 && c <= 0xd6) return 1;
00818     if (c >= 0xd8 && c <= 0xde) return 1;
00819     break;
00820 
00821   case ISO_8859_14:
00822     if (c == 0xa1) return 1;
00823     if (c == 0xa4 || c == 0xa6) return 1;
00824     if (c == 0xa8) return 1;
00825     if (c == 0xaa || c == 0xac) return 1;
00826     if (c == 0xaf || c == 0xb0) return 1;
00827     if (c == 0xb2 || c == 0xb4 || c == 0xb7) return 1;
00828     if (c == 0xbb || c == 0xbd) return 1;
00829     if (c >= 0xc0 && c <= 0xde) return 1;
00830     break;
00831 
00832   case ISO_8859_15:
00833     if (c >= 0xc0 && c <= 0xd6) return 1;
00834     if (c >= 0xd8 && c <= 0xde) return 1;
00835     if (c == 0xa6) return 1;
00836     if (c == 0xb4) return 1;
00837     if (c == 0xbc) return 1;
00838     if (c == 0xbe) return 1;
00839     break;
00840 
00841   case ISO_8859_16:
00842     if (c == 0xa1) return 1;
00843     if (c == 0xa3) return 1;
00844     if (c == 0xa6) return 1;
00845     if (c == 0xaa) return 1;
00846     if (c == 0xac) return 1;
00847     if (c == 0xaf) return 1;
00848     if (c == 0xb2) return 1;
00849     if (c == 0xb4) return 1;
00850     if (c == 0xbc) return 1;
00851     if (c == 0xbe) return 1;
00852     if (c >= 0xc0 && c <= 0xde) return 1;
00853     break;
00854 
00855   case KOI8_R:
00856     if (c == 0xb3) return 1;
00857     /* fall */
00858   case KOI8:
00859     if (c >= 0xe0 && c <= 0xff) return 1;
00860     break;
00861 
00862   default:
00863     exit(-1);
00864   }
00865 
00866   return 0;
00867 }
00868 
00869 static int IsXDigit(int enc, int c)
00870 {
00871   if (c >= 0x30 && c <= 0x39) return 1;
00872   if (c >= 0x41 && c <= 0x46) return 1;
00873   if (c >= 0x61 && c <= 0x66) return 1;
00874   return 0;
00875 }
00876 
00877 static int IsWord(int enc, int c)
00878 {
00879   if (c >= 0x30 && c <= 0x39) return 1;
00880   if (c >= 0x41 && c <= 0x5a) return 1;
00881   if (c == 0x5f) return 1;
00882   if (c >= 0x61 && c <= 0x7a) return 1;
00883 
00884   switch (enc) {
00885   case UNICODE_ISO_8859_1:
00886   case ISO_8859_1:
00887   case ISO_8859_9:
00888     if (c == 0xaa) return 1;
00889     if (c >= 0xb2 && c <= 0xb3) return 1;
00890     if (c == 0xb5) return 1;
00891     if (c >= 0xb9 && c <= 0xba) return 1;
00892     if (c >= 0xbc && c <= 0xbe) return 1;
00893     if (c >= 0xc0 && c <= 0xd6) return 1;
00894     if (c >= 0xd8 && c <= 0xf6) return 1;
00895     if (c >= 0xf8 && c <= 0xff) return 1;
00896     break;
00897 
00898   case ISO_8859_2:
00899     if (c == 0xa1 || c == 0xa3) return 1;
00900     if (c == 0xa5 || c == 0xa6) return 1;
00901     if (c >= 0xa9 && c <= 0xac) return 1;
00902     if (c >= 0xae && c <= 0xaf) return 1;
00903     if (c == 0xb1 || c == 0xb3) return 1;
00904     if (c == 0xb5 || c == 0xb6) return 1;
00905     if (c >= 0xb9 && c <= 0xbc) return 1;
00906     if (c >= 0xbe && c <= 0xbf) return 1;
00907     if (c >= 0xc0 && c <= 0xd6) return 1;
00908     if (c >= 0xd8 && c <= 0xf6) return 1;
00909     if (c >= 0xf8 && c <= 0xfe) return 1;
00910     break;
00911 
00912   case ISO_8859_3:
00913     if (c == 0xa1) return 1;
00914     if (c == 0xa6) return 1;
00915     if (c >= 0xa9 && c <= 0xac) return 1;
00916     if (c == 0xaf) return 1;
00917     if (c >= 0xb1 && c <= 0xb3) return 1;
00918     if (c == 0xb5 || c == 0xb6) return 1;
00919     if (c >= 0xb9 && c <= 0xbd) return 1;
00920     if (c == 0xbf) return 1;
00921     if (c >= 0xc0 && c <= 0xc2) return 1;
00922     if (c >= 0xc4 && c <= 0xcf) return 1;
00923     if (c >= 0xd1 && c <= 0xd6) return 1;
00924     if (c >= 0xd8 && c <= 0xe2) return 1;
00925     if (c >= 0xe4 && c <= 0xef) return 1;
00926     if (c >= 0xf1 && c <= 0xf6) return 1;
00927     if (c >= 0xf8 && c <= 0xfe) return 1;
00928     break;
00929 
00930   case ISO_8859_4:
00931     if (c >= 0xa1 && c <= 0xa3) return 1;
00932     if (c == 0xa5 || c == 0xa6) return 1;
00933     if (c >= 0xa9 && c <= 0xac) return 1;
00934     if (c == 0xae) return 1;
00935     if (c == 0xb1 || c == 0xb3) return 1;
00936     if (c == 0xb5 || c == 0xb6) return 1;
00937     if (c >= 0xb9 && c <= 0xbf) return 1;
00938     if (c >= 0xc0 && c <= 0xd6) return 1;
00939     if (c >= 0xd8 && c <= 0xf6) return 1;
00940     if (c >= 0xf8 && c <= 0xfe) return 1;
00941     break;
00942 
00943   case ISO_8859_5:
00944     if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1;
00945     if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1;
00946     break;
00947 
00948   case ISO_8859_6:
00949     if (c >= 0xc1 && c <= 0xda) return 1;
00950     if (c >= 0xe0 && c <= 0xea) return 1;
00951     if (c >= 0xeb && c <= 0xf2) return 1;
00952     break;
00953 
00954   case ISO_8859_7:
00955     if (c == 0xb2 || c == 0xb3) return 1;
00956     if (c == 0xb6) return 1;
00957     if (c >= 0xb8 && c <= 0xba) return 1;
00958     if (c >= 0xbc && c <= 0xbf) return 1;
00959     if (c == 0xc0) return 1;
00960     if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1;
00961     if (c >= 0xdc && c <= 0xfe) return 1;
00962     break;
00963 
00964   case ISO_8859_8:
00965     if (c == 0xb2 || c == 0xb3 || c == 0xb5 || c == 0xb9) return 1;
00966     if (c >= 0xbc && c <= 0xbe) return 1;
00967     if (c >= 0xe0 && c <= 0xfa) return 1;
00968     break;
00969 
00970   case ISO_8859_10:
00971     if (c >= 0xa1 && c <= 0xff) {
00972       if (c != 0xa7 && c != 0xad && c != 0xb0 && c != 0xb7 && c != 0xbd)
00973        return 1;
00974     }
00975     break;
00976 
00977   case ISO_8859_11:
00978     if (c >= 0xa1 && c <= 0xda) return 1;
00979     if (c >= 0xdf && c <= 0xfb) return 1;
00980     break;
00981 
00982   case ISO_8859_13:
00983     if (c == 0xa8) return 1;
00984     if (c == 0xaa) return 1;
00985     if (c == 0xaf) return 1;
00986     if (c == 0xb2 || c == 0xb3 || c == 0xb5 || c == 0xb9) return 1;
00987     if (c >= 0xbc && c <= 0xbe) return 1;
00988     if (c == 0xb8) return 1;
00989     if (c == 0xba) return 1;
00990     if (c >= 0xbf && c <= 0xd6) return 1;
00991     if (c >= 0xd8 && c <= 0xf6) return 1;
00992     if (c >= 0xf8 && c <= 0xfe) return 1;
00993     break;
00994 
00995   case ISO_8859_14:
00996     if (c >= 0xa1 && c <= 0xff) {
00997       if (c == 0xa3 || c == 0xa7 || c == 0xa9 || c == 0xad || c == 0xae ||
00998          c == 0xb6) return 0;
00999       return 1;
01000     }
01001     break;
01002 
01003   case ISO_8859_15:
01004     if (c == 0xaa) return 1;
01005     if (c >= 0xb2 && c <= 0xb3) return 1;
01006     if (c == 0xb5) return 1;
01007     if (c >= 0xb9 && c <= 0xba) return 1;
01008     if (c >= 0xbc && c <= 0xbe) return 1;
01009     if (c >= 0xc0 && c <= 0xd6) return 1;
01010     if (c >= 0xd8 && c <= 0xf6) return 1;
01011     if (c >= 0xf8 && c <= 0xff) return 1;
01012     if (c == 0xa6) return 1;
01013     if (c == 0xa8) return 1;
01014     if (c == 0xb4) return 1;
01015     if (c == 0xb8) return 1;
01016     break;
01017 
01018   case ISO_8859_16:
01019     if (c == 0xa1) return 1;
01020     if (c == 0xa2) return 1;
01021     if (c == 0xa3) return 1;
01022     if (c == 0xa6) return 1;
01023     if (c == 0xa8) return 1;
01024     if (c == 0xaa) return 1;
01025     if (c == 0xac) return 1;
01026     if (c == 0xae) return 1;
01027     if (c == 0xaf) return 1;
01028     if (c == 0xb2) return 1;
01029     if (c == 0xb3) return 1;
01030     if (c == 0xb4) return 1;
01031     if (c >= 0xb8 && c <= 0xba) return 1;
01032     if (c == 0xbc) return 1;
01033     if (c == 0xbd) return 1;
01034     if (c == 0xbe) return 1;
01035     if (c == 0xbf) return 1;
01036     if (c >= 0xc0 && c <= 0xde) return 1;
01037     if (c >= 0xdf && c <= 0xff) return 1;
01038     break;
01039 
01040   case KOI8_R:
01041     if (c == 0x9d) return 1;
01042     if (c == 0xa3 || c == 0xb3) return 1;
01043     /* fall */
01044   case KOI8:
01045     if (c >= 0xc0 && c <= 0xff) return 1;
01046     break;
01047 
01048   default:
01049     exit(-1);
01050   }
01051 
01052   return 0;
01053 }
01054 
01055 static int IsAscii(int enc, int c)
01056 {
01057   if (c >= 0x00 && c <= 0x7f) return 1;
01058   return 0;
01059 }
01060 
01061 static int IsNewline(int enc, int c)
01062 {
01063   if (c == 0x0a) return 1;
01064   return 0;
01065 }
01066 
01067 static int exec(FILE* fp, ENC_INFO* einfo)
01068 {
01069 #define NCOL  8
01070 
01071   int c, val, enc;
01072 
01073   enc = einfo->num;
01074 
01075   fprintf(fp, "static unsigned short Enc%s_CtypeTable[256] = {\n",
01076          einfo->name);
01077 
01078   for (c = 0; c < 256; c++) {
01079     val = 0;
01080     if (IsNewline(enc, c))  val |= ONIGENC_CTYPE_NEWLINE;
01081     if (IsAlpha (enc, c))   val |= ONIGENC_CTYPE_ALPHA;
01082     if (IsBlank (enc, c))   val |= ONIGENC_CTYPE_BLANK;
01083     if (IsCntrl (enc, c))   val |= ONIGENC_CTYPE_CNTRL;
01084     if (IsDigit (enc, c))   val |= ONIGENC_CTYPE_DIGIT;
01085     if (IsGraph (enc, c))   val |= ONIGENC_CTYPE_GRAPH;
01086     if (IsLower (enc, c))   val |= ONIGENC_CTYPE_LOWER;
01087     if (IsPrint (enc, c))   val |= ONIGENC_CTYPE_PRINT;
01088     if (IsPunct (enc, c))   val |= ONIGENC_CTYPE_PUNCT;
01089     if (IsSpace (enc, c))   val |= ONIGENC_CTYPE_SPACE;
01090     if (IsUpper (enc, c))   val |= ONIGENC_CTYPE_UPPER;
01091     if (IsXDigit(enc, c))   val |= ONIGENC_CTYPE_XDIGIT;
01092     if (IsWord  (enc, c))   val |= ONIGENC_CTYPE_WORD;
01093     if (IsAscii (enc, c))   val |= ONIGENC_CTYPE_ASCII;
01094 
01095     if (c % NCOL == 0) fputs("  ", fp);
01096     fprintf(fp, "0x%04x", val);
01097     if (c != 255) fputs(",", fp);
01098     if (c != 0 && c % NCOL == (NCOL-1))
01099       fputs("\n", fp);
01100     else
01101       fputs(" ", fp);
01102   }
01103   fprintf(fp, "};\n");
01104   return 0;
01105 }
01106 
01107 extern int main(int argc, char* argv[])
01108 {
01109   int i;
01110   FILE* fp = stdout;
01111 
01112   for (i = 0; i < sizeof(Info)/sizeof(ENC_INFO); i++) {
01113     exec(fp, &Info[i]);
01114   }
01115 }