Back to index

php5  5.3.10
gen_exp.c
Go to the documentation of this file.
00001 #include <stdio.h>
00002 #include <stddef.h>
00003 #include <string.h>
00004 #include <stdlib.h>
00005 #include <errno.h>
00006 
00007 struct mappings_entry {
00008        int cp_uni;
00009        int n;
00010        int cp_932[16];
00011 };
00012 
00013 struct mappings {
00014        size_t n;
00015        size_t nalloc;
00016        struct mappings_entry *entries;
00017 };
00018 
00019 static void mappings_init(struct mappings *map)
00020 {
00021        map->n = 0;
00022        map->nalloc = 0;
00023        map->entries = 0;
00024 }
00025 
00026 static void mappings_destroy(struct mappings *map)
00027 {
00028        if (map->entries)
00029               free(map->entries);
00030 }
00031 
00032 static int mappings_grow(struct mappings *map)
00033 {
00034        if (map->n >= map->nalloc) {
00035               struct mappings_entry *new_entries;
00036               size_t n = map->nalloc << 1, a;
00037               if (n == 0)
00038                      n = 1;
00039               else if (n <= map->n)
00040                      return 2;
00041               a = sizeof(*map->entries) * n;
00042               if (a / n != sizeof(*map->entries))
00043                      return 2;
00044               new_entries = realloc(map->entries, a);
00045               if (!new_entries)
00046                      return 2;
00047               map->entries = new_entries;
00048               map->nalloc = n;
00049        }
00050        return 0;
00051 }
00052 
00053 static int mappings_add(struct mappings *map, int cp_uni, int cp_932)
00054 {
00055        size_t i;
00056        size_t s = 0, e = map->n;
00057        struct mappings_entry *entry;
00058 
00059        for (;;) {
00060               i = (s + e) / 2;
00061               entry = &map->entries[i];
00062               if (e == i || entry->cp_uni > cp_uni) {
00063                      if (e == i) {
00064                             int r = mappings_grow(map);
00065                             if (r)
00066                                    return r;
00067                             if (map->n > i) {
00068                                    size_t n = map->n - i, a = sizeof(*map->entries) * n;
00069                                    if (a / n != sizeof(*map->entries))
00070                                           return 2;
00071                                    memmove(&map->entries[i + 1], &map->entries[i], a);
00072                             }
00073                             ++map->n;
00074                             entry = &map->entries[i];
00075                             entry->cp_uni = cp_uni;
00076                             entry->n = 0;
00077                             break;
00078                      }
00079                      e = i;
00080               } else if (entry->cp_uni < cp_uni) {
00081                      if (s == i) {
00082                             int r = mappings_grow(map);
00083                             if (r)
00084                                    return r;
00085                             if (map->n > i + 1) {
00086                                    size_t n = (map->n - i - 1), a = sizeof(*map->entries) * n;
00087                                    if (a / n != sizeof(*map->entries))
00088                                           return 2;
00089                                    memmove(&map->entries[i + 2], &map->entries[i + 1], a);
00090                             }
00091                             ++map->n;
00092                             entry = &map->entries[i + 1];
00093                             entry->cp_uni = cp_uni;
00094                             entry->n = 0;
00095                             break;
00096                      }
00097                      s = i;
00098               } else {
00099                      break;
00100               }
00101        }
00102        if (entry->n >= sizeof(entry->cp_932) / sizeof(*entry->cp_932))
00103               return 1;
00104        entry->cp_932[entry->n++] = cp_932;
00105        return 0;
00106 }
00107 
00108 struct generator_entry {
00109        const char *name;
00110        const char *prologue;
00111        const char *epilogue;
00112        void(*visitor)(const struct mappings_entry *);
00113 };
00114 
00115 static int utf32_utf8(char *buf, int k)
00116 {
00117        int retval = 0;
00118 
00119        if (k < 0x80) {
00120               buf[0] = k;
00121               retval = 1;
00122        } else if (k < 0x800) {
00123               buf[0] = 0xc0 | (k >> 6);
00124               buf[1] = 0x80 | (k & 0x3f);
00125               retval = 2;
00126        } else if (k < 0x10000) {
00127               buf[0] = 0xe0 | (k >> 12);
00128               buf[1] = 0x80 | ((k >> 6) & 0x3f);
00129               buf[2] = 0x80 | (k & 0x3f);
00130               retval = 3;
00131        } else if (k < 0x200000) {
00132               buf[0] = 0xf0 | (k >> 18);
00133               buf[1] = 0x80 | ((k >> 12) & 0x3f);
00134               buf[2] = 0x80 | ((k >> 6) & 0x3f);
00135               buf[3] = 0x80 | (k & 0x3f);
00136               retval = 4;
00137        } else if (k < 0x4000000) {
00138               buf[0] = 0xf8 | (k >> 24);
00139               buf[1] = 0x80 | ((k >> 18) & 0x3f);
00140               buf[2] = 0x80 | ((k >> 12) & 0x3f);
00141               buf[3] = 0x80 | ((k >> 6) & 0x3f);
00142               buf[4] = 0x80 | (k & 0x3f);
00143               retval = 5;
00144        } else {
00145               buf[0] = 0xfc | (k >> 30);
00146               buf[1] = 0x80 | ((k >> 24) & 0x3f);
00147               buf[2] = 0x80 | ((k >> 18) & 0x3f);
00148               buf[3] = 0x80 | ((k >> 12) & 0x3f);
00149               buf[4] = 0x80 | ((k >> 6) & 0x3f);
00150               buf[5] = 0x80 | (k & 0x3f);
00151               retval = 6;
00152        }
00153        buf[retval] = '\0';
00154 
00155        return retval;
00156 }
00157 
00158 static const char epilogue[] =
00159 "close\n";
00160 
00161 static const char prologue_to_cp932[] =
00162 "#!/usr/bin/expect -f\n"
00163 "spawn tests/conv_encoding Japanese CP932 UTF-8\n"
00164 "set timeout 1\n"
00165 "\n"
00166 "expect_after {\n"
00167 "    \"\\[^\\r\\n\\]*\\r\\n\" { fail $test }\n"
00168 "}\n";
00169 
00170 static const char prologue_to_cp50220[] =
00171 "#!/usr/bin/expect -f\n"
00172 "spawn tests/conv_encoding Japanese CP50220 UTF-8\n"
00173 "set timeout 1\n"
00174 "\n"
00175 "expect_after {\n"
00176 "    \"\\[^\\r\\n\\]*\\r\\n\" { fail $test }\n"
00177 "}\n";
00178 
00179 static const char prologue_to_cp50222[] =
00180 "#!/usr/bin/expect -f\n"
00181 "spawn tests/conv_encoding Japanese CP50222 UTF-8\n"
00182 "set timeout 1\n"
00183 "\n"
00184 "expect_after {\n"
00185 "    \"\\[^\\r\\n\\]*\\r\\n\" { fail $test }\n"
00186 "}\n";
00187 
00188 static const char prologue_from_cp932[] =
00189 "#!/usr/bin/expect -f\n"
00190 "spawn tests/conv_encoding Japanese UTF-8 CP932\n"
00191 "set timeout 1\n"
00192 "\n"
00193 "expect_after {\n"
00194 "    \"\\[^\\r\\n\\]*\\r\\n\" { fail $test }\n"
00195 "}\n";
00196 
00197 static void to_cp932_visitor(const struct mappings_entry *entry)
00198 {
00199        char buf_uni[32], buf_cp932[8];
00200        int i;
00201 
00202        if (entry->cp_uni < 32 || entry->cp_uni == 127)
00203               return;
00204 
00205        i = utf32_utf8(buf_uni, entry->cp_uni);
00206        buf_uni[i * 4] = '\0';
00207        while (--i >= 0) {
00208               unsigned char c = ((unsigned char *)buf_uni)[i];
00209               buf_uni[i * 4] = '\\';
00210               buf_uni[i * 4 + 1] = 'x';
00211               buf_uni[i * 4 + 2] = "0123456789abcdef"[c >> 4];
00212               buf_uni[i * 4 + 3] = "0123456789abcdef"[c & 15];
00213        }
00214 
00215        printf("set test \"U+%06X\"\n"
00216               "send -- \"%s\r\"\n"
00217                  "sleep 0.001\n"
00218               "expect {\n", entry->cp_uni, buf_uni);
00219 
00220        for (i = 0; i < entry->n; ++i) {
00221               int len = 0;
00222               const int c = entry->cp_932[i];
00223               if (c >= 0x100) {
00224                      len = 2;
00225                      sprintf(buf_cp932, "%%%02x%%%02x", (c >> 8) & 0xff, c & 0xff);
00226               } else {
00227                      len = 1;
00228                      sprintf(buf_cp932, "%%%02x", c);
00229               }
00230               printf("    \"%s (%d)\\r\\n\" { pass $test }\n", buf_cp932, len);
00231        }
00232 
00233        printf("}\n");
00234 }
00235 
00236 static void from_cp932_visitor(const struct mappings_entry *entry)
00237 {
00238        char buf_uni[32], buf_cp932[8];
00239        int i, len;
00240 
00241        if (entry->cp_uni < 32 || entry->cp_uni == 127)
00242               return;
00243 
00244        len = utf32_utf8(buf_uni, entry->cp_uni);
00245        buf_uni[len * 3] = '\0';
00246        i = len;
00247        while (--i >= 0) {
00248               unsigned char c = ((unsigned char *)buf_uni)[i];
00249               buf_uni[i * 3] = '%';
00250               buf_uni[i * 3 + 1] = "0123456789abcdef"[c >> 4];
00251               buf_uni[i * 3 + 2] = "0123456789abcdef"[c & 15];
00252        }
00253 
00254        for (i = 0; i < entry->n; ++i) {
00255               const int c = entry->cp_932[i];
00256               if (c >= 0x100)
00257                      sprintf(buf_cp932, "\\x%02x\\x%02x", (c >> 8) & 0xff, c & 0xff);
00258               else
00259                      sprintf(buf_cp932, "\\x%02x", c);
00260               printf("set test \"U+%06X\"\n"
00261                         "send -- \"%s\r\"\n"
00262                         "sleep 0.001\n"
00263                         "expect {\n"
00264                      "    \"%s (%d)\\r\\n\" { pass $test }\n"
00265                      "}\n",
00266                         entry->cp_uni, buf_cp932, buf_uni, len);
00267        }
00268 }
00269 
00270 static void to_cp50220_visitor(const struct mappings_entry *entry)
00271 {
00272        char buf_uni[32], buf_cp50220[32];
00273        int i;
00274 
00275        if (entry->cp_uni < 32 || entry->cp_uni == 127)
00276               return;
00277 
00278        i = utf32_utf8(buf_uni, entry->cp_uni);
00279        buf_uni[i * 4] = '\0';
00280        while (--i >= 0) {
00281               unsigned char c = ((unsigned char *)buf_uni)[i];
00282               buf_uni[i * 4] = '\\';
00283               buf_uni[i * 4 + 1] = 'x';
00284               buf_uni[i * 4 + 2] = "0123456789abcdef"[c >> 4];
00285               buf_uni[i * 4 + 3] = "0123456789abcdef"[c & 15];
00286        }
00287 
00288        printf("set test \"U+%06X\"\n"
00289               "send -- \"%s\r\"\n"
00290                  "sleep 0.001\n"
00291               "expect {\n", entry->cp_uni, buf_uni);
00292 
00293        for (i = 0; i < entry->n; ++i) {
00294               int len = 0;
00295               const int c = entry->cp_932[i];
00296               if (c >= 0xa1 && c < 0xe0) {
00297                      static const int jisx0208_tl_map[] = {
00298                             0x0000, 0x2123, 0x2156, 0x2157, 0x2122, 0x2126, 0x2572, 0x2521,
00299                             0x2523, 0x2525, 0x2527, 0x2529, 0x2563, 0x2565, 0x2567, 0x2543,
00300                             0x213c, 0x2522, 0x2524, 0x2526, 0x2528, 0x252a, 0x252b, 0x252d,
00301                             0x252f, 0x2531, 0x2533, 0x2535, 0x2537, 0x2539, 0x253b, 0x253d,
00302                             0x253f, 0x2541, 0x2544, 0x2546, 0x2548, 0x254a, 0x254b, 0x254c,
00303                             0x254d, 0x254e, 0x254f, 0x2552, 0x2555, 0x2558, 0x255b, 0x255e,
00304                             0x255f, 0x2560, 0x2561, 0x2562, 0x2564, 0x2566, 0x2568, 0x2569,
00305                             0x256a, 0x256b, 0x256c, 0x256d, 0x256f, 0x2573, 0x212b, 0x212c
00306                      };
00307                      const int j = jisx0208_tl_map[c - 0xa0];
00308                      len = 8;
00309                      sprintf(buf_cp50220, "%%1b%%24%%42%%%02x%%%02x%%1b%%28%%42", j >> 8, j & 0xff);
00310               } else if (c >= 0x100) {
00311                      const int j = ((((c & 0xff00) - (c >= 0xe000 ? 0xb000: 0x7000)) << 1) | ((c & 0xff) - (c & 0x80 ? 32: 31))) - ((c & 0xff) >= 159 ? 94: 0x100);
00312                      len = 8;
00313                      sprintf(buf_cp50220, "%%1b%%24%%42%%%02x%%%02x%%1b%%28%%42", j >> 8, j & 0xff);
00314               } else {
00315                      len = 1;
00316                      sprintf(buf_cp50220, "%%%02x", c);
00317               }
00318               printf("    \"%s (%d)\\r\\n\" { pass $test }\n", buf_cp50220, len);
00319        }
00320 
00321        printf("}\n");
00322 }
00323 
00324 static void to_cp50222_visitor(const struct mappings_entry *entry)
00325 {
00326        char buf_uni[32], buf_cp50220[32];
00327        int i;
00328 
00329        if (entry->cp_uni < 32 || entry->cp_uni == 127)
00330               return;
00331 
00332        i = utf32_utf8(buf_uni, entry->cp_uni);
00333        buf_uni[i * 4] = '\0';
00334        while (--i >= 0) {
00335               unsigned char c = ((unsigned char *)buf_uni)[i];
00336               buf_uni[i * 4] = '\\';
00337               buf_uni[i * 4 + 1] = 'x';
00338               buf_uni[i * 4 + 2] = "0123456789abcdef"[c >> 4];
00339               buf_uni[i * 4 + 3] = "0123456789abcdef"[c & 15];
00340        }
00341 
00342        printf("set test \"U+%06X\"\n"
00343               "send -- \"%s\r\"\n"
00344                  "sleep 0.001\n"
00345               "expect {\n", entry->cp_uni, buf_uni);
00346 
00347        for (i = 0; i < entry->n; ++i) {
00348               int len = 0;
00349               const int c = entry->cp_932[i];
00350               if (c >= 0xa1 && c < 0xe0) {
00351                      len = 3;
00352                      sprintf(buf_cp50220, "%%0e%%%02x%%0f", c - 0x80);
00353               } else if (c >= 0x100) {
00354                      const int j = ((((c & 0xff00) - (c >= 0xe000 ? 0xb000: 0x7000)) << 1) | ((c & 0xff) - (c & 0x80 ? 32: 31))) - ((c & 0xff) >= 159 ? 94: 0x100);
00355                      len = 8;
00356                      sprintf(buf_cp50220, "%%1b%%24%%42%%%02x%%%02x%%1b%%28%%42", j >> 8, j & 0xff);
00357               } else {
00358                      len = 1;
00359                      sprintf(buf_cp50220, "%%%02x", c);
00360               }
00361               printf("    \"%s (%d)\\r\\n\" { pass $test }\n", buf_cp50220, len);
00362        }
00363 
00364        printf("}\n");
00365 }
00366 
00367 
00368 static struct generator_entry entries[] = {
00369        { "to_cp932", prologue_to_cp932, epilogue, to_cp932_visitor },
00370        { "to_cp50220", prologue_to_cp50220, epilogue, to_cp50220_visitor },
00371        { "to_cp50222", prologue_to_cp50222, epilogue, to_cp50222_visitor },
00372        { "from_cp932", prologue_from_cp932, epilogue, from_cp932_visitor },
00373        { NULL }
00374 };
00375 
00376 static const char cp932_txt[] = "CP932.TXT";
00377 
00378 int main(int argc, char **argv)
00379 {
00380        int retval = 0;
00381        FILE *fp;
00382        char buf[1024];
00383        struct generator_entry* gen;
00384        struct mappings map;
00385 
00386        if (argc <= 1) {
00387               fprintf(stderr, "usage: %s generator\n", argv[0]);
00388               return 255;
00389        }
00390 
00391        for (gen = entries;; ++gen) {
00392               if (!gen->name) {
00393                      fprintf(stderr, "Unknown generator: %s\n", argv[1]);
00394                      return 1;
00395               }
00396               if (strcmp(gen->name, argv[1]) == 0)
00397                      break;
00398        }
00399 
00400     fp = fopen(cp932_txt, "r");
00401        if (!fp) {
00402               fprintf(stderr, "Failed to open %s\n", cp932_txt);
00403               return 2;
00404        }
00405 
00406        mappings_init(&map);
00407 
00408        while (fgets(buf, sizeof(buf), fp)) {
00409               const char *fields[16];
00410               char *p = buf;
00411               int field = 0;
00412               int cp_932, cp_uni;
00413               for (;;) {
00414                      char *q = 0;
00415                      int eol = 0;
00416 
00417                      if (field >= sizeof(fields) / sizeof(*fields)) {
00418                             fprintf(stderr, "Too many fields (incorrect file?)\n");
00419                             retval = 3;
00420                             goto out;
00421                      }
00422 
00423                      for (;;) {
00424                             if (*p == '\0' || *p == '#' || *p == 0x0a) {
00425                                    eol = 1;
00426                                    break;
00427                             } else if (*p != ' ' && *p != '\t') {
00428                                    break;
00429                             }
00430                             ++p;
00431                      }
00432 
00433                      if (eol)
00434                             break;
00435 
00436                      q = p;
00437 
00438                      for (;;) {
00439                             if (*p == '\0' || *p == '#' || *p == 0x0a) {
00440                                    eol = 1;
00441                                    break;
00442                             } else if (*p == ' ' || *p == '\t') {
00443                                    break;
00444                             }
00445                             ++p;
00446                      }
00447 
00448                      *p = '\0';
00449                      fields[field++] = q;
00450 
00451                      if (eol)
00452                             break;
00453                      ++p;
00454               }
00455               if (field == 0 || field == 1) {
00456                      continue;
00457               } else if (field != 2) {
00458                      fprintf(stderr, "Unexpected field count (expected 2, got %d)\n", field);
00459                      retval = 3;
00460                      goto out;
00461               }
00462               cp_932 = strtol(fields[0], NULL, 0);
00463               if (errno == ERANGE || errno == EINVAL) {
00464                      fprintf(stderr, "Malformed field value: %s\n", fields[0]);
00465                      retval = 4;
00466                      goto out;
00467               }
00468               cp_uni = strtol(fields[1], NULL, 0);
00469               if (errno == ERANGE || errno == EINVAL) {
00470                      fprintf(stderr, "Malformed field value: %s\n", fields[1]);
00471                      retval = 4;
00472                      goto out;
00473               }
00474 
00475               if (mappings_add(&map, cp_uni, cp_932)) {
00476                      fprintf(stderr, "Too many mappings to the same Unicode codepoint (U+%06X)\n", cp_uni);
00477                      retval = 4;
00478                      goto out;
00479               }
00480        }
00481 
00482        {
00483               size_t i;
00484               printf("%s", gen->prologue);
00485               for (i = 0; i < map.n; ++i)
00486                      gen->visitor(&map.entries[i]);
00487               printf("%s", gen->epilogue);
00488        }
00489 
00490 out:
00491        mappings_destroy(&map);
00492        return retval;
00493 }
00494 
00495 /*
00496  * vim: sts=4 sw=4 ts=4 noet
00497  */