Back to index

lightning-sunbird  0.9+nobinonly
jskwgen.c
Go to the documentation of this file.
00001 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
00002  * vim: set sw=4 ts=8 et tw=80:
00003  *
00004  * ***** BEGIN LICENSE BLOCK *****
00005  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00006  *
00007  * The contents of this file are subject to the Mozilla Public License Version
00008  * 1.1 (the "License"); you may not use this file except in compliance with
00009  * the License. You may obtain a copy of the License at
00010  * http://www.mozilla.org/MPL/
00011  *
00012  * Software distributed under the License is distributed on an "AS IS" basis,
00013  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00014  * for the specific language governing rights and limitations under the
00015  * License.
00016  *
00017  * The Original Code is String Switch Generator for JavaScript Keywords,
00018  * released 2005-12-09.
00019  *
00020  * The Initial Developer of the Original Code is
00021  * Igor Bukanov.
00022  * Portions created by the Initial Developer are Copyright (C) 2005-2006
00023  * the Initial Developer. All Rights Reserved.
00024  *
00025  * Contributor(s):
00026  *
00027  * Alternatively, the contents of this file may be used under the terms of
00028  * either of the GNU General Public License Version 2 or later (the "GPL"),
00029  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00030  * in which case the provisions of the GPL or the LGPL are applicable instead
00031  * of those above. If you wish to allow use of your version of this file only
00032  * under the terms of either the GPL or the LGPL, and not to allow others to
00033  * use your version of this file under the terms of the MPL, indicate your
00034  * decision by deleting the provisions above and replace them with the notice
00035  * and other provisions required by the GPL or the LGPL. If you do not delete
00036  * the provisions above, a recipient may use your version of this file under
00037  * the terms of any one of the MPL, the GPL or the LGPL.
00038  *
00039  * ***** END LICENSE BLOCK ***** */
00040 
00041 #include "jsstddef.h"
00042 #include <assert.h>
00043 #include <stdio.h>
00044 #include <stdlib.h>
00045 #include <string.h>
00046 #include <stdarg.h>
00047 #include <ctype.h>
00048 
00049 #include "jsconfig.h"
00050 
00051 const char * const keyword_list[] = {
00052 #define JS_KEYWORD(keyword, type, op, version) #keyword,
00053 #include "jskeyword.tbl"
00054 #undef JS_KEYWORD
00055 };
00056 
00057 struct gen_opt {
00058     FILE *output;                       /* output file for generated source */
00059     unsigned use_if_threshold;          /* max number of choices to generate
00060                                            "if" selector instead of "switch" */
00061     unsigned char_tail_test_threshold;  /* max number of unprocessed columns
00062                                            to use inlined char compare
00063                                            for remaining chars and not generic
00064                                            string compare code */
00065     unsigned indent_level;              /* current source identation level */
00066 };
00067 
00068 static unsigned column_to_compare;
00069 
00070 static int
00071 length_comparator(const void *a, const void *b)
00072 {
00073     const char *str1 = keyword_list[*(unsigned *)a];
00074     const char *str2 = keyword_list[*(unsigned *)b];
00075     return (int)strlen(str1) - (int)strlen(str2);
00076 }
00077 
00078 static int
00079 column_comparator(const void *a, const void *b)
00080 {
00081     const char *str1 = keyword_list[*(unsigned *)a];
00082     const char *str2 = keyword_list[*(unsigned *)b];
00083     return (int)str1[column_to_compare] - (int)str2[column_to_compare];
00084 }
00085 
00086 static unsigned
00087 count_different_lengths(unsigned indexes[], unsigned nelem)
00088 {
00089     unsigned nlength, current_length, i, l;
00090 
00091     current_length = 0;
00092     nlength = 0;
00093     for (i = 0; i != nelem; ++i) {
00094         l = (unsigned)strlen(keyword_list[indexes[i]]);
00095         assert(l != 0);
00096         if (current_length != l) {
00097             ++nlength;
00098             current_length = l;
00099         }
00100     }
00101     return nlength;
00102 }
00103 
00104 static void
00105 find_char_span_and_count(unsigned indexes[], unsigned nelem, unsigned column,
00106                          unsigned *span_result, unsigned *count_result)
00107 {
00108     unsigned i, count;
00109     unsigned char c, prev, minc, maxc;
00110 
00111     assert(nelem != 0);
00112     minc = maxc = prev = (unsigned char)keyword_list[indexes[0]][column];
00113     count = 1;
00114     for (i = 1; i != nelem; ++i) {
00115         c = (unsigned char)keyword_list[indexes[i]][column];
00116         if (prev != c) {
00117             prev = c;
00118             ++count;
00119             if (minc > c) {
00120                 minc = c;
00121             } else if (maxc < c) {
00122                 maxc = c;
00123             }
00124         }
00125     }
00126 
00127     *span_result = maxc - minc + 1;
00128     *count_result = count;
00129 }
00130 
00131 static unsigned
00132 find_optimal_switch_column(struct gen_opt *opt,
00133                            unsigned indexes[], unsigned nelem,
00134                            unsigned columns[], unsigned unprocessed_columns,
00135                            int *use_if_result)
00136 {
00137     unsigned i;
00138     unsigned span, min_span, min_span_index;
00139     unsigned nchar, min_nchar, min_nchar_index;
00140 
00141     assert(unprocessed_columns != 0);
00142     i = 0;
00143     min_nchar = min_span = (unsigned)-1;
00144     min_nchar_index = min_span_index = 0;
00145     do {
00146         column_to_compare = columns[i];
00147         qsort(indexes, nelem, sizeof(indexes[0]), column_comparator);
00148         find_char_span_and_count(indexes, nelem, column_to_compare,
00149                                  &span, &nchar);
00150         assert(span != 0);
00151         if (span == 1) {
00152             assert(nchar == 1);
00153             *use_if_result = 1;
00154             return 1;
00155         }
00156         assert(nchar != 1);
00157         if (min_span > span) {
00158             min_span = span;
00159             min_span_index = i;
00160         }
00161         if (min_nchar > nchar) {
00162             min_nchar = nchar;
00163             min_nchar_index = i;
00164         }
00165     } while (++i != unprocessed_columns);
00166 
00167     if (min_nchar <= opt->use_if_threshold) {
00168         *use_if_result = 1;
00169         i = min_nchar_index;
00170     } else {
00171         *use_if_result = 0;
00172         i = min_span_index;
00173     }
00174 
00175     /*
00176      * Restore order corresponding to i if it was destroyed by
00177      * subsequent sort.
00178      */
00179     if (i != unprocessed_columns - 1) {
00180         column_to_compare = columns[i];
00181         qsort(indexes, nelem, sizeof(indexes[0]), column_comparator);
00182     }
00183 
00184     return i;
00185 }
00186 
00187 
00188 static void
00189 p(struct gen_opt *opt, const char *format, ...)
00190 {
00191     va_list ap;
00192 
00193     va_start(ap, format);
00194     vfprintf(opt->output, format, ap);
00195     va_end(ap);
00196 }
00197 
00198 /* Size for '\xxx' where xxx is octal escape */
00199 #define MIN_QUOTED_CHAR_BUFFER 7
00200 
00201 static char *
00202 qchar(char c, char *quoted_buffer)
00203 {
00204     char *s;
00205 
00206     s = quoted_buffer;
00207     *s++ = '\'';
00208     switch (c) {
00209       case '\n': c = 'n'; goto one_char_escape;
00210       case '\r': c = 'r'; goto one_char_escape;
00211       case '\t': c = 't'; goto one_char_escape;
00212       case '\f': c = 't'; goto one_char_escape;
00213       case '\0': c = '0'; goto one_char_escape;
00214       case '\'': goto one_char_escape;
00215       one_char_escape:
00216         *s++ = '\\';
00217         break;
00218       default:
00219         if (!isprint(c)) {
00220             *s++ = '\\';
00221             *s++ = (char)('0' + (0x3 & (((unsigned char)c) >> 6)));
00222             *s++ = (char)('0' + (0x7 & (((unsigned char)c) >> 3)));
00223             c = (char)('0' + (0x7 & ((unsigned char)c)));
00224         }
00225     }
00226     *s++ = c;
00227     *s++ = '\'';
00228     *s = '\0';
00229     assert(s + 1 <= quoted_buffer + MIN_QUOTED_CHAR_BUFFER);
00230     return quoted_buffer;
00231 }
00232 
00233 static void
00234 nl(struct gen_opt *opt)
00235 {
00236     putc('\n', opt->output);
00237 }
00238 
00239 static void
00240 indent(struct gen_opt *opt)
00241 {
00242     unsigned n = opt->indent_level;
00243     while (n != 0) {
00244         --n;
00245         fputs("    ", opt->output);
00246     }
00247 }
00248 
00249 static void
00250 line(struct gen_opt *opt, const char *format, ...)
00251 {
00252     va_list ap;
00253 
00254     indent(opt);
00255     va_start(ap, format);
00256     vfprintf(opt->output, format, ap);
00257     va_end(ap);
00258     nl(opt);
00259 }
00260 
00261 static void
00262 generate_letter_switch_r(struct gen_opt *opt,
00263                          unsigned indexes[], unsigned nelem,
00264                          unsigned columns[], unsigned unprocessed_columns)
00265 {
00266     char qbuf[MIN_QUOTED_CHAR_BUFFER];
00267 
00268     assert(nelem != 0);
00269     if (nelem == 1) {
00270         unsigned kw_index = indexes[0];
00271         const char *keyword = keyword_list[kw_index];
00272 
00273         if (unprocessed_columns == 0) {
00274             line(opt, "JSKW_GOT_MATCH(%u) /* %s */", kw_index, keyword);
00275         } else if (unprocessed_columns > opt->char_tail_test_threshold) {
00276             line(opt, "JSKW_TEST_GUESS(%u) /* %s */", kw_index, keyword);
00277         } else {
00278             unsigned i, column;
00279 
00280             indent(opt); p(opt, "if (");
00281             for (i = 0; i != unprocessed_columns; ++i) {
00282                 column = columns[i];
00283                 qchar(keyword[column], qbuf);
00284                 p(opt, "%sJSKW_AT(%u)==%s", (i == 0) ? "" : " && ",
00285                   column, qbuf);
00286             }
00287             p(opt, ") {"); nl(opt);
00288             ++opt->indent_level;
00289             line(opt, "JSKW_GOT_MATCH(%u) /* %s */", kw_index, keyword);
00290             --opt->indent_level;
00291             line(opt, "}");
00292             line(opt, "JSKW_NO_MATCH()");
00293         }
00294     } else {
00295         unsigned optimal_column_index, optimal_column;
00296         unsigned i;
00297         int use_if;
00298         char current;
00299 
00300         assert(unprocessed_columns != 0);
00301         optimal_column_index = find_optimal_switch_column(opt, indexes, nelem,
00302                                                           columns,
00303                                                           unprocessed_columns,
00304                                                           &use_if);
00305         optimal_column = columns[optimal_column_index];
00306         columns[optimal_column_index] = columns[unprocessed_columns - 1];
00307 
00308         if (!use_if)
00309             line(opt, "switch (JSKW_AT(%u)) {", optimal_column);
00310 
00311         current = keyword_list[indexes[0]][optimal_column];
00312         for (i = 0; i != nelem;) {
00313             unsigned same_char_begin = i;
00314             char next = current;
00315 
00316             for (++i; i != nelem; ++i) {
00317                 next = keyword_list[indexes[i]][optimal_column];
00318                 if (next != current)
00319                     break;
00320             }
00321             qchar(current, qbuf);
00322             if (use_if) {
00323                 line(opt, "if (JSKW_AT(%u) == %s) {", optimal_column, qbuf);
00324             } else {
00325                 line(opt, "  case %s:", qbuf);
00326             }
00327             ++opt->indent_level;
00328             generate_letter_switch_r(opt, indexes + same_char_begin,
00329                                      i - same_char_begin,
00330                                      columns, unprocessed_columns - 1);
00331             --opt->indent_level;
00332             if (use_if) {
00333                 line(opt, "}");
00334             }
00335             current = next;
00336         }
00337 
00338         if (!use_if) {
00339             line(opt, "}");
00340         }
00341 
00342         columns[optimal_column_index] = optimal_column;
00343 
00344         line(opt, "JSKW_NO_MATCH()");
00345     }
00346 }
00347 
00348 static void
00349 generate_letter_switch(struct gen_opt *opt,
00350                        unsigned indexes[], unsigned nelem,
00351                        unsigned current_length)
00352 {
00353     unsigned *columns;
00354     unsigned i;
00355 
00356     columns = malloc(sizeof(columns[0]) * current_length);
00357     if (!columns) {
00358         perror("malloc");
00359         exit(EXIT_FAILURE);
00360     }
00361     for (i = 0; i != current_length; ++i) {
00362         columns[i] = i;
00363     }
00364     generate_letter_switch_r(opt, indexes, nelem, columns, current_length);
00365     free(columns);
00366 }
00367 
00368 
00369 static void
00370 generate_switch(struct gen_opt *opt)
00371 {
00372     unsigned *indexes;
00373     unsigned nlength;
00374     unsigned i, current;
00375     int use_if;
00376     unsigned nelem;
00377 
00378     nelem = sizeof(keyword_list)/sizeof(keyword_list[0]);
00379 
00380     line(opt, "/*");
00381     line(opt, " * Generating switch for the list of %u entries:", nelem);
00382     for (i = 0; i != nelem; ++i) {
00383         line(opt, " * %s", keyword_list[i]);
00384     }
00385     line(opt, " */");
00386 
00387     indexes = malloc(sizeof(indexes[0]) * nelem);
00388     if (!indexes) {
00389         perror("malloc");
00390         exit(EXIT_FAILURE);
00391     }
00392     for (i = 0; i != nelem; ++i)
00393         indexes[i] = i;
00394     qsort(indexes, nelem, sizeof(indexes[i]), length_comparator);
00395     nlength = count_different_lengths(indexes, nelem);
00396 
00397     use_if = (nlength <= opt->use_if_threshold);
00398 
00399     if (!use_if)
00400         line(opt, "switch (JSKW_LENGTH()) {");
00401 
00402     current = (unsigned)strlen(keyword_list[indexes[0]]);
00403     for (i = 0; i != nelem;) {
00404         unsigned same_length_begin = i;
00405         unsigned next = current;
00406 
00407         for (++i; i != nelem; ++i) {
00408             next = (unsigned)strlen(keyword_list[indexes[i]]);
00409             if (next != current)
00410                 break;
00411         }
00412         if (use_if) {
00413             line(opt, "if (JSKW_LENGTH() == %u) {", current);
00414         } else {
00415             line(opt, "  case %u:", current);
00416         }
00417         ++opt->indent_level;
00418         generate_letter_switch(opt, indexes + same_length_begin,
00419                                i - same_length_begin,
00420                                current);
00421         --opt->indent_level;
00422         if (use_if) {
00423             line(opt, "}");
00424         }
00425         current = next;
00426     }
00427     if (!use_if)
00428         line(opt, "}");
00429     line(opt, "JSKW_NO_MATCH()");
00430     free(indexes);
00431 }
00432 
00433 int main(int argc, char **argv)
00434 {
00435     struct gen_opt opt;
00436 
00437     if (argc < 2) {
00438         opt.output = stdout;
00439     } else {
00440         opt.output = fopen(argv[1], "w");
00441         if (!opt.output) {
00442             perror("fopen");
00443             exit(EXIT_FAILURE);
00444         }
00445     }
00446     opt.indent_level = 1;
00447     opt.use_if_threshold = 3;
00448     opt.char_tail_test_threshold = 4;
00449 
00450     generate_switch(&opt);
00451 
00452     if (opt.output != stdout) {
00453         if (fclose(opt.output)) {
00454             perror("fclose");
00455             exit(EXIT_FAILURE);
00456         }
00457     }
00458 
00459     return EXIT_SUCCESS;
00460 }