Back to index

lightning-sunbird  0.9+nobinonly
prefread.cpp
Go to the documentation of this file.
00001 /* ***** BEGIN LICENSE BLOCK *****
00002  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00003  *
00004  * The contents of this file are subject to the Mozilla Public License Version
00005  * 1.1 (the "License"); you may not use this file except in compliance with
00006  * the License. You may obtain a copy of the License at
00007  * http://www.mozilla.org/MPL/
00008  *
00009  * Software distributed under the License is distributed on an "AS IS" basis,
00010  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00011  * for the specific language governing rights and limitations under the
00012  * License.
00013  *
00014  * The Original Code is Mozilla.
00015  *
00016  * The Initial Developer of the Original Code is Darin Fisher.
00017  * Portions created by the Initial Developer are Copyright (C) 2003
00018  * the Initial Developer. All Rights Reserved.
00019  *
00020  * Contributor(s):
00021  *   Darin Fisher <darin@meer.net>
00022  *
00023  * Alternatively, the contents of this file may be used under the terms of
00024  * either the GNU General Public License Version 2 or later (the "GPL"), or
00025  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00026  * in which case the provisions of the GPL or the LGPL are applicable instead
00027  * of those above. If you wish to allow use of your version of this file only
00028  * under the terms of either the GPL or the LGPL, and not to allow others to
00029  * use your version of this file under the terms of the MPL, indicate your
00030  * decision by deleting the provisions above and replace them with the notice
00031  * and other provisions required by the GPL or the LGPL. If you do not delete
00032  * the provisions above, a recipient may use your version of this file under
00033  * the terms of any one of the MPL, the GPL or the LGPL.
00034  *
00035  * ***** END LICENSE BLOCK ***** */
00036 
00037 #include <stdlib.h>
00038 #include <string.h>
00039 #include <ctype.h>
00040 #include "prefread.h"
00041 #include "nsString.h"
00042 #include "nsUTF8Utils.h"
00043 
00044 #ifdef TEST_PREFREAD
00045 #include <stdio.h>
00046 #define NS_WARNING(_s) printf(">>> " _s "!\n")
00047 #define NS_NOTREACHED(_s) NS_WARNING(_s)
00048 #else
00049 #include "nsDebug.h" // for NS_WARNING
00050 #endif
00051 
00052 /* pref parser states */
00053 enum {
00054     PREF_PARSE_INIT,
00055     PREF_PARSE_MATCH_STRING,
00056     PREF_PARSE_UNTIL_NAME,
00057     PREF_PARSE_QUOTED_STRING,
00058     PREF_PARSE_UNTIL_COMMA,
00059     PREF_PARSE_UNTIL_VALUE,
00060     PREF_PARSE_INT_VALUE,
00061     PREF_PARSE_COMMENT_MAYBE_START,
00062     PREF_PARSE_COMMENT_BLOCK,
00063     PREF_PARSE_COMMENT_BLOCK_MAYBE_END,
00064     PREF_PARSE_ESC_SEQUENCE,
00065     PREF_PARSE_HEX_ESCAPE,
00066     PREF_PARSE_UTF16_LOW_SURROGATE,
00067     PREF_PARSE_UNTIL_OPEN_PAREN,
00068     PREF_PARSE_UNTIL_CLOSE_PAREN,
00069     PREF_PARSE_UNTIL_SEMICOLON,
00070     PREF_PARSE_UNTIL_EOL
00071 };
00072 
00073 #define UTF16_ESC_NUM_DIGITS    4
00074 #define HEX_ESC_NUM_DIGITS      2
00075 #define BITS_PER_HEX_DIGIT      4
00076 
00077 static const char kUserPref[] = "user_pref";
00078 static const char kPref[] = "pref";
00079 static const char kTrue[] = "true";
00080 static const char kFalse[] = "false";
00081 
00102 static PRBool
00103 pref_GrowBuf(PrefParseState *ps)
00104 {
00105     int bufLen, curPos, valPos;
00106 
00107     bufLen = ps->lbend - ps->lb;
00108     curPos = ps->lbcur - ps->lb;
00109     valPos = ps->vb    - ps->lb;
00110 
00111     if (bufLen == 0)
00112         bufLen = 128;  /* default buffer size */
00113     else
00114         bufLen <<= 1;  /* double buffer size */
00115 
00116 #ifdef TEST_PREFREAD
00117     fprintf(stderr, ">>> realloc(%d)\n", bufLen);
00118 #endif
00119 
00120     ps->lb = (char*) realloc(ps->lb, bufLen);
00121     if (!ps->lb)
00122         return PR_FALSE;
00123 
00124     ps->lbcur = ps->lb + curPos;
00125     ps->lbend = ps->lb + bufLen;
00126     ps->vb    = ps->lb + valPos;
00127 
00128     return PR_TRUE;
00129 }
00130 
00142 static PRBool
00143 pref_DoCallback(PrefParseState *ps)
00144 {
00145     PrefValue  value;
00146 
00147     switch (ps->vtype) {
00148     case PREF_STRING:
00149         value.stringVal = ps->vb;
00150         break;
00151     case PREF_INT:
00152         if ((ps->vb[0] == '-' || ps->vb[0] == '+') && ps->vb[1] == '\0') {
00153             NS_WARNING("malformed integer value");
00154             return PR_FALSE;
00155         }
00156         value.intVal = atoi(ps->vb);
00157         break;
00158     case PREF_BOOL:
00159         value.boolVal = (ps->vb == kTrue);
00160         break;
00161     default:
00162         break;
00163     }
00164     (*ps->reader)(ps->closure, ps->lb, value, ps->vtype, ps->fdefault);
00165     return PR_TRUE;
00166 }
00167 
00168 void
00169 PREF_InitParseState(PrefParseState *ps, PrefReader reader, void *closure)
00170 {
00171     memset(ps, 0, sizeof(*ps));
00172     ps->reader = reader;
00173     ps->closure = closure;
00174 }
00175 
00176 void
00177 PREF_FinalizeParseState(PrefParseState *ps)
00178 {
00179     if (ps->lb)
00180         free(ps->lb);
00181 }
00182 
00204 PRBool
00205 PREF_ParseBuf(PrefParseState *ps, const char *buf, int bufLen)
00206 {
00207     const char *end;
00208     char c;
00209     char udigit;
00210     int state;
00211 
00212     state = ps->state;
00213     for (end = buf + bufLen; buf != end; ++buf) {
00214         c = *buf;
00215         switch (state) {
00216         /* initial state */
00217         case PREF_PARSE_INIT:
00218             if (ps->lbcur != ps->lb) { /* reset state */
00219                 ps->lbcur = ps->lb;
00220                 ps->vb    = NULL;
00221                 ps->vtype = PREF_INVALID;
00222                 ps->fdefault = PR_FALSE;
00223             }
00224             switch (c) {
00225             case '/':       /* begin comment block or line? */
00226                 state = PREF_PARSE_COMMENT_MAYBE_START;
00227                 break;
00228             case '#':       /* accept shell style comments */
00229                 state = PREF_PARSE_UNTIL_EOL;
00230                 break;
00231             case 'u':       /* indicating user_pref */
00232             case 'p':       /* indicating pref */
00233                 ps->smatch = (c == 'u' ? kUserPref : kPref);
00234                 ps->sindex = 1;
00235                 ps->nextstate = PREF_PARSE_UNTIL_OPEN_PAREN;
00236                 state = PREF_PARSE_MATCH_STRING;
00237                 break;
00238             /* else skip char */
00239             }
00240             break;
00241 
00242         /* string matching */
00243         case PREF_PARSE_MATCH_STRING:
00244             if (c == ps->smatch[ps->sindex++]) {
00245                 /* if we've matched all characters, then move to next state. */
00246                 if (ps->smatch[ps->sindex] == '\0') {
00247                     state = ps->nextstate;
00248                     ps->nextstate = PREF_PARSE_INIT; /* reset next state */
00249                 }
00250                 /* else wait for next char */
00251             }
00252             else {
00253                 NS_WARNING("malformed pref file");
00254                 return PR_FALSE;
00255             }
00256             break;
00257 
00258         /* quoted string parsing */
00259         case PREF_PARSE_QUOTED_STRING:
00260             /* we assume that the initial quote has already been consumed */
00261             if (ps->lbcur == ps->lbend && !pref_GrowBuf(ps))
00262                 return PR_FALSE; /* out of memory */
00263             if (c == '\\')
00264                 state = PREF_PARSE_ESC_SEQUENCE;
00265             else if (c == ps->quotechar) {
00266                 *ps->lbcur++ = '\0';
00267                 state = ps->nextstate;
00268                 ps->nextstate = PREF_PARSE_INIT; /* reset next state */
00269             }
00270             else
00271                 *ps->lbcur++ = c;
00272             break;
00273 
00274         /* name parsing */
00275         case PREF_PARSE_UNTIL_NAME:
00276             if (c == '\"' || c == '\'') {
00277                 ps->fdefault = (ps->smatch == kPref);
00278                 ps->quotechar = c;
00279                 ps->nextstate = PREF_PARSE_UNTIL_COMMA; /* return here when done */
00280                 state = PREF_PARSE_QUOTED_STRING;
00281             }
00282             else if (c == '/') {       /* allow embedded comment */
00283                 ps->nextstate = state; /* return here when done with comment */
00284                 state = PREF_PARSE_COMMENT_MAYBE_START;
00285             }
00286             else if (!isspace(c)) {
00287                 NS_WARNING("malformed pref file");
00288                 return PR_FALSE;
00289             }
00290             break;
00291 
00292         /* parse until we find a comma separating name and value */
00293         case PREF_PARSE_UNTIL_COMMA:
00294             if (c == ',') {
00295                 ps->vb = ps->lbcur;
00296                 state = PREF_PARSE_UNTIL_VALUE;
00297             }
00298             else if (c == '/') {       /* allow embedded comment */
00299                 ps->nextstate = state; /* return here when done with comment */
00300                 state = PREF_PARSE_COMMENT_MAYBE_START;
00301             }
00302             else if (!isspace(c)) {
00303                 NS_WARNING("malformed pref file");
00304                 return PR_FALSE;
00305             }
00306             break;
00307 
00308         /* value parsing */
00309         case PREF_PARSE_UNTIL_VALUE:
00310             /* the pref value type is unknown.  so, we scan for the first
00311              * character of the value, and determine the type from that. */
00312             if (c == '\"' || c == '\'') {
00313                 ps->vtype = PREF_STRING;
00314                 ps->quotechar = c;
00315                 ps->nextstate = PREF_PARSE_UNTIL_CLOSE_PAREN;
00316                 state = PREF_PARSE_QUOTED_STRING;
00317             }
00318             else if (c == 't' || c == 'f') {
00319                 ps->vb = (char *) (c == 't' ? kTrue : kFalse);
00320                 ps->vtype = PREF_BOOL;
00321                 ps->smatch = ps->vb;
00322                 ps->sindex = 1;
00323                 ps->nextstate = PREF_PARSE_UNTIL_CLOSE_PAREN;
00324                 state = PREF_PARSE_MATCH_STRING;
00325             }
00326             else if (isdigit(c) || (c == '-') || (c == '+')) {
00327                 ps->vtype = PREF_INT;
00328                 /* write c to line buffer... */
00329                 if (ps->lbcur == ps->lbend && !pref_GrowBuf(ps))
00330                     return PR_FALSE; /* out of memory */
00331                 *ps->lbcur++ = c;
00332                 state = PREF_PARSE_INT_VALUE;
00333             }
00334             else if (c == '/') {       /* allow embedded comment */
00335                 ps->nextstate = state; /* return here when done with comment */
00336                 state = PREF_PARSE_COMMENT_MAYBE_START;
00337             }
00338             else if (!isspace(c)) {
00339                 NS_WARNING("malformed pref file");
00340                 return PR_FALSE;
00341             }
00342             break;
00343         case PREF_PARSE_INT_VALUE:
00344             /* grow line buffer if necessary... */
00345             if (ps->lbcur == ps->lbend && !pref_GrowBuf(ps))
00346                 return PR_FALSE; /* out of memory */
00347             if (isdigit(c))
00348                 *ps->lbcur++ = c;
00349             else {
00350                 *ps->lbcur++ = '\0'; /* stomp null terminator; we are done. */
00351                 if (c == ')')
00352                     state = PREF_PARSE_UNTIL_SEMICOLON;
00353                 else if (c == '/') { /* allow embedded comment */
00354                     ps->nextstate = PREF_PARSE_UNTIL_CLOSE_PAREN;
00355                     state = PREF_PARSE_COMMENT_MAYBE_START;
00356                 }
00357                 else if (isspace(c))
00358                     state = PREF_PARSE_UNTIL_CLOSE_PAREN;
00359                 else {
00360                     NS_WARNING("malformed pref file");
00361                     return PR_FALSE;
00362                 }
00363             }
00364             break;
00365 
00366         /* comment parsing */
00367         case PREF_PARSE_COMMENT_MAYBE_START:
00368             switch (c) {
00369             case '*': /* comment block */
00370                 state = PREF_PARSE_COMMENT_BLOCK;
00371                 break;
00372             case '/': /* comment line */
00373                 state = PREF_PARSE_UNTIL_EOL;
00374                 break;
00375             default:
00376                 /* pref file is malformed */
00377                 NS_WARNING("malformed pref file");
00378                 return PR_FALSE;
00379             }
00380             break;
00381         case PREF_PARSE_COMMENT_BLOCK:
00382             if (c == '*') 
00383                 state = PREF_PARSE_COMMENT_BLOCK_MAYBE_END;
00384             break;
00385         case PREF_PARSE_COMMENT_BLOCK_MAYBE_END:
00386             switch (c) {
00387             case '/':
00388                 state = ps->nextstate;
00389                 ps->nextstate = PREF_PARSE_INIT;
00390                 break;
00391             case '*':       /* stay in this state */
00392                 break;
00393             default:
00394                 state = PREF_PARSE_COMMENT_BLOCK;
00395             }
00396             break;
00397 
00398         /* string escape sequence parsing */
00399         case PREF_PARSE_ESC_SEQUENCE:
00400             /* not necessary to resize buffer here since we should be writing
00401              * only one character and the resize check would have been done
00402              * for us in the previous state */
00403             switch (c) {
00404             case '\"':
00405             case '\'':
00406             case '\\':
00407                 break;
00408             case 'r':
00409                 c = '\r';
00410                 break;
00411             case 'n':
00412                 c = '\n';
00413                 break;
00414             case 'x': /* hex escape -- always interpreted as Latin-1 */
00415             case 'u': /* UTF16 escape */
00416                 ps->esctmp[0] = c;
00417                 ps->esclen = 1;
00418                 ps->utf16[0] = ps->utf16[1] = 0;
00419                 ps->sindex = (c == 'x' ) ?
00420                                 HEX_ESC_NUM_DIGITS :
00421                                 UTF16_ESC_NUM_DIGITS;
00422                 state = PREF_PARSE_HEX_ESCAPE;
00423                 continue;
00424             default:
00425                 NS_WARNING("preserving unexpected JS escape sequence");
00426                 /* Invalid escape sequence so we do have to write more than
00427                  * one character. Grow line buffer if necessary... */
00428                 if ((ps->lbcur+1) == ps->lbend && !pref_GrowBuf(ps))
00429                     return PR_FALSE; /* out of memory */
00430                 *ps->lbcur++ = '\\'; /* preserve the escape sequence */
00431                 break;
00432             }
00433             *ps->lbcur++ = c;
00434             state = PREF_PARSE_QUOTED_STRING;
00435             break;
00436 
00437         /* parsing a hex (\xHH) or utf16 escape (\uHHHH) */
00438         case PREF_PARSE_HEX_ESCAPE:
00439             if ( c >= '0' && c <= '9' )
00440                 udigit = (c - '0');
00441             else if ( c >= 'A' && c <= 'F' )
00442                 udigit = (c - 'A') + 10;
00443             else if ( c >= 'a' && c <= 'f' )
00444                 udigit = (c - 'a') + 10;
00445             else {
00446                 /* bad escape sequence found, write out broken escape as-is */
00447                 NS_WARNING("preserving invalid or incomplete hex escape");
00448                 *ps->lbcur++ = '\\';  /* original escape slash */
00449                 if ((ps->lbcur + ps->esclen) >= ps->lbend && !pref_GrowBuf(ps))
00450                     return PR_FALSE;
00451                 for (int i = 0; i < ps->esclen; ++i)
00452                     *ps->lbcur++ = ps->esctmp[i];
00453 
00454                 /* push the non-hex character back for re-parsing. */
00455                 /* (++buf at the top of the loop keeps this safe)  */
00456                 --buf;
00457                 state = PREF_PARSE_QUOTED_STRING;
00458                 continue;
00459             }
00460 
00461             /* have a digit */
00462             ps->esctmp[ps->esclen++] = c; /* preserve it */
00463             ps->utf16[1] <<= BITS_PER_HEX_DIGIT;
00464             ps->utf16[1] |= udigit;
00465             ps->sindex--;
00466             if (ps->sindex == 0) {
00467                 /* have the full escape. Convert to UTF8 */
00468                 int utf16len = 0;
00469                 if (ps->utf16[0]) {
00470                     /* already have a high surrogate, this is a two char seq */
00471                     utf16len = 2;
00472                 }
00473                 else if (0xD800 == (0xFC00 & ps->utf16[1])) {
00474                     /* a high surrogate, can't convert until we have the low */
00475                     ps->utf16[0] = ps->utf16[1];
00476                     ps->utf16[1] = 0;
00477                     state = PREF_PARSE_UTF16_LOW_SURROGATE;
00478                     break;
00479                 }
00480                 else {
00481                     /* a single utf16 character */
00482                     ps->utf16[0] = ps->utf16[1];
00483                     utf16len = 1;
00484                 }
00485 
00486                 /* actual conversion */
00487                 /* make sure there's room, 6 bytes is max utf8 len (in */
00488                 /* theory; 4 bytes covers the actual utf16 range) */
00489                 if (ps->lbcur+6 >= ps->lbend && !pref_GrowBuf(ps))
00490                     return PR_FALSE;
00491 
00492                 ConvertUTF16toUTF8 converter(ps->lbcur);
00493                 converter.write(ps->utf16, utf16len);
00494                 ps->lbcur += converter.Size();
00495                 state = PREF_PARSE_QUOTED_STRING;
00496             }
00497             break;
00498 
00499         /* looking for beginning of utf16 low surrogate */
00500         case PREF_PARSE_UTF16_LOW_SURROGATE:
00501             if (ps->sindex == 0 && c == '\\') {
00502                 ++ps->sindex;
00503             }
00504             else if (ps->sindex == 1 && c == 'u') {
00505                 /* escape sequence is correct, now parse hex */
00506                 ps->sindex = UTF16_ESC_NUM_DIGITS;
00507                 ps->esctmp[0] = 'u';
00508                 ps->esclen = 1;
00509                 state = PREF_PARSE_HEX_ESCAPE;
00510             }
00511             else {
00512                 /* didn't find expected low surrogate. Ignore high surrogate
00513                  * (it would just get converted to nothing anyway) and start
00514                  * over with this character */
00515                  --buf;
00516                  if (ps->sindex == 1)
00517                      state = PREF_PARSE_ESC_SEQUENCE;
00518                  else
00519                      state = PREF_PARSE_QUOTED_STRING;
00520                  continue;
00521             }
00522             break;
00523 
00524         /* function open and close parsing */
00525         case PREF_PARSE_UNTIL_OPEN_PAREN:
00526             /* tolerate only whitespace and embedded comments */
00527             if (c == '(')
00528                 state = PREF_PARSE_UNTIL_NAME;
00529             else if (c == '/') {
00530                 ps->nextstate = state; /* return here when done with comment */
00531                 state = PREF_PARSE_COMMENT_MAYBE_START;
00532             }
00533             else if (!isspace(c)) {
00534                 NS_WARNING("malformed pref file");
00535                 return PR_FALSE;
00536             }
00537             break;
00538         case PREF_PARSE_UNTIL_CLOSE_PAREN:
00539             /* tolerate only whitespace and embedded comments  */
00540             if (c == ')')
00541                 state = PREF_PARSE_UNTIL_SEMICOLON;
00542             else if (c == '/') {
00543                 ps->nextstate = state; /* return here when done with comment */
00544                 state = PREF_PARSE_COMMENT_MAYBE_START;
00545             }
00546             else if (!isspace(c)) {
00547                 NS_WARNING("malformed pref file");
00548                 return PR_FALSE;
00549             }
00550             break;
00551 
00552         /* function terminator ';' parsing */
00553         case PREF_PARSE_UNTIL_SEMICOLON:
00554             /* tolerate only whitespace and embedded comments */
00555             if (c == ';') {
00556                 if (!pref_DoCallback(ps))
00557                     return PR_FALSE;
00558                 state = PREF_PARSE_INIT;
00559             }
00560             else if (c == '/') {
00561                 ps->nextstate = state; /* return here when done with comment */
00562                 state = PREF_PARSE_COMMENT_MAYBE_START;
00563             }
00564             else if (!isspace(c)) {
00565                 NS_WARNING("malformed pref file");
00566                 return PR_FALSE;
00567             }
00568             break;
00569 
00570         /* eol parsing */
00571         case PREF_PARSE_UNTIL_EOL:
00572             /* need to handle mac, unix, or dos line endings.
00573              * PREF_PARSE_INIT will eat the next \n in case
00574              * we have \r\n. */
00575             if (c == '\r' || c == '\n' || c == 0x1A) {
00576                 state = ps->nextstate;
00577                 ps->nextstate = PREF_PARSE_INIT; /* reset next state */
00578             }
00579             break;
00580         }
00581     }
00582     ps->state = state;
00583     return PR_TRUE;
00584 }
00585 
00586 #ifdef TEST_PREFREAD
00587 
00588 static void
00589 pref_reader(void       *closure, 
00590             const char *pref,
00591             PrefValue   val,
00592             PrefType    type,
00593             PRBool      defPref)
00594 {
00595     printf("%spref(\"%s\", ", defPref ? "" : "user_", pref);
00596     switch (type) {
00597     case PREF_STRING:
00598         printf("\"%s\");\n", val.stringVal);
00599         break;
00600     case PREF_INT:
00601         printf("%i);\n", val.intVal);
00602         break;
00603     case PREF_BOOL:
00604         printf("%s);\n", val.boolVal == PR_FALSE ? "false" : "true");
00605         break;
00606     }
00607 }
00608 
00609 int
00610 main(int argc, char **argv)
00611 {
00612     PrefParseState ps;
00613     char buf[4096];     /* i/o buffer */
00614     FILE *fp;
00615     int n;
00616 
00617     if (argc == 1) {
00618         printf("usage: prefread file.js\n");
00619         return -1;
00620     }
00621 
00622     fp = fopen(argv[1], "r");
00623     if (!fp) {
00624         printf("failed to open file\n");
00625         return -1;
00626     }
00627 
00628     PREF_InitParseState(&ps, pref_reader, NULL);
00629 
00630     while ((n = fread(buf, 1, sizeof(buf), fp)) > 0)
00631         PREF_ParseBuf(&ps, buf, n);
00632 
00633     PREF_FinalizeParseState(&ps);
00634 
00635     fclose(fp);
00636     return 0;
00637 }
00638 
00639 #endif /* TEST_PREFREAD */