Back to index

lightning-sunbird  0.9+nobinonly
javascript.c
Go to the documentation of this file.
00001 /* ***** BEGIN LICENSE BLOCK *****
00002  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00003  *
00004  * The contents of this file are subject to the Mozilla Public License Version
00005  * 1.1 (the "License"); you may not use this file except in compliance with
00006  * the License. You may obtain a copy of the License at
00007  * http://www.mozilla.org/MPL/
00008  *
00009  * Software distributed under the License is distributed on an "AS IS" basis,
00010  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00011  * for the specific language governing rights and limitations under the
00012  * License.
00013  *
00014  * The Original Code is the Netscape security libraries.
00015  *
00016  * The Initial Developer of the Original Code is
00017  * Netscape Communications Corporation.
00018  * Portions created by the Initial Developer are Copyright (C) 1994-2000
00019  * the Initial Developer. All Rights Reserved.
00020  *
00021  * Contributor(s):
00022  *
00023  * Alternatively, the contents of this file may be used under the terms of
00024  * either the GNU General Public License Version 2 or later (the "GPL"), or
00025  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00026  * in which case the provisions of the GPL or the LGPL are applicable instead
00027  * of those above. If you wish to allow use of your version of this file only
00028  * under the terms of either the GPL or the LGPL, and not to allow others to
00029  * use your version of this file under the terms of the MPL, indicate your
00030  * decision by deleting the provisions above and replace them with the notice
00031  * and other provisions required by the GPL or the LGPL. If you do not delete
00032  * the provisions above, a recipient may use your version of this file under
00033  * the terms of any one of the MPL, the GPL or the LGPL.
00034  *
00035  * ***** END LICENSE BLOCK ***** */
00036 
00037 #include "signtool.h"
00038 #include <prmem.h>
00039 #include <prio.h>
00040 #include <prenv.h>
00041 
00042 static int    javascript_fn(char *relpath, char *basedir, char *reldir,
00043 char *filename, void *arg);
00044 static int    extract_js (char *filename);
00045 static int    copyinto (char *from, char *to);
00046 static PRStatus ensureExists (char *base, char *path);
00047 static int    make_dirs(char *path, PRInt32 file_perms);
00048 
00049 static char   *jartree = NULL;
00050 static int    idOrdinal;
00051 static PRBool dumpParse = PR_FALSE;
00052 
00053 static char   *event_handlers[] = {
00054     "onAbort",
00055     "onBlur",
00056     "onChange",
00057     "onClick",
00058     "onDblClick",
00059     "onDragDrop",
00060     "onError",
00061     "onFocus",
00062     "onKeyDown",
00063     "onKeyPress",
00064     "onKeyUp",
00065     "onLoad",
00066     "onMouseDown",
00067     "onMouseMove",
00068     "onMouseOut",
00069     "onMouseOver",
00070     "onMouseUp",
00071     "onMove",
00072     "onReset",
00073     "onResize",
00074     "onSelect",
00075     "onSubmit",
00076     "onUnload"
00077 };
00078 
00079 
00080 static int    num_handlers = 23;
00081 
00082 /*
00083  *  I n l i n e J a v a S c r i p t
00084  *
00085  *  Javascript signing. Instead of passing an archive to signtool,
00086  *  a directory containing html files is given. Archives are created
00087  *  from the archive= and src= tag attributes inside the html,
00088  *  as appropriate. Then the archives are signed.
00089  *
00090  */
00091 int
00092 InlineJavaScript(char *dir, PRBool recurse)
00093 {
00094     jartree = dir;
00095     if (verbosity >= 0) {
00096        PR_fprintf(outputFD, "\nGenerating inline signatures from HTML files in: %s\n",
00097             dir);
00098     }
00099     if (PR_GetEnv("SIGNTOOL_DUMP_PARSE")) {
00100        dumpParse = PR_TRUE;
00101     }
00102 
00103     return foreach(dir, "", javascript_fn, recurse, PR_FALSE /*include dirs*/,
00104                      (void * )NULL);
00105 
00106 }
00107 
00108 
00109 /************************************************************************
00110  *
00111  * j a v a s c r i p t _ f n
00112  */
00113 static int    javascript_fn
00114 (char *relpath, char *basedir, char *reldir, char *filename, void *arg)
00115 {
00116     char      fullname [FNSIZE];
00117 
00118     /* only process inline scripts from .htm, .html, and .shtml*/
00119 
00120     if (!(PL_strcaserstr(filename, ".htm") == filename + strlen(filename) -
00121         4) && 
00122         !(PL_strcaserstr(filename, ".html") == filename + strlen(filename) -
00123         5) && 
00124         !(PL_strcaserstr(filename, ".shtml") == filename + strlen(filename)
00125         -6)) {
00126        return 0;
00127     }
00128 
00129     /* don't process scripts that signtool has already
00130      extracted (those that are inside .arc directories) */
00131 
00132     if (PL_strcaserstr(filename, ".arc") == filename + strlen(filename) - 4)
00133        return 0;
00134 
00135     if (verbosity >= 0) {
00136        PR_fprintf(outputFD, "Processing HTML file: %s\n", relpath);
00137     }
00138 
00139     /* reset firstArchive at top of each HTML file */
00140 
00141     /* skip directories that contain extracted scripts */
00142 
00143     if (PL_strcaserstr(reldir, ".arc") == reldir + strlen(reldir) - 4)
00144        return 0;
00145 
00146     sprintf (fullname, "%s/%s", basedir, relpath);
00147     return extract_js (fullname);
00148 }
00149 
00150 
00151 /*===========================================================================
00152  =
00153  = D A T A   S T R U C T U R E S
00154  =
00155 */
00156 typedef enum {
00157     TEXT_HTML_STATE = 0,
00158     SCRIPT_HTML_STATE
00159 } 
00160 
00161 
00162 HTML_STATE ;
00163 
00164 typedef enum {
00165     /* we start in the start state */
00166     START_STATE,
00167 
00168     /* We are looking for or reading in an attribute */
00169     GET_ATT_STATE,
00170 
00171     /* We're burning ws before finding an attribute */
00172     PRE_ATT_WS_STATE,
00173 
00174     /* We're burning ws after an attribute.  Looking for an '='. */
00175     POST_ATT_WS_STATE,
00176 
00177     /* We're burning ws after an '=', waiting for a value */
00178     PRE_VAL_WS_STATE,
00179 
00180     /* We're reading in a value */
00181     GET_VALUE_STATE,
00182 
00183     /* We're reading in a value that's inside quotes */
00184     GET_QUOTED_VAL_STATE,
00185 
00186     /* We've encountered the closing '>' */
00187     DONE_STATE,
00188 
00189     /* Error state */
00190     ERR_STATE
00191 } 
00192 
00193 
00194 TAG_STATE ;
00195 
00196 typedef struct AVPair_Str {
00197     char      *attribute;
00198     char      *value;
00199     unsigned int     valueLine; /* the line that the value ends on */
00200     struct AVPair_Str *next;
00201 } AVPair;
00202 
00203 typedef enum {
00204     APPLET_TAG,
00205     SCRIPT_TAG,
00206     LINK_TAG,
00207     STYLE_TAG,
00208     COMMENT_TAG,
00209     OTHER_TAG
00210 } 
00211 
00212 
00213 TAG_TYPE ;
00214 
00215 typedef struct {
00216     TAG_TYPE type;
00217     AVPair * attList;
00218     AVPair * attListTail;
00219     char      *text;
00220 } TagItem;
00221 
00222 typedef enum {
00223     TAG_ITEM,
00224     TEXT_ITEM
00225 } 
00226 
00227 
00228 ITEM_TYPE ;
00229 
00230 typedef struct HTMLItem_Str {
00231     unsigned int     startLine;
00232     unsigned int     endLine;
00233     ITEM_TYPE type;
00234     union {
00235        TagItem *tag;
00236        char   *text;
00237     } item;
00238     struct HTMLItem_Str *next;
00239 } HTMLItem;
00240 
00241 typedef struct {
00242     PRFileDesc *fd;
00243     PRInt32 curIndex;
00244     PRBool IsEOF;
00245 #define FILE_BUFFER_BUFSIZE 512
00246     char      buf[FILE_BUFFER_BUFSIZE];
00247     PRInt32 startOffset;
00248     PRInt32 maxIndex;
00249     unsigned int     lineNum;
00250 } FileBuffer;
00251 
00252 /*===========================================================================
00253  =
00254  = F U N C T I O N S
00255  =
00256 */
00257 static HTMLItem*CreateTextItem(char *text, unsigned int startline,
00258 unsigned int endline);
00259 static HTMLItem*CreateTagItem(TagItem*ti, unsigned int startline,
00260 unsigned int endline);
00261 static TagItem*ProcessTag(FileBuffer*fb, char **errStr);
00262 static void   DestroyHTMLItem(HTMLItem *item);
00263 static void   DestroyTagItem(TagItem*ti);
00264 static TAG_TYPE GetTagType(char *att);
00265 static FileBuffer*FB_Create(PRFileDesc*fd);
00266 static int    FB_GetChar(FileBuffer *fb);
00267 static PRInt32 FB_GetPointer(FileBuffer *fb);
00268 static PRInt32 FB_GetRange(FileBuffer *fb, PRInt32 start, PRInt32 end,
00269 char **buf);
00270 static unsigned int  FB_GetLineNum(FileBuffer *fb);
00271 static void   FB_Destroy(FileBuffer *fb);
00272 static void   PrintTagItem(PRFileDesc *fd, TagItem *ti);
00273 static void   PrintHTMLStream(PRFileDesc *fd, HTMLItem *head);
00274 
00275 /************************************************************************
00276  *
00277  * C r e a t e T e x t I t e m
00278  */
00279 static HTMLItem*
00280 CreateTextItem(char *text, unsigned int startline, unsigned int endline)
00281 {
00282     HTMLItem * item;
00283 
00284     item = PR_Malloc(sizeof(HTMLItem));
00285     if (!item) {
00286        return NULL;
00287     }
00288 
00289     item->type = TEXT_ITEM;
00290     item->item.text = text;
00291     item->next = NULL;
00292     item->startLine = startline;
00293     item->endLine = endline;
00294 
00295     return item;
00296 }
00297 
00298 
00299 /************************************************************************
00300  *
00301  * C r e a t e T a g I t e m
00302  */
00303 static HTMLItem*
00304 CreateTagItem(TagItem*ti, unsigned int startline, unsigned int endline)
00305 {
00306     HTMLItem * item;
00307 
00308     item = PR_Malloc(sizeof(HTMLItem));
00309     if (!item) {
00310        return NULL;
00311     }
00312 
00313     item->type = TAG_ITEM;
00314     item->item.tag = ti;
00315     item->next = NULL;
00316     item->startLine = startline;
00317     item->endLine = endline;
00318 
00319     return item;
00320 }
00321 
00322 
00323 static PRBool
00324 isAttChar(int c)
00325 {
00326     return (isalnum(c) || c == '/' || c == '-');
00327 }
00328 
00329 
00330 /************************************************************************
00331  *
00332  * P r o c e s s T a g
00333  */
00334 static TagItem*
00335 ProcessTag(FileBuffer*fb, char **errStr)
00336 {
00337     TAG_STATE state;
00338     PRInt32 startText, startID, curPos;
00339     PRBool firstAtt;
00340     int       curchar;
00341     TagItem * ti = NULL;
00342     AVPair * curPair = NULL;
00343     char      quotechar = '\0';
00344     unsigned int     linenum;
00345     unsigned int     startline;
00346 
00347     state = START_STATE;
00348 
00349     startID = FB_GetPointer(fb);
00350     startText = startID;
00351     firstAtt = PR_TRUE;
00352 
00353     ti = (TagItem * ) PR_Malloc(sizeof(TagItem));
00354     if (!ti) 
00355        out_of_memory();
00356     ti->type = OTHER_TAG;
00357     ti->attList = NULL;
00358     ti->attListTail = NULL;
00359     ti->text = NULL;
00360 
00361     startline = FB_GetLineNum(fb);
00362 
00363     while (state != DONE_STATE && state != ERR_STATE) {
00364        linenum = FB_GetLineNum(fb);
00365        curchar = FB_GetChar(fb);
00366        if (curchar == EOF) {
00367            *errStr = PR_smprintf(
00368                "line %d: Unexpected end-of-file while parsing tag starting at line %d.\n",
00369                 linenum, startline);
00370            state = ERR_STATE;
00371            continue;
00372        }
00373 
00374        switch (state) {
00375        case START_STATE:
00376            if (curchar == '!') {
00377               /*
00378                * SGML tag or comment
00379                * Here's the general rule for SGML tags.  Everything from
00380                * <! to > is the tag.  Inside the tag, comments are
00381                * delimited with --.  So we are looking for the first '>'
00382                * that is not commented out, that is, not inside a pair
00383                * of --: <!DOCTYPE --this is a comment >(psyche!)   -->
00384                */
00385 
00386               PRBool inComment = PR_FALSE;
00387               short  hyphenCount = 0; /* number of consecutive hyphens */
00388 
00389               while (1) {
00390                   linenum = FB_GetLineNum(fb);
00391                   curchar = FB_GetChar(fb);
00392                   if (curchar == EOF) {
00393                      /* Uh oh, EOF inside comment */
00394                      *errStr = PR_smprintf(
00395     "line %d: Unexpected end-of-file inside comment starting at line %d.\n",
00396                                           linenum, startline);
00397                      state = ERR_STATE;
00398                      break;
00399                   }
00400                   if (curchar == '-') {
00401                      if (hyphenCount == 1) {
00402                          /* This is a comment delimiter */
00403                          inComment = !inComment;
00404                          hyphenCount = 0;
00405                      } else {
00406                          /* beginning of a comment delimiter? */
00407                          hyphenCount = 1;
00408                      }
00409                   } else if (curchar == '>') {
00410                      if (!inComment) {
00411                          /* This is the end of the tag */
00412                          state = DONE_STATE;
00413                          break;
00414                      } else {
00415                          /* The > is inside a comment, so it's not
00416                                                   * really the end of the tag */
00417                          hyphenCount = 0;
00418                      }
00419                   } else {
00420                      hyphenCount = 0;
00421                   }
00422               }
00423               ti->type = COMMENT_TAG;
00424               break;
00425            }
00426            /* fall through */
00427        case GET_ATT_STATE:
00428            if (isspace(curchar) || curchar == '=' || curchar
00429                == '>') {
00430               /* end of the current attribute */
00431               curPos = FB_GetPointer(fb) - 2;
00432               if (curPos >= startID) {
00433                   /* We have an attribute */
00434                   curPair = (AVPair * )PR_Malloc(sizeof(AVPair));
00435                   if (!curPair) 
00436                      out_of_memory();
00437                   curPair->value = NULL;
00438                   curPair->next = NULL;
00439                   FB_GetRange(fb, startID, curPos,
00440                       &curPair->attribute);
00441 
00442                   /* Stick this attribute on the list */
00443                   if (ti->attListTail) {
00444                      ti->attListTail->next = curPair;
00445                      ti->attListTail = curPair;
00446                   } else {
00447                      ti->attList = ti->attListTail =
00448                          curPair;
00449                   }
00450 
00451                   /* If this is the first attribute, find the type of tag
00452                    * based on it. Also, start saving the text of the tag. */
00453                   if (firstAtt) {
00454                      ti->type = GetTagType(curPair->attribute);
00455                      startText = FB_GetPointer(fb)
00456                          -1;
00457                      firstAtt = PR_FALSE;
00458                   }
00459               } else {
00460                   if (curchar == '=') {
00461                      /* If we don't have any attribute but we do have an
00462                       * equal sign, that's an error */
00463                      *errStr = PR_smprintf("line %d: Malformed tag starting at line %d.\n",
00464                           linenum, startline);
00465                      state = ERR_STATE;
00466                      break;
00467                   }
00468               }
00469 
00470               /* Compute next state */
00471               if (curchar == '=') {
00472                   startID = FB_GetPointer(fb);
00473                   state = PRE_VAL_WS_STATE;
00474               } else if (curchar == '>') {
00475                   state = DONE_STATE;
00476               } else if (curPair) {
00477                   state = POST_ATT_WS_STATE;
00478               } else {
00479                   state = PRE_ATT_WS_STATE;
00480               }
00481            } else if (isAttChar(curchar)) {
00482               /* Just another char in the attribute. Do nothing */
00483               state = GET_ATT_STATE;
00484            } else {
00485               /* bogus char */
00486               *errStr = PR_smprintf("line %d: Bogus chararacter '%c' in tag.\n",
00487                                    linenum, curchar);
00488               state = ERR_STATE;
00489               break;
00490            }
00491            break;
00492        case PRE_ATT_WS_STATE:
00493            if (curchar == '>') {
00494               state = DONE_STATE;
00495            } else if (isspace(curchar)) {
00496               /* more whitespace, do nothing */
00497            } else if (isAttChar(curchar)) {
00498               /* starting another attribute */
00499               startID = FB_GetPointer(fb) - 1;
00500               state = GET_ATT_STATE;
00501            } else {
00502               /* bogus char */
00503               *errStr = PR_smprintf("line %d: Bogus character '%c' in tag.\n",
00504                                    linenum, curchar);
00505               state = ERR_STATE;
00506               break;
00507            }
00508            break;
00509        case POST_ATT_WS_STATE:
00510            if (curchar == '>') {
00511               state = DONE_STATE;
00512            } else if (isspace(curchar)) {
00513               /* more whitespace, do nothing */
00514            } else if (isAttChar(curchar)) {
00515               /* starting another attribute */
00516               startID = FB_GetPointer(fb) - 1;
00517               state = GET_ATT_STATE;
00518            } else if (curchar == '=') {
00519               /* there was whitespace between the attribute and its equal
00520                * sign, which means there's a value coming up */
00521               state = PRE_VAL_WS_STATE;
00522            } else {
00523               /* bogus char */
00524               *errStr = PR_smprintf("line %d: Bogus character '%c' in tag.\n",
00525                                                  linenum, curchar);
00526               state = ERR_STATE;
00527               break;
00528            }
00529            break;
00530        case PRE_VAL_WS_STATE:
00531            if (curchar == '>') {
00532               /* premature end-of-tag (sounds like a personal problem). */
00533               *errStr = PR_smprintf(
00534                   "line %d: End of tag while waiting for value.\n",
00535                    linenum);
00536               state = ERR_STATE;
00537               break;
00538            } else if (isspace(curchar)) {
00539               /* more whitespace, do nothing */
00540               break;
00541            } else {
00542               /* this must be some sort of value. Fall through
00543                              * to GET_VALUE_STATE */
00544               startID = FB_GetPointer(fb) - 1;
00545               state = GET_VALUE_STATE;
00546            }
00547            /* Fall through if we didn't break on '>' or whitespace */
00548        case GET_VALUE_STATE:
00549            if (isspace(curchar) || curchar == '>') {
00550               /* end of value */
00551               curPos = FB_GetPointer(fb) - 2;
00552               if (curPos >= startID) {
00553                   /* Grab the value */
00554                   FB_GetRange(fb, startID, curPos,
00555                       &curPair->value);
00556                   curPair->valueLine = linenum;
00557               } else {
00558                   /* empty value, leave as NULL */
00559               }
00560               if (isspace(curchar)) {
00561                   state = PRE_ATT_WS_STATE;
00562               } else {
00563                   state = DONE_STATE;
00564               }
00565            } else if (curchar == '\"' || curchar == '\'') {
00566               /* quoted value.  Start recording the value inside the quote*/
00567               startID = FB_GetPointer(fb);
00568               state = GET_QUOTED_VAL_STATE;
00569               PORT_Assert(quotechar == '\0');
00570               quotechar = curchar; /* look for matching quote type */
00571            } else {
00572               /* just more value */
00573            }
00574            break;
00575        case GET_QUOTED_VAL_STATE:
00576            PORT_Assert(quotechar != '\0');
00577            if (curchar == quotechar) {
00578               /* end of quoted value */
00579               curPos = FB_GetPointer(fb) - 2;
00580               if (curPos >= startID) {
00581                   /* Grab the value */
00582                   FB_GetRange(fb, startID, curPos,
00583                       &curPair->value);
00584                   curPair->valueLine = linenum;
00585               } else {
00586                   /* empty value, leave it as NULL */
00587               }
00588               state = GET_ATT_STATE;
00589               quotechar = '\0';
00590               startID = FB_GetPointer(fb);
00591            } else {
00592               /* more quoted value, continue */
00593            }
00594            break;
00595        case DONE_STATE:
00596        case ERR_STATE:
00597        default:
00598            ; /* should never get here */
00599        }
00600     }
00601 
00602     if (state == DONE_STATE) {
00603        /* Get the text of the tag */
00604        curPos = FB_GetPointer(fb) - 1;
00605        FB_GetRange(fb, startText, curPos, &ti->text);
00606 
00607        /* Return the tag */
00608        return ti;
00609     }
00610 
00611     /* Uh oh, an error.  Kill the tag item*/
00612     DestroyTagItem(ti);
00613     return NULL;
00614 }
00615 
00616 
00617 /************************************************************************
00618  *
00619  * D e s t r o y H T M L I t e m
00620  */
00621 static void   
00622 DestroyHTMLItem(HTMLItem *item)
00623 {
00624     if (item->type == TAG_ITEM) {
00625        DestroyTagItem(item->item.tag);
00626     } else {
00627        if (item->item.text) {
00628            PR_Free(item->item.text);
00629        }
00630     }
00631 }
00632 
00633 
00634 /************************************************************************
00635  *
00636  * D e s t r o y T a g I t e m
00637  */
00638 static void   
00639 DestroyTagItem(TagItem*ti)
00640 {
00641     AVPair * temp;
00642 
00643     if (ti->text) {
00644        PR_Free(ti->text); 
00645        ti->text = NULL;
00646     }
00647 
00648     while (ti->attList) {
00649        temp = ti->attList;
00650        ti->attList = ti->attList->next;
00651 
00652        if (temp->attribute) {
00653            PR_Free(temp->attribute); 
00654            temp->attribute = NULL;
00655        }
00656        if (temp->value) {
00657            PR_Free(temp->value); 
00658            temp->value = NULL;
00659        }
00660        PR_Free(temp);
00661     }
00662 
00663     PR_Free(ti);
00664 }
00665 
00666 
00667 /************************************************************************
00668  *
00669  * G e t T a g T y p e
00670  */
00671 static TAG_TYPE
00672 GetTagType(char *att)
00673 {
00674     if (!PORT_Strcasecmp(att, "APPLET")) {
00675        return APPLET_TAG;
00676     }
00677     if (!PORT_Strcasecmp(att, "SCRIPT")) {
00678        return SCRIPT_TAG;
00679     }
00680     if (!PORT_Strcasecmp(att, "LINK")) {
00681        return LINK_TAG;
00682     }
00683     if (!PORT_Strcasecmp(att, "STYLE")) {
00684        return STYLE_TAG;
00685     }
00686     return OTHER_TAG;
00687 }
00688 
00689 
00690 /************************************************************************
00691  *
00692  * F B _ C r e a t e
00693  */
00694 static FileBuffer*
00695 FB_Create(PRFileDesc*fd)
00696 {
00697     FileBuffer * fb;
00698     PRInt32 amountRead;
00699     PRInt32 storedOffset;
00700 
00701     fb = (FileBuffer * ) PR_Malloc(sizeof(FileBuffer));
00702     fb->fd = fd;
00703     storedOffset = PR_Seek(fd, 0, PR_SEEK_CUR);
00704     PR_Seek(fd, 0, PR_SEEK_SET);
00705     fb->startOffset = 0;
00706     amountRead = PR_Read(fd, fb->buf, FILE_BUFFER_BUFSIZE);
00707     if (amountRead == -1) 
00708        goto loser;
00709     fb->maxIndex = amountRead - 1;
00710     fb->curIndex = 0;
00711     fb->IsEOF = (fb->curIndex > fb->maxIndex) ? PR_TRUE : PR_FALSE;
00712     fb->lineNum = 1;
00713 
00714     PR_Seek(fd, storedOffset, PR_SEEK_SET);
00715     return fb;
00716 loser:
00717     PR_Seek(fd, storedOffset, PR_SEEK_SET);
00718     PR_Free(fb);
00719     return NULL;
00720 }
00721 
00722 
00723 /************************************************************************
00724  *
00725  * F B _ G e t C h a r
00726  */
00727 static int    
00728 FB_GetChar(FileBuffer *fb)
00729 {
00730     PRInt32 storedOffset;
00731     PRInt32 amountRead;
00732     int       retval = -1;
00733 
00734     if (fb->IsEOF) {
00735        return EOF;
00736     }
00737 
00738     storedOffset = PR_Seek(fb->fd, 0, PR_SEEK_CUR);
00739 
00740     retval = (unsigned char) fb->buf[fb->curIndex++];
00741     if (retval == '\n') 
00742        fb->lineNum++;
00743 
00744     if (fb->curIndex > fb->maxIndex) {
00745        /* We're at the end of the buffer. Try to get some new data from the
00746                * file */
00747        fb->startOffset += fb->maxIndex + 1;
00748        PR_Seek(fb->fd, fb->startOffset, PR_SEEK_SET);
00749        amountRead = PR_Read(fb->fd, fb->buf, FILE_BUFFER_BUFSIZE);
00750        if (amountRead == -1)  
00751            goto loser;
00752        fb->maxIndex = amountRead - 1;
00753        fb->curIndex = 0;
00754     }
00755 
00756     fb->IsEOF = (fb->curIndex > fb->maxIndex) ? PR_TRUE : PR_FALSE;
00757 
00758 loser:
00759     PR_Seek(fb->fd, storedOffset, PR_SEEK_SET);
00760     return retval;
00761 }
00762 
00763 
00764 /************************************************************************
00765  *
00766  * F B _ G e t L i n e N u m
00767  *
00768  */
00769 static unsigned int  
00770 FB_GetLineNum(FileBuffer *fb)
00771 {
00772     return fb->lineNum;
00773 }
00774 
00775 
00776 /************************************************************************
00777  *
00778  * F B _ G e t P o i n t e r
00779  *
00780  */
00781 static PRInt32
00782 FB_GetPointer(FileBuffer *fb)
00783 {
00784     return fb->startOffset + fb->curIndex;
00785 }
00786 
00787 
00788 /************************************************************************
00789  *
00790  * F B _ G e t R a n g e
00791  *
00792  */
00793 static PRInt32
00794 FB_GetRange(FileBuffer *fb, PRInt32 start, PRInt32 end, char **buf)
00795 {
00796     PRInt32 amountRead;
00797     PRInt32 storedOffset;
00798 
00799     *buf = PR_Malloc(end - start + 2);
00800     if (*buf == NULL) {
00801        return 0;
00802     }
00803 
00804     storedOffset = PR_Seek(fb->fd, 0, PR_SEEK_CUR);
00805     PR_Seek(fb->fd, start, PR_SEEK_SET);
00806     amountRead = PR_Read(fb->fd, *buf, end - start + 1);
00807     PR_Seek(fb->fd, storedOffset, PR_SEEK_SET);
00808     if (amountRead == -1) {
00809        PR_Free(*buf);
00810        *buf = NULL;
00811        return 0;
00812     }
00813 
00814     (*buf)[end-start+1] = '\0';
00815     return amountRead;
00816 }
00817 
00818 
00819 /************************************************************************
00820  *
00821  * F B _ D e s t r o y
00822  *
00823  */
00824 static void   
00825 FB_Destroy(FileBuffer *fb)
00826 {
00827     if (fb) {
00828        PR_Free(fb);
00829     }
00830 }
00831 
00832 
00833 /************************************************************************
00834  *
00835  * P r i n t T a g I t e m
00836  *
00837  */
00838 static void   
00839 PrintTagItem(PRFileDesc *fd, TagItem *ti)
00840 {
00841     AVPair * pair;
00842 
00843     PR_fprintf(fd, "TAG:\n----\nType: ");
00844     switch (ti->type) {
00845     case APPLET_TAG:
00846        PR_fprintf(fd, "applet\n");
00847        break;
00848     case SCRIPT_TAG:
00849        PR_fprintf(fd, "script\n");
00850        break;
00851     case LINK_TAG:
00852        PR_fprintf(fd, "link\n");
00853        break;
00854     case STYLE_TAG:
00855        PR_fprintf(fd, "style\n");
00856        break;
00857     case COMMENT_TAG:
00858        PR_fprintf(fd, "comment\n");
00859        break;
00860     case OTHER_TAG:
00861     default:
00862        PR_fprintf(fd, "other\n");
00863        break;
00864     }
00865 
00866     PR_fprintf(fd, "Attributes:\n");
00867     for (pair = ti->attList; pair; pair = pair->next) {
00868        PR_fprintf(fd, "\t%s=%s\n", pair->attribute,
00869            pair->value ? pair->value : "");
00870     }
00871     PR_fprintf(fd, "Text:%s\n", ti->text ? ti->text : "");
00872 
00873     PR_fprintf(fd, "---End of tag---\n");
00874 }
00875 
00876 
00877 /************************************************************************
00878  *
00879  * P r i n t H T M L S t r e a m
00880  *
00881  */
00882 static void   
00883 PrintHTMLStream(PRFileDesc *fd, HTMLItem *head)
00884 {
00885     while (head) {
00886        if (head->type == TAG_ITEM) {
00887            PrintTagItem(fd, head->item.tag);
00888        } else {
00889            PR_fprintf(fd, "\nTEXT:\n-----\n%s\n-----\n\n", head->item.text);
00890        }
00891        head = head->next;
00892     }
00893 }
00894 
00895 
00896 /************************************************************************
00897  *
00898  * S a v e I n l i n e S c r i p t
00899  *
00900  */
00901 static int    
00902 SaveInlineScript(char *text, char *id, char *basedir, char *archiveDir)
00903 {
00904     char      *filename = NULL;
00905     PRFileDesc * fd = NULL;
00906     int       retval = -1;
00907     PRInt32 writeLen;
00908     char      *ilDir = NULL;
00909 
00910     if (!text || !id || !archiveDir) {
00911        return - 1;
00912     }
00913 
00914     if (dumpParse) {
00915        PR_fprintf(outputFD, "SaveInlineScript: text=%s, id=%s, \n"
00916            "basedir=%s, archiveDir=%s\n",
00917            text, id, basedir, archiveDir);
00918     }
00919 
00920     /* Make sure the archive directory is around */
00921     if (ensureExists(basedir, archiveDir) != PR_SUCCESS) {
00922        PR_fprintf(errorFD,
00923            "ERROR: Unable to create archive directory %s.\n", archiveDir);
00924        errorCount++;
00925        return - 1;
00926     }
00927 
00928     /* Make sure the inline script directory is around */
00929     ilDir = PR_smprintf("%s/inlineScripts", archiveDir);
00930     scriptdir = "inlineScripts";
00931     if (ensureExists(basedir, ilDir) != PR_SUCCESS) {
00932        PR_fprintf(errorFD,
00933            "ERROR: Unable to create directory %s.\n", ilDir);
00934        errorCount++;
00935        return - 1;
00936     }
00937 
00938     filename = PR_smprintf("%s/%s/%s", basedir, ilDir, id);
00939 
00940     /* If the file already exists, give a warning, then blow it away */
00941     if (PR_Access(filename, PR_ACCESS_EXISTS) == PR_SUCCESS) {
00942        PR_fprintf(errorFD,
00943            "warning: file \"%s\" already exists--will overwrite.\n",
00944                             filename);
00945        warningCount++;
00946        if (rm_dash_r(filename)) {
00947            PR_fprintf(errorFD, "ERROR: Unable to delete %s.\n", filename);
00948            errorCount++;
00949            goto finish;
00950        }
00951     }
00952 
00953     /* Write text into file with name id */
00954     fd = PR_Open(filename, PR_WRONLY | PR_CREATE_FILE | PR_TRUNCATE, 0777);
00955     if (!fd) {
00956        PR_fprintf(errorFD, "ERROR: Unable to create file \"%s\".\n",
00957                             filename);
00958        errorCount++;
00959        goto finish;
00960     }
00961     writeLen = strlen(text);
00962     if ( PR_Write(fd, text, writeLen) != writeLen) {
00963        PR_fprintf(errorFD, "ERROR: Unable to write to file \"%s\".\n",
00964                             filename);
00965        errorCount++;
00966        goto finish;
00967     }
00968 
00969     retval = 0;
00970 finish:
00971     if (filename) {
00972        PR_smprintf_free(filename);
00973     }
00974     if (ilDir) {
00975        PR_smprintf_free(ilDir);
00976     }
00977     if (fd) {
00978        PR_Close(fd);
00979     }
00980     return retval;
00981 }
00982 
00983 
00984 /************************************************************************
00985  *
00986  * S a v e U n n a m a b l e S c r i p t
00987  *
00988  */
00989 static int    
00990 SaveUnnamableScript(char *text, char *basedir, char *archiveDir,
00991 char *HTMLfilename)
00992 {
00993     char      *id = NULL;
00994     char      *ext = NULL;
00995     char      *start = NULL;
00996     int       retval = -1;
00997 
00998     if (!text || !archiveDir || !HTMLfilename) {
00999        return - 1;
01000     }
01001 
01002     if (dumpParse) {
01003        PR_fprintf(outputFD, "SaveUnnamableScript: text=%s, basedir=%s,\n"
01004            "archiveDir=%s, filename=%s\n", text, basedir, archiveDir,
01005                             HTMLfilename);
01006     }
01007 
01008     /* Construct the filename */
01009     ext = PL_strrchr(HTMLfilename, '.');
01010     if (ext) {
01011        *ext = '\0';
01012     }
01013     for (start = HTMLfilename; strpbrk(start, "/\\"); 
01014          start = strpbrk(start, "/\\") + 1)
01015        /* do nothing */;
01016     if (*start == '\0') 
01017        start = HTMLfilename;
01018     id = PR_smprintf("_%s%d", start, idOrdinal++);
01019     if (ext) {
01020        *ext = '.';
01021     }
01022 
01023     /* Now call SaveInlineScript to do the work */
01024     retval = SaveInlineScript(text, id, basedir, archiveDir);
01025 
01026     PR_Free(id);
01027 
01028     return retval;
01029 }
01030 
01031 
01032 /************************************************************************
01033  *
01034  * S a v e S o u r c e
01035  *
01036  */
01037 static int    
01038 SaveSource(char *src, char *codebase, char *basedir, char *archiveDir)
01039 {
01040     char      *from = NULL, *to = NULL;
01041     int       retval = -1;
01042     char      *arcDir = NULL;
01043 
01044     if (!src || !archiveDir) {
01045        return - 1;
01046     }
01047 
01048     if (dumpParse) {
01049        PR_fprintf(outputFD, "SaveSource: src=%s, codebase=%s, basedir=%s,\n"
01050            "archiveDir=%s\n", src, codebase, basedir, archiveDir);
01051     }
01052 
01053     if (codebase) {
01054        arcDir = PR_smprintf("%s/%s/%s/", basedir, codebase, archiveDir);
01055     } else {
01056        arcDir = PR_smprintf("%s/%s/", basedir, archiveDir);
01057     }
01058 
01059     if (codebase) {
01060        from = PR_smprintf("%s/%s/%s", basedir, codebase, src);
01061        to = PR_smprintf("%s%s", arcDir, src);
01062     } else {
01063        from = PR_smprintf("%s/%s", basedir, src);
01064        to = PR_smprintf("%s%s", arcDir, src);
01065     }
01066 
01067     if (make_dirs(to, 0777)) {
01068        PR_fprintf(errorFD,
01069            "ERROR: Unable to create archive directory %s.\n", archiveDir);
01070        errorCount++;
01071        goto finish;
01072     }
01073 
01074     retval = copyinto(from, to);
01075 finish:
01076     if (from) 
01077        PR_Free(from);
01078     if (to) 
01079        PR_Free(to);
01080     if (arcDir) 
01081        PR_Free(arcDir);
01082     return retval;
01083 }
01084 
01085 
01086 /************************************************************************
01087  *
01088  * T a g T y p e T o S t r i n g
01089  *
01090  */
01091 char   *
01092 TagTypeToString(TAG_TYPE type)
01093 {
01094     switch (type) {
01095     case APPLET_TAG:
01096        return "APPLET";
01097     case SCRIPT_TAG:
01098        return "SCRIPT";
01099     case LINK_TAG:
01100        return "LINK";
01101     case STYLE_TAG:
01102        return "STYLE";
01103     default:
01104        break;
01105     }
01106     return "unknown";
01107 }
01108 
01109 
01110 /************************************************************************
01111  *
01112  * e x t r a c t _ j s
01113  *
01114  */
01115 static int    
01116 extract_js(char *filename)
01117 {
01118     PRFileDesc * fd = NULL;
01119     FileBuffer * fb = NULL;
01120     HTMLItem * head = NULL;
01121     HTMLItem * tail = NULL;
01122     HTMLItem * curitem = NULL;
01123     HTMLItem * styleList    = NULL;
01124     HTMLItem * styleListTail       = NULL;
01125     HTMLItem * entityList   = NULL;
01126     HTMLItem * entityListTail      = NULL;
01127     TagItem * tagp = NULL;
01128     char      *text = NULL;
01129     char      *tagerr = NULL;
01130     char      *archiveDir = NULL;
01131     char      *firstArchiveDir = NULL;
01132     char      *basedir = NULL;
01133     PRInt32    textStart;
01134     PRInt32    curOffset;
01135     HTML_STATE state;
01136     int              curchar;
01137     int              retval = -1;
01138     unsigned int linenum, startLine;
01139 
01140     /* Initialize the implicit ID counter for each file */
01141     idOrdinal = 0;
01142 
01143     /*
01144      * First, parse the HTML into a stream of tags and text.
01145      */
01146 
01147     fd = PR_Open(filename, PR_RDONLY, 0);
01148     if (!fd) {
01149        PR_fprintf(errorFD, "Unable to open %s for reading.\n", filename);
01150        errorCount++;
01151        return - 1;
01152     }
01153 
01154     /* Construct base directory of filename. */
01155      {
01156        char   *cp;
01157 
01158        basedir = PL_strdup(filename);
01159 
01160        /* Remove trailing slashes */
01161        while ( (cp = PL_strprbrk(basedir, "/\\")) == 
01162            (basedir + strlen(basedir) - 1)) {
01163            *cp = '\0';
01164        }
01165 
01166        /* Now remove everything from the last slash (which will be followed
01167         * by a filename) to the end */
01168        cp = PL_strprbrk(basedir, "/\\");
01169        if (cp) {
01170            *cp = '\0';
01171        }
01172     }
01173 
01174     state = TEXT_HTML_STATE;
01175 
01176     fb = FB_Create(fd);
01177 
01178     textStart = 0;
01179     startLine = 0;
01180     while (linenum = FB_GetLineNum(fb), (curchar = FB_GetChar(fb)) !=
01181         EOF) {
01182        switch (state) {
01183        case TEXT_HTML_STATE:
01184            if (curchar == '<') {
01185               /*
01186                * Found a tag
01187                */
01188               /* Save the text so far to a new text item */
01189               curOffset = FB_GetPointer(fb) - 2;
01190               if (curOffset >= textStart) {
01191                   if (FB_GetRange(fb, textStart, curOffset,
01192                        &text) != 
01193                       curOffset - textStart + 1)  {
01194                      PR_fprintf(errorFD,
01195                          "Unable to read from %s.\n",
01196                           filename);
01197                      errorCount++;
01198                      goto loser;
01199                   }
01200                   /* little fudge here.  If the first character on a line
01201                    * is '<', meaning a new tag, the preceding text item
01202                    * actually ends on the previous line.  In this case
01203                    * we will be saying that the text segment ends on the
01204                    * next line. I don't think this matters for text items. */
01205                   curitem = CreateTextItem(text, startLine,
01206                        linenum);
01207                   text = NULL;
01208                   if (tail == NULL) {
01209                      head = tail = curitem;
01210                   } else {
01211                      tail->next = curitem;
01212                      tail = curitem;
01213                   }
01214               }
01215 
01216               /* Process the tag */
01217               tagp = ProcessTag(fb, &tagerr);
01218               if (!tagp) {
01219                   if (tagerr) {
01220                      PR_fprintf(errorFD, "Error in file %s: %s\n",
01221                                             filename, tagerr);
01222                      errorCount++;
01223                   } else {
01224                      PR_fprintf(errorFD,
01225                          "Error in file %s, in tag starting at line %d\n",
01226                                             filename, linenum);
01227                      errorCount++;
01228                   }
01229                   goto loser;
01230               }
01231               /* Add the tag to the list */
01232               curitem = CreateTagItem(tagp, linenum, FB_GetLineNum(fb));
01233               if (tail == NULL) {
01234                   head = tail = curitem;
01235               } else {
01236                   tail->next = curitem;
01237                   tail = curitem;
01238               }
01239 
01240               /* What's the next state */
01241               if (tagp->type == SCRIPT_TAG) {
01242                   state = SCRIPT_HTML_STATE;
01243               }
01244 
01245               /* Start recording text from the new offset */
01246               textStart = FB_GetPointer(fb);
01247               startLine = FB_GetLineNum(fb);
01248            } else {
01249               /* regular character.  Next! */
01250            }
01251            break;
01252        case SCRIPT_HTML_STATE:
01253            if (curchar == '<') {
01254               char   *cp;
01255               /*
01256                * If this is a </script> tag, then we're at the end of the
01257                * script.  Otherwise, ignore
01258                */
01259               curOffset = FB_GetPointer(fb) - 1;
01260               cp = NULL;
01261               if (FB_GetRange(fb, curOffset, curOffset + 8, &cp) != 9) {
01262                   if (cp) { 
01263                      PR_Free(cp); 
01264                      cp = NULL; 
01265                   }
01266               } else {
01267                   /* compare the strings */
01268                   if ( !PORT_Strncasecmp(cp, "</script>", 9) ) {
01269                      /* This is the end of the script. Record the text. */
01270                      curOffset--;
01271                      if (curOffset >= textStart) {
01272                          if (FB_GetRange(fb, textStart, curOffset, &text) != 
01273                              curOffset - textStart + 1) {
01274                             PR_fprintf(errorFD, "Unable to read from %s.\n",
01275                                  filename);
01276                             errorCount++;
01277                             goto loser;
01278                          }
01279                          curitem = CreateTextItem(text, startLine, linenum);
01280                          text = NULL;
01281                          if (tail == NULL) {
01282                             head = tail = curitem;
01283                          } else {
01284                             tail->next = curitem;
01285                             tail = curitem;
01286                          }
01287                      }
01288 
01289                      /* Now parse the /script tag and put it on the list */
01290                      tagp = ProcessTag(fb, &tagerr);
01291                      if (!tagp) {
01292                          if (tagerr) {
01293                             PR_fprintf(errorFD, "Error in file %s: %s\n",
01294                                  filename, tagerr);
01295                          } else {
01296                             PR_fprintf(errorFD, 
01297                                 "Error in file %s, in tag starting at"
01298                                 " line %d\n", filename, linenum);
01299                          }
01300                          errorCount++;
01301                          goto loser;
01302                      }
01303                      curitem = CreateTagItem(tagp, linenum,
01304                                           FB_GetLineNum(fb));
01305                      if (tail == NULL) {
01306                          head = tail = curitem;
01307                      } else {
01308                          tail->next = curitem;
01309                          tail = curitem;
01310                      }
01311 
01312                      /* go back to text state */
01313                      state = TEXT_HTML_STATE;
01314 
01315                      textStart = FB_GetPointer(fb);
01316                      startLine = FB_GetLineNum(fb);
01317                   }
01318               }
01319            }
01320            break;
01321        }
01322     }
01323 
01324     /* End of the file.  Wrap up any remaining text */
01325     if (state == SCRIPT_HTML_STATE) {
01326        if (tail && tail->type == TAG_ITEM) {
01327            PR_fprintf(errorFD, "ERROR: <SCRIPT> tag at %s:%d is not followed "
01328                "by a </SCRIPT> tag.\n", filename, tail->startLine);
01329        } else {
01330            PR_fprintf(errorFD, "ERROR: <SCRIPT> tag in file %s is not followed"
01331                " by a </SCRIPT tag.\n", filename);
01332        }
01333        errorCount++;
01334        goto loser;
01335     }
01336     curOffset = FB_GetPointer(fb) - 1;
01337     if (curOffset >= textStart) {
01338        text = NULL;
01339        if ( FB_GetRange(fb, textStart, curOffset, &text) != 
01340            curOffset - textStart + 1) {
01341            PR_fprintf(errorFD, "Unable to read from %s.\n", filename);
01342            errorCount++;
01343            goto loser;
01344        }
01345        curitem = CreateTextItem(text, startLine, linenum);
01346        text = NULL;
01347        if (tail == NULL) {
01348            head = tail = curitem;
01349        } else {
01350            tail->next = curitem;
01351            tail = curitem;
01352        }
01353     }
01354 
01355     if (dumpParse) {
01356        PrintHTMLStream(outputFD, head);
01357     }
01358 
01359     /*
01360      * Now we have a stream of tags and text.  Go through and deal with each.
01361      */
01362     for (curitem = head; curitem; curitem = curitem->next) {
01363        TagItem * tagp = NULL;
01364        AVPair * pairp = NULL;
01365        char   *src = NULL, *id = NULL, *codebase = NULL;
01366        PRBool hasEventHandler = PR_FALSE;
01367        int    i;
01368 
01369        /* Reset archive directory for each tag */
01370        if (archiveDir) {
01371            PR_Free(archiveDir); 
01372            archiveDir = NULL;
01373        }
01374 
01375        /* We only analyze tags */
01376        if (curitem->type != TAG_ITEM) {
01377            continue;
01378        }
01379 
01380        tagp = curitem->item.tag;
01381 
01382        /* go through the attributes to get information */
01383        for (pairp = tagp->attList; pairp; pairp = pairp->next) {
01384 
01385            /* ARCHIVE= */
01386            if ( !PL_strcasecmp(pairp->attribute, "archive")) {
01387               if (archiveDir) {
01388                   /* Duplicate attribute.  Print warning */
01389                   PR_fprintf(errorFD,
01390                       "warning: \"%s\" attribute overwrites previous attribute"
01391                       " in tag starting at %s:%d.\n",
01392                       pairp->attribute, filename, curitem->startLine);
01393                   warningCount++;
01394                   PR_Free(archiveDir);
01395               }
01396               archiveDir = PL_strdup(pairp->value);
01397 
01398               /* Substiture ".arc" for ".jar" */
01399               if ( (PL_strlen(archiveDir) < 4) || 
01400                   PL_strcasecmp((archiveDir + strlen(archiveDir) -4), 
01401                      ".jar")) {
01402                   PR_fprintf(errorFD,
01403                       "warning: ARCHIVE attribute should end in \".jar\" in tag"
01404                       " starting on %s:%d.\n", filename, curitem->startLine);
01405                   warningCount++;
01406                   PR_Free(archiveDir);
01407                   archiveDir = PR_smprintf("%s.arc", archiveDir);
01408               } else {
01409                   PL_strcpy(archiveDir + strlen(archiveDir) -4, ".arc");
01410               }
01411 
01412               /* Record the first archive.  This will be used later if
01413                * the archive is not specified */
01414               if (firstArchiveDir == NULL) {
01415                   firstArchiveDir = PL_strdup(archiveDir);
01416               }
01417            } 
01418            /* CODEBASE= */
01419            else if ( !PL_strcasecmp(pairp->attribute, "codebase")) {
01420               if (codebase) {
01421                   /* Duplicate attribute.  Print warning */
01422                   PR_fprintf(errorFD,
01423                       "warning: \"%s\" attribute overwrites previous attribute"
01424                       " in tag staring at %s:%d.\n",
01425                       pairp->attribute, filename, curitem->startLine);
01426                   warningCount++;
01427               }
01428               codebase = pairp->value;
01429            } 
01430            /* SRC= and HREF= */
01431            else if ( !PORT_Strcasecmp(pairp->attribute, "src") ||
01432                !PORT_Strcasecmp(pairp->attribute, "href") ) {
01433               if (src) {
01434                   /* Duplicate attribute.  Print warning */
01435                   PR_fprintf(errorFD,
01436                       "warning: \"%s\" attribute overwrites previous attribute"
01437                       " in tag staring at %s:%d.\n",
01438                       pairp->attribute, filename, curitem->startLine);
01439                   warningCount++;
01440               }
01441               src = pairp->value;
01442            } 
01443            /* CODE= */
01444            else if (!PORT_Strcasecmp(pairp->attribute, "code") ) {
01446               if (src) {
01447                   /* Duplicate attribute.  Print warning */
01448                   PR_fprintf(errorFD,
01449                       "warning: \"%s\" attribute overwrites previous attribute"
01450                       " ,in tag staring at %s:%d.\n",
01451                       pairp->attribute, filename, curitem->startLine);
01452                   warningCount++;
01453               }
01454               src = pairp->value;
01455 
01456               /* Append a .class if one is not already present */
01457               if ( (PL_strlen(src) < 6) || 
01458                   PL_strcasecmp( (src + PL_strlen(src) - 6), ".class") ) {
01459                   src = PR_smprintf("%s.class", src);
01460                   /* Put this string back into the data structure so it
01461                    * will be deallocated properly */
01462                   PR_Free(pairp->value);
01463                   pairp->value = src;
01464               }
01465            } 
01466            /* ID= */
01467            else if (!PL_strcasecmp(pairp->attribute, "id") ) {
01468               if (id) {
01469                   /* Duplicate attribute.  Print warning */
01470                   PR_fprintf(errorFD,
01471                       "warning: \"%s\" attribute overwrites previous attribute"
01472                       " in tag staring at %s:%d.\n",
01473                       pairp->attribute, filename, curitem->startLine);
01474                   warningCount++;
01475               }
01476               id = pairp->value;
01477            }
01478 
01479            /* STYLE= */
01480            /* style= attributes, along with JS entities, are stored into
01481             * files with dynamically generated names. The filenames are
01482             * based on the order in which the text is found in the file.
01483             * All JS entities on all lines up to and including the line
01484             * containing the end of the tag that has this style= attribute
01485             * will be processed before this style=attribute.  So we need
01486             * to record the line that this _tag_ (not the attribute) ends on.
01487             */
01488            else if (!PL_strcasecmp(pairp->attribute, "style") && pairp->value) 
01489            {
01490               HTMLItem * styleItem;
01491               /* Put this item on the style list */
01492               styleItem = CreateTextItem(PL_strdup(pairp->value),
01493                   curitem->startLine, curitem->endLine);
01494               if (styleListTail == NULL) {
01495                   styleList = styleListTail = styleItem;
01496               } else {
01497                   styleListTail->next = styleItem;
01498                   styleListTail = styleItem;
01499               }
01500            } 
01501            /* Event handlers */
01502            else {
01503               for (i = 0; i < num_handlers; i++) {
01504                   if (!PL_strcasecmp(event_handlers[i], pairp->attribute)) {
01505                      hasEventHandler = PR_TRUE;
01506                      break;
01507                   }
01508               }
01509            }
01510 
01511 
01512            /* JS Entity */
01513            {
01514               char   *entityStart, *entityEnd;
01515               HTMLItem * entityItem;
01516 
01517               /* go through each JavaScript entity ( &{...}; ) and store it
01518                * in the entityList.  The important thing is to record what
01519                * line number it's on, so we can get it in the right order
01520                * in relation to style= attributes.
01521                * Apparently, these can't flow across lines, so the start and
01522                * end line will be the same.  That helps matters.
01523                */
01524               entityEnd = pairp->value;
01525               while ( entityEnd && 
01526                   (entityStart = PL_strstr(entityEnd, "&{")) /*}*/ != NULL) {
01527                   entityStart += 2; /* point at beginning of actual entity */
01528                   entityEnd = PL_strstr(entityStart, /*{*/ "}");
01529                   if (entityEnd) {
01530                      /* Put this item on the entity list */
01531                      *entityEnd = '\0';
01532                      entityItem = CreateTextItem(PL_strdup(entityStart),
01533                                        pairp->valueLine, pairp->valueLine);
01534                      *entityEnd = /* { */ '}';
01535                      if (entityListTail) {
01536                          entityListTail->next = entityItem;
01537                          entityListTail = entityItem;
01538                      } else {
01539                          entityList = entityListTail = entityItem;
01540                      }
01541                   }
01542               }
01543            }
01544        }
01545 
01546        /* If no archive was supplied, we use the first one of the file */
01547        if (!archiveDir && firstArchiveDir) {
01548            archiveDir = PL_strdup(firstArchiveDir);
01549        }
01550 
01551        /* If we have an event handler, we need to archive this tag */
01552        if (hasEventHandler) {
01553            if (!id) {
01554               PR_fprintf(errorFD,
01555                   "warning: tag starting at %s:%d has event handler but"
01556                   " no ID attribute.  The tag will not be signed.\n",
01557                                    filename, curitem->startLine);
01558               warningCount++;
01559            } else if (!archiveDir) {
01560               PR_fprintf(errorFD,
01561                   "warning: tag starting at %s:%d has event handler but"
01562                   " no ARCHIVE attribute.  The tag will not be signed.\n",
01563                                        filename, curitem->startLine);
01564               warningCount++;
01565            } else {
01566               if (SaveInlineScript(tagp->text, id, basedir, archiveDir)) {
01567                   goto loser;
01568               }
01569            }
01570        }
01571 
01572        switch (tagp->type) {
01573        case APPLET_TAG:
01574            if (!src) {
01575               PR_fprintf(errorFD,
01576                   "error: APPLET tag starting on %s:%d has no CODE "
01577                   "attribute.\n", filename, curitem->startLine);
01578               errorCount++;
01579               goto loser;
01580            } else if (!archiveDir) {
01581               PR_fprintf(errorFD,
01582                   "error: APPLET tag starting on %s:%d has no ARCHIVE "
01583                   "attribute.\n", filename, curitem->startLine);
01584               errorCount++;
01585               goto loser;
01586            } else {
01587               if (SaveSource(src, codebase, basedir, archiveDir)) {
01588                   goto loser;
01589               }
01590            }
01591            break;
01592        case SCRIPT_TAG:
01593        case LINK_TAG:
01594        case STYLE_TAG:
01595            if (!archiveDir) {
01596               PR_fprintf(errorFD,
01597                   "error: %s tag starting on %s:%d has no ARCHIVE "
01598                   "attribute.\n", TagTypeToString(tagp->type),
01599                                        filename, curitem->startLine);
01600               errorCount++;
01601               goto loser;
01602            } else if (src) {
01603               if (SaveSource(src, codebase, basedir, archiveDir)) {
01604                   goto loser;
01605               }
01606            } else if (id) {
01607               /* Save the next text item */
01608               if (!curitem->next || (curitem->next->type !=
01609                   TEXT_ITEM)) {
01610                   PR_fprintf(errorFD,
01611                       "warning: %s tag starting on %s:%d is not followed"
01612                       " by script text.\n", TagTypeToString(tagp->type),
01613                                        filename, curitem->startLine);
01614                   warningCount++;
01615                   /* just create empty file */
01616                   if (SaveInlineScript("", id, basedir, archiveDir)) {
01617                      goto loser;
01618                   }
01619               } else {
01620                   curitem = curitem->next;
01621                   if (SaveInlineScript(curitem->item.text,
01622                        id, basedir,
01623                       archiveDir)) {
01624                      goto loser;
01625                   }
01626               }
01627            } else {
01628               /* No src or id tag--warning */
01629               PR_fprintf(errorFD,
01630                   "warning: %s tag starting on %s:%d has no SRC or"
01631                   " ID attributes.  Will not sign.\n",
01632                   TagTypeToString(tagp->type), filename, curitem->startLine);
01633               warningCount++;
01634            }
01635            break;
01636        default:
01637            /* do nothing for other tags */
01638            break;
01639        }
01640 
01641     }
01642 
01643     /* Now deal with all the unnamable scripts */
01644     if (firstArchiveDir) {
01645        HTMLItem * style, *entity;
01646 
01647        /* Go through the lists of JS entities and style attributes.  Do them
01648         * in chronological order within a list.  Pick the list with the lower
01649         * endLine. In case of a tie, entities come first.
01650         */
01651        style = styleList; 
01652        entity = entityList;
01653        while (style || entity) {
01654            if (!entity || (style && (style->endLine < entity->endLine))) {
01655               /* Process style */
01656               SaveUnnamableScript(style->item.text, basedir, firstArchiveDir,
01657                                 filename);
01658               style = style->next;
01659            } else {
01660               /* Process entity */
01661               SaveUnnamableScript(entity->item.text, basedir, firstArchiveDir,
01662                                 filename);
01663               entity = entity->next;
01664            }
01665        }
01666     }
01667 
01668 
01669     retval = 0;
01670 loser:
01671     /* Blow away the stream */
01672     while (head) {
01673        curitem = head;
01674        head = head->next;
01675        DestroyHTMLItem(curitem);
01676     }
01677     while (styleList) {
01678        curitem = styleList;
01679        styleList = styleList->next;
01680        DestroyHTMLItem(curitem);
01681     }
01682     while (entityList) {
01683        curitem = entityList;
01684        entityList = entityList->next;
01685        DestroyHTMLItem(curitem);
01686     }
01687     if (text) {
01688        PR_Free(text); 
01689        text = NULL;
01690     }
01691     if (fb) {
01692        FB_Destroy(fb); 
01693        fb = NULL;
01694     }
01695     if (fd) {
01696        PR_Close(fd);
01697     }
01698     if (tagerr) {
01699        PR_smprintf_free(tagerr); 
01700        tagerr = NULL;
01701     }
01702     if (archiveDir) {
01703        PR_Free(archiveDir); 
01704        archiveDir = NULL;
01705     }
01706     if (firstArchiveDir) {
01707        PR_Free(firstArchiveDir); 
01708        firstArchiveDir = NULL;
01709     }
01710     return retval;
01711 }
01712 
01713 
01714 /**********************************************************************
01715  *
01716  * e n s u r e E x i s t s
01717  *
01718  * Check for existence of indicated directory.  If it doesn't exist,
01719  * it will be created.
01720  * Returns PR_SUCCESS if the directory is present, PR_FAILURE otherwise.
01721  */
01722 static PRStatus
01723 ensureExists (char *base, char *path)
01724 {
01725     char      fn [FNSIZE];
01726     PRDir * dir;
01727     sprintf (fn, "%s/%s", base, path);
01728 
01729     /*PR_fprintf(outputFD, "Trying to open directory %s.\n", fn);*/
01730 
01731     if ( (dir = PR_OpenDir(fn)) ) {
01732        PR_CloseDir(dir);
01733        return PR_SUCCESS;
01734     }
01735     return PR_MkDir(fn, 0777);
01736 }
01737 
01738 
01739 /***************************************************************************
01740  *
01741  * m a k e _ d i r s
01742  *
01743  * Ensure that the directory portion of the path exists.  This may require
01744  * making the directory, and its parent, and its parent's parent, etc.
01745  */
01746 static int    
01747 make_dirs(char *path, int file_perms)
01748 {
01749     char      *Path;
01750     char      *start;
01751     char      *sep;
01752     int       ret = 0;
01753     PRFileInfo info;
01754 
01755     if (!path) {
01756        return 0;
01757     }
01758 
01759     Path = PL_strdup(path);
01760     start = strpbrk(Path, "/\\");
01761     if (!start) {
01762        return 0;
01763     }
01764     start++; /* start right after first slash */
01765 
01766     /* Each time through the loop add one more directory. */
01767     while ( (sep = strpbrk(start, "/\\")) ) {
01768        *sep = '\0';
01769 
01770        if ( PR_GetFileInfo(Path, &info) != PR_SUCCESS) {
01771            /* No such dir, we have to create it */
01772            if ( PR_MkDir(Path, file_perms) != PR_SUCCESS) {
01773               PR_fprintf(errorFD, "ERROR: Unable to create directory %s.\n",
01774                                                  Path);
01775               errorCount++;
01776               ret = -1;
01777               goto loser;
01778            }
01779        } else {
01780            /* something exists by this name, make sure it's a directory */
01781            if ( info.type != PR_FILE_DIRECTORY ) {
01782               PR_fprintf(errorFD, "ERROR: Unable to create directory %s.\n",
01783                                                  Path);
01784               errorCount++;
01785               ret = -1;
01786               goto loser;
01787            }
01788        }
01789 
01790        start = sep + 1; /* start after the next slash */
01791        *sep = '/';
01792     }
01793 
01794 loser:
01795     PR_Free(Path);
01796     return ret;
01797 }
01798 
01799 
01800 /*
01801  *  c o p y i n t o
01802  *
01803  *  Function to copy file "from" to path "to".
01804  *
01805  */
01806 static int    
01807 copyinto (char *from, char *to)
01808 {
01809     PRInt32 num;
01810     char      buf [BUFSIZ];
01811     PRFileDesc * infp = NULL, *outfp = NULL;
01812     int       retval = -1;
01813 
01814     if ((infp = PR_Open(from, PR_RDONLY, 0777)) == NULL) {
01815        PR_fprintf(errorFD, "ERROR: Unable to open \"%s\" for reading.\n",
01816                             from);
01817        errorCount++;
01818        goto finish;
01819     }
01820 
01821     /* If to already exists, print a warning before deleting it */
01822     if (PR_Access(to, PR_ACCESS_EXISTS) == PR_SUCCESS) {
01823        PR_fprintf(errorFD, "warning: %s already exists--will overwrite\n", to);
01824        warningCount++;
01825        if (rm_dash_r(to)) {
01826            PR_fprintf(errorFD,
01827                "ERROR: Unable to remove %s.\n", to);
01828            errorCount++;
01829            goto finish;
01830        }
01831     }
01832 
01833     if ((outfp = PR_Open(to, PR_WRONLY | PR_CREATE_FILE | PR_TRUNCATE, 0777))
01834          == NULL) {
01835        char   *errBuf = NULL;
01836 
01837        errBuf = PR_Malloc(PR_GetErrorTextLength());
01838        PR_fprintf(errorFD, "ERROR: Unable to open \"%s\" for writing.\n", to);
01839        if (PR_GetErrorText(errBuf)) {
01840            PR_fprintf(errorFD, "Cause: %s\n", errBuf);
01841        }
01842        if (errBuf) {
01843            PR_Free(errBuf);
01844        }
01845        errorCount++;
01846        goto finish;
01847     }
01848 
01849     while ( (num = PR_Read(infp, buf, BUFSIZ)) > 0) {
01850        if (PR_Write(outfp, buf, num) != num) {
01851            PR_fprintf(errorFD, "ERROR: Error writing to %s.\n", to);
01852            errorCount++;
01853            goto finish;
01854        }
01855     }
01856 
01857     retval = 0;
01858 finish:
01859     if (infp) 
01860        PR_Close(infp);
01861     if (outfp) 
01862        PR_Close(outfp);
01863 
01864     return retval;
01865 }
01866 
01867