Back to index

lightning-sunbird  0.9+nobinonly
Classes | Defines | Functions | Variables
nsParser.cpp File Reference
#include "nsIAtom.h"
#include "nsParser.h"
#include "nsString.h"
#include "nsCRT.h"
#include "nsScanner.h"
#include "plstr.h"
#include "nsIStringStream.h"
#include "nsIChannel.h"
#include "nsICachingChannel.h"
#include "nsICacheEntryDescriptor.h"
#include "nsICharsetAlias.h"
#include "nsIInputStream.h"
#include "CNavDTD.h"
#include "COtherDTD.h"
#include "prenv.h"
#include "nsParserCIID.h"
#include "nsReadableUtils.h"
#include "nsCOMPtr.h"
#include "nsIEventQueue.h"
#include "nsIEventQueueService.h"
#include "nsExpatDriver.h"
#include "nsIServiceManager.h"
#include "nsICategoryManager.h"
#include "nsISupportsPrimitives.h"
#include "nsIFragmentContentSink.h"
#include "nsStreamUtils.h"

Go to the source code of this file.

Classes

class  CDTDDeallocator
class  CDTDFinder
class  CSharedParserObjects
struct  nsParserContinueEvent
class  CWordTokenizer< CharT >
struct  PubIDInfo
struct  ParserWriteStruct

Defines

#define NS_PARSER_FLAG_DTD_VERIFICATION   0x00000001
#define NS_PARSER_FLAG_PARSER_ENABLED   0x00000002
#define NS_PARSER_FLAG_OBSERVERS_ENABLED   0x00000004
#define NS_PARSER_FLAG_PENDING_CONTINUE_EVENT   0x00000008
#define NS_PARSER_FLAG_CAN_INTERRUPT   0x00000010
#define NS_PARSER_FLAG_FLUSH_TOKENS   0x00000020
#define NS_PARSER_FLAG_CAN_TOKENIZE   0x00000040
#define PARSE_DTD_HAVE_DOCTYPE   (1<<0)
#define PARSE_DTD_HAVE_PUBLIC_ID   (1<<1)
#define PARSE_DTD_HAVE_SYSTEM_ID   (1<<2)
#define PARSE_DTD_HAVE_INTERNAL_SUBSET   (1<<3)
#define ELEMENTS_OF(array_)   (sizeof(array_)/sizeof(array_[0]))
#define UTF16_BE   "UTF-16BE"
#define UTF16_LE   "UTF-16LE"
#define UCS4_BE   "UTF-32BE"
#define UCS4_LE   "UTF-32LE"
#define UCS4_2143   "X-ISO-10646-UCS-4-2143"
#define UCS4_3412   "X-ISO-10646-UCS-4-3412"
#define UTF8   "UTF-8"

Functions

static NS_DEFINE_IID (kISupportsIID, NS_ISUPPORTS_IID)
static NS_DEFINE_CID (kCParserCID, NS_PARSER_CID)
static NS_DEFINE_IID (kIParserIID, NS_IPARSER_IID)
static NS_DEFINE_CID (kEventQueueServiceCID, NS_EVENTQUEUESERVICE_CID)
static nsresult GetSharedObjects (CSharedParserObjects **aSharedParserObjects)
static void FreeSharedObjects (void)
 NS_IMETHODIMP_ (void) nsParser
 gess 01/04/99
 NS_IMETHODIMP_ (nsIContentSink *) nsParser
 retrive the sink set into the parser gess5/11/98
 NS_IMETHODIMP_ (nsDTDMode) nsParser
 Retrieve parsemode from topmost parser context.
static PRInt32 ParsePS (const nsString &aBuffer, PRInt32 aIndex)
 Determine what DTD mode (and thus what layout nsCompatibility mode) to use for this document based on the first chunk of data recieved from the network (each parsercontext can have its own mode).
static PRBool ParseDocTypeDecl (const nsString &aBuffer, PRInt32 *aResultFlags, nsString &aPublicID, nsString &aSystemID)
static void DetermineHTMLParseMode (const nsString &aBuffer, nsDTDMode &aParseMode, eParserDocType &aDocType)
static void DetermineParseMode (const nsString &aBuffer, nsDTDMode &aParseMode, eParserDocType &aDocType, const nsACString &aMimeType)
static nsresult FindSuitableDTD (CParserContext &aParserContext, PRBool *aReturn)
 gess 5/13/98
 NS_IMETHODIMP_ (PRBool) nsParser
 Call this to query whether the parser is enabled or not.
static PRBool IsSecondMarker (unsigned char aChar)
static PRBool DetectByteOrderMark (const unsigned char *aBytes, PRInt32 aLen, nsCString &oCharset, PRInt32 &oCharsetSource)
const char GetNextChar (nsACString::const_iterator &aStart, nsACString::const_iterator &aEnd)
static NS_METHOD ParserWriteFunc (nsIInputStream *in, void *closure, const char *fromRawSegment, PRUint32 toOffset, PRUint32 count, PRUint32 *writeCount)

Variables

static CSharedParserObjectsgSharedParserObjects = 0
static const PubIDInfo kPublicIDs []

Class Documentation

struct ParserWriteStruct

Definition at line 2566 of file nsParser.cpp.

Collaboration diagram for ParserWriteStruct:
Class Members
PRBool mNeedCharsetCheck
nsParser * mParser
nsIParserFilter * mParserFilter
nsIRequest * mRequest
nsScanner * mScanner

Define Documentation

#define ELEMENTS_OF (   array_)    (sizeof(array_)/sizeof(array_[0]))

Definition at line 934 of file nsParser.cpp.

Definition at line 75 of file nsParser.cpp.

Definition at line 77 of file nsParser.cpp.

Definition at line 71 of file nsParser.cpp.

Definition at line 76 of file nsParser.cpp.

Definition at line 73 of file nsParser.cpp.

Definition at line 72 of file nsParser.cpp.

Definition at line 74 of file nsParser.cpp.

Definition at line 786 of file nsParser.cpp.

Definition at line 789 of file nsParser.cpp.

Definition at line 787 of file nsParser.cpp.

Definition at line 788 of file nsParser.cpp.

#define UCS4_2143   "X-ISO-10646-UCS-4-2143"

Definition at line 2256 of file nsParser.cpp.

#define UCS4_3412   "X-ISO-10646-UCS-4-3412"

Definition at line 2257 of file nsParser.cpp.

#define UCS4_BE   "UTF-32BE"

Definition at line 2254 of file nsParser.cpp.

#define UCS4_LE   "UTF-32LE"

Definition at line 2255 of file nsParser.cpp.

#define UTF16_BE   "UTF-16BE"

Definition at line 2252 of file nsParser.cpp.

#define UTF16_LE   "UTF-16LE"

Definition at line 2253 of file nsParser.cpp.

#define UTF8   "UTF-8"

Definition at line 2258 of file nsParser.cpp.


Function Documentation

static PRBool DetectByteOrderMark ( const unsigned char *  aBytes,
PRInt32  aLen,
nsCString oCharset,
PRInt32 oCharsetSource 
) [static]

Definition at line 2273 of file nsParser.cpp.

                                                                                                                           {
 oCharsetSource= kCharsetFromAutoDetection;
 oCharset.Truncate();
 // See http://www.w3.org/TR/2000/REC-xml-20001006#sec-guessing
 // for details
 // Also, MS Win2K notepad now generate 3 bytes BOM in UTF8 as UTF8 signature
 // We need to check that
 // UCS2 BOM FEFF = UTF8 EF BB BF
 switch(aBytes[0])
        {
   case 0x00:
     if(0x00==aBytes[1]) {
        // 00 00
        if((0xFE==aBytes[2]) && (0xFF==aBytes[3])) {
           // 00 00 FE FF UCS-4, big-endian machine (1234 order)
           oCharset.Assign(UCS4_BE);
        } else if((0x00==aBytes[2]) && (0x3C==aBytes[3])) {
           // 00 00 00 3C UCS-4, big-endian machine (1234 order)
           oCharset.Assign(UCS4_BE);
        } else if((0xFF==aBytes[2]) && (0xFE==aBytes[3])) {
           // 00 00 FF FE UCS-4, unusual octet order (2143)
           oCharset.Assign(UCS4_2143);
        } else if((0x3C==aBytes[2]) && (0x00==aBytes[3])) {
           // 00 00 3C 00 UCS-4, unusual octet order (2143)
           oCharset.Assign(UCS4_2143);
        } 
        oCharsetSource = kCharsetFromByteOrderMark;
     } else if((0x3C==aBytes[1]) && (0x00==aBytes[2])) {
        // 00 3C 00
        if(IsSecondMarker(aBytes[3])) {
           // 00 3C 00 SM UTF-16,  big-endian, no Byte Order Mark 
           oCharset.Assign(UTF16_BE); 
        } else if((0x00==aBytes[3])) {
           // 00 3C 00 00 UCS-4, unusual octet order (3412)
           oCharset.Assign(UCS4_3412);
        } 
        oCharsetSource = kCharsetFromByteOrderMark;
     }
   break;
   case 0x3C:
     if(0x00==aBytes[1] && (0x00==aBytes[3])) {
        // 3C 00 XX 00
        if(IsSecondMarker(aBytes[2])) {
           // 3C 00 SM 00 UTF-16,  little-endian, no Byte Order Mark 
           oCharset.Assign(UTF16_LE); 
        } else if((0x00==aBytes[2])) {
           // 3C 00 00 00 UCS-4, little-endian machine (4321 order)
           oCharset.Assign(UCS4_LE); 
        } 
        oCharsetSource = kCharsetFromByteOrderMark;
     // For html, meta tag detector is invoked before this so that we have 
     // to deal only with XML here.
     } else if(                     (0x3F==aBytes[1]) &&
               (0x78==aBytes[2]) && (0x6D==aBytes[3]) &&
               (0 == PL_strncmp("<?xml", (char*)aBytes, 5 ))) {
       // 3C 3F 78 6D
       // ASCII characters are in their normal positions, so we can safely
       // deal with the XML declaration in the old C way
       // XXX This part could be made simpler by using CWordTokenizer<char>,
       //     but bug 104479 must be fixed first.
       // The shortest string so far (strlen==5):
       // <?xml
       PRInt32 i;
       PRBool versionFound = PR_FALSE, encodingFound = PR_FALSE;
       for (i=6; i < aLen && !encodingFound; ++i) {
         // end of XML declaration?
         if ((((char*)aBytes)[i] == '?') && 
           ((i+1) < aLen) &&
           (((char*)aBytes)[i+1] == '>')) {
           break;
         }
         // Version is required.
         if (!versionFound) {
           // Want to avoid string comparisons, hence looking for 'n'
           // and only if found check the string leading to it. Not
           // foolproof, but fast.
           // The shortest string allowed before this is  (strlen==13):
           // <?xml version
           if ((((char*)aBytes)[i] == 'n') &&
             (i >= 12) && 
             (0 == PL_strncmp("versio", (char*)(aBytes+i-6), 6 ))) {
             // Fast forward through version
             char q = 0;
             for (++i; i < aLen; ++i) {
               char qi = ((char*)aBytes)[i];
               if (qi == '\'' || qi == '"') {
                 if (q && q == qi) {
                   //  ending quote
                   versionFound = PR_TRUE;
                   break;
                 } else {
                   // Starting quote
                   q = qi;
                 }
               }
             }
           }
         } else {
           // encoding must follow version
           // Want to avoid string comparisons, hence looking for 'g'
           // and only if found check the string leading to it. Not
           // foolproof, but fast.
           // The shortest allowed string before this (strlen==26):
           // <?xml version="1" encoding
           if ((((char*)aBytes)[i] == 'g') &&
             (i >= 25) && 
             (0 == PL_strncmp("encodin", (char*)(aBytes+i-7), 7 ))) {
             PRInt32 encStart = 0;
             char q = 0;
             for (++i; i < aLen; ++i) {
               char qi = ((char*)aBytes)[i];
               if (qi == '\'' || qi == '"') {
                 if (q && q == qi) {
                   PRInt32 count = i - encStart;
                   // encoding value is invalid if it is UTF-16
                   if (count > 0 && 
                     (0 != PL_strcmp("UTF-16", (char*)(aBytes+encStart)))) {
                     oCharset.Assign((char*)(aBytes+encStart),count);
                     oCharsetSource = kCharsetFromMetaTag;
                   }
                   encodingFound = PR_TRUE;
                   break;
                 } else {
                   encStart = i+1;
                   q = qi;
                 }
               }
             }
           }
         } // if (!versionFound)
       } // for
     }
   break;
   case 0xEF:  
     if((0xBB==aBytes[1]) && (0xBF==aBytes[2])) {
        // EF BB BF
        // Win2K UTF-8 BOM
        oCharset.Assign(UTF8); 
        oCharsetSource= kCharsetFromByteOrderMark;
     }
   break;
   case 0xFE:
     if(0xFF==aBytes[1]) {
        if(0x00==aBytes[2] && 0x00==aBytes[3]) {
          // FE FF 00 00  UCS-4, unusual octet order (3412)
          oCharset.Assign(UCS4_3412);
        } else {
          // FE FF UTF-16, big-endian 
          oCharset.Assign(UTF16_BE); 
        }
        oCharsetSource= kCharsetFromByteOrderMark;
     }
   break;
   case 0xFF:
     if(0xFE==aBytes[1]) {
        if(0x00==aBytes[2] && 0x00==aBytes[3]) 
         // FF FE 00 00  UTF-32, little-endian
           oCharset.Assign(UCS4_LE); 
        else
        // FF FE
        // UTF-16, little-endian 
           oCharset.Assign(UTF16_LE); 
        oCharsetSource= kCharsetFromByteOrderMark;
     }
   break;
   // case 0x4C: if((0x6F==aBytes[1]) && ((0xA7==aBytes[2] && (0x94==aBytes[3])) {
   //   We do not care EBCIDIC here....
   // }
   // break;
 }  // switch
 return !oCharset.IsEmpty();
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void DetermineHTMLParseMode ( const nsString aBuffer,
nsDTDMode aParseMode,
eParserDocType aDocType 
) [static]

Definition at line 1050 of file nsParser.cpp.

{
#ifdef DEBUG
  VerifyPublicIDs();
#endif
  PRInt32 resultFlags;
  nsAutoString publicIDUCS2, sysIDUCS2;
  if (ParseDocTypeDecl(aBuffer, &resultFlags, publicIDUCS2, sysIDUCS2)) {
    if (!(resultFlags & PARSE_DTD_HAVE_DOCTYPE)) {

      // no DOCTYPE
      aParseMode = eDTDMode_quirks;
      aDocType = eHTML_Quirks;

    } else if ((resultFlags & PARSE_DTD_HAVE_INTERNAL_SUBSET) ||
               !(resultFlags & PARSE_DTD_HAVE_PUBLIC_ID)) {

      // A doctype with an internal subset is always full_standards.
      // A doctype without a public ID is always full_standards.
      aDocType = eHTML_Strict;
      aParseMode = eDTDMode_full_standards;

      // Special hack for IBM's custom DOCTYPE.
      if (!(resultFlags & PARSE_DTD_HAVE_INTERNAL_SUBSET) &&
          sysIDUCS2 == NS_LITERAL_STRING(
               "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")) {
        aParseMode = eDTDMode_quirks;
        aDocType = eHTML_Quirks;
      }

    } else {

      // We have to check our list of public IDs to see what to do.

      // Yes, we want UCS2 to ASCII lossy conversion.
      nsCAutoString publicID;
      publicID.AssignWithConversion(publicIDUCS2);

      // See comment above definition of kPublicIDs about case
      // sensitivity.
      ToLowerCase(publicID);

      // binary search to see if we can find the correct public ID
        // These must be signed since maximum can go below zero and we'll
        // crash if it's unsigned.
      PRInt32 minimum = 0;
      PRInt32 maximum = ELEMENTS_OF(kPublicIDs) - 1;
      PRInt32 index;
      for (;;) {
        index = (minimum + maximum) / 2;
        PRInt32 comparison =
            nsCRT::strcmp(publicID.get(), kPublicIDs[index].name);
        if (comparison == 0)
          break;
        if (comparison < 0)
          maximum = index - 1;
        else
          minimum = index + 1;

        if (maximum < minimum) {
          // The DOCTYPE is not in our list, so it must be full_standards.
          aParseMode = eDTDMode_full_standards;
          aDocType = eHTML_Strict;
          return;
        }
      }

      switch ((resultFlags & PARSE_DTD_HAVE_SYSTEM_ID)
                ? kPublicIDs[index].mode_if_sysid
                : kPublicIDs[index].mode_if_no_sysid)
      {
        case PubIDInfo::eQuirks3:
          aParseMode = eDTDMode_quirks;
          aDocType = eHTML3_Quirks;
          break;
        case PubIDInfo::eQuirks:
          aParseMode = eDTDMode_quirks;
          aDocType = eHTML_Quirks;
          break;
        case PubIDInfo::eAlmostStandards:
          aParseMode = eDTDMode_almost_standards;
          aDocType = eHTML_Strict;
          break;
        case PubIDInfo::eFullStandards:
          aParseMode = eDTDMode_full_standards;
          aDocType = eHTML_Strict;
          break;
        default:
          NS_NOTREACHED("no other cases!");
      }

    }
  } else {
    // badly formed DOCTYPE -> quirks
    aParseMode = eDTDMode_quirks;
    aDocType = eHTML3_Quirks;
  }
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void DetermineParseMode ( const nsString aBuffer,
nsDTDMode aParseMode,
eParserDocType aDocType,
const nsACString &  aMimeType 
) [static]

Definition at line 1152 of file nsParser.cpp.

{
  if (aMimeType.EqualsLiteral(kHTMLTextContentType)) {
    DetermineHTMLParseMode(aBuffer, aParseMode, aDocType);
  } else if (aMimeType.EqualsLiteral(kPlainTextContentType) ||
             aMimeType.EqualsLiteral(kTextCSSContentType) ||
             aMimeType.EqualsLiteral(kApplicationJSContentType) ||
             aMimeType.EqualsLiteral(kApplicationXJSContentType) ||
             aMimeType.EqualsLiteral(kTextECMAScriptContentType) ||
             aMimeType.EqualsLiteral(kApplicationECMAScriptContentType) ||
             aMimeType.EqualsLiteral(kTextJSContentType)) {
    aDocType = ePlainText;
    aParseMode = eDTDMode_quirks;
  } else { // Some form of XML
    aDocType = eXML;
    aParseMode = eDTDMode_full_standards;
  }
}

Here is the call graph for this function:

Here is the caller graph for this function:

static nsresult FindSuitableDTD ( CParserContext aParserContext,
PRBool aReturn 
) [static]

gess 5/13/98

Parameters:
@return

Definition at line 1183 of file nsParser.cpp.

{
  *aReturn = PR_FALSE;
  //Let's start by trying the defaultDTD, if one exists...
  if(aParserContext.mDTD) {
    eAutoDetectResult canParse = aParserContext.mDTD->CanParse(aParserContext);
    if(canParse != eUnknownDetect && canParse != eInvalidDetect)
      return PR_TRUE;
  }

  CSharedParserObjects* sharedObjects;
  nsresult rv = GetSharedObjects(&sharedObjects);
  NS_ENSURE_SUCCESS(rv, rv);

  aParserContext.mAutoDetectStatus = eUnknownDetect;
  PRInt32 theDTDIndex = 0;
  nsIDTD* theBestDTD  = 0;
  nsIDTD* theDTD      = 0;
  PRBool  thePrimaryFound = PR_FALSE;

  while ((theDTDIndex <= sharedObjects->mDTDDeque.GetSize()) && 
         (aParserContext.mAutoDetectStatus != ePrimaryDetect)){
    theDTD = NS_STATIC_CAST(nsIDTD*, sharedObjects->mDTDDeque.ObjectAt(theDTDIndex++));
    if (theDTD) {
      // Store detect status in temp ( theResult ) to avoid bugs such as
      // 36233, 36754, 36491, 36323. Basically, we should avoid calling DTD's
      // WillBuildModel() multiple times, i.e., we shouldn't leave auto-detect-status
      // unknown.
      eAutoDetectResult theResult = theDTD->CanParse(aParserContext);
      if (eValidDetect == theResult){
        aParserContext.mAutoDetectStatus = eValidDetect;
        theBestDTD = theDTD;
      }
      else if (ePrimaryDetect == theResult) {  
        theBestDTD = theDTD;
        thePrimaryFound = PR_TRUE;
        aParserContext.mAutoDetectStatus = ePrimaryDetect;
      }
    }
    if (theDTDIndex == sharedObjects->mDTDDeque.GetSize() && !thePrimaryFound) {
      if (!sharedObjects->mHasXMLDTD) {
        rv = NS_NewExpatDriver(&theDTD); //do this to view XML files...
        NS_ENSURE_SUCCESS(rv, rv);

        sharedObjects->mDTDDeque.Push(theDTD);
        sharedObjects->mHasXMLDTD = PR_TRUE;
      }
#ifdef MOZ_VIEW_SOURCE
      else if (!sharedObjects->mHasViewSourceDTD) {
        rv = NS_NewViewSourceHTML(&theDTD);  //do this so all non-html files can be viewed...
        NS_ENSURE_SUCCESS(rv, rv);
        
        sharedObjects->mDTDDeque.Push(theDTD);
        sharedObjects->mHasViewSourceDTD = PR_TRUE;
      }
#endif
    }
  }

  if(theBestDTD) {
    rv = theBestDTD->CreateNewInstance(&aParserContext.mDTD);
    NS_ENSURE_SUCCESS(rv, rv);

    *aReturn = PR_TRUE;
  }

  return rv;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void FreeSharedObjects ( void  ) [static]

Definition at line 268 of file nsParser.cpp.

Here is the caller graph for this function:

const char GetNextChar ( nsACString::const_iterator &  aStart,
nsACString::const_iterator &  aEnd 
) [inline]

Definition at line 2446 of file nsParser.cpp.

{
  NS_ASSERTION(aStart != aEnd, "end of buffer");
  return (++aStart != aEnd) ? *aStart : '\0';
}

Here is the caller graph for this function:

static nsresult GetSharedObjects ( CSharedParserObjects **  aSharedParserObjects) [static]

Definition at line 256 of file nsParser.cpp.

Here is the call graph for this function:

Here is the caller graph for this function:

static PRBool IsSecondMarker ( unsigned char  aChar) [inline, static]

Definition at line 2260 of file nsParser.cpp.

{
  switch (aChar) {
    case '!':
    case '?':
    case 'h':
    case 'H':
      return PR_TRUE;
    default:
      return PR_FALSE;
  }
}

Here is the caller graph for this function:

static NS_DEFINE_CID ( kCParserCID  ,
NS_PARSER_CID   
) [static]
static NS_DEFINE_IID ( kISupportsIID  ,
NS_ISUPPORTS_IID   
) [static]
static NS_DEFINE_IID ( kIParserIID  ,
NS_IPARSER_IID   
) [static]

gess 01/04/99

Open up the parser for tokenization, building up content model..etc.

Stops parsing temporarily.

This method gets called in order to set the content sink for this parser to dump nodes to.

Call this method once you've created a parser, and want to instruct it about the command which caused the parser to be constructed.

Parameters:
@returnFor example, this allows us to select a DTD which can do, say, view-source.

gess 01/04/99

Parameters:
aCommandthe command string to set

For example, this allows us to select a DTD which can do, say, view-source.

gess 01/04/99

Parameters:
aParserCommandthe command to set

gess 01/04/99

Parameters:
nsIContentSinkinterface for node receiver
Returns:

That's it will prevent the parser from building up content model.

Returns:

However, this method does not resume parsing automatically. It's the callers' responsibility to restart the parsing engine.

Returns:

Definition at line 503 of file nsParser.cpp.

{
  mParserFilter = aFilter;
}

retrive the sink set into the parser gess5/11/98

Returns:
current sink

Definition at line 595 of file nsParser.cpp.

{
  return mSink;
}

Retrieve parsemode from topmost parser context.

gess 01/04/99

Returns:
parsemode

Definition at line 623 of file nsParser.cpp.

{
  if(mParserContext)
    return mParserContext->mDTDMode;
  NS_NOTREACHED("no parser context");
  return eDTDMode_unknown;
}

Call this to query whether the parser is enabled or not.

Call this to query whether the parser thinks it's done with parsing.

vidur 4/12/99

Returns:
current state

rickg 5/12/01

Returns:
complete state

Definition at line 1529 of file nsParser.cpp.

{
  return mFlags & NS_PARSER_FLAG_PARSER_ENABLED;
}
static PRBool ParseDocTypeDecl ( const nsString aBuffer,
PRInt32 aResultFlags,
nsString aPublicID,
nsString aSystemID 
) [static]

Definition at line 792 of file nsParser.cpp.

{
  PRBool haveDoctype = PR_FALSE;
  *aResultFlags = 0;

  // Skip through any comments and processing instructions
  // The PI-skipping is a bit of a hack.
  PRInt32 theIndex = 0;
  do {
    theIndex = aBuffer.FindChar('<', theIndex);
    if (theIndex == kNotFound) break;
    PRUnichar nextChar = aBuffer.CharAt(theIndex+1);
    if (nextChar == PRUnichar('!')) {
      PRInt32 tmpIndex = theIndex + 2;
      if (kNotFound !=
          (theIndex=aBuffer.Find("DOCTYPE", PR_TRUE, tmpIndex, 1))) {
        haveDoctype = PR_TRUE;
        theIndex += 7; // skip "DOCTYPE"
        break;
      }
      theIndex = ParsePS(aBuffer, tmpIndex);
      theIndex = aBuffer.FindChar('>', theIndex);
    } else if (nextChar == PRUnichar('?')) {
      theIndex = aBuffer.FindChar('>', theIndex);
    } else {
      break;
    }
  } while (theIndex != kNotFound);

  if (!haveDoctype)
    return PR_TRUE;
  *aResultFlags |= PARSE_DTD_HAVE_DOCTYPE;

  theIndex = ParsePS(aBuffer, theIndex);
  theIndex = aBuffer.Find("HTML", PR_TRUE, theIndex, 1);
  if(kNotFound == theIndex)
    return PR_FALSE;
  theIndex = ParsePS(aBuffer, theIndex+4);
  PRInt32 tmpIndex = aBuffer.Find("PUBLIC", PR_TRUE, theIndex, 1);

  if (kNotFound != tmpIndex) {
    theIndex = ParsePS(aBuffer, tmpIndex+6);

    // We get here only if we've read <!DOCTYPE HTML PUBLIC
    // (not case sensitive) possibly with comments within.

    // Now find the beginning and end of the public identifier
    // and the system identifier (if present).

    PRUnichar lit = aBuffer.CharAt(theIndex);
    if ((lit != PRUnichar('\"')) && (lit != PRUnichar('\'')))
      return PR_FALSE;

    // Start is the first character, excluding the quote, and End is
    // the final quote, so there are (end-start) characters.

    PRInt32 PublicIDStart = theIndex + 1;
    PRInt32 PublicIDEnd = aBuffer.FindChar(lit, PublicIDStart);
    if (kNotFound == PublicIDEnd)
      return PR_FALSE;
    theIndex = ParsePS(aBuffer, PublicIDEnd + 1);
    PRUnichar next = aBuffer.CharAt(theIndex);
    if (next == PRUnichar('>')) {
      // There was a public identifier, but no system
      // identifier,
      // so do nothing.
      // This is needed to avoid the else at the end, and it's
      // also the most common case.
    } else if ((next == PRUnichar('\"')) ||
               (next == PRUnichar('\''))) {
      // We found a system identifier.
      *aResultFlags |= PARSE_DTD_HAVE_SYSTEM_ID;
      PRInt32 SystemIDStart = theIndex + 1;
      PRInt32 SystemIDEnd = aBuffer.FindChar(next, SystemIDStart);
      if (kNotFound == SystemIDEnd)
        return PR_FALSE;
      aSystemID =
        Substring(aBuffer, SystemIDStart, SystemIDEnd - SystemIDStart);
    } else if (next == PRUnichar('[')) {
      // We found an internal subset.
      *aResultFlags |= PARSE_DTD_HAVE_INTERNAL_SUBSET;
    } else {
      // Something's wrong.
      return PR_FALSE;
    }

    // Since a public ID is a minimum literal, we must trim
    // and collapse whitespace
    aPublicID = Substring(aBuffer, PublicIDStart, PublicIDEnd - PublicIDStart);
    aPublicID.CompressWhitespace(PR_TRUE, PR_TRUE);
    *aResultFlags |= PARSE_DTD_HAVE_PUBLIC_ID;
  } else {
    tmpIndex=aBuffer.Find("SYSTEM", PR_TRUE, theIndex, 1);
    if (kNotFound != tmpIndex) {
      // DOCTYPES with system ID but no Public ID
      *aResultFlags |= PARSE_DTD_HAVE_SYSTEM_ID;
      
      theIndex = ParsePS(aBuffer, tmpIndex+6);
      PRUnichar next = aBuffer.CharAt(theIndex);
      if (next != PRUnichar('\"') && next != PRUnichar('\''))
        return PR_FALSE;

      PRInt32 SystemIDStart = theIndex + 1;
      PRInt32 SystemIDEnd = aBuffer.FindChar(next, SystemIDStart);

      if (kNotFound == SystemIDEnd)
        return PR_FALSE;
      aSystemID =
        Substring(aBuffer, SystemIDStart, SystemIDEnd - SystemIDStart);
      theIndex = ParsePS(aBuffer, SystemIDEnd + 1);
    }

    PRUnichar nextChar = aBuffer.CharAt(theIndex);
    if (nextChar == PRUnichar('['))
      *aResultFlags |= PARSE_DTD_HAVE_INTERNAL_SUBSET;
    else if (nextChar != PRUnichar('>'))
      return PR_FALSE;
  }
  return PR_TRUE;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static PRInt32 ParsePS ( const nsString aBuffer,
PRInt32  aIndex 
) [static]

Determine what DTD mode (and thus what layout nsCompatibility mode) to use for this document based on the first chunk of data recieved from the network (each parsercontext can have its own mode).

(No, this is not an optimal solution -- we really don't need to know until after we've received the DOCTYPE, and this could easily be part of the regular parsing process if the parser were designed in a way that made such modifications easy.)

Definition at line 765 of file nsParser.cpp.

{
  for(;;) {
    PRUnichar ch = aBuffer.CharAt(aIndex);
    if ((ch == PRUnichar(' ')) || (ch == PRUnichar('\t')) ||
        (ch == PRUnichar('\n')) || (ch == PRUnichar('\r'))) {
      ++aIndex;
    } else if (ch == PRUnichar('-')) {
      PRInt32 tmpIndex;
      if (aBuffer.CharAt(aIndex+1) == PRUnichar('-') &&
          kNotFound != (tmpIndex=aBuffer.Find("--",PR_FALSE,aIndex+2,-1))) {
        aIndex = tmpIndex + 2;
      } else {
        return aIndex;
      }
    } else {
      return aIndex;
    }
  }
}

Here is the caller graph for this function:

static NS_METHOD ParserWriteFunc ( nsIInputStream in,
void closure,
const char *  fromRawSegment,
PRUint32  toOffset,
PRUint32  count,
PRUint32 writeCount 
) [static]

Definition at line 2581 of file nsParser.cpp.

{
  nsresult result;
  ParserWriteStruct* pws = NS_STATIC_CAST(ParserWriteStruct*, closure);
  const char* buf = fromRawSegment;
  PRUint32 theNumRead = count;

  if (!pws) {
    return NS_ERROR_FAILURE;
  }

  if (pws->mNeedCharsetCheck) {
    PRInt32 guessSource;
    nsCAutoString guess;
    nsCAutoString preferred;

    pws->mNeedCharsetCheck = PR_FALSE;
    if (pws->mParser->DetectMetaTag(buf, theNumRead, guess, guessSource) ||
        ((count >= 4) &&
         DetectByteOrderMark((const unsigned char*)buf,
                             theNumRead, guess, guessSource))) {
      nsCOMPtr<nsICharsetAlias> alias(do_GetService(NS_CHARSETALIAS_CONTRACTID));
      result = alias->GetPreferred(guess, preferred);
      // Only continue if it's a recognized charset and not
      // one of a designated set that we ignore.
      if (NS_SUCCEEDED(result) &&
          ((kCharsetFromByteOrderMark == guessSource) ||
           (!preferred.EqualsLiteral("UTF-16") &&
            !preferred.EqualsLiteral("UTF-16BE") &&
            !preferred.EqualsLiteral("UTF-16LE") &&
            !preferred.EqualsLiteral("UTF-32BE") &&
            !preferred.EqualsLiteral("UTF-32LE")))) {
        guess = preferred;
        pws->mParser->SetDocumentCharset(guess, guessSource);
        pws->mParser->SetSinkCharset(preferred);
        nsCOMPtr<nsICachingChannel> channel(do_QueryInterface(pws->mRequest));
        if (channel) {
          nsCOMPtr<nsISupports> cacheToken;
          channel->GetCacheToken(getter_AddRefs(cacheToken));
          if (cacheToken) {
            nsCOMPtr<nsICacheEntryDescriptor> cacheDescriptor(do_QueryInterface(cacheToken));
            if (cacheDescriptor) {
#ifdef DEBUG
              nsresult rv =
#endif
                cacheDescriptor->SetMetaDataElement("charset",
                                                    guess.get());
              NS_ASSERTION(NS_SUCCEEDED(rv),"cannot SetMetaDataElement");
            }
          }
        }
      }
    }
  }

  if (pws->mParserFilter)
    pws->mParserFilter->RawBuffer(buf, &theNumRead);

  result = pws->mScanner->Append(buf, theNumRead, pws->mRequest);
  if (NS_SUCCEEDED(result)) {
    *writeCount = count;
  }

  return result;
}

Here is the call graph for this function:


Variable Documentation

Definition at line 250 of file nsParser.cpp.

Definition at line 945 of file nsParser.cpp.