Back to index

lightning-sunbird  0.9+nobinonly
Public Member Functions | Protected Member Functions | Protected Attributes
nsUniversalDetector Class Reference

#include <nsUniversalDetector.h>

Inheritance diagram for nsUniversalDetector:
Inheritance graph
[legend]
Collaboration diagram for nsUniversalDetector:
Collaboration graph
[legend]

List of all members.

Public Member Functions

 nsUniversalDetector ()
virtual ~nsUniversalDetector ()
virtual nsresult HandleData (const char *aBuf, PRUint32 aLen)
virtual void DataEnd (void)

Protected Member Functions

virtual void Report (const char *aCharset)=0
virtual void Reset ()

Protected Attributes

nsInputState mInputState
PRBool mDone
PRBool mInTag
PRBool mStart
PRBool mGotData
char mLastChar
const char * mDetectedCharset
PRInt32 mBestGuess
nsCharSetProbermCharSetProbers [NUM_OF_CHARSET_PROBERS]
nsCharSetProbermEscCharSetProber

Detailed Description

Definition at line 51 of file nsUniversalDetector.h.


Constructor & Destructor Documentation

Definition at line 48 of file nsUniversalDetector.cpp.

Definition at line 66 of file nsUniversalDetector.cpp.


Member Function Documentation

Definition at line 236 of file nsUniversalDetector.cpp.

{
  if (!mGotData)
  {
    // we haven't got any data yet, return immediately 
    // caller program sometimes call DataEnd before anything has been sent to detector
    return;
  }

  if (mDetectedCharset)
  {
    mDone = PR_TRUE;
    Report(mDetectedCharset);
    return;
  }
  
  switch (mInputState)
  {
  case eHighbyte:
    {
      float proberConfidence;
      float maxProberConfidence = (float)0.0;
      PRInt32 maxProber = 0;

      for (PRInt32 i = 0; i < NUM_OF_CHARSET_PROBERS; i++)
      {
        proberConfidence = mCharSetProbers[i]->GetConfidence();
        if (proberConfidence > maxProberConfidence)
        {
          maxProberConfidence = proberConfidence;
          maxProber = i;
        }
      }
      //do not report anything because we are not confident of it, that's in fact a negative answer
      if (maxProberConfidence > MINIMUM_THRESHOLD)
        Report(mCharSetProbers[maxProber]->GetCharSetName());
    }
    break;
  case eEscAscii:
    break;
  default:
    ;
  }
  return;
}

Here is the call graph for this function:

Here is the caller graph for this function:

nsresult nsUniversalDetector::HandleData ( const char *  aBuf,
PRUint32  aLen 
) [virtual]

Definition at line 101 of file nsUniversalDetector.cpp.

{
  if(mDone) 
    return NS_OK;

  if (aLen > 0)
    mGotData = PR_TRUE;

  //If the data starts with BOM, we know it is UTF
  if (mStart)
  {
    mStart = PR_FALSE;
    if (aLen > 3)
      switch (aBuf[0])
        {
        case '\xEF':
          if (('\xBB' == aBuf[1]) && ('\xBF' == aBuf[2]))
            // EF BB BF  UTF-8 encoded BOM
            mDetectedCharset = "UTF-8";
        break;
        case '\xFE':
          if (('\xFF' == aBuf[1]) && ('\x00' == aBuf[2]) && ('\x00' == aBuf[3]))
            // FE FF 00 00  UCS-4, unusual octet order BOM (3412)
            mDetectedCharset = "X-ISO-10646-UCS-4-3412";
          else if ('\xFF' == aBuf[1])
            // FE FF  UTF-16, big endian BOM
            mDetectedCharset = "UTF-16BE";
        break;
        case '\x00':
          if (('\x00' == aBuf[1]) && ('\xFE' == aBuf[2]) && ('\xFF' == aBuf[3]))
            // 00 00 FE FF  UTF-32, big-endian BOM
            mDetectedCharset = "UTF-32BE";
          else if (('\x00' == aBuf[1]) && ('\xFF' == aBuf[2]) && ('\xFE' == aBuf[3]))
            // 00 00 FF FE  UCS-4, unusual octet order BOM (2143)
            mDetectedCharset = "X-ISO-10646-UCS-4-2143";
        break;
        case '\xFF':
          if (('\xFE' == aBuf[1]) && ('\x00' == aBuf[2]) && ('\x00' == aBuf[3]))
            // FF FE 00 00  UTF-32, little-endian BOM
            mDetectedCharset = "UTF-32LE";
          else if ('\xFE' == aBuf[1])
            // FF FE  UTF-16, little endian BOM
            mDetectedCharset = "UTF-16LE";
        break;
      }  // switch

      if (mDetectedCharset)
      {
        mDone = PR_TRUE;
        return NS_OK;
      }
  }
  
  PRUint32 i;
  for (i = 0; i < aLen; i++)
  {
    //other than 0xa0, if every othe character is ascii, the page is ascii
    if (aBuf[i] & '\x80' && aBuf[i] != '\xA0')  //Since many Ascii only page contains NBSP 
    {
      //we got a non-ascii byte (high-byte)
      if (mInputState != eHighbyte)
      {
        //adjust state
        mInputState = eHighbyte;

        //kill mEscCharSetProber if it is active
        if (mEscCharSetProber) {
          delete mEscCharSetProber;
          mEscCharSetProber = nsnull;
        }

        //start multibyte and singlebyte charset prober
        if (nsnull == mCharSetProbers[0])
          mCharSetProbers[0] = new nsMBCSGroupProber;
        if (nsnull == mCharSetProbers[1])
          mCharSetProbers[1] = new nsSBCSGroupProber;
        if (nsnull == mCharSetProbers[2])
          mCharSetProbers[2] = new nsLatin1Prober; 

        if ((nsnull == mCharSetProbers[0]) ||
            (nsnull == mCharSetProbers[1]) ||
            (nsnull == mCharSetProbers[2]))
            return NS_ERROR_OUT_OF_MEMORY;
      }
    }
    else
    {
      //ok, just pure ascii so far
      if ( ePureAscii == mInputState &&
        (aBuf[i] == '\033' || (aBuf[i] == '{' && mLastChar == '~')) )
      {
        //found escape character or HZ "~{"
        mInputState = eEscAscii;
      }
      mLastChar = aBuf[i];
    }
  }

  nsProbingState st;
  switch (mInputState)
  {
  case eEscAscii:
    if (nsnull == mEscCharSetProber) {
      mEscCharSetProber = new nsEscCharSetProber;
      if (nsnull == mEscCharSetProber)
        return NS_ERROR_OUT_OF_MEMORY;
    }
    st = mEscCharSetProber->HandleData(aBuf, aLen);
    if (st == eFoundIt)
    {
      mDone = PR_TRUE;
      mDetectedCharset = mEscCharSetProber->GetCharSetName();
    }
    break;
  case eHighbyte:
    for (i = 0; i < NUM_OF_CHARSET_PROBERS; i++)
    {
      st = mCharSetProbers[i]->HandleData(aBuf, aLen);
      if (st == eFoundIt) 
      {
        mDone = PR_TRUE;
        mDetectedCharset = mCharSetProbers[i]->GetCharSetName();
        return NS_OK;
      } 
    }
    break;

  default:  //pure ascii
    ;//do nothing here
  }
  return NS_OK;
}

Here is the call graph for this function:

Here is the caller graph for this function:

virtual void nsUniversalDetector::Report ( const char *  aCharset) [protected, pure virtual]

Implemented in nsUniversalXPCOMStringDetector, and nsUniversalXPCOMDetector.

Here is the caller graph for this function:

void nsUniversalDetector::Reset ( void  ) [protected, virtual]

Definition at line 76 of file nsUniversalDetector.cpp.

Here is the call graph for this function:

Here is the caller graph for this function:


Member Data Documentation

Definition at line 68 of file nsUniversalDetector.h.

Definition at line 70 of file nsUniversalDetector.h.

Definition at line 67 of file nsUniversalDetector.h.

Definition at line 62 of file nsUniversalDetector.h.

Definition at line 71 of file nsUniversalDetector.h.

Definition at line 65 of file nsUniversalDetector.h.

Definition at line 61 of file nsUniversalDetector.h.

Definition at line 63 of file nsUniversalDetector.h.

Definition at line 66 of file nsUniversalDetector.h.

Definition at line 64 of file nsUniversalDetector.h.


The documentation for this class was generated from the following files: