Back to index

lightning-sunbird  0.9+nobinonly
Public Member Functions | Protected Member Functions | Protected Attributes | Private Attributes
nsUniversalXPCOMStringDetector Class Reference

#include <nsUdetXPCOMWrapper.h>

Inheritance diagram for nsUniversalXPCOMStringDetector:
Inheritance graph
[legend]
Collaboration diagram for nsUniversalXPCOMStringDetector:
Collaboration graph
[legend]

List of all members.

Public Member Functions

 nsUniversalXPCOMStringDetector ()
virtual ~nsUniversalXPCOMStringDetector ()
NS_IMETHOD DoIt (const char *aBuf, PRUint32 aLen, const char **oCharset, nsDetectionConfident &oConf)
virtual nsresult HandleData (const char *aBuf, PRUint32 aLen)
virtual void DataEnd (void)

Protected Member Functions

virtual void Report (const char *aCharset)
virtual void Reset ()

Protected Attributes

nsInputState mInputState
PRBool mDone
PRBool mInTag
PRBool mStart
PRBool mGotData
char mLastChar
const char * mDetectedCharset
PRInt32 mBestGuess
nsCharSetProbermCharSetProbers [NUM_OF_CHARSET_PROBERS]
nsCharSetProbermEscCharSetProber

Private Attributes

nsCOMPtr
< nsICharsetDetectionObserver
mObserver
const char * mResult

Detailed Description

Definition at line 77 of file nsUdetXPCOMWrapper.h.


Constructor & Destructor Documentation

Definition at line 131 of file nsUdetXPCOMWrapper.cpp.

Definition at line 136 of file nsUdetXPCOMWrapper.cpp.

{
}

Member Function Documentation

void nsUniversalDetector::DataEnd ( void  ) [virtual, inherited]

Definition at line 236 of file nsUniversalDetector.cpp.

{
  if (!mGotData)
  {
    // we haven't got any data yet, return immediately 
    // caller program sometimes call DataEnd before anything has been sent to detector
    return;
  }

  if (mDetectedCharset)
  {
    mDone = PR_TRUE;
    Report(mDetectedCharset);
    return;
  }
  
  switch (mInputState)
  {
  case eHighbyte:
    {
      float proberConfidence;
      float maxProberConfidence = (float)0.0;
      PRInt32 maxProber = 0;

      for (PRInt32 i = 0; i < NUM_OF_CHARSET_PROBERS; i++)
      {
        proberConfidence = mCharSetProbers[i]->GetConfidence();
        if (proberConfidence > maxProberConfidence)
        {
          maxProberConfidence = proberConfidence;
          maxProber = i;
        }
      }
      //do not report anything because we are not confident of it, that's in fact a negative answer
      if (maxProberConfidence > MINIMUM_THRESHOLD)
        Report(mCharSetProbers[maxProber]->GetCharSetName());
    }
    break;
  case eEscAscii:
    break;
  default:
    ;
  }
  return;
}

Here is the call graph for this function:

Here is the caller graph for this function:

NS_IMETHODIMP nsUniversalXPCOMStringDetector::DoIt ( const char *  aBuf,
PRUint32  aLen,
const char **  oCharset,
nsDetectionConfident oConf 
) [virtual]

Implements nsIStringCharsetDetector.

Definition at line 150 of file nsUdetXPCOMWrapper.cpp.

{
  mResult = nsnull;
  this->Reset();
  nsresult rv = this->HandleData(aBuf, aLen); 
  if (NS_FAILED(rv))
    return rv;
  this->DataEnd();
  if (mResult)
  {
    *oCharset=mResult;
    oConf = eBestAnswer;
  }
  return NS_OK;
}

Here is the call graph for this function:

nsresult nsUniversalDetector::HandleData ( const char *  aBuf,
PRUint32  aLen 
) [virtual, inherited]

Definition at line 101 of file nsUniversalDetector.cpp.

{
  if(mDone) 
    return NS_OK;

  if (aLen > 0)
    mGotData = PR_TRUE;

  //If the data starts with BOM, we know it is UTF
  if (mStart)
  {
    mStart = PR_FALSE;
    if (aLen > 3)
      switch (aBuf[0])
        {
        case '\xEF':
          if (('\xBB' == aBuf[1]) && ('\xBF' == aBuf[2]))
            // EF BB BF  UTF-8 encoded BOM
            mDetectedCharset = "UTF-8";
        break;
        case '\xFE':
          if (('\xFF' == aBuf[1]) && ('\x00' == aBuf[2]) && ('\x00' == aBuf[3]))
            // FE FF 00 00  UCS-4, unusual octet order BOM (3412)
            mDetectedCharset = "X-ISO-10646-UCS-4-3412";
          else if ('\xFF' == aBuf[1])
            // FE FF  UTF-16, big endian BOM
            mDetectedCharset = "UTF-16BE";
        break;
        case '\x00':
          if (('\x00' == aBuf[1]) && ('\xFE' == aBuf[2]) && ('\xFF' == aBuf[3]))
            // 00 00 FE FF  UTF-32, big-endian BOM
            mDetectedCharset = "UTF-32BE";
          else if (('\x00' == aBuf[1]) && ('\xFF' == aBuf[2]) && ('\xFE' == aBuf[3]))
            // 00 00 FF FE  UCS-4, unusual octet order BOM (2143)
            mDetectedCharset = "X-ISO-10646-UCS-4-2143";
        break;
        case '\xFF':
          if (('\xFE' == aBuf[1]) && ('\x00' == aBuf[2]) && ('\x00' == aBuf[3]))
            // FF FE 00 00  UTF-32, little-endian BOM
            mDetectedCharset = "UTF-32LE";
          else if ('\xFE' == aBuf[1])
            // FF FE  UTF-16, little endian BOM
            mDetectedCharset = "UTF-16LE";
        break;
      }  // switch

      if (mDetectedCharset)
      {
        mDone = PR_TRUE;
        return NS_OK;
      }
  }
  
  PRUint32 i;
  for (i = 0; i < aLen; i++)
  {
    //other than 0xa0, if every othe character is ascii, the page is ascii
    if (aBuf[i] & '\x80' && aBuf[i] != '\xA0')  //Since many Ascii only page contains NBSP 
    {
      //we got a non-ascii byte (high-byte)
      if (mInputState != eHighbyte)
      {
        //adjust state
        mInputState = eHighbyte;

        //kill mEscCharSetProber if it is active
        if (mEscCharSetProber) {
          delete mEscCharSetProber;
          mEscCharSetProber = nsnull;
        }

        //start multibyte and singlebyte charset prober
        if (nsnull == mCharSetProbers[0])
          mCharSetProbers[0] = new nsMBCSGroupProber;
        if (nsnull == mCharSetProbers[1])
          mCharSetProbers[1] = new nsSBCSGroupProber;
        if (nsnull == mCharSetProbers[2])
          mCharSetProbers[2] = new nsLatin1Prober; 

        if ((nsnull == mCharSetProbers[0]) ||
            (nsnull == mCharSetProbers[1]) ||
            (nsnull == mCharSetProbers[2]))
            return NS_ERROR_OUT_OF_MEMORY;
      }
    }
    else
    {
      //ok, just pure ascii so far
      if ( ePureAscii == mInputState &&
        (aBuf[i] == '\033' || (aBuf[i] == '{' && mLastChar == '~')) )
      {
        //found escape character or HZ "~{"
        mInputState = eEscAscii;
      }
      mLastChar = aBuf[i];
    }
  }

  nsProbingState st;
  switch (mInputState)
  {
  case eEscAscii:
    if (nsnull == mEscCharSetProber) {
      mEscCharSetProber = new nsEscCharSetProber;
      if (nsnull == mEscCharSetProber)
        return NS_ERROR_OUT_OF_MEMORY;
    }
    st = mEscCharSetProber->HandleData(aBuf, aLen);
    if (st == eFoundIt)
    {
      mDone = PR_TRUE;
      mDetectedCharset = mEscCharSetProber->GetCharSetName();
    }
    break;
  case eHighbyte:
    for (i = 0; i < NUM_OF_CHARSET_PROBERS; i++)
    {
      st = mCharSetProbers[i]->HandleData(aBuf, aLen);
      if (st == eFoundIt) 
      {
        mDone = PR_TRUE;
        mDetectedCharset = mCharSetProbers[i]->GetCharSetName();
        return NS_OK;
      } 
    }
    break;

  default:  //pure ascii
    ;//do nothing here
  }
  return NS_OK;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void nsUniversalXPCOMStringDetector::Report ( const char *  aCharset) [protected, virtual]

Implements nsUniversalDetector.

Definition at line 142 of file nsUdetXPCOMWrapper.cpp.

{
  mResult = aCharset;
#ifdef DEBUG_chardet
  printf("New Charset Prober report charset %s . \r\n", aCharset);
#endif
}
void nsUniversalDetector::Reset ( void  ) [protected, virtual, inherited]

Definition at line 76 of file nsUniversalDetector.cpp.

Here is the call graph for this function:

Here is the caller graph for this function:


Member Data Documentation

PRInt32 nsUniversalDetector::mBestGuess [protected, inherited]

Definition at line 68 of file nsUniversalDetector.h.

Definition at line 70 of file nsUniversalDetector.h.

const char* nsUniversalDetector::mDetectedCharset [protected, inherited]

Definition at line 67 of file nsUniversalDetector.h.

PRBool nsUniversalDetector::mDone [protected, inherited]

Definition at line 62 of file nsUniversalDetector.h.

Definition at line 71 of file nsUniversalDetector.h.

PRBool nsUniversalDetector::mGotData [protected, inherited]

Definition at line 65 of file nsUniversalDetector.h.

Definition at line 61 of file nsUniversalDetector.h.

PRBool nsUniversalDetector::mInTag [protected, inherited]

Definition at line 63 of file nsUniversalDetector.h.

char nsUniversalDetector::mLastChar [protected, inherited]

Definition at line 66 of file nsUniversalDetector.h.

Definition at line 90 of file nsUdetXPCOMWrapper.h.

Definition at line 91 of file nsUdetXPCOMWrapper.h.

PRBool nsUniversalDetector::mStart [protected, inherited]

Definition at line 64 of file nsUniversalDetector.h.


The documentation for this class was generated from the following files: