Back to index

lightning-sunbird  0.9+nobinonly
Public Member Functions | Protected Member Functions | Private Member Functions | Private Attributes
nsXPCOMStringDetector Class Reference

#include <nsPSMDetectors.h>

Inheritance diagram for nsXPCOMStringDetector:
Inheritance graph
[legend]
Collaboration diagram for nsXPCOMStringDetector:
Collaboration graph
[legend]

List of all members.

Public Member Functions

 nsXPCOMStringDetector (PRUint8 aItems, nsVerifier *const *aVer, nsEUCStatistics *const *aStatisticsSet)
virtual ~nsXPCOMStringDetector ()
NS_IMETHOD DoIt (const char *aBuf, PRUint32 aLen, const char **oCharset, nsDetectionConfident &oConfident)

Protected Member Functions

virtual void Report (const char *charset)

Private Member Functions

virtual PRBool HandleData (const char *aBuf, PRUint32 aLen)
virtual void DataEnd ()
void Reset ()
void Sample (const char *aBuf, PRUint32 aLen, PRBool aLastChance=PR_FALSE)

Private Attributes

const char * mResult
PRUint8 mItems
PRUint8 mClassItems
PRUint8 mState [MAX_VERIFIERS]
PRUint8 mItemIdx [MAX_VERIFIERS]
nsVerifier *constmVerifier
nsEUCStatistics *constmStatisticsData
PRBool mDone
PRBool mRunSampler
PRBool mClassRunSampler

Detailed Description

Definition at line 274 of file nsPSMDetectors.h.


Constructor & Destructor Documentation

Definition at line 536 of file nsPSMDetectors.cpp.

   : nsPSMDetector( aItems, aVer, aStatisticsSet)
{
}

Definition at line 541 of file nsPSMDetectors.cpp.

{
}

Member Function Documentation

void nsPSMDetector::DataEnd ( void  ) [virtual, inherited]

Definition at line 302 of file nsPSMDetectors.cpp.

{
  // since gb18030 covers almost all code points in big5, sjis, euc-xx, 
  // it effectively make other verifiers unusable. Gb18030 is not 
  // very popular, and it could reach Itsme state. We need to eliminate
  // gb18030 when there are only 2 candidates left. 
  if (mItems == 2) {
    if ((&nsGB18030Verifier) == mVerifier[mItemIdx[0]]) {
      Report( mVerifier[mItemIdx[1]]->charset);
      mDone = PR_TRUE;
    } else if ((&nsGB18030Verifier) == mVerifier[mItemIdx[1]]) {
      Report( mVerifier[mItemIdx[0]]->charset);
      mDone = PR_TRUE;
    }
  }
  if(mRunSampler)
     Sample(nsnull, 0, PR_TRUE);
}

Here is the call graph for this function:

Here is the caller graph for this function:

NS_IMETHODIMP nsXPCOMStringDetector::DoIt ( const char *  aBuf,
PRUint32  aLen,
const char **  oCharset,
nsDetectionConfident oConfident 
) [virtual]

Implements nsIStringCharsetDetector.

Definition at line 550 of file nsPSMDetectors.cpp.

{
  mResult = nsnull;
  this->HandleData(aBuf, aLen);

  if( nsnull == mResult) {
     // If we have no result and detector is done - answer no match
     if(mDone) 
     {
        *oCharset = nsnull;
        oConfident = eNoAnswerMatch;
     } else {
        // if we have no answer force the Done method and find the answer
        // if we find one, return it as eBestAnswer
        this->DataEnd();
        *oCharset = mResult;
        oConfident = (mResult) ? eBestAnswer : eNoAnswerMatch ;
     }
  } else {
     // If we have answer, return as eSureAnswer
     *oCharset = mResult;
     oConfident = eSureAnswer;
  }
  this->Reset();
  return NS_OK;
}

Here is the call graph for this function:

PRBool nsPSMDetector::HandleData ( const char *  aBuf,
PRUint32  aLen 
) [virtual, inherited]

Definition at line 325 of file nsPSMDetectors.cpp.

{
  PRUint32 i,j;
  PRUint32 st;
  for(i=0; i < aLen; i++)
  {
     char b = aBuf[i];
     for(j = 0; j < mItems; )
     {
#ifdef ftang_TRACE_STATE
       if(  mVerifier[mItemIdx[j]] == & TRACE_VERIFIER )
       {
           printf("%d = %d\n", i + mDbgLen, mState[j]);
       }
#endif
#ifdef DETECTOR_DEBUG
        mDbgTest++;
#endif 
        st = GETNEXTSTATE( mVerifier[mItemIdx[j]], b, mState[j] );
        if(eItsMe == st) 
        {
#ifdef DETECTOR_DEBUG
            printf("It's %s- byte %d(%x) test %d\n", 
                    mVerifier[mItemIdx[j]]->charset,
                    i+mDbgLen,
                    i+mDbgLen,
                    mDbgTest
                  );
#endif
            Report( mVerifier[mItemIdx[j]]->charset);
            mDone = PR_TRUE;
            return mDone;
        } else if (eError == st) 
        {
#ifdef DETECTOR_DEBUG
            printf("It's NOT %s- byte %d(%x)\n", 
                    mVerifier[mItemIdx[j]]->charset,
                    i+mDbgLen,
                    i+mDbgLen);
#endif
            mItems--;
            if(j < mItems )
            {
                mItemIdx[j] = mItemIdx[mItems];
                mState[j] = mState[mItems];
            } 
        } else {
            mState[j++] = st;
        } 
     }
     if( mItems <= 1) 
     {
         if( 1 == mItems) {
#ifdef DETECTOR_DEBUG
             printf("It's %s- byte %d (%x) Test %d. The only left\n", 
                       mVerifier[mItemIdx[0]]->charset,
                       i+mDbgLen,
                       i+mDbgLen,
                       mDbgTest);
#endif
             Report( mVerifier[mItemIdx[0]]->charset);
         }
         mDone = PR_TRUE;
         return mDone;
     } else {
        // If the only charset left is UCS2LE/UCS2BE and another, report the other
        PRInt32 nonUCS2Num=0;
        PRInt32 nonUCS2Idx=0;
        for(j = 0; j < mItems; j++) {
             if(((&nsUCS2BEVerifier) != mVerifier[mItemIdx[j]]) &&
                ((&nsUCS2LEVerifier) != mVerifier[mItemIdx[j]])) {
                  nonUCS2Num++;
                  nonUCS2Idx = j;
             }
        }
        if(1 == nonUCS2Num) {
#ifdef DETECTOR_DEBUG
             printf("It's %s- byte %d (%x) Test %d. The only left except UCS2LE/BE\n", 
                       mVerifier[mItemIdx[nonUCS2Idx]]->charset,
                       i+mDbgLen,
                       i+mDbgLen,
                       mDbgTest);
#endif
            Report( mVerifier[mItemIdx[nonUCS2Idx]]->charset);
            mDone = PR_TRUE;
            return mDone;
        }
     }
  }
  if(mRunSampler)
     Sample(aBuf, aLen);

#ifdef DETECTOR_DEBUG
  mDbgLen += aLen;
#endif
  return PR_FALSE;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void nsXPCOMStringDetector::Report ( const char *  charset) [protected, virtual]

Implements nsPSMDetector.

Definition at line 545 of file nsPSMDetectors.cpp.

{
  mResult = charset;
}
void nsPSMDetector::Reset ( void  ) [protected, inherited]

Definition at line 286 of file nsPSMDetectors.cpp.

{
  mRunSampler = mClassRunSampler;
  mDone= PR_FALSE;
  mItems = mClassItems;
  NS_ASSERTION(MAX_VERIFIERS >= mItems , "MAX_VERIFIERS is too small!");
  for(PRUint8 i = 0; i < mItems ; i++)
  {
     mState[i] = 0;
     mItemIdx[i] = i;
  }
#ifdef DETECTOR_DEBUG
  mDbgLen = mDbgTest = 0;
#endif   
}

Here is the caller graph for this function:

void nsPSMDetector::Sample ( const char *  aBuf,
PRUint32  aLen,
PRBool  aLastChance = PR_FALSE 
) [protected, inherited]

Definition at line 423 of file nsPSMDetectors.cpp.

{
     PRInt32 possibleCandidateNum=0;
     PRInt32 j;
     PRInt32 eucNum=0;
     for(j = 0; j < mItems; j++) {
        if(nsnull != mStatisticsData[mItemIdx[j]]) 
             eucNum++;
        if(((&nsUCS2BEVerifier) != mVerifier[mItemIdx[j]]) &&
                ((&nsUCS2LEVerifier) != mVerifier[mItemIdx[j]]) &&
                ((&nsGB18030Verifier) != mVerifier[mItemIdx[j]]) ) {
                  possibleCandidateNum++;
        }
     }
     mRunSampler = (eucNum > 1);
     if(mRunSampler) {
        mRunSampler = mSampler.Sample(aBuf, aLen);
        if(((aLastChance && mSampler.GetSomeData()) || 
            mSampler.EnoughData())
           && (eucNum == possibleCandidateNum)) {
          mSampler.CalFreq();
#ifdef DETECTOR_DEBUG
          printf("We cannot figure out charset from the encoding, "
                 "All EUC based charset share the same encoding structure.\n"
                 "Detect based on statistics"); 
          if(aLastChance) {
             printf(" after we receive all the data.\n"); 
          } else {
             printf(" after we receive enough data.\n");
          }
#endif
          PRInt32 bestIdx = -1;
          PRInt32 eucCnt=0;
          float bestScore = 0.0f;
          for(j = 0; j < mItems; j++) {
             if((nsnull != mStatisticsData[mItemIdx[j]])  &&
                (&gBig5Statistics != mStatisticsData[mItemIdx[j]]))
             {
                float score = mSampler.GetScore(
                   mStatisticsData[mItemIdx[j]]->mFirstByteFreq,
                   mStatisticsData[mItemIdx[j]]->mFirstByteWeight,
                   mStatisticsData[mItemIdx[j]]->mSecoundByteFreq,
                   mStatisticsData[mItemIdx[j]]->mSecoundByteWeight );
#ifdef DETECTOR_DEBUG
                printf("Differences between %s and this data is %2.8f\n",
                       mVerifier[mItemIdx[j]]->charset,
                       score);
#endif
                if(( 0 == eucCnt++) || (bestScore > score )) {
                   bestScore = score;
                   bestIdx = j;
                } // if(( 0 == eucCnt++) || (bestScore > score )) 
            } // if(nsnull != ...)
         } // for
         if (bestIdx >= 0)
         {
#ifdef DETECTOR_DEBUG
           printf("Based on the statistic, we decide it is %s",
            mVerifier[mItemIdx[bestIdx]]->charset);
#endif
           Report( mVerifier[mItemIdx[bestIdx]]->charset);
           mDone = PR_TRUE;
         }
       } // if (eucNum == possibleCandidateNum)
     } // if(mRunSampler)
}

Here is the call graph for this function:

Here is the caller graph for this function:


Member Data Documentation

PRUint8 nsPSMDetector::mClassItems [protected, inherited]

Definition at line 234 of file nsPSMDetectors.h.

PRBool nsPSMDetector::mClassRunSampler [protected, inherited]

Definition at line 242 of file nsPSMDetectors.h.

PRBool nsPSMDetector::mDone [protected, inherited]

Definition at line 239 of file nsPSMDetectors.h.

Definition at line 236 of file nsPSMDetectors.h.

PRUint8 nsPSMDetector::mItems [protected, inherited]

Definition at line 233 of file nsPSMDetectors.h.

Definition at line 288 of file nsPSMDetectors.h.

PRBool nsPSMDetector::mRunSampler [protected, inherited]

Definition at line 241 of file nsPSMDetectors.h.

PRUint8 nsPSMDetector::mState[MAX_VERIFIERS] [protected, inherited]

Definition at line 235 of file nsPSMDetectors.h.

Definition at line 238 of file nsPSMDetectors.h.

nsVerifier* const* nsPSMDetector::mVerifier [protected, inherited]

Definition at line 237 of file nsPSMDetectors.h.


The documentation for this class was generated from the following files: