Back to index

lightning-sunbird  0.9+nobinonly
Public Member Functions | Protected Member Functions | Protected Attributes | Private Attributes
nsPSMDetector Class Reference

#include <nsPSMDetectors.h>

Inheritance diagram for nsPSMDetector:
Inheritance graph
[legend]
Collaboration diagram for nsPSMDetector:
Collaboration graph
[legend]

List of all members.

Public Member Functions

 nsPSMDetector (PRUint8 aItems, nsVerifier *const *aVerifierSet, nsEUCStatistics *const *aStatisticsSet)
virtual ~nsPSMDetector ()
virtual PRBool HandleData (const char *aBuf, PRUint32 aLen)
virtual void DataEnd ()

Protected Member Functions

virtual void Report (const char *charset)=0
void Reset ()
void Sample (const char *aBuf, PRUint32 aLen, PRBool aLastChance=PR_FALSE)

Protected Attributes

PRUint8 mItems
PRUint8 mClassItems
PRUint8 mState [MAX_VERIFIERS]
PRUint8 mItemIdx [MAX_VERIFIERS]
nsVerifier *constmVerifier
nsEUCStatistics *constmStatisticsData
PRBool mDone
PRBool mRunSampler
PRBool mClassRunSampler

Private Attributes

nsEUCSampler mSampler

Detailed Description

Definition at line 222 of file nsPSMDetectors.h.


Constructor & Destructor Documentation

nsPSMDetector::nsPSMDetector ( PRUint8  aItems,
nsVerifier *const aVerifierSet,
nsEUCStatistics *const aStatisticsSet 
)

Definition at line 277 of file nsPSMDetectors.cpp.

{
  mClassRunSampler = (nsnull != aStatisticsSet);
  mStatisticsData = aStatisticsSet;
  mVerifier = aVerifierSet;

  mClassItems = aItems;
  Reset();
}
virtual nsPSMDetector::~nsPSMDetector ( ) [inline, virtual]

Definition at line 225 of file nsPSMDetectors.h.

{};

Member Function Documentation

void nsPSMDetector::DataEnd ( void  ) [virtual]

Definition at line 302 of file nsPSMDetectors.cpp.

{
  // since gb18030 covers almost all code points in big5, sjis, euc-xx, 
  // it effectively make other verifiers unusable. Gb18030 is not 
  // very popular, and it could reach Itsme state. We need to eliminate
  // gb18030 when there are only 2 candidates left. 
  if (mItems == 2) {
    if ((&nsGB18030Verifier) == mVerifier[mItemIdx[0]]) {
      Report( mVerifier[mItemIdx[1]]->charset);
      mDone = PR_TRUE;
    } else if ((&nsGB18030Verifier) == mVerifier[mItemIdx[1]]) {
      Report( mVerifier[mItemIdx[0]]->charset);
      mDone = PR_TRUE;
    }
  }
  if(mRunSampler)
     Sample(nsnull, 0, PR_TRUE);
}

Here is the call graph for this function:

Here is the caller graph for this function:

PRBool nsPSMDetector::HandleData ( const char *  aBuf,
PRUint32  aLen 
) [virtual]

Definition at line 325 of file nsPSMDetectors.cpp.

{
  PRUint32 i,j;
  PRUint32 st;
  for(i=0; i < aLen; i++)
  {
     char b = aBuf[i];
     for(j = 0; j < mItems; )
     {
#ifdef ftang_TRACE_STATE
       if(  mVerifier[mItemIdx[j]] == & TRACE_VERIFIER )
       {
           printf("%d = %d\n", i + mDbgLen, mState[j]);
       }
#endif
#ifdef DETECTOR_DEBUG
        mDbgTest++;
#endif 
        st = GETNEXTSTATE( mVerifier[mItemIdx[j]], b, mState[j] );
        if(eItsMe == st) 
        {
#ifdef DETECTOR_DEBUG
            printf("It's %s- byte %d(%x) test %d\n", 
                    mVerifier[mItemIdx[j]]->charset,
                    i+mDbgLen,
                    i+mDbgLen,
                    mDbgTest
                  );
#endif
            Report( mVerifier[mItemIdx[j]]->charset);
            mDone = PR_TRUE;
            return mDone;
        } else if (eError == st) 
        {
#ifdef DETECTOR_DEBUG
            printf("It's NOT %s- byte %d(%x)\n", 
                    mVerifier[mItemIdx[j]]->charset,
                    i+mDbgLen,
                    i+mDbgLen);
#endif
            mItems--;
            if(j < mItems )
            {
                mItemIdx[j] = mItemIdx[mItems];
                mState[j] = mState[mItems];
            } 
        } else {
            mState[j++] = st;
        } 
     }
     if( mItems <= 1) 
     {
         if( 1 == mItems) {
#ifdef DETECTOR_DEBUG
             printf("It's %s- byte %d (%x) Test %d. The only left\n", 
                       mVerifier[mItemIdx[0]]->charset,
                       i+mDbgLen,
                       i+mDbgLen,
                       mDbgTest);
#endif
             Report( mVerifier[mItemIdx[0]]->charset);
         }
         mDone = PR_TRUE;
         return mDone;
     } else {
        // If the only charset left is UCS2LE/UCS2BE and another, report the other
        PRInt32 nonUCS2Num=0;
        PRInt32 nonUCS2Idx=0;
        for(j = 0; j < mItems; j++) {
             if(((&nsUCS2BEVerifier) != mVerifier[mItemIdx[j]]) &&
                ((&nsUCS2LEVerifier) != mVerifier[mItemIdx[j]])) {
                  nonUCS2Num++;
                  nonUCS2Idx = j;
             }
        }
        if(1 == nonUCS2Num) {
#ifdef DETECTOR_DEBUG
             printf("It's %s- byte %d (%x) Test %d. The only left except UCS2LE/BE\n", 
                       mVerifier[mItemIdx[nonUCS2Idx]]->charset,
                       i+mDbgLen,
                       i+mDbgLen,
                       mDbgTest);
#endif
            Report( mVerifier[mItemIdx[nonUCS2Idx]]->charset);
            mDone = PR_TRUE;
            return mDone;
        }
     }
  }
  if(mRunSampler)
     Sample(aBuf, aLen);

#ifdef DETECTOR_DEBUG
  mDbgLen += aLen;
#endif
  return PR_FALSE;
}

Here is the call graph for this function:

Here is the caller graph for this function:

virtual void nsPSMDetector::Report ( const char *  charset) [protected, pure virtual]

Implemented in nsXPCOMStringDetector, and nsXPCOMDetector.

Here is the caller graph for this function:

void nsPSMDetector::Reset ( void  ) [protected]

Definition at line 286 of file nsPSMDetectors.cpp.

{
  mRunSampler = mClassRunSampler;
  mDone= PR_FALSE;
  mItems = mClassItems;
  NS_ASSERTION(MAX_VERIFIERS >= mItems , "MAX_VERIFIERS is too small!");
  for(PRUint8 i = 0; i < mItems ; i++)
  {
     mState[i] = 0;
     mItemIdx[i] = i;
  }
#ifdef DETECTOR_DEBUG
  mDbgLen = mDbgTest = 0;
#endif   
}

Here is the caller graph for this function:

void nsPSMDetector::Sample ( const char *  aBuf,
PRUint32  aLen,
PRBool  aLastChance = PR_FALSE 
) [protected]

Definition at line 423 of file nsPSMDetectors.cpp.

{
     PRInt32 possibleCandidateNum=0;
     PRInt32 j;
     PRInt32 eucNum=0;
     for(j = 0; j < mItems; j++) {
        if(nsnull != mStatisticsData[mItemIdx[j]]) 
             eucNum++;
        if(((&nsUCS2BEVerifier) != mVerifier[mItemIdx[j]]) &&
                ((&nsUCS2LEVerifier) != mVerifier[mItemIdx[j]]) &&
                ((&nsGB18030Verifier) != mVerifier[mItemIdx[j]]) ) {
                  possibleCandidateNum++;
        }
     }
     mRunSampler = (eucNum > 1);
     if(mRunSampler) {
        mRunSampler = mSampler.Sample(aBuf, aLen);
        if(((aLastChance && mSampler.GetSomeData()) || 
            mSampler.EnoughData())
           && (eucNum == possibleCandidateNum)) {
          mSampler.CalFreq();
#ifdef DETECTOR_DEBUG
          printf("We cannot figure out charset from the encoding, "
                 "All EUC based charset share the same encoding structure.\n"
                 "Detect based on statistics"); 
          if(aLastChance) {
             printf(" after we receive all the data.\n"); 
          } else {
             printf(" after we receive enough data.\n");
          }
#endif
          PRInt32 bestIdx = -1;
          PRInt32 eucCnt=0;
          float bestScore = 0.0f;
          for(j = 0; j < mItems; j++) {
             if((nsnull != mStatisticsData[mItemIdx[j]])  &&
                (&gBig5Statistics != mStatisticsData[mItemIdx[j]]))
             {
                float score = mSampler.GetScore(
                   mStatisticsData[mItemIdx[j]]->mFirstByteFreq,
                   mStatisticsData[mItemIdx[j]]->mFirstByteWeight,
                   mStatisticsData[mItemIdx[j]]->mSecoundByteFreq,
                   mStatisticsData[mItemIdx[j]]->mSecoundByteWeight );
#ifdef DETECTOR_DEBUG
                printf("Differences between %s and this data is %2.8f\n",
                       mVerifier[mItemIdx[j]]->charset,
                       score);
#endif
                if(( 0 == eucCnt++) || (bestScore > score )) {
                   bestScore = score;
                   bestIdx = j;
                } // if(( 0 == eucCnt++) || (bestScore > score )) 
            } // if(nsnull != ...)
         } // for
         if (bestIdx >= 0)
         {
#ifdef DETECTOR_DEBUG
           printf("Based on the statistic, we decide it is %s",
            mVerifier[mItemIdx[bestIdx]]->charset);
#endif
           Report( mVerifier[mItemIdx[bestIdx]]->charset);
           mDone = PR_TRUE;
         }
       } // if (eucNum == possibleCandidateNum)
     } // if(mRunSampler)
}

Here is the call graph for this function:

Here is the caller graph for this function:


Member Data Documentation

Definition at line 234 of file nsPSMDetectors.h.

Definition at line 242 of file nsPSMDetectors.h.

Definition at line 239 of file nsPSMDetectors.h.

Definition at line 236 of file nsPSMDetectors.h.

Definition at line 233 of file nsPSMDetectors.h.

Definition at line 241 of file nsPSMDetectors.h.

Definition at line 251 of file nsPSMDetectors.h.

Definition at line 235 of file nsPSMDetectors.h.

Definition at line 238 of file nsPSMDetectors.h.

Definition at line 237 of file nsPSMDetectors.h.


The documentation for this class was generated from the following files: