Back to index

lightning-sunbird  0.9+nobinonly
Public Member Functions | Static Public Member Functions | Protected Attributes
nsSingleByteCharSetProber Class Reference

#include <nsSBCharSetProber.h>

Inheritance diagram for nsSingleByteCharSetProber:
Inheritance graph
[legend]
Collaboration diagram for nsSingleByteCharSetProber:
Collaboration graph
[legend]

List of all members.

Public Member Functions

 nsSingleByteCharSetProber (SequenceModel *model)
 nsSingleByteCharSetProber (SequenceModel *model, PRBool reversed, nsCharSetProber *nameProber)
virtual const char * GetCharSetName ()
virtual nsProbingState HandleData (const char *aBuf, PRUint32 aLen)
virtual nsProbingState GetState (void)
virtual void Reset (void)
virtual float GetConfidence (void)
virtual void SetOpion ()
PRBool KeepEnglishLetters ()

Static Public Member Functions

static PRBool FilterWithoutEnglishLetters (const char *aBuf, PRUint32 aLen, char **newBuf, PRUint32 &newLen)
static PRBool FilterWithEnglishLetters (const char *aBuf, PRUint32 aLen, char **newBuf, PRUint32 &newLen)

Protected Attributes

nsProbingState mState
const SequenceModelmModel
const PRBool mReversed
unsigned char mLastOrder
PRUint32 mTotalSeqs
PRUint32 mSeqCounters [NUMBER_OF_SEQ_CAT]
PRUint32 mTotalChar
PRUint32 mFreqChar
nsCharSetProbermNameProber

Detailed Description

Definition at line 62 of file nsSBCharSetProber.h.


Constructor & Destructor Documentation

Definition at line 64 of file nsSBCharSetProber.h.

    :mModel(model), mReversed(PR_FALSE), mNameProber(0) { Reset(); }

Here is the call graph for this function:

nsSingleByteCharSetProber::nsSingleByteCharSetProber ( SequenceModel model,
PRBool  reversed,
nsCharSetProber nameProber 
) [inline]

Definition at line 66 of file nsSBCharSetProber.h.

    :mModel(model), mReversed(reversed), mNameProber(nameProber) { Reset(); }

Here is the call graph for this function:


Member Function Documentation

PRBool nsCharSetProber::FilterWithEnglishLetters ( const char *  aBuf,
PRUint32  aLen,
char **  newBuf,
PRUint32 newLen 
) [static, inherited]

Definition at line 83 of file nsCharSetProber.cpp.

{
  //do filtering to reduce load to probers
  char *newptr;
  char *prevPtr, *curPtr;
  PRBool isInTag = PR_FALSE;

  newptr = *newBuf = (char*)PR_Malloc(aLen);
  if (!newptr)
    return PR_FALSE;

  for (curPtr = prevPtr = (char*)aBuf; curPtr < aBuf+aLen; curPtr++)
  {
    if (*curPtr == '>')
      isInTag = PR_FALSE;
    else if (*curPtr == '<')
      isInTag = PR_TRUE;

    if (!(*curPtr & 0x80) &&
        (*curPtr < 'A' || (*curPtr > 'Z' && *curPtr < 'a') || *curPtr > 'z') )
    {
      if (curPtr > prevPtr && !isInTag) // Current segment contains more than just a symbol 
                                        // and it is not inside a tag, keep it.
      {
        while (prevPtr < curPtr) *newptr++ = *prevPtr++;  
        prevPtr++;
        *newptr++ = ' ';
      }
      else
        prevPtr = curPtr+1;
    }
  }

  // If the current segment contains more than just a symbol 
  // and it is not inside a tag then keep it.
  if (!isInTag)
    while (prevPtr < curPtr)
      *newptr++ = *prevPtr++;  

  newLen = newptr - *newBuf;

  return PR_TRUE;
}

Here is the caller graph for this function:

PRBool nsCharSetProber::FilterWithoutEnglishLetters ( const char *  aBuf,
PRUint32  aLen,
char **  newBuf,
PRUint32 newLen 
) [static, inherited]

Definition at line 43 of file nsCharSetProber.cpp.

{
  char *newptr;
  char *prevPtr, *curPtr;
  
  PRBool meetMSB = PR_FALSE;   
  newptr = *newBuf = (char*)PR_Malloc(aLen);
  if (!newptr)
    return PR_FALSE;

  for (curPtr = prevPtr = (char*)aBuf; curPtr < aBuf+aLen; curPtr++)
  {
    if (*curPtr & 0x80)
    {
      meetMSB = PR_TRUE;
    }
    else if (*curPtr < 'A' || (*curPtr > 'Z' && *curPtr < 'a') || *curPtr > 'z') 
    {
      //current char is a symbol, most likely a punctuation. we treat it as segment delimiter
      if (meetMSB && curPtr > prevPtr) 
      //this segment contains more than single symbol, and it has upper ASCII, we need to keep it
      {
        while (prevPtr < curPtr) *newptr++ = *prevPtr++;  
        prevPtr++;
        *newptr++ = ' ';
        meetMSB = PR_FALSE;
      }
      else //ignore current segment. (either because it is just a symbol or just an English word)
        prevPtr = curPtr+1;
    }
  }
  if (meetMSB && curPtr > prevPtr) 
    while (prevPtr < curPtr) *newptr++ = *prevPtr++;  

  newLen = newptr - *newBuf;

  return PR_TRUE;
}

Here is the caller graph for this function:

Implements nsCharSetProber.

Definition at line 114 of file nsSBCharSetProber.cpp.

Here is the call graph for this function:

Implements nsCharSetProber.

Definition at line 93 of file nsSBCharSetProber.cpp.

{
#ifdef NEGATIVE_APPROACH
  if (mTotalSeqs > 0)
    if (mTotalSeqs > mSeqCounters[NEGATIVE_CAT]*10 )
      return ((float)(mTotalSeqs - mSeqCounters[NEGATIVE_CAT]*10))/mTotalSeqs * mFreqChar / mTotalChar;
  return (float)0.01;
#else  //POSITIVE_APPROACH
  float r;

  if (mTotalSeqs > 0) {
    r = ((float)1.0) * mSeqCounters[POSITIVE_CAT] / mTotalSeqs / mModel->mTypicalPositiveRatio;
    r = r*mFreqChar/mTotalChar;
    if (r >= (float)1.00)
      r = (float)0.99;
    return r;
  }
  return (float)0.01;
#endif
}

Here is the caller graph for this function:

virtual nsProbingState nsSingleByteCharSetProber::GetState ( void  ) [inline, virtual]

Implements nsCharSetProber.

Definition at line 71 of file nsSBCharSetProber.h.

{return mState;};
nsProbingState nsSingleByteCharSetProber::HandleData ( const char *  aBuf,
PRUint32  aLen 
) [virtual]

Implements nsCharSetProber.

Definition at line 41 of file nsSBCharSetProber.cpp.

{
  unsigned char order;

  for (PRUint32 i = 0; i < aLen; i++)
  {
    order = mModel->charToOrderMap[(unsigned char)aBuf[i]];

    if (order < SYMBOL_CAT_ORDER)
      mTotalChar++;
    if (order < SAMPLE_SIZE)
    {
        mFreqChar++;

      if (mLastOrder < SAMPLE_SIZE)
      {
        mTotalSeqs++;
        if (!mReversed)
          ++(mSeqCounters[mModel->precedenceMatrix[mLastOrder*SAMPLE_SIZE+order]]);
        else // reverse the order of the letters in the lookup
          ++(mSeqCounters[mModel->precedenceMatrix[order*SAMPLE_SIZE+mLastOrder]]);
      }
    }
    mLastOrder = order;
  }

  if (mState == eDetecting)
    if (mTotalSeqs > SB_ENOUGH_REL_THRESHOLD)
    {
      float cf = GetConfidence();
      if (cf > POSITIVE_SHORTCUT_THRESHOLD)
        mState = eFoundIt;
      else if (cf < NEGATIVE_SHORTCUT_THRESHOLD)
        mState = eNotMe;
    }

  return mState;
}

Here is the call graph for this function:

Definition at line 82 of file nsSBCharSetProber.h.

{return mModel->keepEnglishLetter;}; // (not implemented)

Implements nsCharSetProber.

Definition at line 80 of file nsSBCharSetProber.cpp.

{
  mState = eDetecting;
  mLastOrder = 255;
  for (PRUint32 i = 0; i < NUMBER_OF_SEQ_CAT; i++)
    mSeqCounters[i] = 0;
  mTotalSeqs = 0;
  mTotalChar = 0;
  mFreqChar = 0;
}

Here is the caller graph for this function:

virtual void nsSingleByteCharSetProber::SetOpion ( ) [inline, virtual]

Implements nsCharSetProber.

Definition at line 74 of file nsSBCharSetProber.h.

{};

Member Data Documentation

Definition at line 101 of file nsSBCharSetProber.h.

unsigned char nsSingleByteCharSetProber::mLastOrder [protected]

Definition at line 94 of file nsSBCharSetProber.h.

Definition at line 90 of file nsSBCharSetProber.h.

Definition at line 104 of file nsSBCharSetProber.h.

Definition at line 91 of file nsSBCharSetProber.h.

Definition at line 97 of file nsSBCharSetProber.h.

Definition at line 82 of file nsSBCharSetProber.h.

Definition at line 99 of file nsSBCharSetProber.h.

Definition at line 96 of file nsSBCharSetProber.h.


The documentation for this class was generated from the following files: