Back to index

lightning-sunbird  0.9+nobinonly
Public Member Functions | Static Public Member Functions | Protected Member Functions | Protected Attributes
nsEUCTWProber Class Reference

#include <nsEUCTWProber.h>

Inheritance diagram for nsEUCTWProber:
Inheritance graph
[legend]
Collaboration diagram for nsEUCTWProber:
Collaboration graph
[legend]

List of all members.

Public Member Functions

 nsEUCTWProber (void)
virtual ~nsEUCTWProber (void)
nsProbingState HandleData (const char *aBuf, PRUint32 aLen)
const char * GetCharSetName ()
nsProbingState GetState (void)
void Reset (void)
float GetConfidence (void)
void SetOpion ()

Static Public Member Functions

static PRBool FilterWithoutEnglishLetters (const char *aBuf, PRUint32 aLen, char **newBuf, PRUint32 &newLen)
static PRBool FilterWithEnglishLetters (const char *aBuf, PRUint32 aLen, char **newBuf, PRUint32 &newLen)

Protected Member Functions

void GetDistribution (PRUint32 aCharLen, const char *aStr)

Protected Attributes

nsCodingStateMachinemCodingSM
nsProbingState mState
EUCTWDistributionAnalysis mDistributionAnalyser
char mLastChar [2]

Detailed Description

Definition at line 45 of file nsEUCTWProber.h.


Constructor & Destructor Documentation

Definition at line 47 of file nsEUCTWProber.h.

Here is the call graph for this function:

virtual nsEUCTWProber::~nsEUCTWProber ( void  ) [inline, virtual]

Definition at line 49 of file nsEUCTWProber.h.

{delete mCodingSM;};

Member Function Documentation

PRBool nsCharSetProber::FilterWithEnglishLetters ( const char *  aBuf,
PRUint32  aLen,
char **  newBuf,
PRUint32 newLen 
) [static, inherited]

Definition at line 83 of file nsCharSetProber.cpp.

{
  //do filtering to reduce load to probers
  char *newptr;
  char *prevPtr, *curPtr;
  PRBool isInTag = PR_FALSE;

  newptr = *newBuf = (char*)PR_Malloc(aLen);
  if (!newptr)
    return PR_FALSE;

  for (curPtr = prevPtr = (char*)aBuf; curPtr < aBuf+aLen; curPtr++)
  {
    if (*curPtr == '>')
      isInTag = PR_FALSE;
    else if (*curPtr == '<')
      isInTag = PR_TRUE;

    if (!(*curPtr & 0x80) &&
        (*curPtr < 'A' || (*curPtr > 'Z' && *curPtr < 'a') || *curPtr > 'z') )
    {
      if (curPtr > prevPtr && !isInTag) // Current segment contains more than just a symbol 
                                        // and it is not inside a tag, keep it.
      {
        while (prevPtr < curPtr) *newptr++ = *prevPtr++;  
        prevPtr++;
        *newptr++ = ' ';
      }
      else
        prevPtr = curPtr+1;
    }
  }

  // If the current segment contains more than just a symbol 
  // and it is not inside a tag then keep it.
  if (!isInTag)
    while (prevPtr < curPtr)
      *newptr++ = *prevPtr++;  

  newLen = newptr - *newBuf;

  return PR_TRUE;
}

Here is the caller graph for this function:

PRBool nsCharSetProber::FilterWithoutEnglishLetters ( const char *  aBuf,
PRUint32  aLen,
char **  newBuf,
PRUint32 newLen 
) [static, inherited]

Definition at line 43 of file nsCharSetProber.cpp.

{
  char *newptr;
  char *prevPtr, *curPtr;
  
  PRBool meetMSB = PR_FALSE;   
  newptr = *newBuf = (char*)PR_Malloc(aLen);
  if (!newptr)
    return PR_FALSE;

  for (curPtr = prevPtr = (char*)aBuf; curPtr < aBuf+aLen; curPtr++)
  {
    if (*curPtr & 0x80)
    {
      meetMSB = PR_TRUE;
    }
    else if (*curPtr < 'A' || (*curPtr > 'Z' && *curPtr < 'a') || *curPtr > 'z') 
    {
      //current char is a symbol, most likely a punctuation. we treat it as segment delimiter
      if (meetMSB && curPtr > prevPtr) 
      //this segment contains more than single symbol, and it has upper ASCII, we need to keep it
      {
        while (prevPtr < curPtr) *newptr++ = *prevPtr++;  
        prevPtr++;
        *newptr++ = ' ';
        meetMSB = PR_FALSE;
      }
      else //ignore current segment. (either because it is just a symbol or just an English word)
        prevPtr = curPtr+1;
    }
  }
  if (meetMSB && curPtr > prevPtr) 
    while (prevPtr < curPtr) *newptr++ = *prevPtr++;  

  newLen = newptr - *newBuf;

  return PR_TRUE;
}

Here is the caller graph for this function:

const char* nsEUCTWProber::GetCharSetName ( ) [inline, virtual]

Implements nsCharSetProber.

Definition at line 51 of file nsEUCTWProber.h.

{return "x-euc-tw";};
float nsEUCTWProber::GetConfidence ( void  ) [virtual]

Implements nsCharSetProber.

Definition at line 90 of file nsEUCTWProber.cpp.

{
  float distribCf = mDistributionAnalyser.GetConfidence();

  return (float)distribCf;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void nsEUCTWProber::GetDistribution ( PRUint32  aCharLen,
const char *  aStr 
) [protected]
nsProbingState nsEUCTWProber::GetState ( void  ) [inline, virtual]

Implements nsCharSetProber.

Definition at line 52 of file nsEUCTWProber.h.

{return mState;};
nsProbingState nsEUCTWProber::HandleData ( const char *  aBuf,
PRUint32  aLen 
) [virtual]

Implements nsCharSetProber.

Definition at line 48 of file nsEUCTWProber.cpp.

{
  nsSMState codingState;

  for (PRUint32 i = 0; i < aLen; i++)
  {
    codingState = mCodingSM->NextState(aBuf[i]);
    if (codingState == eError)
    {
      mState = eNotMe;
      break;
    }
    if (codingState == eItsMe)
    {
      mState = eFoundIt;
      break;
    }
    if (codingState == eStart)
    {
      PRUint32 charLen = mCodingSM->GetCurrentCharLen();

      if (i == 0)
      {
        mLastChar[1] = aBuf[0];
        mDistributionAnalyser.HandleOneChar(mLastChar, charLen);
      }
      else
        mDistributionAnalyser.HandleOneChar(aBuf+i-1, charLen);
    }
  }

  mLastChar[0] = aBuf[aLen-1];

  if (mState == eDetecting)
    if (mDistributionAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD)
      mState = eFoundIt;
//    else
//      mDistributionAnalyser.HandleData(aBuf, aLen);

  return mState;
}

Here is the call graph for this function:

void nsEUCTWProber::Reset ( void  ) [virtual]

Implements nsCharSetProber.

Definition at line 40 of file nsEUCTWProber.cpp.

{
  mCodingSM->Reset(); 
  mState = eDetecting;
  mDistributionAnalyser.Reset();
  //mContextAnalyser.Reset();
}

Here is the call graph for this function:

Here is the caller graph for this function:

void nsEUCTWProber::SetOpion ( ) [inline, virtual]

Implements nsCharSetProber.

Definition at line 55 of file nsEUCTWProber.h.

{};

Member Data Documentation

Definition at line 60 of file nsEUCTWProber.h.

Definition at line 64 of file nsEUCTWProber.h.

char nsEUCTWProber::mLastChar[2] [protected]

Definition at line 65 of file nsEUCTWProber.h.

Definition at line 61 of file nsEUCTWProber.h.


The documentation for this class was generated from the following files: