Back to index

lightning-sunbird  0.9+nobinonly
Public Member Functions | Static Public Member Functions | Protected Attributes
nsEUCJPProber Class Reference

#include <nsEUCJPProber.h>

Inheritance diagram for nsEUCJPProber:
Inheritance graph
[legend]
Collaboration diagram for nsEUCJPProber:
Collaboration graph
[legend]

List of all members.

Public Member Functions

 nsEUCJPProber (void)
virtual ~nsEUCJPProber (void)
nsProbingState HandleData (const char *aBuf, PRUint32 aLen)
const char * GetCharSetName ()
nsProbingState GetState (void)
void Reset (void)
float GetConfidence (void)
void SetOpion ()

Static Public Member Functions

static PRBool FilterWithoutEnglishLetters (const char *aBuf, PRUint32 aLen, char **newBuf, PRUint32 &newLen)
static PRBool FilterWithEnglishLetters (const char *aBuf, PRUint32 aLen, char **newBuf, PRUint32 &newLen)

Protected Attributes

nsCodingStateMachinemCodingSM
nsProbingState mState
EUCJPContextAnalysis mContextAnalyser
EUCJPDistributionAnalysis mDistributionAnalyser
char mLastChar [2]

Detailed Description

Definition at line 51 of file nsEUCJPProber.h.


Constructor & Destructor Documentation

Definition at line 53 of file nsEUCJPProber.h.

Here is the call graph for this function:

virtual nsEUCJPProber::~nsEUCJPProber ( void  ) [inline, virtual]

Definition at line 55 of file nsEUCJPProber.h.

{delete mCodingSM;};

Member Function Documentation

PRBool nsCharSetProber::FilterWithEnglishLetters ( const char *  aBuf,
PRUint32  aLen,
char **  newBuf,
PRUint32 newLen 
) [static, inherited]

Definition at line 83 of file nsCharSetProber.cpp.

{
  //do filtering to reduce load to probers
  char *newptr;
  char *prevPtr, *curPtr;
  PRBool isInTag = PR_FALSE;

  newptr = *newBuf = (char*)PR_Malloc(aLen);
  if (!newptr)
    return PR_FALSE;

  for (curPtr = prevPtr = (char*)aBuf; curPtr < aBuf+aLen; curPtr++)
  {
    if (*curPtr == '>')
      isInTag = PR_FALSE;
    else if (*curPtr == '<')
      isInTag = PR_TRUE;

    if (!(*curPtr & 0x80) &&
        (*curPtr < 'A' || (*curPtr > 'Z' && *curPtr < 'a') || *curPtr > 'z') )
    {
      if (curPtr > prevPtr && !isInTag) // Current segment contains more than just a symbol 
                                        // and it is not inside a tag, keep it.
      {
        while (prevPtr < curPtr) *newptr++ = *prevPtr++;  
        prevPtr++;
        *newptr++ = ' ';
      }
      else
        prevPtr = curPtr+1;
    }
  }

  // If the current segment contains more than just a symbol 
  // and it is not inside a tag then keep it.
  if (!isInTag)
    while (prevPtr < curPtr)
      *newptr++ = *prevPtr++;  

  newLen = newptr - *newBuf;

  return PR_TRUE;
}

Here is the caller graph for this function:

PRBool nsCharSetProber::FilterWithoutEnglishLetters ( const char *  aBuf,
PRUint32  aLen,
char **  newBuf,
PRUint32 newLen 
) [static, inherited]

Definition at line 43 of file nsCharSetProber.cpp.

{
  char *newptr;
  char *prevPtr, *curPtr;
  
  PRBool meetMSB = PR_FALSE;   
  newptr = *newBuf = (char*)PR_Malloc(aLen);
  if (!newptr)
    return PR_FALSE;

  for (curPtr = prevPtr = (char*)aBuf; curPtr < aBuf+aLen; curPtr++)
  {
    if (*curPtr & 0x80)
    {
      meetMSB = PR_TRUE;
    }
    else if (*curPtr < 'A' || (*curPtr > 'Z' && *curPtr < 'a') || *curPtr > 'z') 
    {
      //current char is a symbol, most likely a punctuation. we treat it as segment delimiter
      if (meetMSB && curPtr > prevPtr) 
      //this segment contains more than single symbol, and it has upper ASCII, we need to keep it
      {
        while (prevPtr < curPtr) *newptr++ = *prevPtr++;  
        prevPtr++;
        *newptr++ = ' ';
        meetMSB = PR_FALSE;
      }
      else //ignore current segment. (either because it is just a symbol or just an English word)
        prevPtr = curPtr+1;
    }
  }
  if (meetMSB && curPtr > prevPtr) 
    while (prevPtr < curPtr) *newptr++ = *prevPtr++;  

  newLen = newptr - *newBuf;

  return PR_TRUE;
}

Here is the caller graph for this function:

const char* nsEUCJPProber::GetCharSetName ( ) [inline, virtual]

Implements nsCharSetProber.

Definition at line 57 of file nsEUCJPProber.h.

{return "EUC-JP";};
float nsEUCJPProber::GetConfidence ( void  ) [virtual]

Implements nsCharSetProber.

Definition at line 97 of file nsEUCJPProber.cpp.

{
  float contxtCf = mContextAnalyser.GetConfidence();
  float distribCf = mDistributionAnalyser.GetConfidence();

  return (contxtCf > distribCf ? contxtCf : distribCf);
}

Here is the call graph for this function:

Here is the caller graph for this function:

nsProbingState nsEUCJPProber::GetState ( void  ) [inline, virtual]

Implements nsCharSetProber.

Definition at line 58 of file nsEUCJPProber.h.

{return mState;};
nsProbingState nsEUCJPProber::HandleData ( const char *  aBuf,
PRUint32  aLen 
) [virtual]

Implements nsCharSetProber.

Definition at line 53 of file nsEUCJPProber.cpp.

{
  nsSMState codingState;

  for (PRUint32 i = 0; i < aLen; i++)
  {
    codingState = mCodingSM->NextState(aBuf[i]);
    if (codingState == eError)
    {
      mState = eNotMe;
      break;
    }
    if (codingState == eItsMe)
    {
      mState = eFoundIt;
      break;
    }
    if (codingState == eStart)
    {
      PRUint32 charLen = mCodingSM->GetCurrentCharLen();

      if (i == 0)
      {
        mLastChar[1] = aBuf[0];
        mContextAnalyser.HandleOneChar(mLastChar, charLen);
        mDistributionAnalyser.HandleOneChar(mLastChar, charLen);
      }
      else
      {
        mContextAnalyser.HandleOneChar(aBuf+i-1, charLen);
        mDistributionAnalyser.HandleOneChar(aBuf+i-1, charLen);
      }
    }
  }

  mLastChar[0] = aBuf[aLen-1];

  if (mState == eDetecting)
    if (mContextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD)
      mState = eFoundIt;

  return mState;
}

Here is the call graph for this function:

void nsEUCJPProber::Reset ( void  ) [virtual]

Implements nsCharSetProber.

Definition at line 45 of file nsEUCJPProber.cpp.

Here is the call graph for this function:

Here is the caller graph for this function:

void nsEUCJPProber::SetOpion ( ) [inline, virtual]

Implements nsCharSetProber.

Definition at line 61 of file nsEUCJPProber.h.

{};

Member Data Documentation

Definition at line 61 of file nsEUCJPProber.h.

Definition at line 67 of file nsEUCJPProber.h.

Definition at line 68 of file nsEUCJPProber.h.

char nsEUCJPProber::mLastChar[2] [protected]

Definition at line 70 of file nsEUCJPProber.h.

Definition at line 65 of file nsEUCJPProber.h.


The documentation for this class was generated from the following files: