Back to index

lightning-sunbird  0.9+nobinonly
Public Member Functions | Static Public Member Functions | Protected Attributes
nsUTF8Prober Class Reference

#include <nsUTF8Prober.h>

Inheritance diagram for nsUTF8Prober:
Inheritance graph
[legend]
Collaboration diagram for nsUTF8Prober:
Collaboration graph
[legend]

List of all members.

Public Member Functions

 nsUTF8Prober ()
virtual ~nsUTF8Prober ()
nsProbingState HandleData (const char *aBuf, PRUint32 aLen)
const char * GetCharSetName ()
nsProbingState GetState (void)
void Reset (void)
float GetConfidence (void)
void SetOpion ()

Static Public Member Functions

static PRBool FilterWithoutEnglishLetters (const char *aBuf, PRUint32 aLen, char **newBuf, PRUint32 &newLen)
static PRBool FilterWithEnglishLetters (const char *aBuf, PRUint32 aLen, char **newBuf, PRUint32 &newLen)

Protected Attributes

nsCodingStateMachinemCodingSM
nsProbingState mState
PRUint32 mNumOfMBChar

Detailed Description

Definition at line 44 of file nsUTF8Prober.h.


Constructor & Destructor Documentation

Definition at line 46 of file nsUTF8Prober.h.

Here is the call graph for this function:

virtual nsUTF8Prober::~nsUTF8Prober ( ) [inline, virtual]

Definition at line 49 of file nsUTF8Prober.h.

{delete mCodingSM;};

Member Function Documentation

PRBool nsCharSetProber::FilterWithEnglishLetters ( const char *  aBuf,
PRUint32  aLen,
char **  newBuf,
PRUint32 newLen 
) [static, inherited]

Definition at line 83 of file nsCharSetProber.cpp.

{
  //do filtering to reduce load to probers
  char *newptr;
  char *prevPtr, *curPtr;
  PRBool isInTag = PR_FALSE;

  newptr = *newBuf = (char*)PR_Malloc(aLen);
  if (!newptr)
    return PR_FALSE;

  for (curPtr = prevPtr = (char*)aBuf; curPtr < aBuf+aLen; curPtr++)
  {
    if (*curPtr == '>')
      isInTag = PR_FALSE;
    else if (*curPtr == '<')
      isInTag = PR_TRUE;

    if (!(*curPtr & 0x80) &&
        (*curPtr < 'A' || (*curPtr > 'Z' && *curPtr < 'a') || *curPtr > 'z') )
    {
      if (curPtr > prevPtr && !isInTag) // Current segment contains more than just a symbol 
                                        // and it is not inside a tag, keep it.
      {
        while (prevPtr < curPtr) *newptr++ = *prevPtr++;  
        prevPtr++;
        *newptr++ = ' ';
      }
      else
        prevPtr = curPtr+1;
    }
  }

  // If the current segment contains more than just a symbol 
  // and it is not inside a tag then keep it.
  if (!isInTag)
    while (prevPtr < curPtr)
      *newptr++ = *prevPtr++;  

  newLen = newptr - *newBuf;

  return PR_TRUE;
}

Here is the caller graph for this function:

PRBool nsCharSetProber::FilterWithoutEnglishLetters ( const char *  aBuf,
PRUint32  aLen,
char **  newBuf,
PRUint32 newLen 
) [static, inherited]

Definition at line 43 of file nsCharSetProber.cpp.

{
  char *newptr;
  char *prevPtr, *curPtr;
  
  PRBool meetMSB = PR_FALSE;   
  newptr = *newBuf = (char*)PR_Malloc(aLen);
  if (!newptr)
    return PR_FALSE;

  for (curPtr = prevPtr = (char*)aBuf; curPtr < aBuf+aLen; curPtr++)
  {
    if (*curPtr & 0x80)
    {
      meetMSB = PR_TRUE;
    }
    else if (*curPtr < 'A' || (*curPtr > 'Z' && *curPtr < 'a') || *curPtr > 'z') 
    {
      //current char is a symbol, most likely a punctuation. we treat it as segment delimiter
      if (meetMSB && curPtr > prevPtr) 
      //this segment contains more than single symbol, and it has upper ASCII, we need to keep it
      {
        while (prevPtr < curPtr) *newptr++ = *prevPtr++;  
        prevPtr++;
        *newptr++ = ' ';
        meetMSB = PR_FALSE;
      }
      else //ignore current segment. (either because it is just a symbol or just an English word)
        prevPtr = curPtr+1;
    }
  }
  if (meetMSB && curPtr > prevPtr) 
    while (prevPtr < curPtr) *newptr++ = *prevPtr++;  

  newLen = newptr - *newBuf;

  return PR_TRUE;
}

Here is the caller graph for this function:

const char* nsUTF8Prober::GetCharSetName ( ) [inline, virtual]

Implements nsCharSetProber.

Definition at line 51 of file nsUTF8Prober.h.

{return "UTF-8";};
float nsUTF8Prober::GetConfidence ( void  ) [virtual]

Implements nsCharSetProber.

Definition at line 79 of file nsUTF8Prober.cpp.

{
  float unlike = (float)0.99;

  if (mNumOfMBChar < 6)
  {
    for (PRUint32 i = 0; i < mNumOfMBChar; i++)
      unlike *= ONE_CHAR_PROB;
    return (float)1.0 - unlike;
  }
  else
    return (float)0.99;
}

Here is the caller graph for this function:

nsProbingState nsUTF8Prober::GetState ( void  ) [inline, virtual]

Implements nsCharSetProber.

Definition at line 52 of file nsUTF8Prober.h.

{return mState;};
nsProbingState nsUTF8Prober::HandleData ( const char *  aBuf,
PRUint32  aLen 
) [virtual]

Implements nsCharSetProber.

Definition at line 47 of file nsUTF8Prober.cpp.

{
  nsSMState codingState;

  for (PRUint32 i = 0; i < aLen; i++)
  {
    codingState = mCodingSM->NextState(aBuf[i]);
    if (codingState == eError)
    {
      mState = eNotMe;
      break;
    }
    if (codingState == eItsMe)
    {
      mState = eFoundIt;
      break;
    }
    if (codingState == eStart)
    {
      if (mCodingSM->GetCurrentCharLen() >= 2)
        mNumOfMBChar++;
    }
  }

  if (mState == eDetecting)
    if (GetConfidence() > SHORTCUT_THRESHOLD)
      mState = eFoundIt;
  return mState;
}

Here is the call graph for this function:

void nsUTF8Prober::Reset ( void  ) [virtual]

Implements nsCharSetProber.

Definition at line 40 of file nsUTF8Prober.cpp.

Here is the call graph for this function:

Here is the caller graph for this function:

void nsUTF8Prober::SetOpion ( ) [inline, virtual]

Implements nsCharSetProber.

Definition at line 55 of file nsUTF8Prober.h.

{};

Member Data Documentation

Definition at line 55 of file nsUTF8Prober.h.

Definition at line 60 of file nsUTF8Prober.h.

Definition at line 59 of file nsUTF8Prober.h.


The documentation for this class was generated from the following files: