Back to index

lightning-sunbird  0.9+nobinonly
Public Member Functions | Static Public Member Functions | Protected Attributes
nsMBCSGroupProber Class Reference

#include <nsMBCSGroupProber.h>

Inheritance diagram for nsMBCSGroupProber:
Inheritance graph
[legend]
Collaboration diagram for nsMBCSGroupProber:
Collaboration graph
[legend]

List of all members.

Public Member Functions

 nsMBCSGroupProber ()
virtual ~nsMBCSGroupProber ()
nsProbingState HandleData (const char *aBuf, PRUint32 aLen)
const char * GetCharSetName ()
nsProbingState GetState (void)
void Reset (void)
float GetConfidence (void)
void SetOpion ()

Static Public Member Functions

static PRBool FilterWithoutEnglishLetters (const char *aBuf, PRUint32 aLen, char **newBuf, PRUint32 &newLen)
static PRBool FilterWithEnglishLetters (const char *aBuf, PRUint32 aLen, char **newBuf, PRUint32 &newLen)

Protected Attributes

nsProbingState mState
nsCharSetProbermProbers [NUM_OF_PROBERS]
PRBool mIsActive [NUM_OF_PROBERS]
PRInt32 mBestGuess
PRUint32 mActiveNum

Detailed Description

Definition at line 51 of file nsMBCSGroupProber.h.


Constructor & Destructor Documentation

Definition at line 57 of file nsMBCSGroupProber.cpp.

{
  mProbers[0] = new nsUTF8Prober();
  mProbers[1] = new nsSJISProber();
  mProbers[2] = new nsEUCJPProber();
  mProbers[3] = new nsGB18030Prober();
  mProbers[4] = new nsEUCKRProber();
  mProbers[5] = new nsBig5Prober();
  mProbers[6] = new nsEUCTWProber();
  Reset();
}

Here is the call graph for this function:

Definition at line 69 of file nsMBCSGroupProber.cpp.

{
  for (PRUint32 i = 0; i < NUM_OF_PROBERS; i++)
  {
    delete mProbers[i];
  }
}

Member Function Documentation

PRBool nsCharSetProber::FilterWithEnglishLetters ( const char *  aBuf,
PRUint32  aLen,
char **  newBuf,
PRUint32 newLen 
) [static, inherited]

Definition at line 83 of file nsCharSetProber.cpp.

{
  //do filtering to reduce load to probers
  char *newptr;
  char *prevPtr, *curPtr;
  PRBool isInTag = PR_FALSE;

  newptr = *newBuf = (char*)PR_Malloc(aLen);
  if (!newptr)
    return PR_FALSE;

  for (curPtr = prevPtr = (char*)aBuf; curPtr < aBuf+aLen; curPtr++)
  {
    if (*curPtr == '>')
      isInTag = PR_FALSE;
    else if (*curPtr == '<')
      isInTag = PR_TRUE;

    if (!(*curPtr & 0x80) &&
        (*curPtr < 'A' || (*curPtr > 'Z' && *curPtr < 'a') || *curPtr > 'z') )
    {
      if (curPtr > prevPtr && !isInTag) // Current segment contains more than just a symbol 
                                        // and it is not inside a tag, keep it.
      {
        while (prevPtr < curPtr) *newptr++ = *prevPtr++;  
        prevPtr++;
        *newptr++ = ' ';
      }
      else
        prevPtr = curPtr+1;
    }
  }

  // If the current segment contains more than just a symbol 
  // and it is not inside a tag then keep it.
  if (!isInTag)
    while (prevPtr < curPtr)
      *newptr++ = *prevPtr++;  

  newLen = newptr - *newBuf;

  return PR_TRUE;
}

Here is the caller graph for this function:

PRBool nsCharSetProber::FilterWithoutEnglishLetters ( const char *  aBuf,
PRUint32  aLen,
char **  newBuf,
PRUint32 newLen 
) [static, inherited]

Definition at line 43 of file nsCharSetProber.cpp.

{
  char *newptr;
  char *prevPtr, *curPtr;
  
  PRBool meetMSB = PR_FALSE;   
  newptr = *newBuf = (char*)PR_Malloc(aLen);
  if (!newptr)
    return PR_FALSE;

  for (curPtr = prevPtr = (char*)aBuf; curPtr < aBuf+aLen; curPtr++)
  {
    if (*curPtr & 0x80)
    {
      meetMSB = PR_TRUE;
    }
    else if (*curPtr < 'A' || (*curPtr > 'Z' && *curPtr < 'a') || *curPtr > 'z') 
    {
      //current char is a symbol, most likely a punctuation. we treat it as segment delimiter
      if (meetMSB && curPtr > prevPtr) 
      //this segment contains more than single symbol, and it has upper ASCII, we need to keep it
      {
        while (prevPtr < curPtr) *newptr++ = *prevPtr++;  
        prevPtr++;
        *newptr++ = ' ';
        meetMSB = PR_FALSE;
      }
      else //ignore current segment. (either because it is just a symbol or just an English word)
        prevPtr = curPtr+1;
    }
  }
  if (meetMSB && curPtr > prevPtr) 
    while (prevPtr < curPtr) *newptr++ = *prevPtr++;  

  newLen = newptr - *newBuf;

  return PR_TRUE;
}

Here is the caller graph for this function:

Implements nsCharSetProber.

Definition at line 77 of file nsMBCSGroupProber.cpp.

{
  if (mBestGuess == -1)
  {
    GetConfidence();
    if (mBestGuess == -1)
      mBestGuess = 0;
  }
  return mProbers[mBestGuess]->GetCharSetName();
}

Here is the call graph for this function:

float nsMBCSGroupProber::GetConfidence ( void  ) [virtual]

Implements nsCharSetProber.

Definition at line 164 of file nsMBCSGroupProber.cpp.

{
  PRUint32 i;
  float bestConf = 0.0, cf;

  switch (mState)
  {
  case eFoundIt:
    return (float)0.99;
  case eNotMe:
    return (float)0.01;
  default:
    for (i = 0; i < NUM_OF_PROBERS; i++)
    {
      if (!mIsActive[i])
        continue;
      cf = mProbers[i]->GetConfidence();
      if (bestConf < cf)
      {
        bestConf = cf;
        mBestGuess = i;
      }
    }
  }
  return bestConf;
}

Here is the call graph for this function:

Here is the caller graph for this function:

Implements nsCharSetProber.

Definition at line 57 of file nsMBCSGroupProber.h.

{return mState;};
nsProbingState nsMBCSGroupProber::HandleData ( const char *  aBuf,
PRUint32  aLen 
) [virtual]

Implements nsCharSetProber.

Definition at line 106 of file nsMBCSGroupProber.cpp.

{
  nsProbingState st;
  PRUint32 i;

  //do filtering to reduce load to probers
  char *highbyteBuf;
  char *hptr;
  PRBool keepNext = PR_TRUE;   //assume previous is not ascii, it will do no harm except add some noise
  hptr = highbyteBuf = (char*)PR_Malloc(aLen);
  if (!hptr)
      return mState;
  for (i = 0; i < aLen; i++)
  {
    if (aBuf[i] & 0x80)
    {
      *hptr++ = aBuf[i];
      keepNext = PR_TRUE;
    }
    else
    {
      //if previous is highbyte, keep this even it is a ASCII
      if (keepNext)
      {
          *hptr++ = aBuf[i];
          keepNext = PR_FALSE;
      }
    }
  }

  for (i = 0; i < NUM_OF_PROBERS; i++)
  {
     if (!mIsActive[i])
       continue;
     st = mProbers[i]->HandleData(highbyteBuf, hptr - highbyteBuf);
     if (st == eFoundIt)
     {
       mBestGuess = i;
       mState = eFoundIt;
       break;
     }
     else if (st == eNotMe)
     {
       mIsActive[i] = PR_FALSE;
       mActiveNum--;
       if (mActiveNum <= 0)
       {
         mState = eNotMe;
         break;
       }
     }
  }

  PR_FREEIF(highbyteBuf);

  return mState;
}

Here is the call graph for this function:

Implements nsCharSetProber.

Definition at line 88 of file nsMBCSGroupProber.cpp.

{
  mActiveNum = 0;
  for (PRUint32 i = 0; i < NUM_OF_PROBERS; i++)
  {
    if (mProbers[i])
    {
      mProbers[i]->Reset();
      mIsActive[i] = PR_TRUE;
      ++mActiveNum;
    }
    else
      mIsActive[i] = PR_FALSE;
  }
  mBestGuess = -1;
  mState = eDetecting;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void nsMBCSGroupProber::SetOpion ( ) [inline, virtual]

Implements nsCharSetProber.

Definition at line 60 of file nsMBCSGroupProber.h.

{};

Member Data Documentation

Definition at line 71 of file nsMBCSGroupProber.h.

Definition at line 70 of file nsMBCSGroupProber.h.

Definition at line 69 of file nsMBCSGroupProber.h.

Definition at line 68 of file nsMBCSGroupProber.h.

Definition at line 60 of file nsMBCSGroupProber.h.


The documentation for this class was generated from the following files: