Back to index

lightning-sunbird  0.9+nobinonly
Public Member Functions | Static Public Member Functions | Protected Attributes
nsSBCSGroupProber Class Reference

#include <nsSBCSGroupProber.h>

Inheritance diagram for nsSBCSGroupProber:
Inheritance graph
[legend]
Collaboration diagram for nsSBCSGroupProber:
Collaboration graph
[legend]

List of all members.

Public Member Functions

 nsSBCSGroupProber ()
virtual ~nsSBCSGroupProber ()
nsProbingState HandleData (const char *aBuf, PRUint32 aLen)
const char * GetCharSetName ()
nsProbingState GetState (void)
void Reset (void)
float GetConfidence (void)
void SetOpion ()

Static Public Member Functions

static PRBool FilterWithoutEnglishLetters (const char *aBuf, PRUint32 aLen, char **newBuf, PRUint32 &newLen)
static PRBool FilterWithEnglishLetters (const char *aBuf, PRUint32 aLen, char **newBuf, PRUint32 &newLen)

Protected Attributes

nsProbingState mState
nsCharSetProbermProbers [NUM_OF_SBCS_PROBERS]
PRBool mIsActive [NUM_OF_SBCS_PROBERS]
PRInt32 mBestGuess
PRUint32 mActiveNum

Detailed Description

Definition at line 46 of file nsSBCSGroupProber.h.


Constructor & Destructor Documentation

Definition at line 47 of file nsSBCSGroupProber.cpp.

{
  mProbers[0] = new nsSingleByteCharSetProber(&Win1251Model);
  mProbers[1] = new nsSingleByteCharSetProber(&Koi8rModel);
  mProbers[2] = new nsSingleByteCharSetProber(&Latin5Model);
  mProbers[3] = new nsSingleByteCharSetProber(&MacCyrillicModel);
  mProbers[4] = new nsSingleByteCharSetProber(&Ibm866Model);
  mProbers[5] = new nsSingleByteCharSetProber(&Ibm855Model);
  mProbers[6] = new nsSingleByteCharSetProber(&Latin7Model);
  mProbers[7] = new nsSingleByteCharSetProber(&Win1253Model);
  mProbers[8] = new nsSingleByteCharSetProber(&Latin5BulgarianModel);
  mProbers[9] = new nsSingleByteCharSetProber(&Win1251BulgarianModel);

  nsHebrewProber *hebprober = new nsHebrewProber();
  // Notice: Any change in these indexes - 10,11,12 must be reflected
  // in the code below as well.
  mProbers[10] = hebprober;
  mProbers[11] = new nsSingleByteCharSetProber(&Win1255Model, PR_FALSE, hebprober); // Logical Hebrew
  mProbers[12] = new nsSingleByteCharSetProber(&Win1255Model, PR_TRUE, hebprober); // Visual Hebrew
  // Tell the Hebrew prober about the logical and visual probers
  if (mProbers[10] && mProbers[11] && mProbers[12]) // all are not null
  {
    hebprober->SetModelProbers(mProbers[11], mProbers[12]);
  }
  else // One or more is null. avoid any Hebrew probing, null them all
  {
    for (PRUint32 i = 10; i <= 12; ++i)
    { 
      delete mProbers[i]; 
      mProbers[i] = 0; 
    }
  }

  // disable latin2 before latin1 is available, otherwise all latin1 
  // will be detected as latin2 because of their similarity.
  //mProbers[10] = new nsSingleByteCharSetProber(&Latin2HungarianModel);
  //mProbers[11] = new nsSingleByteCharSetProber(&Win1250HungarianModel);

  Reset();
}

Here is the call graph for this function:

Definition at line 88 of file nsSBCSGroupProber.cpp.

{
  for (PRUint32 i = 0; i < NUM_OF_SBCS_PROBERS; i++)
  {
    delete mProbers[i];
  }
}

Member Function Documentation

PRBool nsCharSetProber::FilterWithEnglishLetters ( const char *  aBuf,
PRUint32  aLen,
char **  newBuf,
PRUint32 newLen 
) [static, inherited]

Definition at line 83 of file nsCharSetProber.cpp.

{
  //do filtering to reduce load to probers
  char *newptr;
  char *prevPtr, *curPtr;
  PRBool isInTag = PR_FALSE;

  newptr = *newBuf = (char*)PR_Malloc(aLen);
  if (!newptr)
    return PR_FALSE;

  for (curPtr = prevPtr = (char*)aBuf; curPtr < aBuf+aLen; curPtr++)
  {
    if (*curPtr == '>')
      isInTag = PR_FALSE;
    else if (*curPtr == '<')
      isInTag = PR_TRUE;

    if (!(*curPtr & 0x80) &&
        (*curPtr < 'A' || (*curPtr > 'Z' && *curPtr < 'a') || *curPtr > 'z') )
    {
      if (curPtr > prevPtr && !isInTag) // Current segment contains more than just a symbol 
                                        // and it is not inside a tag, keep it.
      {
        while (prevPtr < curPtr) *newptr++ = *prevPtr++;  
        prevPtr++;
        *newptr++ = ' ';
      }
      else
        prevPtr = curPtr+1;
    }
  }

  // If the current segment contains more than just a symbol 
  // and it is not inside a tag then keep it.
  if (!isInTag)
    while (prevPtr < curPtr)
      *newptr++ = *prevPtr++;  

  newLen = newptr - *newBuf;

  return PR_TRUE;
}

Here is the caller graph for this function:

PRBool nsCharSetProber::FilterWithoutEnglishLetters ( const char *  aBuf,
PRUint32  aLen,
char **  newBuf,
PRUint32 newLen 
) [static, inherited]

Definition at line 43 of file nsCharSetProber.cpp.

{
  char *newptr;
  char *prevPtr, *curPtr;
  
  PRBool meetMSB = PR_FALSE;   
  newptr = *newBuf = (char*)PR_Malloc(aLen);
  if (!newptr)
    return PR_FALSE;

  for (curPtr = prevPtr = (char*)aBuf; curPtr < aBuf+aLen; curPtr++)
  {
    if (*curPtr & 0x80)
    {
      meetMSB = PR_TRUE;
    }
    else if (*curPtr < 'A' || (*curPtr > 'Z' && *curPtr < 'a') || *curPtr > 'z') 
    {
      //current char is a symbol, most likely a punctuation. we treat it as segment delimiter
      if (meetMSB && curPtr > prevPtr) 
      //this segment contains more than single symbol, and it has upper ASCII, we need to keep it
      {
        while (prevPtr < curPtr) *newptr++ = *prevPtr++;  
        prevPtr++;
        *newptr++ = ' ';
        meetMSB = PR_FALSE;
      }
      else //ignore current segment. (either because it is just a symbol or just an English word)
        prevPtr = curPtr+1;
    }
  }
  if (meetMSB && curPtr > prevPtr) 
    while (prevPtr < curPtr) *newptr++ = *prevPtr++;  

  newLen = newptr - *newBuf;

  return PR_TRUE;
}

Here is the caller graph for this function:

Implements nsCharSetProber.

Definition at line 97 of file nsSBCSGroupProber.cpp.

{
  //if we have no answer yet
  if (mBestGuess == -1)
  {
    GetConfidence();
    //no charset seems positive
    if (mBestGuess == -1)
      //we will use default.
      mBestGuess = 0;
  }
  return mProbers[mBestGuess]->GetCharSetName();
}

Here is the call graph for this function:

float nsSBCSGroupProber::GetConfidence ( void  ) [virtual]

Implements nsCharSetProber.

Definition at line 178 of file nsSBCSGroupProber.cpp.

{
  PRUint32 i;
  float bestConf = 0.0, cf;

  switch (mState)
  {
  case eFoundIt:
    return (float)0.99; //sure yes
  case eNotMe:
    return (float)0.01;  //sure no
  default:
    for (i = 0; i < NUM_OF_SBCS_PROBERS; i++)
    {
      if (!mIsActive[i])
        continue;
      cf = mProbers[i]->GetConfidence();
      if (bestConf < cf)
      {
        bestConf = cf;
        mBestGuess = i;
      }
    }
  }
  return bestConf;
}

Here is the call graph for this function:

Here is the caller graph for this function:

Implements nsCharSetProber.

Definition at line 52 of file nsSBCSGroupProber.h.

{return mState;};
nsProbingState nsSBCSGroupProber::HandleData ( const char *  aBuf,
PRUint32  aLen 
) [virtual]

Implements nsCharSetProber.

Definition at line 130 of file nsSBCSGroupProber.cpp.

{
  nsProbingState st;
  PRUint32 i;
  char *newBuf1 = 0;
  PRUint32 newLen1 = 0;

  //apply filter to original buffer, and we got new buffer back
  //depend on what script it is, we will feed them the new buffer 
  //we got after applying proper filter
  //this is done without any consideration to KeepEnglishLetters
  //of each prober since as of now, there are no probers here which
  //recognize languages with English characters.
  if (!FilterWithoutEnglishLetters(aBuf, aLen, &newBuf1, newLen1))
    goto done;
  
  if (newLen1 == 0)
    goto done; // Nothing to see here, move on.

  for (i = 0; i < NUM_OF_SBCS_PROBERS; i++)
  {
     if (!mIsActive[i])
       continue;
     st = mProbers[i]->HandleData(newBuf1, newLen1);
     if (st == eFoundIt)
     {
       mBestGuess = i;
       mState = eFoundIt;
       break;
     }
     else if (st == eNotMe)
     {
       mIsActive[i] = PR_FALSE;
       mActiveNum--;
       if (mActiveNum <= 0)
       {
         mState = eNotMe;
         break;
       }
     }
  }

done:
  PR_FREEIF(newBuf1);

  return mState;
}

Here is the call graph for this function:

Implements nsCharSetProber.

Definition at line 111 of file nsSBCSGroupProber.cpp.

{
  mActiveNum = 0;
  for (PRUint32 i = 0; i < NUM_OF_SBCS_PROBERS; i++)
  {
    if (mProbers[i]) // not null
    {
      mProbers[i]->Reset();
      mIsActive[i] = PR_TRUE;
      ++mActiveNum;
    }
    else
      mIsActive[i] = PR_FALSE;
  }
  mBestGuess = -1;
  mState = eDetecting;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void nsSBCSGroupProber::SetOpion ( ) [inline, virtual]

Implements nsCharSetProber.

Definition at line 55 of file nsSBCSGroupProber.h.

{};

Member Data Documentation

Definition at line 66 of file nsSBCSGroupProber.h.

Definition at line 65 of file nsSBCSGroupProber.h.

Definition at line 64 of file nsSBCSGroupProber.h.

Definition at line 63 of file nsSBCSGroupProber.h.

Definition at line 55 of file nsSBCSGroupProber.h.


The documentation for this class was generated from the following files: