Back to index

lightning-sunbird  0.9+nobinonly
Public Member Functions | Static Public Member Functions | Protected Attributes
nsLatin1Prober Class Reference

#include <nsLatin1Prober.h>

Inheritance diagram for nsLatin1Prober:
Inheritance graph
[legend]
Collaboration diagram for nsLatin1Prober:
Collaboration graph
[legend]

List of all members.

Public Member Functions

 nsLatin1Prober (void)
virtual ~nsLatin1Prober (void)
nsProbingState HandleData (const char *aBuf, PRUint32 aLen)
const char * GetCharSetName ()
nsProbingState GetState (void)
void Reset (void)
float GetConfidence (void)
void SetOpion ()

Static Public Member Functions

static PRBool FilterWithoutEnglishLetters (const char *aBuf, PRUint32 aLen, char **newBuf, PRUint32 &newLen)
static PRBool FilterWithEnglishLetters (const char *aBuf, PRUint32 aLen, char **newBuf, PRUint32 &newLen)

Protected Attributes

nsProbingState mState
char mLastCharClass
PRUint32 mFreqCounter [FREQ_CAT_NUM]

Detailed Description

Definition at line 46 of file nsLatin1Prober.h.


Constructor & Destructor Documentation

Definition at line 48 of file nsLatin1Prober.h.

{Reset();};

Here is the call graph for this function:

virtual nsLatin1Prober::~nsLatin1Prober ( void  ) [inline, virtual]

Definition at line 49 of file nsLatin1Prober.h.

{};

Member Function Documentation

PRBool nsCharSetProber::FilterWithEnglishLetters ( const char *  aBuf,
PRUint32  aLen,
char **  newBuf,
PRUint32 newLen 
) [static, inherited]

Definition at line 83 of file nsCharSetProber.cpp.

{
  //do filtering to reduce load to probers
  char *newptr;
  char *prevPtr, *curPtr;
  PRBool isInTag = PR_FALSE;

  newptr = *newBuf = (char*)PR_Malloc(aLen);
  if (!newptr)
    return PR_FALSE;

  for (curPtr = prevPtr = (char*)aBuf; curPtr < aBuf+aLen; curPtr++)
  {
    if (*curPtr == '>')
      isInTag = PR_FALSE;
    else if (*curPtr == '<')
      isInTag = PR_TRUE;

    if (!(*curPtr & 0x80) &&
        (*curPtr < 'A' || (*curPtr > 'Z' && *curPtr < 'a') || *curPtr > 'z') )
    {
      if (curPtr > prevPtr && !isInTag) // Current segment contains more than just a symbol 
                                        // and it is not inside a tag, keep it.
      {
        while (prevPtr < curPtr) *newptr++ = *prevPtr++;  
        prevPtr++;
        *newptr++ = ' ';
      }
      else
        prevPtr = curPtr+1;
    }
  }

  // If the current segment contains more than just a symbol 
  // and it is not inside a tag then keep it.
  if (!isInTag)
    while (prevPtr < curPtr)
      *newptr++ = *prevPtr++;  

  newLen = newptr - *newBuf;

  return PR_TRUE;
}

Here is the caller graph for this function:

PRBool nsCharSetProber::FilterWithoutEnglishLetters ( const char *  aBuf,
PRUint32  aLen,
char **  newBuf,
PRUint32 newLen 
) [static, inherited]

Definition at line 43 of file nsCharSetProber.cpp.

{
  char *newptr;
  char *prevPtr, *curPtr;
  
  PRBool meetMSB = PR_FALSE;   
  newptr = *newBuf = (char*)PR_Malloc(aLen);
  if (!newptr)
    return PR_FALSE;

  for (curPtr = prevPtr = (char*)aBuf; curPtr < aBuf+aLen; curPtr++)
  {
    if (*curPtr & 0x80)
    {
      meetMSB = PR_TRUE;
    }
    else if (*curPtr < 'A' || (*curPtr > 'Z' && *curPtr < 'a') || *curPtr > 'z') 
    {
      //current char is a symbol, most likely a punctuation. we treat it as segment delimiter
      if (meetMSB && curPtr > prevPtr) 
      //this segment contains more than single symbol, and it has upper ASCII, we need to keep it
      {
        while (prevPtr < curPtr) *newptr++ = *prevPtr++;  
        prevPtr++;
        *newptr++ = ' ';
        meetMSB = PR_FALSE;
      }
      else //ignore current segment. (either because it is just a symbol or just an English word)
        prevPtr = curPtr+1;
    }
  }
  if (meetMSB && curPtr > prevPtr) 
    while (prevPtr < curPtr) *newptr++ = *prevPtr++;  

  newLen = newptr - *newBuf;

  return PR_TRUE;
}

Here is the caller graph for this function:

const char* nsLatin1Prober::GetCharSetName ( ) [inline, virtual]

Implements nsCharSetProber.

Definition at line 51 of file nsLatin1Prober.h.

{return "windows-1252";};
float nsLatin1Prober::GetConfidence ( void  ) [virtual]

Implements nsCharSetProber.

Definition at line 147 of file nsLatin1Prober.cpp.

{
  if (mState == eNotMe)
    return 0.01f;
  
  float confidence;
  PRUint32 total = 0;
  for (PRInt32 i = 0; i < FREQ_CAT_NUM; i++)
    total += mFreqCounter[i];

  if(!total)
    confidence = 0.0f;
  else
  {
    confidence = mFreqCounter[3]*1.0f / total;
    confidence -= mFreqCounter[1]*20.0f/total;
  }

  if (confidence < 0.0f)
    confidence = 0.0f;
  
  // lower the confidence of latin1 so that other more accurate detector 
  // can take priority.
  confidence *= 0.50f;

  return confidence;
}
nsProbingState nsLatin1Prober::GetState ( void  ) [inline, virtual]

Implements nsCharSetProber.

Definition at line 52 of file nsLatin1Prober.h.

{return mState;};
nsProbingState nsLatin1Prober::HandleData ( const char *  aBuf,
PRUint32  aLen 
) [virtual]

Implements nsCharSetProber.

Definition at line 117 of file nsLatin1Prober.cpp.

{
  char *newBuf1 = 0;
  PRUint32 newLen1 = 0;

  if (!FilterWithEnglishLetters(aBuf, aLen, &newBuf1, newLen1)) {
    newBuf1 = (char*)aBuf;
    newLen1 = aLen;
  }
  
  unsigned char charClass;
  unsigned char freq;
  for (PRUint32 i = 0; i < newLen1; i++)
  {
    charClass = Latin1_CharToClass[(unsigned char)newBuf1[i]];
    freq = Latin1ClassModel[mLastCharClass*CLASS_NUM + charClass];
    if (freq == 0) {
      mState = eNotMe;
      break;
    }
    mFreqCounter[freq]++;
    mLastCharClass = charClass;
  }

  if (newBuf1 != aBuf)
    PR_FREEIF(newBuf1);

  return mState;
}

Here is the call graph for this function:

void nsLatin1Prober::Reset ( void  ) [virtual]

Implements nsCharSetProber.

Definition at line 108 of file nsLatin1Prober.cpp.

{
  mState = eDetecting;
  mLastCharClass = OTH;
  for (int i = 0; i < FREQ_CAT_NUM; i++)
    mFreqCounter[i] = 0;
}

Here is the caller graph for this function:

void nsLatin1Prober::SetOpion ( ) [inline, virtual]

Implements nsCharSetProber.

Definition at line 55 of file nsLatin1Prober.h.

{};

Member Data Documentation

Definition at line 65 of file nsLatin1Prober.h.

Definition at line 64 of file nsLatin1Prober.h.

Definition at line 55 of file nsLatin1Prober.h.


The documentation for this class was generated from the following files: