Back to index

lightning-sunbird  0.9+nobinonly
Public Member Functions | Static Public Member Functions | Static Protected Member Functions | Protected Attributes
nsHebrewProber Class Reference

#include <nsHebrewProber.h>

Inheritance diagram for nsHebrewProber:
Inheritance graph
[legend]
Collaboration diagram for nsHebrewProber:
Collaboration graph
[legend]

List of all members.

Public Member Functions

 nsHebrewProber (void)
virtual ~nsHebrewProber (void)
virtual nsProbingState HandleData (const char *aBuf, PRUint32 aLen)
 HandleData Final letter analysis for logical-visual decision.
virtual const char * GetCharSetName ()
virtual void Reset (void)
virtual nsProbingState GetState (void)
virtual float GetConfidence (void)
virtual void SetOpion ()
void SetModelProbers (nsCharSetProber *logicalPrb, nsCharSetProber *visualPrb)

Static Public Member Functions

static PRBool FilterWithoutEnglishLetters (const char *aBuf, PRUint32 aLen, char **newBuf, PRUint32 &newLen)
static PRBool FilterWithEnglishLetters (const char *aBuf, PRUint32 aLen, char **newBuf, PRUint32 &newLen)

Static Protected Member Functions

static PRBool isFinal (char c)
static PRBool isNonFinal (char c)

Protected Attributes

PRInt32 mFinalCharLogicalScore
PRInt32 mFinalCharVisualScore
char mPrev
char mBeforePrev
nsCharSetProbermLogicalProb
nsCharSetProbermVisualProb

Detailed Description

Definition at line 45 of file nsHebrewProber.h.


Constructor & Destructor Documentation

Definition at line 48 of file nsHebrewProber.h.

Here is the call graph for this function:

virtual nsHebrewProber::~nsHebrewProber ( void  ) [inline, virtual]

Definition at line 50 of file nsHebrewProber.h.

{}

Member Function Documentation

PRBool nsCharSetProber::FilterWithEnglishLetters ( const char *  aBuf,
PRUint32  aLen,
char **  newBuf,
PRUint32 newLen 
) [static, inherited]

Definition at line 83 of file nsCharSetProber.cpp.

{
  //do filtering to reduce load to probers
  char *newptr;
  char *prevPtr, *curPtr;
  PRBool isInTag = PR_FALSE;

  newptr = *newBuf = (char*)PR_Malloc(aLen);
  if (!newptr)
    return PR_FALSE;

  for (curPtr = prevPtr = (char*)aBuf; curPtr < aBuf+aLen; curPtr++)
  {
    if (*curPtr == '>')
      isInTag = PR_FALSE;
    else if (*curPtr == '<')
      isInTag = PR_TRUE;

    if (!(*curPtr & 0x80) &&
        (*curPtr < 'A' || (*curPtr > 'Z' && *curPtr < 'a') || *curPtr > 'z') )
    {
      if (curPtr > prevPtr && !isInTag) // Current segment contains more than just a symbol 
                                        // and it is not inside a tag, keep it.
      {
        while (prevPtr < curPtr) *newptr++ = *prevPtr++;  
        prevPtr++;
        *newptr++ = ' ';
      }
      else
        prevPtr = curPtr+1;
    }
  }

  // If the current segment contains more than just a symbol 
  // and it is not inside a tag then keep it.
  if (!isInTag)
    while (prevPtr < curPtr)
      *newptr++ = *prevPtr++;  

  newLen = newptr - *newBuf;

  return PR_TRUE;
}

Here is the caller graph for this function:

PRBool nsCharSetProber::FilterWithoutEnglishLetters ( const char *  aBuf,
PRUint32  aLen,
char **  newBuf,
PRUint32 newLen 
) [static, inherited]

Definition at line 43 of file nsCharSetProber.cpp.

{
  char *newptr;
  char *prevPtr, *curPtr;
  
  PRBool meetMSB = PR_FALSE;   
  newptr = *newBuf = (char*)PR_Malloc(aLen);
  if (!newptr)
    return PR_FALSE;

  for (curPtr = prevPtr = (char*)aBuf; curPtr < aBuf+aLen; curPtr++)
  {
    if (*curPtr & 0x80)
    {
      meetMSB = PR_TRUE;
    }
    else if (*curPtr < 'A' || (*curPtr > 'Z' && *curPtr < 'a') || *curPtr > 'z') 
    {
      //current char is a symbol, most likely a punctuation. we treat it as segment delimiter
      if (meetMSB && curPtr > prevPtr) 
      //this segment contains more than single symbol, and it has upper ASCII, we need to keep it
      {
        while (prevPtr < curPtr) *newptr++ = *prevPtr++;  
        prevPtr++;
        *newptr++ = ' ';
        meetMSB = PR_FALSE;
      }
      else //ignore current segment. (either because it is just a symbol or just an English word)
        prevPtr = curPtr+1;
    }
  }
  if (meetMSB && curPtr > prevPtr) 
    while (prevPtr < curPtr) *newptr++ = *prevPtr++;  

  newLen = newptr - *newBuf;

  return PR_TRUE;
}

Here is the caller graph for this function:

const char * nsHebrewProber::GetCharSetName ( ) [virtual]

Implements nsCharSetProber.

Definition at line 145 of file nsHebrewProber.cpp.

{
  // If the final letter score distance is dominant enough, rely on it.
  PRInt32 finalsub = mFinalCharLogicalScore - mFinalCharVisualScore;
  if (finalsub >= MIN_FINAL_CHAR_DISTANCE) 
    return LOGICAL_HEBREW_NAME;
  if (finalsub <= -(MIN_FINAL_CHAR_DISTANCE))
    return VISUAL_HEBREW_NAME;

  // It's not dominant enough, try to rely on the model scores instead.
  float modelsub = mLogicalProb->GetConfidence() - mVisualProb->GetConfidence();
  if (modelsub > MIN_MODEL_DISTANCE)
    return LOGICAL_HEBREW_NAME;
  if (modelsub < -(MIN_MODEL_DISTANCE))
    return VISUAL_HEBREW_NAME;

  // Still no good, back to final letter distance, maybe it'll save the day.
  if (finalsub < 0) 
    return VISUAL_HEBREW_NAME;

  // (finalsub > 0 - Logical) or (don't know what to do) default to Logical.
  return LOGICAL_HEBREW_NAME;
}

Here is the call graph for this function:

virtual float nsHebrewProber::GetConfidence ( void  ) [inline, virtual]

Implements nsCharSetProber.

Definition at line 57 of file nsHebrewProber.h.

{ return (float)0.0; }

Implements nsCharSetProber.

Definition at line 181 of file nsHebrewProber.cpp.

{
  // Remain active as long as any of the model probers are active.
  if ((mLogicalProb->GetState() == eNotMe) && (mVisualProb->GetState() == eNotMe))
    return eNotMe;
  return eDetecting;
}

Here is the call graph for this function:

Here is the caller graph for this function:

nsProbingState nsHebrewProber::HandleData ( const char *  aBuf,
PRUint32  aLen 
) [virtual]

HandleData Final letter analysis for logical-visual decision.

Look for evidence that the received buffer is either logical Hebrew or visual Hebrew. The following cases are checked: 1) A word longer than 1 letter, ending with a final letter. This is an indication that the text is laid out "naturally" since the final letter really appears at the end. +1 for logical score. 2) A word longer than 1 letter, ending with a Non-Final letter. In normal Hebrew, words ending with Kaf, Mem, Nun, Pe or Tsadi, should not end with the Non-Final form of that letter. Exceptions to this rule are mentioned above in isNonFinal(). This is an indication that the text is laid out backwards. +1 for visual score 3) A word longer than 1 letter, starting with a final letter. Final letters should not appear at the beginning of a word. This is an indication that the text is laid out backwards. +1 for visual score.

The visual score and logical score are accumulated throughout the text and are finally checked against each other in GetCharSetName(). No checking for final letters in the middle of words is done since that case is not an indication for either Logical or Visual text.

The input buffer should not contain any white spaces that are not (' ') or any low-ascii punctuation marks.

Implements nsCharSetProber.

Definition at line 109 of file nsHebrewProber.cpp.

{
  // Both model probers say it's not them. No reason to continue.
  if (GetState() == eNotMe)
    return eNotMe;

  const char *curPtr, *endPtr = aBuf+aLen;
  char cur;

  for (curPtr = (char*)aBuf; curPtr < endPtr; ++curPtr)
  {
    cur = *curPtr;
    if (cur == ' ') // We stand on a space - a word just ended
    {
      if (mBeforePrev != ' ') // *(curPtr-2) was not a space so prev is not a 1 letter word
      {
        if (isFinal(mPrev)) // case (1) [-2:not space][-1:final letter][cur:space]
          ++mFinalCharLogicalScore;
        else if (isNonFinal(mPrev)) // case (2) [-2:not space][-1:Non-Final letter][cur:space]
          ++mFinalCharVisualScore;
      }
    }
    else  // Not standing on a space
    {
      if ((mBeforePrev == ' ') && (isFinal(mPrev)) && (cur != ' ')) // case (3) [-2:space][-1:final letter][cur:not space]
        ++mFinalCharVisualScore;
    }
    mBeforePrev = mPrev;
    mPrev = cur;
  }

  // Forever detecting, till the end or until both model probers return eNotMe (handled above).
  return eDetecting;
}

Here is the call graph for this function:

PRBool nsHebrewProber::isFinal ( char  c) [static, protected]

Definition at line 64 of file nsHebrewProber.cpp.

{
  return ((c == FINAL_KAF) || (c == FINAL_MEM) || (c == FINAL_NUN) || (c == FINAL_PE) || (c == FINAL_TSADI));
}

Here is the caller graph for this function:

PRBool nsHebrewProber::isNonFinal ( char  c) [static, protected]

Definition at line 69 of file nsHebrewProber.cpp.

{
  return ((c == NORMAL_KAF) || (c == NORMAL_MEM) || (c == NORMAL_NUN) || (c == NORMAL_PE));
  // The normal Tsadi is not a good Non-Final letter due to words like 
  // 'lechotet' (to chat) containing an apostrophe after the tsadi. This 
  // apostrophe is converted to a space in FilterWithoutEnglishLetters causing 
  // the Non-Final tsadi to appear at an end of a word even though this is not 
  // the case in the original text.
  // The letters Pe and Kaf rarely display a related behavior of not being a 
  // good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak' for 
  // example legally end with a Non-Final Pe or Kaf. However, the benefit of 
  // these letters as Non-Final letters outweighs the damage since these words 
  // are quite rare.
}

Here is the caller graph for this function:

void nsHebrewProber::Reset ( void  ) [virtual]

Implements nsCharSetProber.

Definition at line 170 of file nsHebrewProber.cpp.

{
  mFinalCharLogicalScore = 0;
  mFinalCharVisualScore = 0;

  // mPrev and mBeforePrev are initialized to space in order to simulate a word 
  // delimiter at the beginning of the data
  mPrev = ' ';
  mBeforePrev = ' ';
}

Here is the caller graph for this function:

void nsHebrewProber::SetModelProbers ( nsCharSetProber logicalPrb,
nsCharSetProber visualPrb 
) [inline]

Definition at line 60 of file nsHebrewProber.h.

  { mLogicalProb = logicalPrb; mVisualProb = visualPrb; }

Here is the caller graph for this function:

virtual void nsHebrewProber::SetOpion ( ) [inline, virtual]

Implements nsCharSetProber.

Definition at line 58 of file nsHebrewProber.h.

{};

Member Data Documentation

char nsHebrewProber::mBeforePrev [protected]

Definition at line 74 of file nsHebrewProber.h.

Definition at line 71 of file nsHebrewProber.h.

Definition at line 71 of file nsHebrewProber.h.

Definition at line 77 of file nsHebrewProber.h.

char nsHebrewProber::mPrev [protected]

Definition at line 74 of file nsHebrewProber.h.

Definition at line 77 of file nsHebrewProber.h.


The documentation for this class was generated from the following files: