Back to index

lightning-sunbird  0.9+nobinonly
Public Member Functions | Protected Member Functions
nsSemanticUnitScanner Class Reference

#include <nsSemanticUnitScanner.h>

Inheritance diagram for nsSemanticUnitScanner:
Inheritance graph
[legend]
Collaboration diagram for nsSemanticUnitScanner:
Collaboration graph
[legend]

List of all members.

Public Member Functions

NS_DECL_ISUPPORTS
NS_DECL_NSISEMANTICUNITSCANNER 
nsSemanticUnitScanner ()
virtual ~nsSemanticUnitScanner ()
void start (in string characterSet)
 start()
boolean next (in wstring text, in long length, in long pos, in boolean isLastBuffer, out long begin, out long end)
 next() Get the begin / end offset of the next unit in the current text
NS_IMETHOD BreakInBetween (const PRUnichar *aText1, PRUint32 aTextLen1, const PRUnichar *aText2, PRUint32 aTextLen2, PRBool *oCanBreak)
NS_IMETHOD FindWord (const PRUnichar *aText1, PRUint32 aTextLen1, PRUint32 aOffset, PRUint32 *oWordBegin, PRUint32 *oWordEnd)
NS_IMETHOD NextWord (const PRUnichar *aText, PRUint32 aLen, PRUint32 aPos, PRUint32 *oNext, PRBool *oNeedMoreText)
NS_IMETHOD PrevWord (const PRUnichar *aText, PRUint32 aLen, PRUint32 aPos, PRUint32 *oPrev, PRBool *oNeedMoreText)

Protected Member Functions

PRUint8 GetClass (PRUnichar aChar)

Detailed Description

Definition at line 45 of file nsSemanticUnitScanner.h.


Constructor & Destructor Documentation

Definition at line 43 of file nsSemanticUnitScanner.cpp.

                                             : nsSampleWordBreaker()
{
  /* member initializers and constructor code */
}

Definition at line 48 of file nsSemanticUnitScanner.cpp.

{
  /* destructor code */
}

Member Function Documentation

nsresult nsSampleWordBreaker::BreakInBetween ( const PRUnichar aText1,
PRUint32  aTextLen1,
const PRUnichar aText2,
PRUint32  aTextLen2,
PRBool oCanBreak 
) [virtual, inherited]

Implements nsIWordBreaker.

Definition at line 52 of file nsSampleWordBreaker.cpp.

{
  NS_PRECONDITION( nsnull != aText1, "null ptr");
  NS_PRECONDITION( nsnull != aText2, "null ptr");

  if((aText1 == nsnull) || (aText2 == nsnull))
    return NS_ERROR_NULL_POINTER; 

  if( (0 == aTextLen1) || (0 == aTextLen2))
  {
    *oCanBreak = PR_FALSE; 
    return NS_OK;
  }

  *oCanBreak = (this->GetClass(aText1[aTextLen1-1]) != this->GetClass(aText2[0]));

  return NS_OK;
}
nsresult nsSampleWordBreaker::FindWord ( const PRUnichar aText1,
PRUint32  aTextLen1,
PRUint32  aOffset,
PRUint32 oWordBegin,
PRUint32 oWordEnd 
) [virtual, inherited]

Implements nsIWordBreaker.

Definition at line 122 of file nsSampleWordBreaker.cpp.

{
  NS_PRECONDITION( nsnull != aText, "null ptr");
  NS_PRECONDITION( 0 != aTextLen, "len = 0");
  NS_PRECONDITION( nsnull != oWordBegin, "null ptr");
  NS_PRECONDITION( nsnull != oWordEnd, "null ptr");
  NS_PRECONDITION( aOffset <= aTextLen, "aOffset > aTextLen");

  if((nsnull == aText ) || (nsnull == oWordBegin) || (nsnull == oWordEnd))
    return NS_ERROR_NULL_POINTER; 
  
  if( aOffset > aTextLen )
    return NS_ERROR_ILLEGAL_VALUE;


  PRUint8 c = this->GetClass(aText[aOffset]);
  PRUint32 i;
  // Scan forward
  *oWordEnd = aTextLen;
  for(i = aOffset +1;i <= aTextLen; i++)
  {
     if( c != this->GetClass(aText[i]))
     {
       *oWordEnd = i;
       break;
     }
  }

  // Scan backward
  *oWordBegin = 0;
  for(i = aOffset ;i > 0; i--)
  {
     if( c != this->GetClass(aText[i-1]))
     {
       *oWordBegin = i;
       break;
     }
  }
  if(kWbClassThaiLetter == c)
  {
       // need to call Thai word breaker from here
       // we should pass the whole Thai segment to the thai word breaker to find a shorter answer
  }
  return NS_OK;
}
PRUint8 nsSampleWordBreaker::GetClass ( PRUnichar  aChar) [protected, inherited]

Definition at line 88 of file nsSampleWordBreaker.cpp.

{
  // begin of the hack

  if (IS_ALPHABETICAL_SCRIPT(c))  {
         if(IS_ASCII(c))  {
                if(ASCII_IS_SPACE(c)) {
                       return kWbClassSpace;
                } else if(ASCII_IS_ALPHA(c) || ASCII_IS_DIGIT(c)) {
                       return kWbClassAlphaLetter;
                } else {
                       return kWbClassPunct;
                }
         } else if(IS_THAI(c))     {
                return kWbClassThaiLetter;
         } else {
                return kWbClassAlphaLetter;
         }
  }  else {
         if(IS_HAN(c)) {
                return kWbClassHanLetter;
         } else if(IS_KATAKANA(c))   {
                return kWbClassKatakanaLetter;
         } else if(IS_HIRAGANA(c))   {
                return kWbClassHiraganaLetter;
         } else if(IS_HALFWIDTHKATAKANA(c))  {
                return kWbClassHWKatakanaLetter;
         } else  {
                return kWbClassAlphaLetter;
         }
  }
  return 0;
}
boolean nsISemanticUnitScanner::next ( in wstring  text,
in long  length,
in long  pos,
in boolean  isLastBuffer,
out long  begin,
out long  end 
) [inherited]

next() Get the begin / end offset of the next unit in the current text

Parameters:
textthe text to be scanned
lengththe number of characters in the text to be processed
posthe current position
isLastBuffer,thebuffer is the last one
beginthe begin offset of the next unit
beginthe end offset of the next unit
Returns:
has more unit in the current text
nsresult nsSampleWordBreaker::NextWord ( const PRUnichar aText,
PRUint32  aLen,
PRUint32  aPos,
PRUint32 oNext,
PRBool oNeedMoreText 
) [virtual, inherited]

Implements nsIWordBreaker.

Definition at line 172 of file nsSampleWordBreaker.cpp.

{
  PRInt8 c1, c2;
  PRUint32 cur = aPos;
  c1 = this->GetClass(aText[cur]);
 
  for(cur++; cur <aLen; cur++)
  {
     c2 = this->GetClass(aText[cur]);
     if(c2 != c1) 
       break;
  }
  if(kWbClassThaiLetter == c1)
  {
       // need to call Thai word breaker from here
       // we should pass the whole Thai segment to the thai word breaker to find a shorter answer
  }
  *oNext = cur;
  *oNeedMoreText = (cur == aLen) ? PR_TRUE : PR_FALSE;
  return NS_OK;
}
nsresult nsSampleWordBreaker::PrevWord ( const PRUnichar aText,
PRUint32  aLen,
PRUint32  aPos,
PRUint32 oPrev,
PRBool oNeedMoreText 
) [virtual, inherited]

Implements nsIWordBreaker.

Definition at line 196 of file nsSampleWordBreaker.cpp.

{
  PRInt8 c1, c2;
  PRUint32 cur = aPos;
  c1 = this->GetClass(aText[cur]);

  for(; cur > 0; cur--)
  {
     c2 = this->GetClass(aText[cur-1]);
     if(c2 != c1)
       break;
  }
  if(kWbClassThaiLetter == c1)
  {
       // need to call Thai word breaker from here
       // we should pass the whole Thai segment to the thai word breaker to find a shorter answer
  }
  *oPrev = cur;
  *oNeedMoreText = (cur == 0) ? PR_TRUE : PR_FALSE;
  return NS_OK;
}
void nsISemanticUnitScanner::start ( in string  characterSet) [inherited]

start()

Starts up the semantic unit scanner with an optional character set, which acts as a hint to optimize the heuristics used to determine the language(s) of the processed text.

Parameters:
characterSetthe character set the text was originally encoded in (can be NULL)

The documentation for this class was generated from the following files: