Back to index

lightning-sunbird  0.9+nobinonly
Public Member Functions | Protected Member Functions
nsJISx4051LineBreaker Class Reference

#include <nsJISx4501LineBreaker.h>

Inheritance diagram for nsJISx4051LineBreaker:
Inheritance graph
[legend]
Collaboration diagram for nsJISx4051LineBreaker:
Collaboration graph
[legend]

List of all members.

Public Member Functions

 nsJISx4051LineBreaker (const PRUnichar *aNoBegin, PRInt32 aNoBeginLen, const PRUnichar *aNoEnd, PRInt32 aNoEndLen)
virtual ~nsJISx4051LineBreaker ()
NS_IMETHOD BreakInBetween (const PRUnichar *aText1, PRUint32 aTextLen1, const PRUnichar *aText2, PRUint32 aTextLen2, PRBool *oCanBreak)
NS_IMETHOD Next (const PRUnichar *aText, PRUint32 aLen, PRUint32 aPos, PRUint32 *oNext, PRBool *oNeedMoreText)
NS_IMETHOD Prev (const PRUnichar *aText, PRUint32 aLen, PRUint32 aPos, PRUint32 *oPrev, PRBool *oNeedMoreText)

Protected Member Functions

PRInt8 GetClass (PRUnichar u)
PRInt8 ContextualAnalysis (PRUnichar prev, PRUnichar cur, PRUnichar next)
PRBool GetPair (PRInt8 c1, PRInt8 c2)

Detailed Description

Definition at line 43 of file nsJISx4501LineBreaker.h.


Constructor & Destructor Documentation

nsJISx4051LineBreaker::nsJISx4051LineBreaker ( const PRUnichar aNoBegin,
PRInt32  aNoBeginLen,
const PRUnichar aNoEnd,
PRInt32  aNoEndLen 
)

Definition at line 346 of file nsJISx4501LineBreaker.cpp.

{
}

Definition at line 353 of file nsJISx4501LineBreaker.cpp.

{
}

Member Function Documentation

NS_IMETHODIMP nsJISx4051LineBreaker::BreakInBetween ( const PRUnichar aText1,
PRUint32  aTextLen1,
const PRUnichar aText2,
PRUint32  aTextLen2,
PRBool oCanBreak 
) [virtual]

Implements nsILineBreaker.

Definition at line 406 of file nsJISx4501LineBreaker.cpp.

{
  NS_ENSURE_TRUE(aText1, NS_ERROR_NULL_POINTER);
  NS_ENSURE_TRUE(aText2, NS_ERROR_NULL_POINTER);

  if((0 == aTextLen1) || (0==aTextLen2) ||
     IS_HIGH_SURROGATE(aText1[aTextLen1-1]) && 
     IS_LOW_SURROGATE(aText2[0]) )  //Do not separate a surrogate pair
  {
     *oCanBreak = PR_FALSE;
     return NS_OK;
  }

  //search for CJK characters until a space is found. 
  //if CJK char is found before space, use 4051, otherwise western
  PRInt32 cur;

  for (cur= aTextLen1-1; cur>=0; cur--)
  {
    if (IS_SPACE(aText1[cur]))
      break;
    if (IS_CJK_CHAR(aText1[cur]))
      goto ROUTE_CJK_BETWEEN;
  }

  for (cur= 0; cur < (PRInt32)aTextLen2; cur++)
  {
    if (IS_SPACE(aText2[cur]))
      break;
    if (IS_CJK_CHAR(aText2[cur]))
      goto ROUTE_CJK_BETWEEN;
  }

  //now apply western rule.
  *oCanBreak = IS_SPACE(aText1[aTextLen1-1]) || IS_SPACE(aText2[0]);
  return NS_OK;

ROUTE_CJK_BETWEEN:

  PRInt8 c1, c2;
  if(NEED_CONTEXTUAL_ANALYSIS(aText1[aTextLen1-1]))
    c1 = this->ContextualAnalysis((aTextLen1>1)?aText1[aTextLen1-2]:0,
                                  aText1[aTextLen1-1],
                                  aText2[0]);
  else 
    c1 = this->GetClass(aText1[aTextLen1-1]);

  if(NEED_CONTEXTUAL_ANALYSIS(aText2[0]))
    c2 = this->ContextualAnalysis(aText1[aTextLen1-1],
                                  aText2[0],
                                  (aTextLen2>1)?aText2[1]:0);
  else 
    c2 = this->GetClass(aText2[0]);

  /* Handle cases for THAI */
  if((CLASS_THAI == c1) && (CLASS_THAI == c2))
  {
     *oCanBreak = (0 == TrbWordBreakPos(aText1, aTextLen1, aText2, aTextLen2));
  }
  else 
  {
     *oCanBreak = GetPair(c1,c2);
  }
  return NS_OK;
}

Here is the call graph for this function:

PRInt8 nsJISx4051LineBreaker::ContextualAnalysis ( PRUnichar  prev,
PRUnichar  cur,
PRUnichar  next 
) [protected]

Definition at line 370 of file nsJISx4501LineBreaker.cpp.

{
   if(U_COMMA == cur)
   {
     if(IS_ASCII_DIGIT (prev) && IS_ASCII_DIGIT (next))
       return NUMERIC_CLASS;
   }
   else if(U_PERIOD == cur)
   {
     if((IS_ASCII_DIGIT (prev) || (0x0020 == prev)) && 
         IS_ASCII_DIGIT (next))
       return NUMERIC_CLASS;
 
     // By assigning a full stop  character class only when it's followed by
     // class 6 (numeric), 7, and 8 (character). Note that class 9 (Thai) 
     // doesn't matter, either way, we prevent lines from breaking around 
     // full stop in those cases while  still allowing it to end a line when 
     // followed by CJK  characters. With an additional condition of it being 
     // preceded by  class 0 or class > 5, we make sure that it does not 
     // start a line  (see bug 164759). 
     PRUint8 pc = GetClass(prev);
     if((pc > 5 || pc == 0)  && GetClass(next) > 5)
       return CHARACTER_CLASS;
   }
   else if(U_RIGHT_SINGLE_QUOTATION_MARK == cur)
   {
     // somehow people use this as ' in "it's" sometimes...
     if(U_SPACE != next)
       return CHARACTER_CLASS;
   }
   return this->GetClass(cur);
}

Here is the call graph for this function:

Here is the caller graph for this function:

PRInt8 nsJISx4051LineBreaker::GetClass ( PRUnichar  u) [protected]

Definition at line 246 of file nsJISx4501LineBreaker.cpp.

{
   PRUint16 h = u & 0xFF00;
   PRUint16 l = u & 0x00ff;
   PRInt8 c;
   
   // Handle 3 range table first
   if( 0x0000 == h)
   {
     c = GETCLASSFROMTABLE(gLBClass00, l);
   } 
   else if(th_isthai(u))
   {
     c = CLASS_THAI;
   }
   else if( 0x2000 == h)
   {
     c = GETCLASSFROMTABLE(gLBClass20, l);
   } 
   else if( 0x2100 == h)
   {
     c = GETCLASSFROMTABLE(gLBClass21, l);
   } 
   else if( 0x3000 == h)
   {
     c = GETCLASSFROMTABLE(gLBClass30, l);
   } 
   else if (  ( ( 0x3200 <= u) && ( u <= 0xA4CF) ) || // CJK and Yi 
              ( ( 0xAC00 <= h) && ( h <= 0xD7FF) ) || // Hangul
              ( ( 0xf900 <= h) && ( h <= 0xfaff) )
             )
   { 
     c = 5; // CJK charcter, Han, and Han Compatability
   } 
   else if( 0xff00 == h)
   {
     if( l < 0x0060) // Fullwidth ASCII variant 
     {
       c = GETCLASSFROMTABLE(gLBClass00, (l+0x20));
     } else if (l < 0x00a0) {
       switch (l)
       {
         case 0x61: c = GetClass(0x3002); break;
         case 0x62: c = GetClass(0x300c); break;
         case 0x63: c = GetClass(0x300d); break;
         case 0x64: c = GetClass(0x3001); break;
         case 0x65: c = GetClass(0x30fb); break;
         case 0x9e: c = GetClass(0x309b); break;
         case 0x9f: c = GetClass(0x309c); break;
         default:
           if(IS_HALFWIDTH_IN_JISx4051_CLASS3(u))
              c = 1; // jis x4051 class 3
           else
              c = 5; // jis x4051 class 11
           break;
       }
       // Halfwidth Katakana variants
     } else if( l < 0x00e0) {
       c = 8; // Halfwidth Hangul variants 
     } else if( l < 0x00f0) {
       static PRUnichar NarrowFFEx[16] = 
       { 
         0x00A2, 0x00A3, 0x00AC, 0x00AF, 0x00A6, 0x00A5, 0x20A9, 0x0000,
         0x2502, 0x2190, 0x2191, 0x2192, 0x2193, 0x25A0, 0x25CB, 0x0000
       };
       c = GetClass(NarrowFFEx[l - 0x00e0]);
     } else {
       c = 8;
     }
   }
   else if( 0x3100 == h) { 
     if ( l <= 0xbf) {  // Hangul Compatibility Jamo, Bopomofo, Kanbun
                        // XXX: This is per UAX #14, but UAX #14 may change
                        // the line breaking rules about Kanbun and Bopomofo.
       c = 5;
     }
     else if ( l >= 0xf0)
     {            // Katakana small letters for Ainu 
       c = 1;
     }
     else   // unassigned
     {
       c = 8;
     }
   } 
   else {
     c = 8; // others 
   }
   return c;
}

Here is the call graph for this function:

Here is the caller graph for this function:

PRBool nsJISx4051LineBreaker::GetPair ( PRInt8  c1,
PRInt8  c2 
) [protected]

Definition at line 337 of file nsJISx4501LineBreaker.cpp.

{
  NS_ASSERTION( c1 < MAX_CLASSES ,"illegal classes 1");
  NS_ASSERTION( c2 < MAX_CLASSES ,"illegal classes 2");

  return (0 == ((gPair[c1] >> c2 ) & 0x0001));
}

Here is the caller graph for this function:

NS_IMETHODIMP nsJISx4051LineBreaker::Next ( const PRUnichar aText,
PRUint32  aLen,
PRUint32  aPos,
PRUint32 oNext,
PRBool oNeedMoreText 
) [virtual]

Implements nsILineBreaker.

Definition at line 476 of file nsJISx4501LineBreaker.cpp.

{
  NS_ENSURE_TRUE(aText, NS_ERROR_NULL_POINTER);
  NS_ENSURE_TRUE(oNext, NS_ERROR_NULL_POINTER);
  NS_ENSURE_TRUE(oNeedMoreText, NS_ERROR_NULL_POINTER);
  NS_ENSURE_TRUE(aPos <= aLen, NS_ERROR_ILLEGAL_VALUE);

  //forward check for CJK characters until a space is found. 
  //if CJK char is found before space, use 4051, otherwise western
  PRUint32 cur;
  for (cur = aPos; cur < aLen; ++cur)
  {
    if (IS_SPACE(aText[cur]))
    {
      *oNext = cur;
      *oNeedMoreText = PR_FALSE;
      return NS_OK;
    }
    if (IS_CJK_CHAR(aText[cur]))
      goto ROUTE_CJK_NEXT;
  }
  *oNext = aLen;
  *oNeedMoreText = PR_TRUE;
  return NS_OK;

ROUTE_CJK_NEXT:
  PRInt8 c1, c2;
  cur = aPos;
  if(NEED_CONTEXTUAL_ANALYSIS(aText[cur]))
  {
    c1 = this->ContextualAnalysis((cur>0)?aText[cur-1]:0,
                                  aText[cur],
                                  (cur<(aLen-1)) ?aText[cur+1]:0);
  } else  {
    c1 = this->GetClass(aText[cur]);
  }
  
  if(CLASS_THAI == c1) 
  {
     *oNext = PRUint32(TrbFollowing(aText, aLen, aPos));
     *oNeedMoreText = PR_FALSE;
     return NS_OK;
  }

  for(cur++; cur <aLen; cur++)
  {
     if(NEED_CONTEXTUAL_ANALYSIS(aText[cur]))
     {
       c2= this->ContextualAnalysis((cur>0)?aText[cur-1]:0,
                                  aText[cur],
                                  (cur<(aLen-1)) ?aText[cur+1]:0);
     } else {
       c2 = this->GetClass(aText[cur]);
     }

     if(GetPair(c1, c2)) {
       *oNext = cur ;
       *oNeedMoreText = PR_FALSE;
       return NS_OK;
     }
     c1 = c2;
  }
  *oNext = aLen;
  *oNeedMoreText = PR_TRUE;
  return NS_OK;
}

Here is the call graph for this function:

NS_IMETHODIMP nsJISx4051LineBreaker::Prev ( const PRUnichar aText,
PRUint32  aLen,
PRUint32  aPos,
PRUint32 oPrev,
PRBool oNeedMoreText 
) [virtual]

Implements nsILineBreaker.

Definition at line 545 of file nsJISx4501LineBreaker.cpp.

{
  NS_ENSURE_TRUE(aText, NS_ERROR_NULL_POINTER);
  NS_ENSURE_TRUE(oPrev, NS_ERROR_NULL_POINTER);
  NS_ENSURE_TRUE(oNeedMoreText, NS_ERROR_NULL_POINTER);

  //backward check for CJK characters until a space is found. 
  //if CJK char is found before space, use 4051, otherwise western
  PRUint32 cur;
  for (cur = aPos - 1; cur > 0; --cur)
  {
    if (IS_SPACE(aText[cur]))
    {
      if (cur != aPos - 1) // XXXldb Why?
        ++cur;
      *oPrev = cur;
      *oNeedMoreText = PR_FALSE;
      return NS_OK;
    }
    if (IS_CJK_CHAR(aText[cur]))
      goto ROUTE_CJK_PREV;
  }

  *oPrev = 0;
  *oNeedMoreText = PR_TRUE;
  return NS_OK;

ROUTE_CJK_PREV:
  cur = aPos;
  PRInt8 c1, c2;
  if(NEED_CONTEXTUAL_ANALYSIS(aText[cur-1]))
  {
    c2 = this->ContextualAnalysis(((cur-1)>0)?aText[cur-2]:0,
                                  aText[cur-1],
                                  (cur<aLen) ?aText[cur]:0);
  } else  {
    c2 = this->GetClass(aText[cur-1]);
  }
  // To Do: 
  //
  // Should handle CLASS_THAI here
  //
  for(cur--; cur > 0; cur--)
  {
     if(NEED_CONTEXTUAL_ANALYSIS(aText[cur-1]))
     {
       c1= this->ContextualAnalysis(((cur-1)>0)?aText[cur-2]:0,
                                  aText[cur-1],
                                  (cur<aLen) ?aText[cur]:0);
     } else {
       c1 = this->GetClass(aText[cur-1]);
     }

     if(GetPair(c1, c2)) {
       *oPrev = cur;
       *oNeedMoreText = PR_FALSE;
       return NS_OK;
     }
     c2 = c1;
  }
  *oPrev = 0;
  *oNeedMoreText = PR_TRUE;
  return NS_OK;
}

Here is the call graph for this function:


The documentation for this class was generated from the following files: