Back to index

lightning-sunbird  0.9+nobinonly
nsBidi.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
00002  *
00003  * ***** BEGIN LICENSE BLOCK *****
00004  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00005  *
00006  * The contents of this file are subject to the Mozilla Public License Version
00007  * 1.1 (the "License"); you may not use this file except in compliance with
00008  * the License. You may obtain a copy of the License at
00009  * http://www.mozilla.org/MPL/
00010  *
00011  * Software distributed under the License is distributed on an "AS IS" basis,
00012  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00013  * for the specific language governing rights and limitations under the
00014  * License.
00015  *
00016  * The Original Code is mozilla.org code.
00017  *
00018  * The Initial Developer of the Original Code is
00019  * IBM Corporation.
00020  * Portions created by the Initial Developer are Copyright (C) 2000
00021  * the Initial Developer. All Rights Reserved.
00022  *
00023  * Contributor(s):
00024  *   Simon Montagu
00025  *
00026  * Alternatively, the contents of this file may be used under the terms of
00027  * either of the GNU General Public License Version 2 or later (the "GPL"),
00028  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00029  * in which case the provisions of the GPL or the LGPL are applicable instead
00030  * of those above. If you wish to allow use of your version of this file only
00031  * under the terms of either the GPL or the LGPL, and not to allow others to
00032  * use your version of this file under the terms of the MPL, indicate your
00033  * decision by deleting the provisions above and replace them with the notice
00034  * and other provisions required by the GPL or the LGPL. If you do not delete
00035  * the provisions above, a recipient may use your version of this file under
00036  * the terms of any one of the MPL, the GPL or the LGPL.
00037  *
00038  * ***** END LICENSE BLOCK ***** */
00039 #ifdef IBMBIDI
00040 
00041 #include "prmem.h"
00042 #include "nsBidi.h"
00043 #include "nsBidiUtils.h"
00044 #include "bidicattable.h"
00045 #include "symmtable.h"
00046 #include "nsCRT.h"
00047 
00048 static nsCharType ebc2ucd[15] = {
00049   eCharType_OtherNeutral, /* Placeholder -- there will never be a 0 index value */
00050   eCharType_LeftToRight,
00051   eCharType_RightToLeft,
00052   eCharType_RightToLeftArabic,
00053   eCharType_ArabicNumber,
00054   eCharType_EuropeanNumber,
00055   eCharType_EuropeanNumberSeparator,
00056   eCharType_EuropeanNumberTerminator,
00057   eCharType_CommonNumberSeparator,
00058   eCharType_OtherNeutral,
00059   eCharType_DirNonSpacingMark,
00060   eCharType_BoundaryNeutral,
00061   eCharType_BlockSeparator,
00062   eCharType_SegmentSeparator,
00063   eCharType_WhiteSpaceNeutral
00064 };
00065 
00066 static nsCharType cc2ucd[5] = {
00067   eCharType_LeftToRightEmbedding,
00068   eCharType_RightToLeftEmbedding,
00069   eCharType_PopDirectionalFormat,
00070   eCharType_LeftToRightOverride,
00071   eCharType_RightToLeftOverride
00072 };
00073 
00074 /*  Comparing the description of the Bidi algorithm with this implementation
00075     is easier with the same names for the Bidi types in the code as there.
00076 */
00077 enum { 
00078     L =   eCharType_LeftToRight,
00079     R =   eCharType_RightToLeft,
00080     EN =  eCharType_EuropeanNumber,
00081     ES =  eCharType_EuropeanNumberSeparator,
00082     ET =  eCharType_EuropeanNumberTerminator,
00083     AN =  eCharType_ArabicNumber,
00084     CS =  eCharType_CommonNumberSeparator,
00085     B =   eCharType_BlockSeparator,
00086     S =   eCharType_SegmentSeparator,
00087     WS =  eCharType_WhiteSpaceNeutral,
00088     O_N = eCharType_OtherNeutral,
00089     LRE = eCharType_LeftToRightEmbedding,
00090     LRO = eCharType_LeftToRightOverride,
00091     AL =  eCharType_RightToLeftArabic,
00092     RLE = eCharType_RightToLeftEmbedding,
00093     RLO = eCharType_RightToLeftOverride,
00094     PDF = eCharType_PopDirectionalFormat,
00095     NSM = eCharType_DirNonSpacingMark,
00096     BN =  eCharType_BoundaryNeutral,
00097     dirPropCount
00098 };
00099 
00100 /* to avoid some conditional statements, use tiny constant arrays */
00101 static Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) };
00102 static Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) };
00103 static Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) };
00104 
00105 #define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
00106 #define DIRPROP_FLAG_E(level) flagE[(level)&1]
00107 #define DIRPROP_FLAG_O(level) flagO[(level)&1]
00108 
00109 /*
00110  * General implementation notes:
00111  *
00112  * Throughout the implementation, there are comments like (W2) that refer to
00113  * rules of the Bidi algorithm in its version 5, in this example to the second
00114  * rule of the resolution of weak types.
00115  *
00116  * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
00117  * character according to UTF-16, the second UChar gets the directional property of
00118  * the entire character assigned, while the first one gets a BN, a boundary
00119  * neutral, type, which is ignored by most of the algorithm according to
00120  * rule (X9) and the implementation suggestions of the Bidi algorithm.
00121  *
00122  * Later, AdjustWSLevels() will set the level for each BN to that of the
00123  * following character (UChar), which results in surrogate pairs getting the
00124  * same level on each of their surrogates.
00125  *
00126  * In a UTF-8 implementation, the same thing could be done: the last byte of
00127  * a multi-byte sequence would get the "real" property, while all previous
00128  * bytes of that sequence would get BN.
00129  *
00130  * It is not possible to assign all those parts of a character the same real
00131  * property because this would fail in the resolution of weak types with rules
00132  * that look at immediately surrounding types.
00133  *
00134  * As a related topic, this implementation does not remove Boundary Neutral
00135  * types from the input, but ignores them whereever this is relevant.
00136  * For example, the loop for the resolution of the weak types reads
00137  * types until it finds a non-BN.
00138  * Also, explicit embedding codes are neither changed into BN nor removed.
00139  * They are only treated the same way real BNs are.
00140  * As stated before, AdjustWSLevels() takes care of them at the end.
00141  * For the purpose of conformance, the levels of all these codes
00142  * do not matter.
00143  *
00144  * Note that this implementation never modifies the dirProps
00145  * after the initial setup.
00146  *
00147  *
00148  * In this implementation, the resolution of weak types (Wn),
00149  * neutrals (Nn), and the assignment of the resolved level (In)
00150  * are all done in one single loop, in ResolveImplicitLevels().
00151  * Changes of dirProp values are done on the fly, without writing
00152  * them back to the dirProps array.
00153  *
00154  *
00155  * This implementation contains code that allows to bypass steps of the
00156  * algorithm that are not needed on the specific paragraph
00157  * in order to speed up the most common cases considerably,
00158  * like text that is entirely LTR, or RTL text without numbers.
00159  *
00160  * Most of this is done by setting a bit for each directional property
00161  * in a flags variable and later checking for whether there are
00162  * any LTR characters or any RTL characters, or both, whether
00163  * there are any explicit embedding codes, etc.
00164  *
00165  * If the (Xn) steps are performed, then the flags are re-evaluated,
00166  * because they will then not contain the embedding codes any more
00167  * and will be adjusted for override codes, so that subsequently
00168  * more bypassing may be possible than what the initial flags suggested.
00169  *
00170  * If the text is not mixed-directional, then the
00171  * algorithm steps for the weak type resolution are not performed,
00172  * and all levels are set to the paragraph level.
00173  *
00174  * If there are no explicit embedding codes, then the (Xn) steps
00175  * are not performed.
00176  *
00177  * If embedding levels are supplied as a parameter, then all
00178  * explicit embedding codes are ignored, and the (Xn) steps
00179  * are not performed.
00180  *
00181  * White Space types could get the level of the run they belong to,
00182  * and are checked with a test of (flags&MASK_EMBEDDING) to
00183  * consider if the paragraph direction should be considered in
00184  * the flags variable.
00185  *
00186  * If there are no White Space types in the paragraph, then
00187  * (L1) is not necessary in AdjustWSLevels().
00188  */
00189 nsBidi::nsBidi()
00190 {
00191   Init();
00192 
00193   mMayAllocateText=PR_TRUE;
00194   mMayAllocateRuns=PR_TRUE;
00195 }
00196 
00197 nsBidi::nsBidi(PRUint32 aMaxLength, PRUint32 aMaxRunCount)
00198 {
00199   Init();
00200   nsresult rv = NS_OK;
00201 
00202   /* allocate memory for arrays as requested */
00203   if(aMaxLength>0) {
00204     if( !GETINITIALDIRPROPSMEMORY(aMaxLength) ||
00205         !GETINITIALLEVELSMEMORY(aMaxLength)
00206       ) {
00207       mMayAllocateText=PR_FALSE;
00208       rv = NS_ERROR_OUT_OF_MEMORY;
00209     }
00210   } else {
00211     mMayAllocateText=PR_TRUE;
00212   }
00213 
00214   if(aMaxRunCount>0) {
00215     if(aMaxRunCount==1) {
00216       /* use simpleRuns[] */
00217       mRunsSize=sizeof(Run);
00218     } else if(!GETINITIALRUNSMEMORY(aMaxRunCount)) {
00219       mMayAllocateRuns=PR_FALSE;
00220       rv = NS_ERROR_OUT_OF_MEMORY;
00221     }
00222   } else {
00223     mMayAllocateRuns=PR_TRUE;
00224   }
00225 
00226   if(NS_FAILED(rv)) {
00227     Free();
00228   }
00229 }
00230 
00231 nsBidi::~nsBidi()
00232 {
00233   Free();
00234 }
00235 
00236 void nsBidi::Init()
00237 {
00238   /* reset the object, all pointers NULL, all flags PR_FALSE, all sizes 0 */
00239   mLength = 0;
00240   mParaLevel = 0;
00241   mFlags = 0;
00242   mDirection = NSBIDI_LTR;
00243   mTrailingWSStart = 0;
00244 
00245   mDirPropsSize = 0;
00246   mLevelsSize = 0;
00247   mRunsSize = 0;
00248   mRunCount = -1;
00249 
00250   mDirProps=NULL;
00251   mLevels=NULL;
00252   mRuns=NULL;
00253 
00254   mDirPropsMemory=NULL;
00255   mLevelsMemory=NULL;
00256   mRunsMemory=NULL;
00257 
00258   mMayAllocateText=PR_FALSE;
00259   mMayAllocateRuns=PR_FALSE;
00260   
00261 }
00262 
00263 /*
00264  * We are allowed to allocate memory if aMemory==NULL or
00265  * aMayAllocate==PR_TRUE for each array that we need.
00266  * We also try to grow and shrink memory as needed if we
00267  * allocate it.
00268  *
00269  * Assume aSizeNeeded>0.
00270  * If *aMemory!=NULL, then assume *aSize>0.
00271  *
00272  * ### this realloc() may unnecessarily copy the old data,
00273  * which we know we don't need any more;
00274  * is this the best way to do this??
00275  */
00276 PRBool nsBidi::GetMemory(void **aMemory, PRSize *aSize, PRBool aMayAllocate, PRSize aSizeNeeded)
00277 {
00278   /* check for existing memory */
00279   if(*aMemory==NULL) {
00280     /* we need to allocate memory */
00281     if(!aMayAllocate) {
00282       return PR_FALSE;
00283     } else {
00284       *aMemory=PR_MALLOC(aSizeNeeded);
00285       if (*aMemory!=NULL) {
00286         *aSize=aSizeNeeded;
00287         return PR_TRUE;
00288       } else {
00289         *aSize=0;
00290         return PR_FALSE;
00291       }
00292     }
00293   } else {
00294     /* there is some memory, is it enough or too much? */
00295     if(aSizeNeeded>*aSize && !aMayAllocate) {
00296       /* not enough memory, and we must not allocate */
00297       return PR_FALSE;
00298     } else if(aSizeNeeded!=*aSize && aMayAllocate) {
00299       /* we may try to grow or shrink */
00300       void *memory=PR_REALLOC(*aMemory, aSizeNeeded);
00301 
00302       if(memory!=NULL) {
00303         *aMemory=memory;
00304         *aSize=aSizeNeeded;
00305         return PR_TRUE;
00306       } else {
00307         /* we failed to grow */
00308         return PR_FALSE;
00309       }
00310     } else {
00311       /* we have at least enough memory and must not allocate */
00312       return PR_TRUE;
00313     }
00314   }
00315 }
00316 
00317 void nsBidi::Free()
00318 {
00319   PR_FREEIF(mDirPropsMemory);
00320   PR_FREEIF(mLevelsMemory);
00321   PR_FREEIF(mRunsMemory);
00322 }
00323 
00324 /* SetPara ------------------------------------------------------------ */
00325 
00326 nsresult nsBidi::SetPara(const PRUnichar *aText, PRInt32 aLength,
00327                          nsBidiLevel aParaLevel, nsBidiLevel *aEmbeddingLevels)
00328 {
00329   nsBidiDirection direction;
00330 
00331   /* check the argument values */
00332   if(aText==NULL ||
00333      (NSBIDI_MAX_EXPLICIT_LEVEL<aParaLevel) && !IS_DEFAULT_LEVEL(aParaLevel) ||
00334      aLength<-1
00335     ) {
00336     return NS_ERROR_INVALID_ARG;
00337   }
00338 
00339   if(aLength==-1) {
00340     aLength=nsCRT::strlen(aText);
00341   }
00342 
00343   /* initialize member data */
00344   mLength=aLength;
00345   mParaLevel=aParaLevel;
00346   mDirection=NSBIDI_LTR;
00347   mTrailingWSStart=aLength;  /* the levels[] will reflect the WS run */
00348 
00349   mDirProps=NULL;
00350   mLevels=NULL;
00351   mRuns=NULL;
00352 
00353   if(aLength==0) {
00354     /*
00355      * For an empty paragraph, create an nsBidi object with the aParaLevel and
00356      * the flags and the direction set but without allocating zero-length arrays.
00357      * There is nothing more to do.
00358      */
00359     if(IS_DEFAULT_LEVEL(aParaLevel)) {
00360       mParaLevel&=1;
00361     }
00362     if(aParaLevel&1) {
00363       mFlags=DIRPROP_FLAG(R);
00364       mDirection=NSBIDI_RTL;
00365     } else {
00366       mFlags=DIRPROP_FLAG(L);
00367       mDirection=NSBIDI_LTR;
00368     }
00369 
00370     mRunCount=0;
00371     return NS_OK;
00372   }
00373 
00374   mRunCount=-1;
00375 
00376   /*
00377    * Get the directional properties,
00378    * the flags bit-set, and
00379    * determine the partagraph level if necessary.
00380    */
00381   if(GETDIRPROPSMEMORY(aLength)) {
00382     mDirProps=mDirPropsMemory;
00383     GetDirProps(aText);
00384   } else {
00385     return NS_ERROR_OUT_OF_MEMORY;
00386   }
00387 
00388   /* are explicit levels specified? */
00389   if(aEmbeddingLevels==NULL) {
00390     /* no: determine explicit levels according to the (Xn) rules */\
00391     if(GETLEVELSMEMORY(aLength)) {
00392       mLevels=mLevelsMemory;
00393       direction=ResolveExplicitLevels();
00394     } else {
00395       return NS_ERROR_OUT_OF_MEMORY;
00396     }
00397   } else {
00398     /* set BN for all explicit codes, check that all levels are aParaLevel..NSBIDI_MAX_EXPLICIT_LEVEL */
00399     mLevels=aEmbeddingLevels;
00400     nsresult rv = CheckExplicitLevels(&direction);
00401     if(NS_FAILED(rv)) {
00402       return rv;
00403     }
00404   }
00405 
00406   /*
00407    * The steps after (X9) in the Bidi algorithm are performed only if
00408    * the paragraph text has mixed directionality!
00409    */
00410   switch(direction) {
00411     case NSBIDI_LTR:
00412       /* make sure paraLevel is even */
00413       mParaLevel=(mParaLevel+1)&~1;
00414 
00415       /* all levels are implicitly at paraLevel (important for GetLevels()) */
00416       mTrailingWSStart=0;
00417       break;
00418     case NSBIDI_RTL:
00419       /* make sure paraLevel is odd */
00420       mParaLevel|=1;
00421 
00422       /* all levels are implicitly at paraLevel (important for GetLevels()) */
00423       mTrailingWSStart=0;
00424       break;
00425     default:
00426       /*
00427        * If there are no external levels specified and there
00428        * are no significant explicit level codes in the text,
00429        * then we can treat the entire paragraph as one run.
00430        * Otherwise, we need to perform the following rules on runs of
00431        * the text with the same embedding levels. (X10)
00432        * "Significant" explicit level codes are ones that actually
00433        * affect non-BN characters.
00434        * Examples for "insignificant" ones are empty embeddings
00435        * LRE-PDF, LRE-RLE-PDF-PDF, etc.
00436        */
00437       if(aEmbeddingLevels==NULL && !(mFlags&DIRPROP_FLAG_MULTI_RUNS)) {
00438         ResolveImplicitLevels(0, aLength,
00439                     GET_LR_FROM_LEVEL(mParaLevel),
00440                     GET_LR_FROM_LEVEL(mParaLevel));
00441       } else {
00442         /* sor, eor: start and end types of same-level-run */
00443         nsBidiLevel *levels=mLevels;
00444         PRInt32 start, limit=0;
00445         nsBidiLevel level, nextLevel;
00446         DirProp sor, eor;
00447 
00448         /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
00449         level=mParaLevel;
00450         nextLevel=levels[0];
00451         if(level<nextLevel) {
00452           eor=GET_LR_FROM_LEVEL(nextLevel);
00453         } else {
00454           eor=GET_LR_FROM_LEVEL(level);
00455         }
00456 
00457         do {
00458           /* determine start and limit of the run (end points just behind the run) */
00459 
00460           /* the values for this run's start are the same as for the previous run's end */
00461           sor=eor;
00462           start=limit;
00463           level=nextLevel;
00464 
00465           /* search for the limit of this run */
00466           while(++limit<aLength && levels[limit]==level) {}
00467 
00468           /* get the correct level of the next run */
00469           if(limit<aLength) {
00470             nextLevel=levels[limit];
00471           } else {
00472             nextLevel=mParaLevel;
00473           }
00474 
00475           /* determine eor from max(level, nextLevel); sor is last run's eor */
00476           if((level&~NSBIDI_LEVEL_OVERRIDE)<(nextLevel&~NSBIDI_LEVEL_OVERRIDE)) {
00477             eor=GET_LR_FROM_LEVEL(nextLevel);
00478           } else {
00479             eor=GET_LR_FROM_LEVEL(level);
00480           }
00481 
00482           /* if the run consists of overridden directional types, then there
00483           are no implicit types to be resolved */
00484           if(!(level&NSBIDI_LEVEL_OVERRIDE)) {
00485             ResolveImplicitLevels(start, limit, sor, eor);
00486           }
00487         } while(limit<aLength);
00488       }
00489 
00490       /* reset the embedding levels for some non-graphic characters (L1), (X9) */
00491       AdjustWSLevels();
00492       break;
00493   }
00494 
00495   mDirection=direction;
00496   return NS_OK;
00497 }
00498 
00499 /* perform (P2)..(P3) ------------------------------------------------------- */
00500 
00501 /*
00502  * Get the directional properties for the text,
00503  * calculate the flags bit-set, and
00504  * determine the partagraph level if necessary.
00505  */
00506 void nsBidi::GetDirProps(const PRUnichar *aText)
00507 {
00508   DirProp *dirProps=mDirPropsMemory;    /* mDirProps is const */
00509 
00510   PRInt32 i=0, length=mLength;
00511   Flags flags=0;      /* collect all directionalities in the text */
00512   PRUnichar uchar;
00513   DirProp dirProp;
00514 
00515   if(IS_DEFAULT_LEVEL(mParaLevel)) {
00516     /* determine the paragraph level (P2..P3) */
00517     for(;;) {
00518       uchar=aText[i];
00519       if(!IS_FIRST_SURROGATE(uchar) || i+1==length || !IS_SECOND_SURROGATE(aText[i+1])) {
00520         /* not a surrogate pair */
00521         flags|=DIRPROP_FLAG(dirProps[i]=dirProp=GetCharType((PRUint32)uchar));
00522       } else {
00523         /* a surrogate pair */
00524         dirProps[i++]=BN;   /* first surrogate in the pair gets the BN type */
00525         flags|=DIRPROP_FLAG(dirProps[i]=dirProp=GetCharType(GET_UTF_32(uchar, aText[i])))|DIRPROP_FLAG(BN);
00526       }
00527       ++i;
00528       if(dirProp==L) {
00529         mParaLevel=0;
00530         break;
00531       } else if(dirProp==R || dirProp==AL) {
00532         mParaLevel=1;
00533         break;
00534       } else if(i==length) {
00535         /*
00536          * see comment in nsIBidi.h:
00537          * the DEFAULT_XXX values are designed so that
00538          * their bit 0 alone yields the intended default
00539          */
00540         mParaLevel&=1;
00541         break;
00542       }
00543     }
00544   }
00545 
00546   /* get the rest of the directional properties and the flags bits */
00547   while(i<length) {
00548     uchar=aText[i];
00549     if(!IS_FIRST_SURROGATE(uchar) || i+1==length || !IS_SECOND_SURROGATE(aText[i+1])) {
00550       /* not a surrogate pair */
00551       flags|=DIRPROP_FLAG(dirProps[i]=GetCharType((PRUint32)uchar));
00552     } else {
00553       /* a surrogate pair */
00554       dirProps[i++]=BN;   /* second surrogate in the pair gets the BN type */
00555       flags|=DIRPROP_FLAG(dirProps[i]=GetCharType(GET_UTF_32(uchar, aText[i])))|DIRPROP_FLAG(BN);
00556     }
00557     ++i;
00558   }
00559   if(flags&MASK_EMBEDDING) {
00560     flags|=DIRPROP_FLAG_LR(mParaLevel);
00561   }
00562   mFlags=flags;
00563 }
00564 
00565 /* perform (X1)..(X9) ------------------------------------------------------- */
00566 
00567 /*
00568  * Resolve the explicit levels as specified by explicit embedding codes.
00569  * Recalculate the flags to have them reflect the real properties
00570  * after taking the explicit embeddings into account.
00571  *
00572  * The Bidi algorithm is designed to result in the same behavior whether embedding
00573  * levels are externally specified (from "styled text", supposedly the preferred
00574  * method) or set by explicit embedding codes (LRx, RLx, PDF) in the plain text.
00575  * That is why (X9) instructs to remove all explicit codes (and BN).
00576  * However, in a real implementation, this removal of these codes and their index
00577  * positions in the plain text is undesirable since it would result in
00578  * reallocated, reindexed text.
00579  * Instead, this implementation leaves the codes in there and just ignores them
00580  * in the subsequent processing.
00581  * In order to get the same reordering behavior, positions with a BN or an
00582  * explicit embedding code just get the same level assigned as the last "real"
00583  * character.
00584  *
00585  * Some implementations, not this one, then overwrite some of these
00586  * directionality properties at "real" same-level-run boundaries by
00587  * L or R codes so that the resolution of weak types can be performed on the
00588  * entire paragraph at once instead of having to parse it once more and
00589  * perform that resolution on same-level-runs.
00590  * This limits the scope of the implicit rules in effectively
00591  * the same way as the run limits.
00592  *
00593  * Instead, this implementation does not modify these codes.
00594  * On one hand, the paragraph has to be scanned for same-level-runs, but
00595  * on the other hand, this saves another loop to reset these codes,
00596  * or saves making and modifying a copy of dirProps[].
00597  *
00598  *
00599  * Note that (Pn) and (Xn) changed significantly from version 4 of the Bidi algorithm.
00600  *
00601  *
00602  * Handling the stack of explicit levels (Xn):
00603  *
00604  * With the Bidi stack of explicit levels,
00605  * as pushed with each LRE, RLE, LRO, and RLO and popped with each PDF,
00606  * the explicit level must never exceed NSBIDI_MAX_EXPLICIT_LEVEL==61.
00607  *
00608  * In order to have a correct push-pop semantics even in the case of overflows,
00609  * there are two overflow counters:
00610  * - countOver60 is incremented with each LRx at level 60
00611  * - from level 60, one RLx increases the level to 61
00612  * - countOver61 is incremented with each LRx and RLx at level 61
00613  *
00614  * Popping levels with PDF must work in the opposite order so that level 61
00615  * is correct at the correct point. Underflows (too many PDFs) must be checked.
00616  *
00617  * This implementation assumes that NSBIDI_MAX_EXPLICIT_LEVEL is odd.
00618  */
00619 
00620 nsBidiDirection nsBidi::ResolveExplicitLevels()
00621 {
00622   const DirProp *dirProps=mDirProps;
00623   nsBidiLevel *levels=mLevels;
00624 
00625   PRInt32 i=0, length=mLength;
00626   Flags flags=mFlags;       /* collect all directionalities in the text */
00627   DirProp dirProp;
00628   nsBidiLevel level=mParaLevel;
00629 
00630   nsBidiDirection direction;
00631 
00632   /* determine if the text is mixed-directional or single-directional */
00633   direction=DirectionFromFlags(flags);
00634 
00635   /* we may not need to resolve any explicit levels */
00636   if(direction!=NSBIDI_MIXED) {
00637     /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
00638   } else if(!(flags&MASK_EXPLICIT)) {
00639     /* mixed, but all characters are at the same embedding level */
00640     /* set all levels to the paragraph level */
00641     for(i=0; i<length; ++i) {
00642       levels[i]=level;
00643     }
00644   } else {
00645     /* continue to perform (Xn) */
00646 
00647     /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
00648     /* both variables may carry the NSBIDI_LEVEL_OVERRIDE flag to indicate the override status */
00649     nsBidiLevel embeddingLevel=level, newLevel, stackTop=0;
00650 
00651     nsBidiLevel stack[NSBIDI_MAX_EXPLICIT_LEVEL];        /* we never push anything >=NSBIDI_MAX_EXPLICIT_LEVEL */
00652     PRUint32 countOver60=0, countOver61=0;  /* count overflows of explicit levels */
00653 
00654     /* recalculate the flags */
00655     flags=0;
00656 
00657     /* since we assume that this is a single paragraph, we ignore (X8) */
00658     for(i=0; i<length; ++i) {
00659       dirProp=dirProps[i];
00660       switch(dirProp) {
00661         case LRE:
00662         case LRO:
00663           /* (X3, X5) */
00664           newLevel=(embeddingLevel+2)&~(NSBIDI_LEVEL_OVERRIDE|1);    /* least greater even level */
00665           if(newLevel<=NSBIDI_MAX_EXPLICIT_LEVEL) {
00666             stack[stackTop]=embeddingLevel;
00667             ++stackTop;
00668             embeddingLevel=newLevel;
00669             if(dirProp==LRO) {
00670               embeddingLevel|=NSBIDI_LEVEL_OVERRIDE;
00671             } else {
00672               embeddingLevel&=~NSBIDI_LEVEL_OVERRIDE;
00673             }
00674           } else if((embeddingLevel&~NSBIDI_LEVEL_OVERRIDE)==NSBIDI_MAX_EXPLICIT_LEVEL) {
00675             ++countOver61;
00676           } else /* (embeddingLevel&~NSBIDI_LEVEL_OVERRIDE)==NSBIDI_MAX_EXPLICIT_LEVEL-1 */ {
00677             ++countOver60;
00678           }
00679           flags|=DIRPROP_FLAG(BN);
00680           break;
00681         case RLE:
00682         case RLO:
00683           /* (X2, X4) */
00684           newLevel=((embeddingLevel&~NSBIDI_LEVEL_OVERRIDE)+1)|1;    /* least greater odd level */
00685           if(newLevel<=NSBIDI_MAX_EXPLICIT_LEVEL) {
00686             stack[stackTop]=embeddingLevel;
00687             ++stackTop;
00688             embeddingLevel=newLevel;
00689             if(dirProp==RLO) {
00690               embeddingLevel|=NSBIDI_LEVEL_OVERRIDE;
00691             } else {
00692               embeddingLevel&=~NSBIDI_LEVEL_OVERRIDE;
00693             }
00694           } else {
00695             ++countOver61;
00696           }
00697           flags|=DIRPROP_FLAG(BN);
00698           break;
00699         case PDF:
00700           /* (X7) */
00701           /* handle all the overflow cases first */
00702           if(countOver61>0) {
00703             --countOver61;
00704           } else if(countOver60>0 && (embeddingLevel&~NSBIDI_LEVEL_OVERRIDE)!=NSBIDI_MAX_EXPLICIT_LEVEL) {
00705             /* handle LRx overflows from level 60 */
00706             --countOver60;
00707           } else if(stackTop>0) {
00708             /* this is the pop operation; it also pops level 61 while countOver60>0 */
00709             --stackTop;
00710             embeddingLevel=stack[stackTop];
00711             /* } else { (underflow) */
00712           }
00713           flags|=DIRPROP_FLAG(BN);
00714           break;
00715         case B:
00716           /*
00717            * We do not really expect to see a paragraph separator (B),
00718            * but we should do something reasonable with it,
00719            * especially at the end of the text.
00720            */
00721           stackTop=0;
00722           countOver60=countOver61=0;
00723           embeddingLevel=level=mParaLevel;
00724           flags|=DIRPROP_FLAG(B);
00725           break;
00726         case BN:
00727           /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
00728           /* they will get their levels set correctly in AdjustWSLevels() */
00729           flags|=DIRPROP_FLAG(BN);
00730           break;
00731         default:
00732           /* all other types get the "real" level */
00733           if(level!=embeddingLevel) {
00734             level=embeddingLevel;
00735             if(level&NSBIDI_LEVEL_OVERRIDE) {
00736               flags|=DIRPROP_FLAG_O(level)|DIRPROP_FLAG_MULTI_RUNS;
00737             } else {
00738               flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG_MULTI_RUNS;
00739             }
00740           }
00741           if(!(level&NSBIDI_LEVEL_OVERRIDE)) {
00742             flags|=DIRPROP_FLAG(dirProp);
00743           }
00744           break;
00745       }
00746 
00747       /*
00748        * We need to set reasonable levels even on BN codes and
00749        * explicit codes because we will later look at same-level runs (X10).
00750        */
00751       levels[i]=level;
00752     }
00753     if(flags&MASK_EMBEDDING) {
00754       flags|=DIRPROP_FLAG_LR(mParaLevel);
00755     }
00756 
00757     /* subsequently, ignore the explicit codes and BN (X9) */
00758 
00759     /* again, determine if the text is mixed-directional or single-directional */
00760     mFlags=flags;
00761     direction=DirectionFromFlags(flags);
00762   }
00763   return direction;
00764 }
00765 
00766 /*
00767  * Use a pre-specified embedding levels array:
00768  *
00769  * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
00770  * ignore all explicit codes (X9),
00771  * and check all the preset levels.
00772  *
00773  * Recalculate the flags to have them reflect the real properties
00774  * after taking the explicit embeddings into account.
00775  */
00776 nsresult nsBidi::CheckExplicitLevels(nsBidiDirection *aDirection)
00777 {
00778   const DirProp *dirProps=mDirProps;
00779   nsBidiLevel *levels=mLevels;
00780 
00781   PRInt32 i, length=mLength;
00782   Flags flags=0;  /* collect all directionalities in the text */
00783   nsBidiLevel level, paraLevel=mParaLevel;
00784 
00785   for(i=0; i<length; ++i) {
00786     level=levels[i];
00787     if(level&NSBIDI_LEVEL_OVERRIDE) {
00788       /* keep the override flag in levels[i] but adjust the flags */
00789       level&=~NSBIDI_LEVEL_OVERRIDE;     /* make the range check below simpler */
00790       flags|=DIRPROP_FLAG_O(level);
00791     } else {
00792       /* set the flags */
00793       flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProps[i]);
00794     }
00795     if(level<paraLevel || NSBIDI_MAX_EXPLICIT_LEVEL<level) {
00796       /* level out of bounds */
00797       *aDirection = NSBIDI_LTR;
00798       return NS_ERROR_INVALID_ARG;
00799     }
00800   }
00801   if(flags&MASK_EMBEDDING) {
00802     flags|=DIRPROP_FLAG_LR(mParaLevel);
00803   }
00804 
00805   /* determine if the text is mixed-directional or single-directional */
00806   mFlags=flags;
00807   *aDirection = DirectionFromFlags(flags);
00808   return NS_OK;
00809 }
00810 
00811 /* determine if the text is mixed-directional or single-directional */
00812 nsBidiDirection nsBidi::DirectionFromFlags(Flags aFlags)
00813 {
00814   /* if the text contains AN and neutrals, then some neutrals may become RTL */
00815   if(!(aFlags&MASK_RTL || aFlags&DIRPROP_FLAG(AN) && aFlags&MASK_POSSIBLE_N)) {
00816     return NSBIDI_LTR;
00817   } else if(!(aFlags&MASK_LTR)) {
00818     return NSBIDI_RTL;
00819   } else {
00820     return NSBIDI_MIXED;
00821   }
00822 }
00823 
00824 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
00825 
00826 /*
00827  * This implementation of the (Wn) rules applies all rules in one pass.
00828  * In order to do so, it needs a look-ahead of typically 1 character
00829  * (except for W5: sequences of ET) and keeps track of changes
00830  * in a rule Wp that affect a later Wq (p<q).
00831  *
00832  * historyOfEN is a variable-saver: it contains 4 boolean states;
00833  * a bit in it set to 1 means:
00834  *  bit 0: the current code is an EN after W2
00835  *  bit 1: the current code is an EN after W4
00836  *  bit 2: the previous code was an EN after W2
00837  *  bit 3: the previous code was an EN after W4
00838  * In other words, b0..1 have transitions of EN in the current iteration,
00839  * while b2..3 have the transitions of EN in the previous iteration.
00840  * A simple historyOfEN<<=2 suffices for the propagation.
00841  *
00842  * The (Nn) and (In) rules are also performed in that same single loop,
00843  * but effectively one iteration behind for white space.
00844  *
00845  * Since all implicit rules are performed in one step, it is not necessary
00846  * to actually store the intermediate directional properties in dirProps[].
00847  */
00848 
00849 #define EN_SHIFT 2
00850 #define EN_AFTER_W2 1
00851 #define EN_AFTER_W4 2
00852 #define EN_ALL 3
00853 #define PREV_EN_AFTER_W2 4
00854 #define PREV_EN_AFTER_W4 8
00855 
00856 void nsBidi::ResolveImplicitLevels(PRInt32 aStart, PRInt32 aLimit,
00857                    DirProp aSOR, DirProp aEOR)
00858 {
00859   const DirProp *dirProps=mDirProps;
00860   nsBidiLevel *levels=mLevels;
00861 
00862   PRInt32 i, next, neutralStart=-1;
00863   DirProp prevDirProp, dirProp, nextDirProp, lastStrong, beforeNeutral;
00864   PRUint8 historyOfEN;
00865 
00866   /* initialize: current at aSOR, next at aStart (it is aStart<aLimit) */
00867   next=aStart;
00868   beforeNeutral=dirProp=lastStrong=aSOR;
00869   nextDirProp=dirProps[next];
00870   historyOfEN=0;
00871 
00872   /*
00873    * In all steps of this implementation, BN and explicit embedding codes
00874    * must be treated as if they didn't exist (X9).
00875    * They will get levels set before a non-neutral character, and remain
00876    * undefined before a neutral one, but AdjustWSLevels() will take care
00877    * of all of them.
00878    */
00879   while(DIRPROP_FLAG(nextDirProp)&MASK_BN_EXPLICIT) {
00880     if(++next<aLimit) {
00881       nextDirProp=dirProps[next];
00882     } else {
00883       nextDirProp=aEOR;
00884       break;
00885     }
00886   }
00887 
00888   /* loop for entire run */
00889   while(next<aLimit) {
00890     /* advance */
00891     prevDirProp=dirProp;
00892     dirProp=nextDirProp;
00893     i=next;
00894     do {
00895       if(++next<aLimit) {
00896         nextDirProp=dirProps[next];
00897       } else {
00898         nextDirProp=aEOR;
00899         break;
00900       }
00901     } while(DIRPROP_FLAG(nextDirProp)&MASK_BN_EXPLICIT);
00902     historyOfEN<<=EN_SHIFT;
00903 
00904     /* (W1..W7) */
00905     switch(dirProp) {
00906       case L:
00907         lastStrong=L;
00908         break;
00909       case R:
00910         lastStrong=R;
00911         break;
00912       case AL:
00913         /* (W3) */
00914         lastStrong=AL;
00915         dirProp=R;
00916         break;
00917       case EN:
00918         /* we have to set historyOfEN correctly */
00919         if(lastStrong==AL) {
00920           /* (W2) */
00921           dirProp=AN;
00922         } else {
00923           if(lastStrong==L) {
00924             /* (W7) */
00925             dirProp=L;
00926           }
00927           /* this EN stays after (W2) and (W4) - at least before (W7) */
00928           historyOfEN|=EN_ALL;
00929         }
00930         break;
00931       case ES:
00932         if( historyOfEN&PREV_EN_AFTER_W2 &&     /* previous was EN before (W4) */
00933             nextDirProp==EN && lastStrong!=AL   /* next is EN and (W2) won't make it AN */
00934           ) {
00935           /* (W4) */
00936           if(lastStrong!=L) {
00937             dirProp=EN;
00938           } else {
00939             /* (W7) */
00940             dirProp=L;
00941           }
00942           historyOfEN|=EN_AFTER_W4;
00943         } else {
00944           /* (W6) */
00945           dirProp=O_N;
00946         }
00947         break;
00948       case CS:
00949         if( historyOfEN&PREV_EN_AFTER_W2 &&     /* previous was EN before (W4) */
00950             nextDirProp==EN && lastStrong!=AL   /* next is EN and (W2) won't make it AN */
00951           ) {
00952           /* (W4) */
00953           if(lastStrong!=L) {
00954             dirProp=EN;
00955           } else {
00956             /* (W7) */
00957             dirProp=L;
00958           }
00959           historyOfEN|=EN_AFTER_W4;
00960         } else if(prevDirProp==AN &&                    /* previous was AN */
00961               (nextDirProp==AN ||                   /* next is AN */
00962                nextDirProp==EN && lastStrong==AL)   /* or (W2) will make it one */
00963              ) {
00964           /* (W4) */
00965           dirProp=AN;
00966         } else {
00967           /* (W6) */
00968           dirProp=O_N;
00969         }
00970         break;
00971       case ET:
00972         /* get sequence of ET; advance only next, not current, previous or historyOfEN */
00973         while(next<aLimit && DIRPROP_FLAG(nextDirProp)&MASK_ET_NSM_BN /* (W1), (X9) */) {
00974           if(++next<aLimit) {
00975             nextDirProp=dirProps[next];
00976           } else {
00977             nextDirProp=aEOR;
00978             break;
00979           }
00980         }
00981 
00982         if( historyOfEN&PREV_EN_AFTER_W4 ||     /* previous was EN before (W5) */
00983             nextDirProp==EN && lastStrong!=AL   /* next is EN and (W2) won't make it AN */
00984           ) {
00985           /* (W5) */
00986           if(lastStrong!=L) {
00987             dirProp=EN;
00988           } else {
00989             /* (W7) */
00990             dirProp=L;
00991           }
00992         } else {
00993           /* (W6) */
00994           dirProp=O_N;
00995         }
00996 
00997         /* apply the result of (W1), (W5)..(W7) to the entire sequence of ET */
00998         break;
00999       case NSM:
01000         /* (W1) */
01001         dirProp=prevDirProp;
01002         /* set historyOfEN back to prevDirProp's historyOfEN */
01003         historyOfEN>>=EN_SHIFT;
01004         /*
01005          * Technically, this should be done before the switch() in the form
01006          *      if(nextDirProp==NSM) {
01007          *          dirProps[next]=nextDirProp=dirProp;
01008          *      }
01009          *
01010          * - effectively one iteration ahead.
01011          * However, whether the next dirProp is NSM or is equal to the current dirProp
01012          * does not change the outcome of any condition in (W2)..(W7).
01013          */
01014         break;
01015       default:
01016         break;
01017     }
01018 
01019     /* here, it is always [prev,this,next]dirProp!=BN; it may be next>i+1 */
01020 
01021     /* perform (Nn) - here, only L, R, EN, AN, and neutrals are left */
01022     /* this is one iteration late for the neutrals */
01023     if(DIRPROP_FLAG(dirProp)&MASK_N) {
01024       if(neutralStart<0) {
01025         /* start of a sequence of neutrals */
01026         neutralStart=i;
01027         beforeNeutral=prevDirProp;
01028       }
01029     } else /* not a neutral, can be only one of { L, R, EN, AN } */ {
01030       /*
01031        * Note that all levels[] values are still the same at this
01032        * point because this function is called for an entire
01033        * same-level run.
01034        * Therefore, we need to read only one actual level.
01035        */
01036       nsBidiLevel level=levels[i];
01037 
01038       if(neutralStart>=0) {
01039         nsBidiLevel final;
01040         /* end of a sequence of neutrals (dirProp is "afterNeutral") */
01041         if(beforeNeutral==L) {
01042           if(dirProp==L) {
01043             final=0;                /* make all neutrals L (N1) */
01044           } else {
01045             final=level;            /* make all neutrals "e" (N2) */
01046           }
01047         } else /* beforeNeutral is one of { R, EN, AN } */ {
01048           if(dirProp==L) {
01049             final=level;            /* make all neutrals "e" (N2) */
01050           } else {
01051             final=1;                /* make all neutrals R (N1) */
01052           }
01053         }
01054         /* perform (In) on the sequence of neutrals */
01055         if((level^final)&1) {
01056           /* do something only if we need to _change_ the level */
01057           do {
01058             ++levels[neutralStart];
01059           } while(++neutralStart<i);
01060         }
01061         neutralStart=-1;
01062       }
01063 
01064       /* perform (In) on the non-neutral character */
01065       /*
01066        * in the cases of (W5), processing a sequence of ET,
01067        * and of (X9), skipping BN,
01068        * there may be multiple characters from i to <next
01069        * that all get (virtually) the same dirProp and (really) the same level
01070        */
01071       if(dirProp==L) {
01072         if(level&1) {
01073           ++level;
01074         } else {
01075           i=next;     /* we keep the levels */
01076         }
01077       } else if(dirProp==R) {
01078         if(!(level&1)) {
01079           ++level;
01080         } else {
01081           i=next;     /* we keep the levels */
01082         }
01083       } else /* EN or AN */ {
01084         level=(level+2)&~1;     /* least greater even level */
01085       }
01086 
01087       /* apply the new level to the sequence, if necessary */
01088       while(i<next) {
01089         levels[i++]=level;
01090       }
01091     }
01092   }
01093 
01094   /* perform (Nn) - here,
01095   the character after the the neutrals is aEOR, which is either L or R */
01096   /* this is one iteration late for the neutrals */
01097   if(neutralStart>=0) {
01098     /*
01099      * Note that all levels[] values are still the same at this
01100      * point because this function is called for an entire
01101      * same-level run.
01102      * Therefore, we need to read only one actual level.
01103      */
01104     nsBidiLevel level=levels[neutralStart], final;
01105 
01106     /* end of a sequence of neutrals (aEOR is "afterNeutral") */
01107     if(beforeNeutral==L) {
01108       if(aEOR==L) {
01109         final=0;                /* make all neutrals L (N1) */
01110       } else {
01111         final=level;            /* make all neutrals "e" (N2) */
01112       }
01113     } else /* beforeNeutral is one of { R, EN, AN } */ {
01114       if(aEOR==L) {
01115         final=level;            /* make all neutrals "e" (N2) */
01116       } else {
01117         final=1;                /* make all neutrals R (N1) */
01118       }
01119     }
01120     /* perform (In) on the sequence of neutrals */
01121     if((level^final)&1) {
01122       /* do something only if we need to _change_ the level */
01123       do {
01124         ++levels[neutralStart];
01125       } while(++neutralStart<aLimit);
01126     }
01127   }
01128 }
01129 
01130 /* perform (L1) and (X9) ---------------------------------------------------- */
01131 
01132 /*
01133  * Reset the embedding levels for some non-graphic characters (L1).
01134  * This function also sets appropriate levels for BN, and
01135  * explicit embedding types that are supposed to have been removed
01136  * from the paragraph in (X9).
01137  */
01138 void nsBidi::AdjustWSLevels()
01139 {
01140   const DirProp *dirProps=mDirProps;
01141   nsBidiLevel *levels=mLevels;
01142   PRInt32 i;
01143 
01144   if(mFlags&MASK_WS) {
01145     nsBidiLevel paraLevel=mParaLevel;
01146     Flags flag;
01147 
01148     i=mTrailingWSStart;
01149     while(i>0) {
01150       /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
01151       while(i>0 && DIRPROP_FLAG(dirProps[--i])&MASK_WS) {
01152         levels[i]=paraLevel;
01153       }
01154 
01155       /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
01156       /* here, i+1 is guaranteed to be <length */
01157       while(i>0) {
01158         flag=DIRPROP_FLAG(dirProps[--i]);
01159         if(flag&MASK_BN_EXPLICIT) {
01160           levels[i]=levels[i+1];
01161         } else if(flag&MASK_B_S) {
01162           levels[i]=paraLevel;
01163           break;
01164         }
01165       }
01166     }
01167   }
01168 
01169   /* now remove the NSBIDI_LEVEL_OVERRIDE flags, if any */
01170   /* (a separate loop can be optimized more easily by a compiler) */
01171   if(mFlags&MASK_OVERRIDE) {
01172     for(i=mTrailingWSStart; i>0;) {
01173       levels[--i]&=~NSBIDI_LEVEL_OVERRIDE;
01174     }
01175   }
01176 }
01177 #ifdef FULL_BIDI_ENGINE
01178 
01179 /* -------------------------------------------------------------------------- */
01180 
01181 nsresult nsBidi::GetDirection(nsBidiDirection* aDirection)
01182 {
01183   *aDirection = mDirection;
01184   return NS_OK;
01185 }
01186 
01187 nsresult nsBidi::GetLength(PRInt32* aLength)
01188 {
01189   *aLength = mLength;
01190   return NS_OK;
01191 }
01192 
01193 nsresult nsBidi::GetParaLevel(nsBidiLevel* aParaLevel)
01194 {
01195   *aParaLevel = mParaLevel;
01196   return NS_OK;
01197 }
01198 
01199 /*
01200  * General remarks about the functions in this section:
01201  *
01202  * These functions deal with the aspects of potentially mixed-directional
01203  * text in a single paragraph or in a line of a single paragraph
01204  * which has already been processed according to
01205  * the Unicode 3.0 Bidi algorithm as defined in
01206  * http://www.unicode.org/unicode/reports/tr9/ , version 5,
01207  * also described in The Unicode Standard, Version 3.0 .
01208  *
01209  * This means that there is a nsBidi object with a levels
01210  * and a dirProps array.
01211  * paraLevel and direction are also set.
01212  * Only if the length of the text is zero, then levels==dirProps==NULL.
01213  *
01214  * The overall directionality of the paragraph
01215  * or line is used to bypass the reordering steps if possible.
01216  * Even purely RTL text does not need reordering there because
01217  * the getLogical/VisualIndex() functions can compute the
01218  * index on the fly in such a case.
01219  *
01220  * The implementation of the access to same-level-runs and of the reordering
01221  * do attempt to provide better performance and less memory usage compared to
01222  * a direct implementation of especially rule (L2) with an array of
01223  * one (32-bit) integer per text character.
01224  *
01225  * Here, the levels array is scanned as soon as necessary, and a vector of
01226  * same-level-runs is created. Reordering then is done on this vector.
01227  * For each run of text positions that were resolved to the same level,
01228  * only 8 bytes are stored: the first text position of the run and the visual
01229  * position behind the run after reordering.
01230  * One sign bit is used to hold the directionality of the run.
01231  * This is inefficient if there are many very short runs. If the average run
01232  * length is <2, then this uses more memory.
01233  *
01234  * In a further attempt to save memory, the levels array is never changed
01235  * after all the resolution rules (Xn, Wn, Nn, In).
01236  * Many functions have to consider the field trailingWSStart:
01237  * if it is less than length, then there is an implicit trailing run
01238  * at the paraLevel,
01239  * which is not reflected in the levels array.
01240  * This allows a line nsBidi object to use the same levels array as
01241  * its paragraph parent object.
01242  *
01243  * When a nsBidi object is created for a line of a paragraph, then the
01244  * paragraph's levels and dirProps arrays are reused by way of setting
01245  * a pointer into them, not by copying. This again saves memory and forbids to
01246  * change the now shared levels for (L1).
01247  */
01248 nsresult nsBidi::SetLine(nsIBidi* aParaBidi, PRInt32 aStart, PRInt32 aLimit)
01249 {
01250   nsBidi* pParent = (nsBidi*)aParaBidi;
01251   PRInt32 length;
01252 
01253   /* check the argument values */
01254   if(pParent==NULL) {
01255     return NS_ERROR_INVALID_POINTER;
01256   } else if(aStart<0 || aStart>aLimit || aLimit>pParent->mLength) {
01257     return NS_ERROR_INVALID_ARG;
01258   }
01259 
01260   /* set members from our aParaBidi parent */
01261   length=mLength=aLimit-aStart;
01262   mParaLevel=pParent->mParaLevel;
01263 
01264   mRuns=NULL;
01265   mFlags=0;
01266 
01267   if(length>0) {
01268     mDirProps=pParent->mDirProps+aStart;
01269     mLevels=pParent->mLevels+aStart;
01270     mRunCount=-1;
01271 
01272     if(pParent->mDirection!=NSBIDI_MIXED) {
01273       /* the parent is already trivial */
01274       mDirection=pParent->mDirection;
01275 
01276       /*
01277        * The parent's levels are all either
01278        * implicitly or explicitly ==paraLevel;
01279        * do the same here.
01280        */
01281       if(pParent->mTrailingWSStart<=aStart) {
01282         mTrailingWSStart=0;
01283       } else if(pParent->mTrailingWSStart<aLimit) {
01284         mTrailingWSStart=pParent->mTrailingWSStart-aStart;
01285       } else {
01286         mTrailingWSStart=length;
01287       }
01288     } else {
01289       const nsBidiLevel *levels=mLevels;
01290       PRInt32 i, trailingWSStart;
01291       nsBidiLevel level;
01292       Flags flags=0;
01293 
01294       SetTrailingWSStart();
01295       trailingWSStart=mTrailingWSStart;
01296 
01297       /* recalculate pLineBidi->direction */
01298       if(trailingWSStart==0) {
01299         /* all levels are at paraLevel */
01300         mDirection=(nsBidiDirection)(mParaLevel&1);
01301       } else {
01302         /* get the level of the first character */
01303         level=levels[0]&1;
01304 
01305         /* if there is anything of a different level, then the line is mixed */
01306         if(trailingWSStart<length && (mParaLevel&1)!=level) {
01307           /* the trailing WS is at paraLevel, which differs from levels[0] */
01308           mDirection=NSBIDI_MIXED;
01309         } else {
01310           /* see if levels[1..trailingWSStart-1] have the same direction as levels[0] and paraLevel */
01311           i=1;
01312           for(;;) {
01313             if(i==trailingWSStart) {
01314               /* the direction values match those in level */
01315               mDirection=(nsBidiDirection)level;
01316               break;
01317             } else if((levels[i]&1)!=level) {
01318               mDirection=NSBIDI_MIXED;
01319               break;
01320             }
01321             ++i;
01322           }
01323         }
01324       }
01325 
01326       switch(mDirection) {
01327         case NSBIDI_LTR:
01328           /* make sure paraLevel is even */
01329           mParaLevel=(mParaLevel+1)&~1;
01330 
01331           /* all levels are implicitly at paraLevel (important for GetLevels()) */
01332           mTrailingWSStart=0;
01333           break;
01334         case NSBIDI_RTL:
01335           /* make sure paraLevel is odd */
01336           mParaLevel|=1;
01337 
01338           /* all levels are implicitly at paraLevel (important for GetLevels()) */
01339           mTrailingWSStart=0;
01340           break;
01341         default:
01342           break;
01343       }
01344     }
01345   } else {
01346     /* create an object for a zero-length line */
01347     mDirection=mParaLevel&1 ? NSBIDI_RTL : NSBIDI_LTR;
01348     mTrailingWSStart=mRunCount=0;
01349 
01350     mDirProps=NULL;
01351     mLevels=NULL;
01352   }
01353   return NS_OK;
01354 }
01355 
01356 /* handle trailing WS (L1) -------------------------------------------------- */
01357 
01358 /*
01359  * SetTrailingWSStart() sets the start index for a trailing
01360  * run of WS in the line. This is necessary because we do not modify
01361  * the paragraph's levels array that we just point into.
01362  * Using trailingWSStart is another form of performing (L1).
01363  *
01364  * To make subsequent operations easier, we also include the run
01365  * before the WS if it is at the paraLevel - we merge the two here.
01366  */
01367 void nsBidi::SetTrailingWSStart() {
01368   /* mDirection!=NSBIDI_MIXED */
01369 
01370   const DirProp *dirProps=mDirProps;
01371   nsBidiLevel *levels=mLevels;
01372   PRInt32 start=mLength;
01373   nsBidiLevel paraLevel=mParaLevel;
01374 
01375   /* go backwards across all WS, BN, explicit codes */
01376   while(start>0 && DIRPROP_FLAG(dirProps[start-1])&MASK_WS) {
01377     --start;
01378   }
01379 
01380   /* if the WS run can be merged with the previous run then do so here */
01381   while(start>0 && levels[start-1]==paraLevel) {
01382     --start;
01383   }
01384 
01385   mTrailingWSStart=start;
01386 }
01387 
01388 nsresult nsBidi::GetLevelAt(PRInt32 aCharIndex, nsBidiLevel* aLevel)
01389 {
01390   /* return paraLevel if in the trailing WS run, otherwise the real level */
01391   if(aCharIndex<0 || mLength<=aCharIndex) {
01392     *aLevel = 0;
01393   } else if(mDirection!=NSBIDI_MIXED || aCharIndex>=mTrailingWSStart) {
01394     *aLevel = mParaLevel;
01395   } else {
01396     *aLevel = mLevels[aCharIndex];
01397   }
01398   return NS_OK;
01399 }
01400 
01401 nsresult nsBidi::GetLevels(nsBidiLevel** aLevels)
01402 {
01403   PRInt32 start, length;
01404 
01405   length = mLength;
01406   if(length<=0) {
01407     *aLevels = NULL;
01408     return NS_ERROR_INVALID_ARG;
01409   }
01410 
01411   start = mTrailingWSStart;
01412   if(start==length) {
01413     /* the current levels array reflects the WS run */
01414     *aLevels = mLevels;
01415     return NS_OK;
01416   }
01417 
01418   /*
01419    * After the previous if(), we know that the levels array
01420    * has an implicit trailing WS run and therefore does not fully
01421    * reflect itself all the levels.
01422    * This must be a nsBidi object for a line, and
01423    * we need to create a new levels array.
01424    */
01425 
01426   if(GETLEVELSMEMORY(length)) {
01427     nsBidiLevel *levels=mLevelsMemory;
01428 
01429     if(start>0 && levels!=mLevels) {
01430       memcpy(levels, mLevels, start);
01431     }
01432     memset(levels+start, mParaLevel, length-start);
01433 
01434     /* this new levels array is set for the line and reflects the WS run */
01435     mTrailingWSStart=length;
01436     *aLevels=mLevels=levels;
01437     return NS_OK;
01438   } else {
01439     /* out of memory */
01440     *aLevels = NULL;
01441     return NS_ERROR_OUT_OF_MEMORY;
01442   }
01443 }
01444 #endif // FULL_BIDI_ENGINE
01445 
01446 nsresult nsBidi::GetCharTypeAt(PRInt32 aCharIndex, nsCharType* pType)
01447 {
01448   if(aCharIndex<0 || mLength<=aCharIndex) {
01449     return NS_ERROR_INVALID_ARG;
01450   }
01451   *pType = (nsCharType)mDirProps[aCharIndex];
01452   return NS_OK;
01453 }
01454 
01455 nsresult nsBidi::GetLogicalRun(PRInt32 aLogicalStart, PRInt32 *aLogicalLimit, nsBidiLevel *aLevel)
01456 {
01457   PRInt32 length = mLength;
01458 
01459   if(aLogicalStart<0 || length<=aLogicalStart) {
01460     return NS_ERROR_INVALID_ARG;
01461   }
01462 
01463   if(mDirection!=NSBIDI_MIXED || aLogicalStart>=mTrailingWSStart) {
01464     if(aLogicalLimit!=NULL) {
01465       *aLogicalLimit=length;
01466     }
01467     if(aLevel!=NULL) {
01468       *aLevel=mParaLevel;
01469     }
01470   } else {
01471     nsBidiLevel *levels=mLevels;
01472     nsBidiLevel level=levels[aLogicalStart];
01473 
01474     /* search for the end of the run */
01475     length=mTrailingWSStart;
01476     while(++aLogicalStart<length && level==levels[aLogicalStart]) {}
01477 
01478     if(aLogicalLimit!=NULL) {
01479       *aLogicalLimit=aLogicalStart;
01480     }
01481     if(aLevel!=NULL) {
01482       *aLevel=level;
01483     }
01484   }
01485   return NS_OK;
01486 }
01487 
01488 /* runs API functions ------------------------------------------------------- */
01489 
01490 nsresult nsBidi::CountRuns(PRInt32* aRunCount)
01491 {
01492   if(mRunCount<0 && !GetRuns()) {
01493     return NS_ERROR_OUT_OF_MEMORY;
01494   } else {
01495     if (aRunCount)
01496       *aRunCount = mRunCount;
01497     return NS_OK;
01498   }
01499 }
01500 
01501 nsresult nsBidi::GetVisualRun(PRInt32 aRunIndex, PRInt32 *aLogicalStart, PRInt32 *aLength, nsBidiDirection *aDirection)
01502 {
01503   if( aRunIndex<0 ||
01504       mRunCount==-1 && !GetRuns() ||
01505       aRunIndex>=mRunCount
01506     ) {
01507     *aDirection = NSBIDI_LTR;
01508     return NS_OK;
01509   } else {
01510     PRInt32 start=mRuns[aRunIndex].logicalStart;
01511     if(aLogicalStart!=NULL) {
01512       *aLogicalStart=GET_INDEX(start);
01513     }
01514     if(aLength!=NULL) {
01515       if(aRunIndex>0) {
01516         *aLength=mRuns[aRunIndex].visualLimit-
01517              mRuns[aRunIndex-1].visualLimit;
01518       } else {
01519         *aLength=mRuns[0].visualLimit;
01520       }
01521     }
01522     *aDirection = (nsBidiDirection)GET_ODD_BIT(start);
01523     return NS_OK;
01524   }
01525 }
01526 
01527 /* compute the runs array --------------------------------------------------- */
01528 
01529 /*
01530  * Compute the runs array from the levels array.
01531  * After GetRuns() returns PR_TRUE, runCount is guaranteed to be >0
01532  * and the runs are reordered.
01533  * Odd-level runs have visualStart on their visual right edge and
01534  * they progress visually to the left.
01535  */
01536 PRBool nsBidi::GetRuns()
01537 {
01538   if(mDirection!=NSBIDI_MIXED) {
01539     /* simple, single-run case - this covers length==0 */
01540     GetSingleRun(mParaLevel);
01541   } else /* NSBIDI_MIXED, length>0 */ {
01542     /* mixed directionality */
01543     PRInt32 length=mLength, limit=length;
01544 
01545     /*
01546      * If there are WS characters at the end of the line
01547      * and the run preceding them has a level different from
01548      * paraLevel, then they will form their own run at paraLevel (L1).
01549      * Count them separately.
01550      * We need some special treatment for this in order to not
01551      * modify the levels array which a line nsBidi object shares
01552      * with its paragraph parent and its other line siblings.
01553      * In other words, for the trailing WS, it may be
01554      * levels[]!=paraLevel but we have to treat it like it were so.
01555      */
01556     limit=mTrailingWSStart;
01557     if(limit==0) {
01558       /* there is only WS on this line */
01559       GetSingleRun(mParaLevel);
01560     } else {
01561       nsBidiLevel *levels=mLevels;
01562       PRInt32 i, runCount;
01563       nsBidiLevel level=NSBIDI_DEFAULT_LTR;   /* initialize with no valid level */
01564 
01565       /* count the runs, there is at least one non-WS run, and limit>0 */
01566       runCount=0;
01567       for(i=0; i<limit; ++i) {
01568         /* increment runCount at the start of each run */
01569         if(levels[i]!=level) {
01570           ++runCount;
01571           level=levels[i];
01572         }
01573       }
01574 
01575       /*
01576        * We don't need to see if the last run can be merged with a trailing
01577        * WS run because SetTrailingWSStart() would have done that.
01578        */
01579       if(runCount==1 && limit==length) {
01580         /* There is only one non-WS run and no trailing WS-run. */
01581         GetSingleRun(levels[0]);
01582       } else /* runCount>1 || limit<length */ {
01583         /* allocate and set the runs */
01584         Run *runs;
01585         PRInt32 runIndex, start;
01586         nsBidiLevel minLevel=NSBIDI_MAX_EXPLICIT_LEVEL+1, maxLevel=0;
01587 
01588         /* now, count a (non-mergable) WS run */
01589         if(limit<length) {
01590           ++runCount;
01591         }
01592 
01593         /* runCount>1 */
01594         if(GETRUNSMEMORY(runCount)) {
01595           runs=mRunsMemory;
01596         } else {
01597           return PR_FALSE;
01598         }
01599 
01600         /* set the runs */
01601         /* this could be optimized, e.g.: 464->444, 484->444, 575->555, 595->555 */
01602         /* however, that would take longer and make other functions more complicated */
01603         runIndex=0;
01604 
01605         /* search for the run ends */
01606         start=0;
01607         level=levels[0];
01608         if(level<minLevel) {
01609           minLevel=level;
01610         }
01611         if(level>maxLevel) {
01612           maxLevel=level;
01613         }
01614 
01615         /* initialize visualLimit values with the run lengths */
01616         for(i=1; i<limit; ++i) {
01617           if(levels[i]!=level) {
01618             /* i is another run limit */
01619             runs[runIndex].logicalStart=start;
01620             runs[runIndex].visualLimit=i-start;
01621             start=i;
01622 
01623             level=levels[i];
01624             if(level<minLevel) {
01625               minLevel=level;
01626             }
01627             if(level>maxLevel) {
01628               maxLevel=level;
01629             }
01630             ++runIndex;
01631           }
01632         }
01633 
01634         /* finish the last run at i==limit */
01635         runs[runIndex].logicalStart=start;
01636         runs[runIndex].visualLimit=limit-start;
01637         ++runIndex;
01638 
01639         if(limit<length) {
01640           /* there is a separate WS run */
01641           runs[runIndex].logicalStart=limit;
01642           runs[runIndex].visualLimit=length-limit;
01643           if(mParaLevel<minLevel) {
01644             minLevel=mParaLevel;
01645           }
01646         }
01647 
01648         /* set the object fields */
01649         mRuns=runs;
01650         mRunCount=runCount;
01651 
01652         ReorderLine(minLevel, maxLevel);
01653 
01654         /* now add the direction flags and adjust the visualLimit's to be just that */
01655         ADD_ODD_BIT_FROM_LEVEL(runs[0].logicalStart, levels[runs[0].logicalStart]);
01656         limit=runs[0].visualLimit;
01657         for(i=1; i<runIndex; ++i) {
01658           ADD_ODD_BIT_FROM_LEVEL(runs[i].logicalStart, levels[runs[i].logicalStart]);
01659           limit=runs[i].visualLimit+=limit;
01660         }
01661 
01662         /* same for the trailing WS run */
01663         if(runIndex<runCount) {
01664           ADD_ODD_BIT_FROM_LEVEL(runs[i].logicalStart, mParaLevel);
01665           runs[runIndex].visualLimit+=limit;
01666         }
01667       }
01668     }
01669   }
01670   return PR_TRUE;
01671 }
01672 
01673 /* in trivial cases there is only one trivial run; called by GetRuns() */
01674 void nsBidi::GetSingleRun(nsBidiLevel aLevel)
01675 {
01676   /* simple, single-run case */
01677   mRuns=mSimpleRuns;
01678   mRunCount=1;
01679 
01680   /* fill and reorder the single run */
01681   mRuns[0].logicalStart=MAKE_INDEX_ODD_PAIR(0, aLevel);
01682   mRuns[0].visualLimit=mLength;
01683 }
01684 
01685 /* reorder the runs array (L2) ---------------------------------------------- */
01686 
01687 /*
01688  * Reorder the same-level runs in the runs array.
01689  * Here, runCount>1 and maxLevel>=minLevel>=paraLevel.
01690  * All the visualStart fields=logical start before reordering.
01691  * The "odd" bits are not set yet.
01692  *
01693  * Reordering with this data structure lends itself to some handy shortcuts:
01694  *
01695  * Since each run is moved but not modified, and since at the initial maxLevel
01696  * each sequence of same-level runs consists of only one run each, we
01697  * don't need to do anything there and can predecrement maxLevel.
01698  * In many simple cases, the reordering is thus done entirely in the
01699  * index mapping.
01700  * Also, reordering occurs only down to the lowest odd level that occurs,
01701  * which is minLevel|1. However, if the lowest level itself is odd, then
01702  * in the last reordering the sequence of the runs at this level or higher
01703  * will be all runs, and we don't need the elaborate loop to search for them.
01704  * This is covered by ++minLevel instead of minLevel|=1 followed
01705  * by an extra reorder-all after the reorder-some loop.
01706  * About a trailing WS run:
01707  * Such a run would need special treatment because its level is not
01708  * reflected in levels[] if this is not a paragraph object.
01709  * Instead, all characters from trailingWSStart on are implicitly at
01710  * paraLevel.
01711  * However, for all maxLevel>paraLevel, this run will never be reordered
01712  * and does not need to be taken into account. maxLevel==paraLevel is only reordered
01713  * if minLevel==paraLevel is odd, which is done in the extra segment.
01714  * This means that for the main reordering loop we don't need to consider
01715  * this run and can --runCount. If it is later part of the all-runs
01716  * reordering, then runCount is adjusted accordingly.
01717  */
01718 void nsBidi::ReorderLine(nsBidiLevel aMinLevel, nsBidiLevel aMaxLevel)
01719 {
01720   Run *runs;
01721   nsBidiLevel *levels;
01722   PRInt32 firstRun, endRun, limitRun, runCount, temp;
01723 
01724   /* nothing to do? */
01725   if(aMaxLevel<=(aMinLevel|1)) {
01726     return;
01727   }
01728 
01729   /*
01730    * Reorder only down to the lowest odd level
01731    * and reorder at an odd aMinLevel in a separate, simpler loop.
01732    * See comments above for why aMinLevel is always incremented.
01733    */
01734   ++aMinLevel;
01735 
01736   runs=mRuns;
01737   levels=mLevels;
01738   runCount=mRunCount;
01739 
01740   /* do not include the WS run at paraLevel<=old aMinLevel except in the simple loop */
01741   if(mTrailingWSStart<mLength) {
01742     --runCount;
01743   }
01744 
01745   while(--aMaxLevel>=aMinLevel) {
01746     firstRun=0;
01747 
01748     /* loop for all sequences of runs */
01749     for(;;) {
01750       /* look for a sequence of runs that are all at >=aMaxLevel */
01751       /* look for the first run of such a sequence */
01752       while(firstRun<runCount && levels[runs[firstRun].logicalStart]<aMaxLevel) {
01753         ++firstRun;
01754       }
01755       if(firstRun>=runCount) {
01756         break;  /* no more such runs */
01757       }
01758 
01759       /* look for the limit run of such a sequence (the run behind it) */
01760       for(limitRun=firstRun; ++limitRun<runCount && levels[runs[limitRun].logicalStart]>=aMaxLevel;) {}
01761 
01762       /* Swap the entire sequence of runs from firstRun to limitRun-1. */
01763       endRun=limitRun-1;
01764       while(firstRun<endRun) {
01765         temp=runs[firstRun].logicalStart;
01766         runs[firstRun].logicalStart=runs[endRun].logicalStart;
01767         runs[endRun].logicalStart=temp;
01768 
01769         temp=runs[firstRun].visualLimit;
01770         runs[firstRun].visualLimit=runs[endRun].visualLimit;
01771         runs[endRun].visualLimit=temp;
01772 
01773         ++firstRun;
01774         --endRun;
01775       }
01776 
01777       if(limitRun==runCount) {
01778         break;  /* no more such runs */
01779       } else {
01780         firstRun=limitRun+1;
01781       }
01782     }
01783   }
01784 
01785   /* now do aMaxLevel==old aMinLevel (==odd!), see above */
01786   if(!(aMinLevel&1)) {
01787     firstRun=0;
01788 
01789     /* include the trailing WS run in this complete reordering */
01790     if(mTrailingWSStart==mLength) {
01791       --runCount;
01792     }
01793 
01794     /* Swap the entire sequence of all runs. (endRun==runCount) */
01795     while(firstRun<runCount) {
01796       temp=runs[firstRun].logicalStart;
01797       runs[firstRun].logicalStart=runs[runCount].logicalStart;
01798       runs[runCount].logicalStart=temp;
01799 
01800       temp=runs[firstRun].visualLimit;
01801       runs[firstRun].visualLimit=runs[runCount].visualLimit;
01802       runs[runCount].visualLimit=temp;
01803 
01804       ++firstRun;
01805       --runCount;
01806     }
01807   }
01808 }
01809 
01810 nsresult nsBidi::ReorderVisual(const nsBidiLevel *aLevels, PRInt32 aLength, PRInt32 *aIndexMap)
01811 {
01812   PRInt32 start, end, limit, temp;
01813   nsBidiLevel minLevel, maxLevel;
01814 
01815   if(aIndexMap==NULL || !PrepareReorder(aLevels, aLength, aIndexMap, &minLevel, &maxLevel)) {
01816     return NS_OK;
01817   }
01818 
01819   /* nothing to do? */
01820   if(minLevel==maxLevel && (minLevel&1)==0) {
01821     return NS_OK;
01822   }
01823 
01824   /* reorder only down to the lowest odd level */
01825   minLevel|=1;
01826 
01827   /* loop maxLevel..minLevel */
01828   do {
01829     start=0;
01830 
01831     /* loop for all sequences of levels to reorder at the current maxLevel */
01832     for(;;) {
01833       /* look for a sequence of levels that are all at >=maxLevel */
01834       /* look for the first index of such a sequence */
01835       while(start<aLength && aLevels[start]<maxLevel) {
01836         ++start;
01837       }
01838       if(start>=aLength) {
01839         break;  /* no more such runs */
01840       }
01841 
01842       /* look for the limit of such a sequence (the index behind it) */
01843       for(limit=start; ++limit<aLength && aLevels[limit]>=maxLevel;) {}
01844 
01845       /*
01846        * Swap the entire interval of indexes from start to limit-1.
01847        * We don't need to swap the levels for the purpose of this
01848        * algorithm: the sequence of levels that we look at does not
01849        * move anyway.
01850        */
01851       end=limit-1;
01852       while(start<end) {
01853         temp=aIndexMap[start];
01854         aIndexMap[start]=aIndexMap[end];
01855         aIndexMap[end]=temp;
01856 
01857         ++start;
01858         --end;
01859       }
01860 
01861       if(limit==aLength) {
01862         break;  /* no more such sequences */
01863       } else {
01864         start=limit+1;
01865       }
01866     }
01867   } while(--maxLevel>=minLevel);
01868 
01869   return NS_OK;
01870 }
01871 
01872 PRBool nsBidi::PrepareReorder(const nsBidiLevel *aLevels, PRInt32 aLength,
01873                 PRInt32 *aIndexMap,
01874                 nsBidiLevel *aMinLevel, nsBidiLevel *aMaxLevel)
01875 {
01876   PRInt32 start;
01877   nsBidiLevel level, minLevel, maxLevel;
01878 
01879   if(aLevels==NULL || aLength<=0) {
01880     return PR_FALSE;
01881   }
01882 
01883   /* determine minLevel and maxLevel */
01884   minLevel=NSBIDI_MAX_EXPLICIT_LEVEL+1;
01885   maxLevel=0;
01886   for(start=aLength; start>0;) {
01887     level=aLevels[--start];
01888     if(level>NSBIDI_MAX_EXPLICIT_LEVEL+1) {
01889       return PR_FALSE;
01890     }
01891     if(level<minLevel) {
01892       minLevel=level;
01893     }
01894     if(level>maxLevel) {
01895       maxLevel=level;
01896     }
01897   }
01898   *aMinLevel=minLevel;
01899   *aMaxLevel=maxLevel;
01900 
01901   /* initialize the index map */
01902   for(start=aLength; start>0;) {
01903     --start;
01904     aIndexMap[start]=start;
01905   }
01906 
01907   return PR_TRUE;
01908 }
01909 
01910 #ifdef FULL_BIDI_ENGINE
01911 /* API functions for logical<->visual mapping ------------------------------- */
01912 
01913 nsresult nsBidi::GetVisualIndex(PRInt32 aLogicalIndex, PRInt32* aVisualIndex) {
01914   if(aLogicalIndex<0 || mLength<=aLogicalIndex) {
01915     return NS_ERROR_INVALID_ARG;
01916   } else {
01917     /* we can do the trivial cases without the runs array */
01918     switch(mDirection) {
01919       case NSBIDI_LTR:
01920         *aVisualIndex = aLogicalIndex;
01921         return NS_OK;
01922       case NSBIDI_RTL:
01923         *aVisualIndex = mLength-aLogicalIndex-1;
01924         return NS_OK;
01925       default:
01926         if(mRunCount<0 && !GetRuns()) {
01927           return NS_ERROR_OUT_OF_MEMORY;
01928         } else {
01929           Run *runs=mRuns;
01930           PRInt32 i, visualStart=0, offset, length;
01931 
01932           /* linear search for the run, search on the visual runs */
01933           for(i=0;; ++i) {
01934             length=runs[i].visualLimit-visualStart;
01935             offset=aLogicalIndex-GET_INDEX(runs[i].logicalStart);
01936             if(offset>=0 && offset<length) {
01937               if(IS_EVEN_RUN(runs[i].logicalStart)) {
01938                 /* LTR */
01939                 *aVisualIndex = visualStart+offset;
01940                 return NS_OK;
01941               } else {
01942                 /* RTL */
01943                 *aVisualIndex = visualStart+length-offset-1;
01944                 return NS_OK;
01945               }
01946             }
01947             visualStart+=length;
01948           }
01949         }
01950     }
01951   }
01952 }
01953 
01954 nsresult nsBidi::GetLogicalIndex(PRInt32 aVisualIndex, PRInt32 *aLogicalIndex)
01955 {
01956   if(aVisualIndex<0 || mLength<=aVisualIndex) {
01957     return NS_ERROR_INVALID_ARG;
01958   } else {
01959     /* we can do the trivial cases without the runs array */
01960     switch(mDirection) {
01961       case NSBIDI_LTR:
01962         *aLogicalIndex = aVisualIndex;
01963         return NS_OK;
01964       case NSBIDI_RTL:
01965         *aLogicalIndex = mLength-aVisualIndex-1;
01966         return NS_OK;
01967       default:
01968         if(mRunCount<0 && !GetRuns()) {
01969           return NS_ERROR_OUT_OF_MEMORY;
01970         } else {
01971           Run *runs=mRuns;
01972           PRInt32 i, runCount=mRunCount, start;
01973 
01974           if(runCount<=10) {
01975             /* linear search for the run */
01976             for(i=0; aVisualIndex>=runs[i].visualLimit; ++i) {}
01977           } else {
01978             /* binary search for the run */
01979             PRInt32 start=0, limit=runCount;
01980 
01981             /* the middle if() will guaranteed find the run, we don't need a loop limit */
01982             for(;;) {
01983               i=(start+limit)/2;
01984               if(aVisualIndex>=runs[i].visualLimit) {
01985                 start=i+1;
01986               } else if(i==0 || aVisualIndex>=runs[i-1].visualLimit) {
01987                 break;
01988               } else {
01989                 limit=i;
01990               }
01991             }
01992           }
01993 
01994           start=runs[i].logicalStart;
01995           if(IS_EVEN_RUN(start)) {
01996             /* LTR */
01997             /* the offset in runs[i] is aVisualIndex-runs[i-1].visualLimit */
01998             if(i>0) {
01999               aVisualIndex-=runs[i-1].visualLimit;
02000             }
02001             *aLogicalIndex = GET_INDEX(start)+aVisualIndex;
02002             return NS_OK;
02003           } else {
02004             /* RTL */
02005             *aLogicalIndex = GET_INDEX(start)+runs[i].visualLimit-aVisualIndex-1;
02006             return NS_OK;
02007           }
02008         }
02009     }
02010   }
02011 }
02012 
02013 nsresult nsBidi::GetLogicalMap(PRInt32 *aIndexMap)
02014 {
02015   nsBidiLevel *levels;
02016   nsresult rv;
02017 
02018   /* GetLevels() checks all of its and our arguments */
02019   rv = GetLevels(&levels);
02020   if(NS_FAILED(rv)) {
02021     return rv;
02022   } else if(aIndexMap==NULL) {
02023     return NS_ERROR_INVALID_ARG;
02024   } else {
02025     return ReorderLogical(levels, mLength, aIndexMap);
02026   }
02027 }
02028 
02029 nsresult nsBidi::GetVisualMap(PRInt32 *aIndexMap)
02030 {
02031   PRInt32* runCount=NULL;
02032   nsresult rv;
02033 
02034   /* CountRuns() checks all of its and our arguments */
02035   rv = CountRuns(runCount);
02036   if(NS_FAILED(rv)) {
02037     return rv;
02038   } else if(aIndexMap==NULL) {
02039     return NS_ERROR_INVALID_ARG;
02040   } else {
02041     /* fill a visual-to-logical index map using the runs[] */
02042     Run *runs=mRuns, *runsLimit=runs+mRunCount;
02043     PRInt32 logicalStart, visualStart, visualLimit;
02044 
02045     visualStart=0;
02046     for(; runs<runsLimit; ++runs) {
02047       logicalStart=runs->logicalStart;
02048       visualLimit=runs->visualLimit;
02049       if(IS_EVEN_RUN(logicalStart)) {
02050         do { /* LTR */
02051           *aIndexMap++ = logicalStart++;
02052         } while(++visualStart<visualLimit);
02053       } else {
02054         REMOVE_ODD_BIT(logicalStart);
02055         logicalStart+=visualLimit-visualStart;  /* logicalLimit */
02056         do { /* RTL */
02057           *aIndexMap++ = --logicalStart;
02058         } while(++visualStart<visualLimit);
02059       }
02060       /* visualStart==visualLimit; */
02061     }
02062     return NS_OK;
02063   }
02064 }
02065 
02066 /* reorder a line based on a levels array (L2) ------------------------------ */
02067 
02068 nsresult nsBidi::ReorderLogical(const nsBidiLevel *aLevels, PRInt32 aLength, PRInt32 *aIndexMap)
02069 {
02070   PRInt32 start, limit, sumOfSosEos;
02071   nsBidiLevel minLevel, maxLevel;
02072 
02073   if(aIndexMap==NULL || !PrepareReorder(aLevels, aLength, aIndexMap, &minLevel, &maxLevel)) {
02074     return NS_OK;
02075   }
02076 
02077   /* nothing to do? */
02078   if(minLevel==maxLevel && (minLevel&1)==0) {
02079     return NS_OK;
02080   }
02081 
02082   /* reorder only down to the lowest odd level */
02083   minLevel|=1;
02084 
02085   /* loop maxLevel..minLevel */
02086   do {
02087     start=0;
02088 
02089     /* loop for all sequences of levels to reorder at the current maxLevel */
02090     for(;;) {
02091       /* look for a sequence of levels that are all at >=maxLevel */
02092       /* look for the first index of such a sequence */
02093       while(start<aLength && aLevels[start]<maxLevel) {
02094         ++start;
02095       }
02096       if(start>=aLength) {
02097         break;  /* no more such sequences */
02098       }
02099 
02100       /* look for the limit of such a sequence (the index behind it) */
02101       for(limit=start; ++limit<aLength && aLevels[limit]>=maxLevel;) {}
02102 
02103       /*
02104        * sos=start of sequence, eos=end of sequence
02105        *
02106        * The closed (inclusive) interval from sos to eos includes all the logical
02107        * and visual indexes within this sequence. They are logically and
02108        * visually contiguous and in the same range.
02109        *
02110        * For each run, the new visual index=sos+eos-old visual index;
02111        * we pre-add sos+eos into sumOfSosEos ->
02112        * new visual index=sumOfSosEos-old visual index;
02113        */
02114       sumOfSosEos=start+limit-1;
02115 
02116       /* reorder each index in the sequence */
02117       do {
02118         aIndexMap[start]=sumOfSosEos-aIndexMap[start];
02119       } while(++start<limit);
02120 
02121       /* start==limit */
02122       if(limit==aLength) {
02123         break;  /* no more such sequences */
02124       } else {
02125         start=limit+1;
02126       }
02127     }
02128   } while(--maxLevel>=minLevel);
02129 
02130   return NS_OK;
02131 }
02132 
02133 nsresult nsBidi::InvertMap(const PRInt32 *aSrcMap, PRInt32 *aDestMap, PRInt32 aLength)
02134 {
02135   if(aSrcMap!=NULL && aDestMap!=NULL) {
02136     aSrcMap+=aLength;
02137     while(aLength>0) {
02138       aDestMap[*--aSrcMap]=--aLength;
02139     }
02140   }
02141   return NS_OK;
02142 }
02143 
02144 #endif // FULL_BIDI_ENGINE
02145 PRInt32 nsBidi::doWriteReverse(const PRUnichar *src, PRInt32 srcLength,
02146                                PRUnichar *dest, PRUint16 options) {
02147   /*
02148    * RTL run -
02149    *
02150    * RTL runs need to be copied to the destination in reverse order
02151    * of code points, not code units, to keep Unicode characters intact.
02152    *
02153    * The general strategy for this is to read the source text
02154    * in backward order, collect all code units for a code point
02155    * (and optionally following combining characters, see below),
02156    * and copy all these code units in ascending order
02157    * to the destination for this run.
02158    *
02159    * Several options request whether combining characters
02160    * should be kept after their base characters,
02161    * whether Bidi control characters should be removed, and
02162    * whether characters should be replaced by their mirror-image
02163    * equivalent Unicode characters.
02164    */
02165   PRInt32 i, j, destSize;
02166   PRUint32 c;
02167 
02168   /* optimize for several combinations of options */
02169   switch(options&(NSBIDI_REMOVE_BIDI_CONTROLS|NSBIDI_DO_MIRRORING|NSBIDI_KEEP_BASE_COMBINING)) {
02170     case 0:
02171     /*
02172          * With none of the "complicated" options set, the destination
02173          * run will have the same length as the source run,
02174          * and there is no mirroring and no keeping combining characters
02175          * with their base characters.
02176          */
02177       destSize=srcLength;
02178 
02179     /* preserve character integrity */
02180       do {
02181       /* i is always after the last code unit known to need to be kept in this segment */
02182         i=srcLength;
02183 
02184       /* collect code units for one base character */
02185         UTF_BACK_1(src, 0, srcLength);
02186 
02187       /* copy this base character */
02188         j=srcLength;
02189         do {
02190           *dest++=src[j++];
02191         } while(j<i);
02192       } while(srcLength>0);
02193       break;
02194     case NSBIDI_KEEP_BASE_COMBINING:
02195     /*
02196          * Here, too, the destination
02197          * run will have the same length as the source run,
02198          * and there is no mirroring.
02199          * We do need to keep combining characters with their base characters.
02200          */
02201       destSize=srcLength;
02202 
02203     /* preserve character integrity */
02204       do {
02205       /* i is always after the last code unit known to need to be kept in this segment */
02206         i=srcLength;
02207 
02208       /* collect code units and modifier letters for one base character */
02209         do {
02210           UTF_PREV_CHAR(src, 0, srcLength, c);
02211         } while(srcLength>0 && IsBidiCategory(c, eBidiCat_NSM));
02212 
02213       /* copy this "user character" */
02214         j=srcLength;
02215         do {
02216           *dest++=src[j++];
02217         } while(j<i);
02218       } while(srcLength>0);
02219       break;
02220     default:
02221     /*
02222          * With several "complicated" options set, this is the most
02223          * general and the slowest copying of an RTL run.
02224          * We will do mirroring, remove Bidi controls, and
02225          * keep combining characters with their base characters
02226          * as requested.
02227          */
02228       if(!(options&NSBIDI_REMOVE_BIDI_CONTROLS)) {
02229         i=srcLength;
02230       } else {
02231       /* we need to find out the destination length of the run,
02232                which will not include the Bidi control characters */
02233         PRInt32 length=srcLength;
02234         PRUnichar ch;
02235 
02236         i=0;
02237         do {
02238           ch=*src++;
02239           if (!IsBidiControl((PRUint32)ch)) {
02240             ++i;
02241           }
02242         } while(--length>0);
02243         src-=srcLength;
02244       }
02245       destSize=i;
02246 
02247     /* preserve character integrity */
02248       do {
02249       /* i is always after the last code unit known to need to be kept in this segment */
02250         i=srcLength;
02251 
02252       /* collect code units for one base character */
02253         UTF_PREV_CHAR(src, 0, srcLength, c);
02254         if(options&NSBIDI_KEEP_BASE_COMBINING) {
02255         /* collect modifier letters for this base character */
02256           while(srcLength>0 && IsBidiCategory(c, eBidiCat_NSM)) {
02257             UTF_PREV_CHAR(src, 0, srcLength, c);
02258           }
02259         }
02260 
02261         if(options&NSBIDI_REMOVE_BIDI_CONTROLS && IsBidiControl(c)) {
02262         /* do not copy this Bidi control character */
02263           continue;
02264         }
02265 
02266       /* copy this "user character" */
02267         j=srcLength;
02268         if(options&NSBIDI_DO_MIRRORING) {
02269           /* mirror only the base character */
02270           c = SymmSwap(c);
02271 
02272           PRInt32 k=0;
02273           UTF_APPEND_CHAR_UNSAFE(dest, k, c);
02274           dest+=k;
02275           j+=k;
02276         }
02277         while(j<i) {
02278           *dest++=src[j++];
02279         }
02280       } while(srcLength>0);
02281       break;
02282   } /* end of switch */
02283   return destSize;
02284 }
02285 
02286 nsresult nsBidi::WriteReverse(const PRUnichar *aSrc, PRInt32 aSrcLength, PRUnichar *aDest, PRUint16 aOptions, PRInt32 *aDestSize)
02287 {
02288   if( aSrc==NULL || aSrcLength<0 ||
02289       aDest==NULL
02290     ) {
02291     return NS_ERROR_INVALID_ARG;
02292   }
02293 
02294   /* do input and output overlap? */
02295   if( aSrc>=aDest && aSrc<aDest+aSrcLength ||
02296       aDest>=aSrc && aDest<aSrc+aSrcLength
02297     ) {
02298     return NS_ERROR_INVALID_ARG;
02299   }
02300 
02301   if(aSrcLength>0) {
02302     *aDestSize = doWriteReverse(aSrc, aSrcLength, aDest, aOptions);
02303   }
02304   return NS_OK;
02305 }
02306 
02307 eBidiCategory nsBidi::GetBidiCategory(PRUint32 aChar)
02308 {
02309   eBidiCategory oResult = GetBidiCat(aChar);
02310   if (eBidiCat_CC == oResult)
02311     oResult = (eBidiCategory)(aChar & 0xFF); /* Control codes have special treatment to keep the tables smaller */
02312   return oResult;
02313 }
02314 
02315 PRBool nsBidi::IsBidiCategory(PRUint32 aChar, eBidiCategory aBidiCategory)
02316 {
02317   return (GetBidiCategory(aChar) == aBidiCategory);
02318 }
02319 
02320 #define LRM_CHAR 0x200e
02321 PRBool nsBidi::IsBidiControl(PRUint32 aChar)
02322 {
02323   // This method is used when stripping Bidi control characters for
02324   // display, so it will return TRUE for LRM and RLM as
02325   // well as the characters with category eBidiCat_CC
02326   return (eBidiCat_CC == GetBidiCat(aChar) || ((aChar)&0xfffffe)==LRM_CHAR);
02327 }
02328 
02329 nsCharType nsBidi::GetCharType(PRUint32 aChar)
02330 {
02331   nsCharType oResult;
02332   eBidiCategory bCat = GetBidiCat(aChar);
02333   if (eBidiCat_CC != bCat) {
02334     NS_ASSERTION(bCat < (sizeof(ebc2ucd)/sizeof(nsCharType)), "size mismatch");
02335     if(bCat < (sizeof(ebc2ucd)/sizeof(nsCharType)))
02336       oResult = ebc2ucd[bCat];
02337     else 
02338       oResult = ebc2ucd[0]; // something is very wrong, but we need to return a value
02339   } else {
02340     NS_ASSERTION((aChar-0x202a) < (sizeof(cc2ucd)/sizeof(nsCharType)), "size mismatch");
02341     if((aChar-0x202a) < (sizeof(cc2ucd)/sizeof(nsCharType)))
02342       oResult = cc2ucd[aChar - 0x202a];
02343     else 
02344       oResult = ebc2ucd[0]; // something is very wrong, but we need to return a value
02345   }
02346   return oResult;
02347 }
02348 
02349 PRUint32 nsBidi::SymmSwap(PRUint32 aChar)
02350 {
02351   return Mirrored(aChar);
02352 }
02353 
02354 #endif // IBMBIDI