Back to index

lightning-sunbird  0.9+nobinonly
nsTextTransformer.h
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* ***** BEGIN LICENSE BLOCK *****
00003  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License. You may obtain a copy of the License at
00008  * http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * The Original Code is Mozilla Communicator client code.
00016  *
00017  * The Initial Developer of the Original Code is
00018  * Netscape Communications Corporation.
00019  * Portions created by the Initial Developer are Copyright (C) 1998
00020  * the Initial Developer. All Rights Reserved.
00021  *
00022  * Contributor(s):
00023  *
00024  * Alternatively, the contents of this file may be used under the terms of
00025  * either of the GNU General Public License Version 2 or later (the "GPL"),
00026  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00027  * in which case the provisions of the GPL or the LGPL are applicable instead
00028  * of those above. If you wish to allow use of your version of this file only
00029  * under the terms of either the GPL or the LGPL, and not to allow others to
00030  * use your version of this file under the terms of the MPL, indicate your
00031  * decision by deleting the provisions above and replace them with the notice
00032  * and other provisions required by the GPL or the LGPL. If you do not delete
00033  * the provisions above, a recipient may use your version of this file under
00034  * the terms of any one of the MPL, the GPL or the LGPL.
00035  *
00036  * ***** END LICENSE BLOCK ***** */
00037 #ifndef nsTextTransformer_h___
00038 #define nsTextTransformer_h___
00039 
00040 #include "nsTextFragment.h"
00041 #include "nsISupports.h"
00042 #include "nsPresContext.h"
00043 #include "nsIObserver.h"
00044 #ifdef IBMBIDI
00045 #include "nsBidi.h"
00046 #include "nsBidiUtils.h"
00047 #endif
00048 
00049 class nsIContent;
00050 class nsIFrame;
00051 class nsILineBreaker;
00052 class nsIWordBreaker;
00053 
00054 // XXX I'm sure there are other special characters
00055 #define CH_NBSP 160
00056 #define CH_ENSP 8194        //<!ENTITY ensp    CDATA "&#8194;" -- en space, U+2002 ISOpub -->
00057 #define CH_EMSP 8195        //<!ENTITY emsp    CDATA "&#8195;" -- em space, U+2003 ISOpub -->
00058 #define CH_THINSP 8291      //<!ENTITY thinsp  CDATA "&#8201;" -- thin space, U+2009 ISOpub -->
00059 #define CH_SHY  173
00060 
00061 #ifdef IBMBIDI
00062 #define CH_LRM  8206  //<!ENTITY lrm     CDATA "&#8206;" -- left-to-right mark, U+200E NEW RFC 2070 -->
00063 #define CH_RLM  8207  //<!ENTITY rlm     CDATA "&#8207;" -- right-to-left mark, U+200F NEW RFC 2070 -->
00064 #define CH_LRE  8234  //<!CDATA "&#8234;" -- left-to-right embedding, U+202A -->
00065 #define CH_RLE  8235  //<!CDATA "&#8235;" -- right-to-left embedding, U+202B -->
00066 #define CH_PDF  8236  //<!CDATA "&#8236;" -- pop directional format, U+202C -->
00067 #define CH_LRO  8237  //<!CDATA "&#8237;" -- left-to-right override, U+202D -->
00068 #define CH_RLO  8238  //<!CDATA "&#8238;" -- right-to-left override, U+202E -->
00069 
00070 #define IS_BIDI_CONTROL(_ch) \
00071   (((_ch) >= CH_LRM && (_ch) <= CH_RLM) \
00072   || ((_ch) >= CH_LRE && (_ch) <= CH_RLO))
00073 #endif // IBMBIDI
00074 
00075 // For now, we have only a couple of characters to strip out. If we get
00076 // any more, change this to use a bitset to lookup into.
00077 //   CH_SHY - soft hyphen (discretionary hyphen)
00078 #ifdef IBMBIDI
00079 // added BIDI formatting codes
00080 #define IS_DISCARDED(_ch) \
00081   (((_ch) == CH_SHY) || ((_ch) == '\r') || IS_BIDI_CONTROL(_ch))
00082 #else
00083 #define IS_DISCARDED(_ch) \
00084   (((_ch) == CH_SHY) || ((_ch) == '\r'))
00085 #endif
00086 
00087 #define IS_ASCII_CHAR(ch) ((ch&0xff80) == 0)
00088 
00089 #define NS_TEXT_TRANSFORMER_AUTO_WORD_BUF_SIZE 128 // used to be 256
00090 
00091 // Indicates whether the transformed text should be left as ascii
00092 #define NS_TEXT_TRANSFORMER_LEAVE_AS_ASCII                                   1
00093 
00094 // If at any point during GetNextWord or GetPrevWord we
00095 // run across a multibyte (> 127) unicode character.
00096 #define NS_TEXT_TRANSFORMER_HAS_MULTIBYTE                             2
00097 
00098 // The text in the transform buffer is ascii
00099 #define NS_TEXT_TRANSFORMER_TRANSFORMED_TEXT_IS_ASCII          4
00100 
00101 #ifdef IBMBIDI
00102 // The text in the transform buffer needs Arabic shaping
00103 #define NS_TEXT_TRANSFORMER_DO_ARABIC_SHAPING 8
00104 
00105 // The text in the transform buffer needs numeric shaping
00106 #define NS_TEXT_TRANSFORMER_DO_NUMERIC_SHAPING 16
00107 #endif
00108 
00109 // A growable text buffer that tries to avoid using malloc by having a
00110 // builtin buffer. Ideally used as an automatic variable.
00111 class nsAutoTextBuffer {
00112 public:
00113   nsAutoTextBuffer();
00114   ~nsAutoTextBuffer();
00115 
00116   nsresult GrowBy(PRInt32 aAtLeast, PRBool aCopyToHead = PR_TRUE);
00117 
00118   nsresult GrowTo(PRInt32 aNewSize, PRBool aCopyToHead = PR_TRUE);
00119 
00120   PRUnichar* GetBuffer() { return mBuffer; }
00121   PRUnichar* GetBufferEnd() { return mBuffer + mBufferLen; }
00122   PRInt32 GetBufferLength() const { return mBufferLen; }
00123 
00124   PRUnichar* mBuffer;
00125   PRInt32 mBufferLen;
00126   PRUnichar mAutoBuffer[NS_TEXT_TRANSFORMER_AUTO_WORD_BUF_SIZE];
00127 };
00128 
00129 //----------------------------------------
00130 
00151 class nsTextTransformer {
00152 public:
00153   // Note: The text transformer does not hold a reference to the line
00154   // breaker and work breaker objects
00155   nsTextTransformer(nsILineBreaker* aLineBreaker,
00156                     nsIWordBreaker* aWordBreaker,
00157                     nsPresContext* aPresContext);
00158 
00159   ~nsTextTransformer();
00160 
00173   nsresult Init(nsIFrame* aFrame,
00174                 nsIContent* aContent,
00175                 PRInt32 aStartingOffset,
00176                 PRBool aForceArabicShaping = PR_FALSE,
00177                 PRBool aLeaveAsAscii = PR_FALSE);
00178 
00179   PRInt32 GetContentLength() const {
00180     return mFrag ? mFrag->GetLength() : 0;
00181   }
00182 
00183   PRUnichar GetContentCharAt(PRInt32 aIndex) {
00184     return (mFrag && aIndex < mFrag->GetLength()) ? mFrag->CharAt(aIndex) : 0;
00185   }
00186 
00199   PRUnichar* GetNextWord(PRBool aInWord,
00200                          PRInt32* aWordLenResult,
00201                          PRInt32* aContentLenResult,
00202                          PRBool* aIsWhitespaceResult,
00203                          PRBool* aWasTransformed,
00204                          PRBool aResetTransformBuf = PR_TRUE,
00205                          PRBool aForLineBreak = PR_TRUE,
00206                          PRBool aIsKeyboardSelect = PR_FALSE);
00207 
00208   PRUnichar* GetPrevWord(PRBool aInWord,
00209                          PRInt32* aWordLenResult,
00210                          PRInt32* aContentLenResult,
00211                          PRBool* aIsWhitespaceResult,
00212                          PRBool aForLineBreak = PR_TRUE,
00213                          PRBool aIsKeyboardSelect = PR_FALSE);
00214 
00215   
00216   // Returns PR_TRUE if the LEAVE_AS_ASCII flag is set
00217   PRBool LeaveAsAscii() const {
00218       return (mFlags & NS_TEXT_TRANSFORMER_LEAVE_AS_ASCII) != 0;
00219   }
00220 
00221   // Returns PR_TRUE if any of the characters are multibyte (greater than 127)
00222   PRBool HasMultibyte() const {
00223       return (mFlags & NS_TEXT_TRANSFORMER_HAS_MULTIBYTE) != 0;
00224   }
00225 
00226   // Returns PR_TRUE if the text in the transform bufer is ascii (i.e., it
00227   // doesn't contain any multibyte characters)
00228   PRBool TransformedTextIsAscii() const {
00229       return (mFlags & NS_TEXT_TRANSFORMER_TRANSFORMED_TEXT_IS_ASCII) != 0;
00230   }
00231 
00232 #ifdef IBMBIDI
00233   // Returns PR_TRUE if the text in the transform bufer needs Arabic
00234   // shaping
00235   PRBool NeedsArabicShaping() const {
00236     return (mFlags & NS_TEXT_TRANSFORMER_DO_ARABIC_SHAPING) != 0;
00237   }
00238   
00239   // Returns PR_TRUE if the text in the transform bufer needs numeric
00240   // shaping
00241   PRBool NeedsNumericShaping() const {
00242     return (mFlags & NS_TEXT_TRANSFORMER_DO_NUMERIC_SHAPING) != 0;
00243   }
00244 #endif
00245 
00246   // Set or clears the LEAVE_AS_ASCII bit
00247   void SetLeaveAsAscii(PRBool aValue) {
00248       aValue ? mFlags |= NS_TEXT_TRANSFORMER_LEAVE_AS_ASCII : 
00249                mFlags &= (~NS_TEXT_TRANSFORMER_LEAVE_AS_ASCII);
00250   }
00251       
00252   // Set or clears the NS_TEXT_TRANSFORMER_HAS_MULTIBYTE bit
00253   void SetHasMultibyte(PRBool aValue) {
00254       aValue ? mFlags |= NS_TEXT_TRANSFORMER_HAS_MULTIBYTE : 
00255                mFlags &= (~NS_TEXT_TRANSFORMER_HAS_MULTIBYTE);
00256   }
00257 
00258   // Set or clears the NS_TEXT_TRANSFORMER_TRANSFORMED_TEXT_IS_ASCII bit
00259   void SetTransformedTextIsAscii(PRBool aValue) {
00260       aValue ? mFlags |= NS_TEXT_TRANSFORMER_TRANSFORMED_TEXT_IS_ASCII : 
00261                mFlags &= (~NS_TEXT_TRANSFORMER_TRANSFORMED_TEXT_IS_ASCII);
00262   }
00263 
00264 #ifdef IBMBIDI
00265   // Set or clears the NS_TEXT_TRANSFORMER_TRANSFORMED_DO_ARABIC_SHAPING bit
00266   void SetNeedsArabicShaping(PRBool aValue) {
00267     aValue ? mFlags |= NS_TEXT_TRANSFORMER_DO_ARABIC_SHAPING : 
00268              mFlags &= (~NS_TEXT_TRANSFORMER_DO_ARABIC_SHAPING);
00269   }
00270 
00271   // Set or clears the NS_TEXT_TRANSFORMER_TRANSFORMED_DO_NUMERIC_SHAPING bit
00272   void SetNeedsNumericShaping(PRBool aValue) {
00273     aValue ? mFlags |= NS_TEXT_TRANSFORMER_DO_NUMERIC_SHAPING : 
00274                        mFlags &= (~NS_TEXT_TRANSFORMER_DO_NUMERIC_SHAPING);
00275   }
00276 #endif
00277   
00278   PRUnichar* GetWordBuffer() {
00279     return mTransformBuf.GetBuffer();
00280   }
00281 
00282   PRInt32 GetWordBufferLength() const {
00283     return mTransformBuf.GetBufferLength();
00284   }
00285 
00286   static PRBool GetWordSelectEatSpaceAfter() {
00287        return sWordSelectEatSpaceAfter;
00288   }
00289   
00290   static PRBool GetWordSelectStopAtPunctuation() {
00291        return sWordSelectStopAtPunctuation;
00292   }
00293   
00294   static nsresult Initialize();
00295   static void Shutdown();
00296 
00297 protected:
00298   // Helper methods for GetNextWord (F == forwards)
00299   PRInt32 ScanNormalWhiteSpace_F(PRInt32 aFragLen);
00300   PRInt32 ScanNormalAsciiText_F(PRInt32  aFragLen,
00301                                 PRInt32* aWordLen,
00302                                 PRBool*  aWasTransformed);
00303   PRInt32 ScanNormalAsciiText_F_ForWordBreak(PRInt32  aFragLen,
00304                                              PRInt32* aWordLen,
00305                                              PRBool*  aWasTransformed,
00306                                              PRBool aIsKeyboardSelect);
00307   PRInt32 ScanNormalUnicodeText_F(PRInt32  aFragLen,
00308                                   PRBool   aForLineBreak,
00309                                   PRInt32* aWordLen,
00310                                   PRBool*  aWasTransformed);
00311   PRInt32 ScanPreWrapWhiteSpace_F(PRInt32  aFragLen,
00312                                   PRInt32* aWordLen);
00313   PRInt32 ScanPreAsciiData_F(PRInt32  aFragLen,
00314                              PRInt32* aWordLen,
00315                              PRBool*  aWasTransformed);
00316   PRInt32 ScanPreData_F(PRInt32  aFragLen,
00317                         PRInt32* aWordLen,
00318                         PRBool*  aWasTransformed);
00319 
00320   // Helper methods for GetPrevWord (B == backwards)
00321   PRInt32 ScanNormalWhiteSpace_B();
00322   PRInt32 ScanNormalAsciiText_B(PRInt32* aWordLen, PRBool aIsKeyboardSelect);
00323   PRInt32 ScanNormalUnicodeText_B(PRBool aForLineBreak, PRInt32* aWordLen);
00324   PRInt32 ScanPreWrapWhiteSpace_B(PRInt32* aWordLen);
00325   PRInt32 ScanPreData_B(PRInt32* aWordLen);
00326 
00327   // Converts the current text in the transform buffer from ascii to
00328   // Unicode
00329   void ConvertTransformedTextToUnicode();
00330   
00331   void LanguageSpecificTransform(PRUnichar* aText, PRInt32 aLen,
00332                                  PRBool* aWasTransformed);
00333 
00334   void DoArabicShaping(PRUnichar* aText, PRInt32& aTextLength, PRBool* aWasTransformed);
00335 
00336   void DoNumericShaping(PRUnichar* aText, PRInt32& aTextLength, PRBool* aWasTransformed);
00337 
00338   // The text fragment that we are looking at
00339   const nsTextFragment* mFrag;
00340 
00341   // Our current offset into the text fragment
00342   PRInt32 mOffset;
00343 
00344   // The frame's white-space mode we are using to process text
00345   enum {
00346     eNormal,
00347     ePreformatted,
00348     ePreWrap
00349   } mMode;
00350   
00351   nsILineBreaker* mLineBreaker;  // [WEAK]
00352 
00353   nsIWordBreaker* mWordBreaker;  // [WEAK]
00354 
00355   nsLanguageSpecificTransformType mLanguageSpecificTransformType;
00356 
00357 #ifdef IBMBIDI
00358   nsPresContext* mPresContext;
00359   nsCharType      mCharType;
00360 #endif
00361 
00362   // Buffer used to hold the transformed words from GetNextWord or
00363   // GetPrevWord
00364   nsAutoTextBuffer mTransformBuf;
00365 
00366   // Our current position within the buffer. Used when iterating the next
00367   // word, because we may be requested to buffer across multiple words
00368   PRInt32 mBufferPos;
00369   
00370   // The frame's text-transform state
00371   PRUint8 mTextTransform;
00372 
00373   // Flag for controling mLeaveAsAscii, mHasMultibyte, mTransformedTextIsAscii
00374   PRUint8 mFlags;
00375 
00376   // prefs used to configure the double-click word selection behavior
00377   static int WordSelectPrefCallback(const char* aPref, void* aClosure);
00378   static PRBool sWordSelectListenerPrefChecked;  // have we read the prefs yet?
00379   static PRBool sWordSelectEatSpaceAfter;        // should we include whitespace up to next word? 
00380   static PRBool sWordSelectStopAtPunctuation;    // should we stop at punctuation?
00381 
00382 #ifdef DEBUG
00383   static void SelfTest(nsILineBreaker* aLineBreaker,
00384                        nsIWordBreaker* aWordBreaker,
00385                        nsPresContext* aPresContext);
00386 
00387   nsresult Init2(const nsTextFragment* aFrag,
00388                  PRInt32 aStartingOffset,
00389                  PRUint8 aWhiteSpace,
00390                  PRUint8 aTextTransform);
00391 #endif
00392 };
00393 
00394 #endif /* nsTextTransformer_h___ */