Back to index

lightning-sunbird  0.9+nobinonly
nsCompressedCharMap.h
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* ***** BEGIN LICENSE BLOCK *****
00003  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License. You may obtain a copy of the License at
00008  * http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * The Original Code is Mozilla Communicator client code.
00016  *
00017  * The Initial Developer of the Original Code is
00018  * Netscape Communications Corporation.
00019  * Portions created by the Initial Developer are Copyright (C) 2001
00020  * the Initial Developer. All Rights Reserved.
00021  *
00022  * Contributor(s):
00023  *   Brian Stell <bstell@netscape.com>
00024  *
00025  * Alternatively, the contents of this file may be used under the terms of
00026  * either of the GNU General Public License Version 2 or later (the "GPL"),
00027  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00028  * in which case the provisions of the GPL or the LGPL are applicable instead
00029  * of those above. If you wish to allow use of your version of this file only
00030  * under the terms of either the GPL or the LGPL, and not to allow others to
00031  * use your version of this file under the terms of the MPL, indicate your
00032  * decision by deleting the provisions above and replace them with the notice
00033  * and other provisions required by the GPL or the LGPL. If you do not delete
00034  * the provisions above, a recipient may use your version of this file under
00035  * the terms of any one of the MPL, the GPL or the LGPL.
00036  *
00037  * ***** END LICENSE BLOCK ***** */
00038 
00039 #ifndef NSCOMPRESSEDCHARMAP_H
00040 #define NSCOMPRESSEDCHARMAP_H
00041 #include "prtypes.h"
00042 #include "nsICharRepresentable.h"
00043 
00044 #define ALU_SIZE PR_BITS_PER_LONG
00045 //#define ALU_SIZE 16
00046 //#define ALU_SIZE 32
00047 //#define ALU_SIZE 64
00048 #if (ALU_SIZE==32)
00049 #   define ALU_TYPE                PRUint32
00050 #   define CCMAP_POW2(n)           (1L<<(n))
00051 #   define CCMAP_BITS_PER_ALU_LOG2 5
00052 #elif (ALU_SIZE==64)
00053 #   define ALU_TYPE                PRUint64
00054 #   define CCMAP_POW2(n)           (1L<<(n))
00055 #   define CCMAP_BITS_PER_ALU_LOG2 6
00056 #else
00057 #   define ALU_TYPE                PRUint16
00058 #   define CCMAP_POW2(n)           (1<<(n))
00059 #   define CCMAP_BITS_PER_ALU_LOG2 4
00060 #endif
00061 
00062 
00063 class nsICharRepresentable;
00064 
00065 extern PRUint16* CreateEmptyCCMap();
00066 extern PRUint16* MapToCCMap(PRUint32* aMap);
00067 extern PRUint16* MapperToCCMap(nsICharRepresentable *aMapper);
00068 extern void FreeCCMap(PRUint16* &aMap);
00069 extern PRBool NextNonEmptyCCMapPage(const PRUint16 *, PRUint32 *);
00070 extern PRBool IsSameCCMap(PRUint16* ccmap1, PRUint16* ccmap2);
00071 #ifdef DEBUG
00072 void printCCMap(PRUint16* aCCMap);
00073 #endif
00074 
00075 
00076 // surrogate support extension
00077 extern PRUint16*
00078 MapToCCMapExt(PRUint32* aBmpPlaneMap, PRUint32** aOtherPlaneMaps, PRUint32 aOtherPlaneNum);
00079 
00080 // 
00081 // nsCompressedCharMap
00082 // 
00083 // A Compressed Char Map (CCMap) saves memory by folding all
00084 // the empty portions of the map on top of each other.
00085 //
00086 // Building a Compressed Char Map (CCMap) is more complex than
00087 // accessing it.  We use the nsCompressedCharMap object to 
00088 // build the CCMap. Once nsCompressedCharMap has built the CCMap
00089 // we get a copy of the CCMap and discard the nsCompressedCharMap 
00090 // object.  The CCMap is an array of PRUint16 and is accessed by 
00091 // a macro.
00092 //
00093 // See "Character Map Compression" below for a discussion of
00094 // what the array looks like.
00095 
00096 //
00097 // The maximum size a CCMap:
00098 //  (16 upper pointers) + (16 empty mid pointers) +
00099 //  (16 empty page)     + (16*16 max mid pointers) +
00100 //  (256*16 max pages)  =  4400 PRUint16
00101 #define CCMAP_MAX_LEN (16+16+16+256+4096)
00102 
00103 // non-bmp unicode support extension
00104 #define EXTENDED_UNICODE_PLANES    16
00105 
00106 class nsCompressedCharMap {
00107 public:
00108   nsCompressedCharMap();
00109   ~nsCompressedCharMap();
00110 
00111   PRUint16* NewCCMap();
00112   PRUint16* FillCCMap(PRUint16* aCCMap);
00113   PRUint16  GetSize() {return mUsedLen;};
00114   void      SetChar(PRUint32);
00115   void      SetChars(PRUint16*);
00116   void      SetChars(PRUint16, ALU_TYPE*);
00117   void      SetChars(PRUint32*);
00118   void      Extend() {mExtended = PR_TRUE;} // enable surrogate area
00119 
00120 protected:
00121   union {
00122     PRUint16 mCCMap[CCMAP_MAX_LEN];
00123     ALU_TYPE used_for_align; // do not use; only here to cause
00124                              // alignment
00125   } u;
00126   PRUint16 mUsedLen;   // in PRUint16
00127   PRUint16 mAllOnesPage;
00128 
00129   PRBool mExtended;
00130 
00131   // for surrogate support
00132   PRUint32* mExtMap[EXTENDED_UNICODE_PLANES+1];
00133   PRUint32  mMap[UCS2_MAP_LEN];
00134 };
00135 
00136 //
00137 // Character Map Compression
00138 //
00139 // Each font requires its own 8k charmap.  On a system with 200 
00140 // fonts this would take: 200 * 8K = 1600K memory.
00141 //
00142 // Since most char maps are mostly empty a significant amount
00143 // of memory can be saved by not allocating the unused sections.
00144 //
00145 // If the map has one or more levels of indirection then the
00146 // the empty sections of the map can all be folded to a single
00147 // common empty element. In this way only the non-empty sections 
00148 // need space. Because the empty sections actually point to a
00149 // common empty section every entry in the map can be valid
00150 // without requiring actually allocating space. 
00151 // Some larger CJK fonts have large sections where every bit
00152 // is set.  In the same way that the empty sections are folded
00153 // onto one "empty page", the sections where all bits are set are 
00154 // folded on to one "all bits set page" .
00155 //
00156 // Break up the Unicode range bits 0x0000 - 0xFFFF
00157 // into 3 bit ranges:
00158 //
00159 // upper bits: bit15 - bit12
00160 //   mid bits: bit11 -  bit8
00161 //  page bits:  bit7 -  bit0
00162 //
00163 // within a page, (assumming a 4 byte ALU)
00164 //    bits 7-5 select one of the 8 longs
00165 //    bits 4-0 select one of the 32 bits within the long
00166 //
00167 // There is exactly one upper "pointers" array.
00168 //
00169 // The upper pointers each point to a mid array.  If there are no chars 
00170 // in an upper pointer's block that pointer points to the empty mid.
00171 // Thus all upper pointers are "valid" even if they do not have space
00172 // allocated; eg: the accessor macro does not need to test if the 
00173 // pointer is zero.
00174 //
00175 // Each mid pointer in the mid array points to a page. If there are no
00176 // chars in a mid pointer's page that pointer points to the empty page.
00177 // Thus all mid pointers are "valid" even if they do not have space
00178 // allocated; eg: the accessor macro does not need to test if the 
00179 // pointer is zero.
00180 //
00181 // Since the array will be less than 5K PRUint16 the "pointers" can
00182 // be implemented as 2 byte offsets from the base instead of
00183 // real pointers.
00184 //
00185 // the format of the CCMap is
00186 //     the upper pointers     (16 PRUint16)
00187 //     the empty mid pointers (16 PRUint16)
00188 //     the empty page         (16 PRUint16)
00189 //     non-empty mid pointers and pages as needed
00190 
00191 // One minor note: for a completely empty map it is actually 
00192 // possible to fold the upper, empty mid, and empty page
00193 // on top of each other and make a map of only 32 bytes.
00194 #define CCMAP_EMPTY_SIZE_PER_INT16    16
00195 
00196 // offsets to the empty mid and empty page
00197 #define CCMAP_EMPTY_MID  CCMAP_NUM_UPPER_POINTERS
00198 #define CCMAP_EMPTY_PAGE CCMAP_EMPTY_MID+CCMAP_NUM_MID_POINTERS
00199 
00200 // 
00201 // Because the table is offset based the code can build the table in a
00202 // temp space (max table size on the stack) and then do one alloc of 
00203 // the actual needed size and simply copy over the data.
00204 //
00205 
00206 //
00207 // Compressed Char map surrogate extension 
00208 //
00209 // The design goal of surrogate support extension is to keep efficiency 
00210 // and compatibility of existing compressed charmap operations. Most of 
00211 // existing operation are untouched. For BMP support, very little memory 
00212 // overhead (4 bytes) is added. Runtime efficiency of BMP support is 
00213 // unchanged. 
00214 //
00215 // structure of extended charmap:
00216 //    ccmap flag      1 PRUint16 (PRUint32) , indicates if this is extended or not
00217 //    bmp ccmap size  1 PRUint16 (PRUint32) , the size of BMP ccmap, 
00218 //    BMP ccmap       size varies, 
00219 //    plane index     16 PRUint32, use to index ccmap for non-BMP plane
00220 //    empty ccmap     16 PRUint16, a total empty ccmap
00221 //    non-BMP ccmaps  size varies, other non-empty, non-BMP ccmap
00222 //   
00223 // Changes to basic ccmap 
00224 //  2 PRUint16 (PRUint32 on 64bit machines) are added in the very beginning. 
00225 // One is used to specify the size 
00226 // of the ccmap, the other is used as a flag. But these 2 fields are indexed 
00227 // negatively so that all other operation remain unchanged to keep efficiency. 
00228 // ccmap memory allocation is moved from nsCompressedCharMap::NewCCMap to 
00229 // MapToCCMap.
00230 //
00231 // Extended ccmap 
00232 //  A 16*PRUint32 array was put at the end of basic ccmap, each PRUint32 either 
00233 // pointed to the empty ccmap or a independent plane ccmap. Directly after this 
00234 // array is a empty ccmap. All planes that has no character will share this ccmap. 
00235 // All non-empty plane will have a ccmap. 
00236 //  "MapToCCMapExt" is added to created an extended ccmap, each plane ccmap is 
00237 // created the same as basic one, but without 2 additional fields.
00238 //  "HasGlyphExt" is used to access extended ccmap, it first need to locate the 
00239 // plane ccmap, and then operated the same way as "HasGlyph". 
00240 //
00241 // Compatibility between old and new one
00242 // Because extended ccmap include an exactly identical basic ccmap in its head, 
00243 // basic ccmap operation (HasGlyph) can be applied on extended ccmap without 
00244 // problem. 
00245 // Because basic ccmap is now have a flag to indicate if it is a extended one, 
00246 // Extended ccmap operation (HasGlyphExt) can check the flag before it does 
00247 // extended ccmap specific operation. So HasGlyphExt can be applied to basic ccmap 
00248 // too.  
00249 //
00250 
00251 // Page bits
00252 //
00253 #define CCMAP_BITS_PER_PAGE_LOG2    8
00254 #define CCMAP_BITS_PER_PAGE         CCMAP_POW2(CCMAP_BITS_PER_PAGE_LOG2)
00255 #define CCMAP_BIT_INDEX(c)          ((c) & PR_BITMASK(CCMAP_BITS_PER_ALU_LOG2))
00256 #define CCMAP_ALU_INDEX(c)          (((c)>>CCMAP_BITS_PER_ALU_LOG2) \
00257                & PR_BITMASK(CCMAP_BITS_PER_PAGE_LOG2 - CCMAP_BITS_PER_ALU_LOG2))
00258 
00259 #define CCMAP_PAGE_MASK             PR_BITMASK(CCMAP_BITS_PER_PAGE_LOG2)
00260 #define CCMAP_NUM_PRUINT16S_PER_PAGE \
00261                          (CCMAP_BITS_PER_PAGE/CCMAP_BITS_PER_PRUINT16)
00262 // one bit per char
00263 #define CCMAP_NUM_ALUS_PER_PAGE     (CCMAP_BITS_PER_PAGE/CCMAP_BITS_PER_ALU)
00264 #define CCMAP_NUM_UCHARS_PER_PAGE   CCMAP_BITS_PER_PAGE
00265 
00266 //
00267 // Mid bits
00268 //
00269 #define CCMAP_BITS_PER_MID_LOG2     4
00270 #define CCMAP_MID_INDEX(c)          \
00271       (((c)>>CCMAP_BITS_PER_PAGE_LOG2) & PR_BITMASK(CCMAP_BITS_PER_MID_LOG2))
00272 #define CCMAP_NUM_MID_POINTERS    CCMAP_POW2(CCMAP_BITS_PER_MID_LOG2)
00273 #define CCMAP_NUM_UCHARS_PER_MID    \
00274                CCMAP_POW2(CCMAP_BITS_PER_MID_LOG2+CCMAP_BITS_PER_PAGE_LOG2)
00275 
00276 //
00277 // Upper bits
00278 //
00279 #define CCMAP_BITS_PER_UPPER_LOG2   4
00280 #define CCMAP_UPPER_INDEX(c)        \
00281       (((c)>>(CCMAP_BITS_PER_MID_LOG2+CCMAP_BITS_PER_PAGE_LOG2)) \
00282          & PR_BITMASK(CCMAP_BITS_PER_UPPER_LOG2))
00283 #define CCMAP_NUM_UPPER_POINTERS    CCMAP_POW2(CCMAP_BITS_PER_UPPER_LOG2)
00284 
00285 //
00286 // Misc
00287 //
00288 #define CCMAP_BITS_PER_PRUINT16_LOG2 4
00289 #define CCMAP_BITS_PER_PRUINT32_LOG2 5
00290 
00291 #define CCMAP_BITS_PER_PRUINT16 CCMAP_POW2(CCMAP_BITS_PER_PRUINT16_LOG2)
00292 #define CCMAP_BITS_PER_PRUINT32 CCMAP_POW2(CCMAP_BITS_PER_PRUINT32_LOG2)
00293 #define CCMAP_BITS_PER_ALU      CCMAP_POW2(CCMAP_BITS_PER_ALU_LOG2)
00294 
00295 #define CCMAP_ALUS_PER_PRUINT32  (CCMAP_BITS_PER_PRUINT32/CCMAP_BITS_PER_ALU)
00296 #define CCMAP_PRUINT32S_PER_ALU  (CCMAP_BITS_PER_ALU/CCMAP_BITS_PER_PRUINT32)
00297 #define CCMAP_PRUINT32S_PER_PAGE (CCMAP_BITS_PER_PAGE/CCMAP_BITS_PER_PRUINT32)
00298 
00299 #define CCMAP_ALU_MASK       PR_BITMASK(CCMAP_BITS_PER_ALU_LOG2)
00300 #define CCMAP_ALUS_PER_PAGE  CCMAP_POW2(CCMAP_BITS_PER_PAGE_LOG2 \
00301                                        - CCMAP_BITS_PER_ALU_LOG2)
00302 #define NUM_UNICODE_CHARS    CCMAP_POW2(CCMAP_BITS_PER_UPPER_LOG2 \
00303                                        +CCMAP_BITS_PER_MID_LOG2 \
00304                                        +CCMAP_BITS_PER_PAGE_LOG2)
00305 #define CCMAP_TOTAL_PAGES    CCMAP_POW2(CCMAP_BITS_PER_UPPER_LOG2 \
00306                                        +CCMAP_BITS_PER_MID_LOG2)
00307 
00308 #define CCMAP_BEGIN_AT_START_OF_MAP 0xFFFFFFFF
00309 
00310 //
00311 // Finally, build up the macro to test the bit for a given char
00312 //
00313 
00314 // offset from base to mid array
00315 #define CCMAP_TO_MID(m,c) ((m)[CCMAP_UPPER_INDEX(c)])
00316 
00317 // offset from base to page
00318 #define CCMAP_TO_PAGE(m,c) ((m)[CCMAP_TO_MID((m),(c)) + CCMAP_MID_INDEX(c)])
00319 
00320 // offset from base to alu
00321 #define CCMAP_TO_ALU(m,c) \
00322           (*((ALU_TYPE*)(&((m)[CCMAP_TO_PAGE((m),(c))])) + CCMAP_ALU_INDEX(c)))
00323 
00324 // test the bit
00325 #define CCMAP_HAS_CHAR(m,c) (((CCMAP_TO_ALU(m,c))>>CCMAP_BIT_INDEX(c)) & 1)
00326 
00327 // unset the bit
00328 #define CCMAP_UNSET_CHAR(m,c) (CCMAP_TO_ALU(m,c) &= ~(CCMAP_POW2(CCMAP_BIT_INDEX(c))))
00329 
00330 #define CCMAP_SIZE(m) (*((m)-1))
00331 #define CCMAP_FLAG(m) (*((m)-2))
00332 #define CCMAP_EXTRA    (sizeof(ALU_TYPE)/sizeof(PRUint16)>2? sizeof(ALU_TYPE)/sizeof(PRUint16): 2)
00333 #define CCMAP_SURROGATE_FLAG         0x0001  
00334 #define CCMAP_NONE_FLAG              0x0000
00335 
00336 // get plane number from ccmap, bmp excluded, so plane 1's number is 0.
00337 #define CCMAP_PLANE_FROM_SURROGATE(h)  ((((PRUint16)(h) - (PRUint16)0xd800) >> 6) + 1)
00338 
00339 // same as above, but get plane number from a ucs4 char
00340 #define CCMAP_PLANE(u)  ((((PRUint32)(u))>>16))
00341 
00342 // scalar value inside the plane
00343 #define CCMAP_INPLANE_OFFSET(h, l) (((((PRUint16)(h) - (PRUint16)0xd800) & 0x3f) << 10) + ((PRUint16)(l) - (PRUint16)0xdc00))
00344 
00345 // get ccmap for that plane
00346 #define CCMAP_FOR_PLANE_EXT(m, i)  ((m) + ((PRUint32*)((m) + CCMAP_SIZE(m)))[(i)-1])
00347 
00348 // test the bit for surrogate pair
00349 #define CCMAP_HAS_CHAR_EXT2(m, h, l)  (CCMAP_FLAG(m) & CCMAP_SURROGATE_FLAG && \
00350                                       CCMAP_HAS_CHAR(CCMAP_FOR_PLANE_EXT((m), CCMAP_PLANE_FROM_SURROGATE(h)), CCMAP_INPLANE_OFFSET(h, l)))
00351 // test the bit for a character in UCS4
00352 #define CCMAP_HAS_CHAR_EXT(m, ucs4)  (((ucs4)&0xffff0000) ?  \
00353                                       (CCMAP_FLAG(m) & CCMAP_SURROGATE_FLAG) && CCMAP_HAS_CHAR(CCMAP_FOR_PLANE_EXT((m), CCMAP_PLANE(ucs4)), (ucs4) & 0xffff) : \
00354                                       CCMAP_HAS_CHAR(m, (PRUnichar)(ucs4)) )
00355 
00356 // macros to ensure that the array defining a pre-compiled CCMap starts
00357 // at an ALU_TYPE boundary instead of just a PRUint16 boundary.
00358 // When invoking the macro, 'typequal' should be either 'const' 
00359 // or empty (NULL). see bug 224337.
00360      
00361 #define DEFINE_ANY_CCMAP(var, extra, typequal)              \
00362 static typequal union {                                     \
00363   PRUint16 array[var ## _SIZE];                             \
00364   ALU_TYPE align;                                           \
00365 } var ## Union =                                            \
00366 {                                                           \
00367   { var ## _INITIALIZER }                                   \
00368 };                                                          \
00369 static typequal PRUint16* var = var ## Union.array + extra
00370 
00371 #define DEFINE_CCMAP(var, typequal)   DEFINE_ANY_CCMAP(var, 0, typequal)
00372 #define DEFINE_X_CCMAP(var, typequal) DEFINE_ANY_CCMAP(var, CCMAP_EXTRA, typequal)
00373 
00374 #endif // NSCOMPRESSEDCHARMAP_H