Back to index

lightning-sunbird  0.9+nobinonly
nsUnicodeToJamoTTF.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* vim:expandtab:shiftwidth=2:tabstop=2:
00003  */
00004 /* ***** BEGIN LICENSE BLOCK *****
00005  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00006  *
00007  * The contents of this file are subject to the Mozilla Public License Version
00008  * 1.1 (the "License"); you may not use this file except in compliance with
00009  * the License. You may obtain a copy of the License at
00010  * http://www.mozilla.org/MPL/
00011  *
00012  * Software distributed under the License is distributed on an "AS IS" basis,
00013  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00014  * for the specific language governing rights and limitations under the
00015  * License.
00016  *
00017  * The Original Code is Mozilla Communicator client code.
00018  *
00019  * The Initial Developer of the Original Code is
00020  * Netscape Communications Corp.
00021  * Portions created by the Initial Developer are Copyright (C) 2003
00022  * the Initial Developer. All Rights Reserved.
00023  *
00024  * Contributor(s): 
00025  *   Jungshik Shin <jshin@mailaps.org> 
00026  *   Frank Tang <ftang@netscape.com>
00027  *   Jin-Hwan Cho <chofchof@ktug.or.kr>
00028  *   Won-Kyu Park  <wkpark@chem.skku.ac.kr>
00029  *
00030  * Alternatively, the contents of this file may be used under the terms of
00031  * either the GNU General Public License Version 2 or later (the "GPL"), or
00032  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00033  * in which case the provisions of the GPL or the LGPL are applicable instead
00034  * of those above. If you wish to allow use of your version of this file only
00035  * under the terms of either the GPL or the LGPL, and not to allow others to
00036  * use your version of this file under the terms of the MPL, indicate your
00037  * decision by deleting the provisions above and replace them with the notice
00038  * and other provisions required by the GPL or the LGPL. If you do not delete
00039  * the provisions above, a recipient may use your version of this file under
00040  * the terms of any one of the MPL, the GPL or the LGPL.
00041  *
00042  * ***** END LICENSE BLOCK ***** */
00043 
00044 /*
00045  * - Purposes:
00046  *    1. Enable rendering  over 1.5 million Hangul syllables with 
00047  *       UnBatang and other fonts made available by UN KoaungHi
00048  *       and PARK Won-kyu.
00049  */
00050 #include "nsUCvKODll.h"
00051 #include "nsUnicodeToJamoTTF.h"
00052 #include "prmem.h"
00053 #include "nsXPIDLString.h"
00054 #include "prtypes.h"
00055 #include "nscore.h"
00056 #include "nsISupportsUtils.h"
00057 #include "nsCOMPtr.h"
00058 #include "nsIUnicodeDecoder.h"
00059 #include "nsServiceManagerUtils.h"
00060 #include "nsICharsetConverterManager.h"
00061 #include "nsICharRepresentable.h"
00062 #include <string.h>
00063 
00064 typedef struct { 
00065   PRUint8 seq[3];
00066   PRUint8 liga;
00067 } JamoNormMap; 
00068 
00069 // cluster maps 
00070 #include "jamoclusters.h"
00071 
00072 // Constants for Hangul Jamo/syllable handling taken from Unicode 3.0 
00073 // section 3.11 
00074 
00075 #define LBASE 0x1100
00076 #define VBASE 0x1161
00077 #define TBASE 0x11A7
00078 #define TSTART 0x11A8
00079 #define SBASE 0xAC00
00080 
00081 #define LCOUNT 19
00082 #define VCOUNT 21
00083 #define TCOUNT 28
00084 #define SCOUNT (LCOUNT * VCOUNT * TCOUNT)
00085 #define SEND (SBASE + SCOUNT - 1)
00086 
00087 
00088 #define LFILL 0x115F
00089 #define VFILL 0x1160
00090 
00091 #define IS_LC(wc) (LBASE <= (wc) && (wc) <  VFILL)
00092 #define IS_VO(wc) (VFILL <= (wc) && (wc) <  TSTART)
00093 #define IS_TC(wc) (TSTART <= (wc) && (wc) <= 0x11FF)
00094 #define IS_JAMO(wc)   (IS_LC(wc) || IS_VO(wc) || IS_TC(wc))
00095 
00096 // Jamos used in modern precomposed syllables 
00097 #define IS_SYL_LC(wc) (LBASE <= (wc) && (wc) <  LBASE + LCOUNT)
00098 #define IS_SYL_VO(wc) (VBASE <= (wc) && (wc) <  VBASE + VCOUNT)
00099 #define IS_SYL_TC(wc) (TBASE <  (wc) && (wc) <= TBASE + TCOUNT)
00100 
00101 // Modern precomposed syllables. 
00102 #define IS_SYL(wc)   (SBASE <= (wc) && (wc) <= SEND)
00103 #define IS_SYL_WO_TC(wc)  (((wc) - SBASE) % TCOUNT == 0)
00104 #define IS_SYL_WITH_TC(wc)  (((wc) - SBASE) % TCOUNT)
00105 
00106 // Compose precomposed syllables out of L, V, and T.
00107 #define SYL_FROM_LVT(l,v,t) (SBASE + \
00108                              (((l) - LBASE) * VCOUNT + (v) - VBASE) * TCOUNT + \
00109                              (t) - TBASE)
00110 
00111 // Hangul tone marks
00112 #define HTONE1 0x302E
00113 #define HTONE2 0x302F
00114 
00115 #define IS_TONE(wc) ((wc) == HTONE1 || (wc) == HTONE2)
00116 
00117 // Below are constants for rendering with UnBatang-like fonts.
00118 
00119 #define LC_TMPPOS  0xF000 // temp. block for leading consonants
00120 #define VO_TMPPOS  0xF100 // temp. block for vowels
00121 #define TC_TMPPOS  0xF200 // temp. block for trailinng consonants
00122 #define LC_OFFSET  (LC_TMPPOS-LBASE)
00123 #define VO_OFFSET  (VO_TMPPOS-VFILL)
00124 #define TC_OFFSET  (TC_TMPPOS-TSTART)
00125 
00126 // Jamo class of *temporary* code points   in PUA for UnBatang-like fonts.
00127 #define IS_LC_EXT(wc) ( ((wc) & 0xFF00) == LC_TMPPOS )
00128 #define IS_VO_EXT(wc) ( ((wc) & 0xFF00) == VO_TMPPOS )
00129 #define IS_TC_EXT(wc) ( ((wc) & 0xFF00) == TC_TMPPOS )
00130 
00131 // Glyph code point bases for L,V, and T in  UnBatang-like fonts
00132 #define UP_LBASE 0xE000  // 0xE000 = Lfill, 0xE006 = Kiyeok 
00133 #define UP_VBASE 0xE300  // 0xE300 = Vfill, 0xE302 = Ah  
00134 #define UP_TBASE 0xE404  // 0xE400 = Tfill, 0xE404 = Kiyeok
00135 
00136 // EUC-KR decoder for FillInfo.
00137 static nsCOMPtr<nsIUnicodeDecoder> gDecoder = 0;
00138   
00139 static inline void FillInfoRange     (PRUint32* aInfo, PRUint32 aStart, 
00140                                       PRUint32 aEnd);
00141 static nsresult     JamoNormalize    (const PRUnichar* aInSeq, 
00142                                       PRUnichar** aOutSeq, PRInt32* aLength);
00143 static void         JamosToExtJamos  (PRUnichar* aInSeq,  PRInt32* aLength);
00144 static const JamoNormMap* JamoClusterSearch(JamoNormMap aKey, 
00145                                             const JamoNormMap* aClusters,
00146                                             PRInt16 aClustersSize);
00147 static nsresult     FillInfoEUCKR    (PRUint32 *aInfo, PRUint16 aHigh1, 
00148                                       PRUint16 aHigh2);
00149 
00150 static PRInt32      JamoNormMapComp  (const JamoNormMap& p1, 
00151                                       const JamoNormMap& p2);
00152 static PRInt16      JamoSrchReplace  (const JamoNormMap* aCluster, 
00153                                       PRUint16 aSize, PRUnichar *aIn, 
00154                                       PRInt32* aLength, PRUint16 aOffset);
00155 static nsresult     GetDecoder       (nsIUnicodeDecoder** aDecoder);
00156 static nsresult     ScanDecomposeSyllable (PRUnichar *aIn, PRInt32* aLength, 
00157                                            const PRInt32 aMaxLen);
00158 
00159 //----------------------------------------------------------------------
00160 // Class nsUnicodeToJamoTTF [implementation]
00161   
00162 NS_IMPL_ISUPPORTS2(nsUnicodeToJamoTTF, nsIUnicodeEncoder, nsICharRepresentable)
00163 
00164 NS_IMETHODIMP 
00165 nsUnicodeToJamoTTF::SetOutputErrorBehavior(PRInt32 aBehavior, 
00166                                            nsIUnicharEncoder *aEncoder, 
00167                                            PRUnichar aChar)
00168 {
00169   if (aBehavior == kOnError_CallBack && aEncoder == nsnull)
00170     return NS_ERROR_NULL_POINTER;
00171   mErrEncoder = aEncoder;
00172   mErrBehavior = aBehavior;
00173   mErrChar = aChar;
00174   return NS_OK;
00175 }
00176 
00177 // constructor and destructor
00178 
00179 nsUnicodeToJamoTTF::nsUnicodeToJamoTTF() 
00180 {
00181   mJamos = nsnull;
00182   Reset();
00183 }
00184 
00185 nsUnicodeToJamoTTF::~nsUnicodeToJamoTTF()
00186 {
00187   if (mJamos != nsnull && mJamos != mJamosStatic)
00188     PR_Free(mJamos);
00189 }
00190 
00191 enum KoCharClass {
00192   KO_CHAR_CLASS_LC,
00193   KO_CHAR_CLASS_VO,  
00194   KO_CHAR_CLASS_TC,  
00195   KO_CHAR_CLASS_SYL1,   // modern precomposed syllable w/o TC (LV type syl.)
00196   KO_CHAR_CLASS_SYL2,   // modern precomposed syllable with TC (LVT type syl.)
00197   KO_CHAR_CLASS_TONE,   // Tone marks 
00198   KO_CHAR_CLASS_NOHANGUL, // Non-Hangul characters.
00199   KO_CHAR_CLASS_NUM
00200 } ;
00201 
00202 #define CHAR_CLASS(ch) \
00203   (IS_LC(ch) ? KO_CHAR_CLASS_LC   :  \
00204    IS_VO(ch) ? KO_CHAR_CLASS_VO   :  \
00205    IS_TC(ch) ? KO_CHAR_CLASS_TC   :  \
00206    IS_SYL(ch) ?                      \
00207     (IS_SYL_WITH_TC(ch) ? KO_CHAR_CLASS_SYL2 : KO_CHAR_CLASS_SYL1) : \
00208    IS_TONE(ch) ? KO_CHAR_CLASS_TONE : \
00209    KO_CHAR_CLASS_NOHANGUL)
00210 
00211 
00212 // Grapheme boundary checker : See UTR #29 and Unicode 3.2 section 3.11
00213 const static PRBool gIsBoundary[KO_CHAR_CLASS_NUM][KO_CHAR_CLASS_NUM] = 
00214 {// L  V  T  S1 S2 M  X
00215   { 0, 0, 1, 0, 0, 0, 1 }, // L  
00216   { 1, 0, 0, 1, 1, 0, 1 }, // V
00217   { 1, 1, 0, 1, 1, 0, 1 }, // T
00218   { 1, 0, 0, 1, 1, 0, 1 }, // S1
00219   { 1, 1, 0, 1, 1, 0, 1 }, // S2
00220   { 1, 1, 1, 1, 1, 0, 1 }, // M
00221   { 1, 1, 1, 1, 1, 0, 1 }  // X
00222 };
00223 
00224 
00225 NS_IMETHODIMP 
00226 nsUnicodeToJamoTTF::Convert(const PRUnichar * aSrc, 
00227                             PRInt32 * aSrcLength, char * aDest, 
00228                             PRInt32 * aDestLength)
00229 {
00230   nsresult rv = NS_OK;
00231   mByteOff = 0;
00232 
00233   // This should never happen, but it happens under MS Windows, somehow...
00234   if (mJamoCount > mJamosMaxLength) 
00235   {
00236     NS_WARNING("mJamoCount > mJamoMaxLength on entering Convert()");
00237     Reset();
00238   }
00239 
00240   for (PRInt32 charOff = 0; charOff < *aSrcLength; charOff++)
00241   {
00242     PRUnichar ch = aSrc[charOff];
00243 
00244     // Syllable boundary check. Ref. : Unicode 3.2 section 3.11 
00245     if (mJamoCount != 0 &&
00246         gIsBoundary[CHAR_CLASS(mJamos[mJamoCount - 1])][CHAR_CLASS(ch)])
00247     {
00248       composeHangul(aDest);
00249       mJamoCount = 0;
00250     }
00251     // Ignore tone marks other than the first in a sequence of tone marks.
00252     else if (mJamoCount != 0 && IS_TONE(mJamos[mJamoCount - 1]) && IS_TONE(ch))
00253     {
00254       --mJamoCount; 
00255       composeHangul(aDest);
00256       mJamoCount = 0;
00257 
00258       // skip over tone marks from the second on in a series.
00259       while (IS_TONE(ch) && ++charOff < *aSrcLength)
00260         ch = aSrc[charOff]; 
00261 
00262       if (!IS_TONE(ch)) 
00263       {
00264         mJamos[mJamoCount++] = ch; 
00265         continue;
00266       }
00267       else
00268         break;
00269     }
00270 
00271     if (mJamoCount == mJamosMaxLength)
00272     {
00273       mJamosMaxLength++;
00274       if (mJamos == mJamosStatic)
00275       {
00276         mJamos = (PRUnichar *) PR_Malloc(sizeof(PRUnichar) * mJamosMaxLength);
00277         if (!mJamos)
00278           return  NS_ERROR_OUT_OF_MEMORY;
00279         memcpy(mJamos, mJamosStatic, sizeof(PRUnichar) * mJamoCount);
00280       }
00281       else
00282       {
00283         mJamos = (PRUnichar *) PR_Realloc(mJamos, 
00284                                sizeof(PRUnichar) * mJamosMaxLength);
00285         if (!mJamos)
00286           return  NS_ERROR_OUT_OF_MEMORY;
00287       }
00288     }
00289 
00290     mJamos[mJamoCount++] = ch;
00291   }
00292     
00293   if (mJamoCount != 0)
00294     composeHangul(aDest);
00295   mJamoCount = 0;
00296   *aDestLength = mByteOff;
00297 
00298   return rv;
00299 }
00300 
00301 NS_IMETHODIMP 
00302 nsUnicodeToJamoTTF::Finish(char* aDest, PRInt32* aDestLength)
00303 {
00304   mByteOff = 0;
00305   if (mJamoCount != 0)
00306     composeHangul(aDest);
00307 
00308   *aDestLength = mByteOff;
00309 
00310   mByteOff = 0;
00311   mJamoCount = 0;
00312   return NS_OK;
00313 }
00314 
00315 //================================================================
00316 NS_IMETHODIMP 
00317 nsUnicodeToJamoTTF::Reset()
00318 {
00319 
00320   if (mJamos != nsnull && mJamos != mJamosStatic)
00321     PR_Free(mJamos);
00322   mJamos = mJamosStatic;
00323   mJamosMaxLength = sizeof(mJamosStatic) / sizeof(PRUnichar);
00324   memset(mJamos, 0, sizeof(mJamosStatic));
00325   mJamoCount = 0;
00326   mByteOff = 0;
00327 
00328   return NS_OK;
00329 }
00330 
00331 NS_IMETHODIMP 
00332 nsUnicodeToJamoTTF::GetMaxLength(const PRUnichar * aSrc, PRInt32 aSrcLength,
00333                                  PRInt32 * aDestLength)
00334 {
00335   // a precomposed Hangul syllable can be decomposed into 3 Jamos, each of
00336   // which takes 2bytes. 
00337   *aDestLength = aSrcLength *  6;
00338   return NS_OK;
00339 }
00340 
00341 
00342 NS_IMETHODIMP 
00343 nsUnicodeToJamoTTF::FillInfo(PRUint32* aInfo)
00344 {
00345   FillInfoRange(aInfo, SBASE, SEND);
00346 
00347   PRUnichar i;
00348 
00349   // Hangul Conjoining Jamos
00350   for(i = 0x1100; i<= 0x1159; i++)
00351      SET_REPRESENTABLE(aInfo, i);
00352   SET_REPRESENTABLE(aInfo, 0x115f);
00353   for(i = 0x1160; i <= 0x11a2; i++)
00354      SET_REPRESENTABLE(aInfo, i);
00355   for(i = 0x11a8; i <= 0x11f9; i++)
00356      SET_REPRESENTABLE(aInfo, i);
00357 
00358   // Hangul Tone marks
00359   SET_REPRESENTABLE(aInfo, HTONE1);
00360   SET_REPRESENTABLE(aInfo, HTONE2);
00361 
00362   // UnPark  fonts have US-ASCII chars.
00363   for(i=0x20; i < 0x7f; i++)
00364      SET_REPRESENTABLE(aInfo, i);
00365 
00366   nsresult rv;
00367 
00368   // UnPark fonts have Hanjas and symbols defined in KS X 1001 as well.
00369   
00370   // XXX: Do we need to exclude Cyrillic, Greek letters and some Latin letters 
00371   // included in KS X 1001 as 'symbol characters'? 
00372   // KS X 1001 has only a subset of Greek and Cyrillic alphabets and
00373   // Latin letters with diacritic marks so that including them may
00374   // result in ransom-note like effect if it is listed *before*
00375   // any genuine Greek/Russian/Latin fonts in CSS. 
00376     
00377   // Lead byte range for symbol chars. in EUC-KR : 0xA1 - 0xAF
00378   rv = FillInfoEUCKR(aInfo, 0xA1, 0xAF); 
00379   NS_ENSURE_SUCCESS(rv, rv);
00380 
00381   // Lead byte range for Hanja in EUC-KR : 0xCA - 0xFD.
00382   return FillInfoEUCKR(aInfo, 0xCA, 0xFD); 
00383 }
00384 
00410 const static PRUint8 gUnParkLcGlyphMap[130] = {
00411   1,  2,  4, 12, 14, 20, 36, 42, 46, 62, 70, 85,100,102,108,113,
00412 114,116,120,  5,  6,  7,  8, 13, 23, 26, 34, 35, 39, 41, 43, 44,
00413  45, 47, 48, 49, 50, 51, 52, 54, 55, 57, 58, 60, 61, 63, 64, 65,
00414  66, 67, 68, 69, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83,
00415  84, 86, 87, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99,101,104,105,
00416 106,107,109,110,111,112,117,119,122,123,  0,  0,  0,  0,  0,  0,
00417   3,  9, 10, 11, 15, 16, 17, 18, 19, 21, 22, 24, 25, 27, 28, 29,
00418  30, 31, 32, 33, 37, 38, 40, 53, 56, 59, 71, 88, 98,103,115,118,
00419 121, 124
00420 };
00421 
00428 const static PRUint8 gUnParkVoGlyphMap[95] = {
00429    0,  1,  5,  6, 10, 11, 15, 16, 20, 21, 22, 23, 33, 34, 43, 46, 
00430   48, 52, 54, 64, 71, 73,  2,  3,  7,  8, 12, 13, 14, 18, 19, 26, 
00431   27, 29, 30, 32, 37, 38, 40, 41, 42, 44, 45, 47, 50, 51, 55, 57, 
00432   58, 59, 60, 62, 63, 69, 70, 72, 74, 75, 80, 83, 85, 87, 88, 90, 
00433   92, 93, 94,  4,  9, 17, 24, 25, 28, 31, 35, 36, 39, 49, 53, 56, 
00434   61, 65, 66, 67, 68, 76, 77, 78, 79, 81, 82, 84, 86, 89, 91
00435 };
00436 
00444 const static PRUint8 gUnParkTcGlyphMap[141] = {
00445    0,  1,  5, 10, 17, 20, 21, 32, 33, 42, 46, 52, 57, 58, 59, 63,
00446   78, 84, 91, 98,109,123,127,128,129,130,135,  3,  6, 11, 13, 15,
00447   16, 19, 22, 25, 35, 37, 38, 39, 40, 43, 44, 48, 50, 51, 53, 54,
00448   56, 60, 64, 67, 69, 71, 72, 73, 75, 76, 77, 80, 88, 89, 90, 92,
00449   93, 94, 96,106,110,111,114,115,117,119,120,131,134,136,137,138,
00450  139,140,  2,  4,  7,  8,  9, 12, 14, 18, 23, 24, 26, 27, 28, 29,
00451   30, 31, 34, 36, 41, 45, 47, 49, 55, 61, 62, 65, 66, 68, 70, 74,
00452   79, 81, 82, 83, 85, 86, 87, 95, 97, 99,100,101,102,103,104,105,
00453  107,108,112,113,116,118,121,122,124,125,126,132,133
00454 };
00455 
00456 /* Which of six glyphs to use for choseong(L) depends on 
00457    the following vowel and whether or not jongseong(T) is present
00458    in a syllable. Note that The first(0th) element is for Vfill.
00459 
00460    shape Number of choseong(L) w.r.t. jungseong(V) without jongseong(T)
00461 
00462    95 = 1(Vfill) + 66 + 28 (extra)
00463 */
00464  
00465 const static PRUint8 gUnParkVo2LcMap[95] = {
00466   0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 1, 2, 2, 1,
00467   1, 1, 2, 2, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00468   1, 1, 2, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1,
00469   1, 1, 1, 2, 1, 2, 2, 1, 0, 0, 1, 1, 1, 0, 2, 1,
00470   2, 1, 2, 1, 1, 0, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1,
00471   2, 1, 1, 1, 2, 1, 0, 0, 0, 1, 1, 1, 0, 2, 2
00472 };
00473 
00474 /* shape Number of choseong(L) w.r.t. jungseong(V) with jongseong(T) */
00475 
00476 const static PRUint8 gUnParkVo2LcMap2[95] = {
00477   3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 4, 4, 4, 5, 5, 4,
00478   4, 4, 5, 5, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00479   4, 4, 5, 5, 4, 4, 4, 5, 4, 4, 4, 4, 4, 5, 4, 4,
00480   4, 4, 4, 5, 4, 5, 5, 4, 3, 3, 4, 4, 4, 3, 5, 4,
00481   5, 4, 5, 4, 4, 3, 4, 4, 4, 5, 4, 4, 4, 4, 4, 4,
00482   5, 4, 4, 4, 5, 4, 3, 3, 3, 4, 4, 4, 3, 5, 5
00483 };
00484 
00485 /* shape Number of jongseong(T) w.r.t. jungseong(V)
00486    Which of four glyphs to use for jongseong(T) depends on 
00487    the preceding vowel. */
00488 
00489 const static PRUint8 gUnParkVo2TcMap[95] = {
00490   3, 0, 2, 0, 2, 1, 2, 1, 2, 3, 0, 2, 1, 3, 3, 1,
00491   2, 1, 3, 3, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
00492   2, 2, 3, 3, 0, 2, 1, 3, 1, 0, 2, 1, 2, 3, 0, 1,
00493   2, 1, 2, 3, 1, 3, 3, 1, 2, 2, 1, 1, 1, 1, 3, 1,
00494   3, 1, 3, 0, 1, 0, 0, 0, 2, 3, 0, 2, 1, 1, 2, 2,
00495   3, 0, 0, 0, 3, 0, 2, 2, 2, 1, 0, 1, 2, 1, 1
00496 };
00497 
00498 NS_IMETHODIMP 
00499 nsUnicodeToJamoTTF::composeHangul(char* aResult)
00500 {
00501   PRInt32 length = mJamoCount, i;
00502   nsresult rv = NS_OK;
00503 
00504   if (!length)
00505   {
00506     NS_WARNING("composeHangul() : zero length string comes in ! \n");
00507     return NS_ERROR_UNEXPECTED;
00508   }
00509 
00510   if (!aResult) 
00511     return NS_ERROR_NULL_POINTER;
00512 
00513   // Put Hangul tone mark first as it should be to the left of 
00514   // the character it follows.
00515   // XXX : What should we do when a tone mark come by itself?
00516   
00517   if (IS_TONE(mJamos[length - 1])) 
00518   {
00519     aResult[mByteOff++] = PRUint8(mJamos[length - 1] >> 8);
00520     aResult[mByteOff++] = PRUint8(mJamos[length - 1] & 0xff); 
00521     if (--length == 0)
00522       return rv;
00523   }
00524 
00525   // no more processing is necessary for precomposed modern Hangul syllables.
00526   if (length == 1 && IS_SYL(mJamos[0])) 
00527   {
00528     aResult[mByteOff++] = PRUint8(mJamos[0] >> 8);
00529     aResult[mByteOff++] = PRUint8(mJamos[0] & 0xff); 
00530     return rv;
00531   }
00532 
00533   if (CHAR_CLASS(mJamos[0]) == KO_CHAR_CLASS_NOHANGUL) 
00534   {
00535     NS_ASSERTION(length == 1, "A non-Hangul should come by itself !!\n");
00536     aResult[mByteOff++] = PRUint8(mJamos[0] >> 8);
00537     aResult[mByteOff++] = PRUint8(mJamos[0] & 0xff); 
00538     return rv;
00539   }
00540 
00541   nsXPIDLString buffer;
00542 
00543   rv =  JamoNormalize(mJamos, getter_Copies(buffer), &length);
00544 
00545   // safe to cast away const.
00546   PRUnichar* text = buffer.BeginWriting();
00547   NS_ENSURE_SUCCESS(rv, rv);
00548 
00549   text += RenderAsPrecompSyllable(text, &length, aResult);
00550 
00551   if (!length)
00552     return rv;
00553 
00554   // convert to extended Jamo sequence
00555   JamosToExtJamos(text, &length);
00556 
00557 
00558   // Check if not in LV or LVT form after the conversion
00559   if (length != 2 && length != 3 ||
00560       (!IS_LC_EXT(text[0]) || !IS_VO_EXT(text[1]) ||
00561        (length == 3 && !IS_TC_EXT(text[2]))))
00562     goto fallback;
00563 
00564 //  Now that text[0..2] are identified as L,V, and T, it's safe to 
00565 //  shift them back to U+1100 block although their ranges overlap each other.
00566   
00567   text[0] -= LC_OFFSET; 
00568   text[1] -= VO_OFFSET; 
00569   if (length == 3)
00570     text[2] -= TC_OFFSET;
00571 
00572   if (length != 3)
00573   {
00574     text[0] = gUnParkLcGlyphMap[text[0] - LBASE] * 6 + 
00575               gUnParkVo2LcMap[text[1] - VFILL] + UP_LBASE;
00576     text[1] = gUnParkVoGlyphMap[text[1] - VFILL] * 2 + UP_VBASE;
00577   }
00578   else 
00579   {
00580     text[0] = gUnParkLcGlyphMap[text[0] - LBASE] * 6 + 
00581               gUnParkVo2LcMap2[text[1] - VFILL] + UP_LBASE;
00582     text[2] = gUnParkTcGlyphMap[text[2] - TSTART] * 4 + 
00583               gUnParkVo2TcMap[text[1] - VFILL] + UP_TBASE; 
00584     text[1] = gUnParkVoGlyphMap[text[1] - VFILL] * 2 + UP_VBASE + 1; 
00585   }
00586 
00587   // Xft doesn't like blank glyphs at code points other than listed in 
00588   // the blank glyph list. Replace Lfill glyph code points of UnPark
00589   // fonts with standard LFILL code point (U+115F).
00590     
00591   if (UP_LBASE <= text[0] && text[0] < UP_LBASE + 6)
00592     text[0] = LFILL;
00593 
00594   // The same is true of glyph code points corresponding to VFILL
00595   // in UnBatang-like fonts. VFILL is not only blank but also non-advancing
00596   // so that we can just skip it. 
00597   if (UP_VBASE <= text[1] && text[1] < UP_VBASE + 2)
00598   {
00599     --length;
00600     if (length == 2) 
00601       text[1] = text[2]; 
00602   }
00603 
00604   for (i = 0 ; i < length; i++)
00605   {
00606     aResult[mByteOff++] = PRUint8(text[i] >> 8);
00607     aResult[mByteOff++] = PRUint8(text[i] & 0xff);
00608   }
00609 
00610   return rv;
00611 
00612 
00613   /* If jamo sequence is not convertible to a jamo cluster,
00614    * just enumerate stand-alone jamos. Prepend V and T with  Lf.
00615    *
00616    * XXX: It might be better to search for a sub-sequence (not just at the
00617    * beginning of a cluster but also in the middle or at the end.) 
00618    * that can be rendered as precomposed and render it as such and enumerate
00619    * jamos in the rest. This approach is useful when a simple Xkb-based input
00620    * is used. 
00621    */
00622 
00623 fallback: 
00624   for (i = 0; i < length; i++)
00625   {
00626     PRUnichar wc=0, wc2=0;
00627     /* skip Lfill and Vfill if they're not the sole char. in a cluster */
00628     if (length > 1 && 
00629          (text[i] - LC_OFFSET == LFILL || text[i] - VO_OFFSET == VFILL))
00630       continue;
00631     else if (IS_LC_EXT (text[i]))
00632        wc = gUnParkLcGlyphMap[text[i] - LC_OFFSET - LBASE] * 6 + UP_LBASE;
00633     else 
00634     {
00635   /* insert Lfill glyph to advance cursor pos. for V and T */
00636       wc = LBASE;
00637   /* don't have to draw Vfill. Drawing Lfill is sufficient. */ 
00638       if (text[i] - VO_OFFSET != VFILL) 
00639         wc2 = IS_VO_EXT (text[i]) ? 
00640         gUnParkVoGlyphMap[text[i] - VO_OFFSET - VFILL] * 2 + UP_VBASE:
00641         gUnParkTcGlyphMap[text[i] - TC_OFFSET - TSTART] * 4 + UP_TBASE + 3;
00642     }
00643     aResult[mByteOff++] = PRUint8(wc >> 8);
00644     aResult[mByteOff++] = PRUint8(wc & 0xff);
00645 
00646     if (wc2) 
00647     {
00648       aResult[mByteOff++] = wc2 >> 8;
00649       aResult[mByteOff++] = wc2 & 0xff; 
00650     }
00651   }
00652 
00653   return rv;
00654 }
00655 
00656 int
00657 nsUnicodeToJamoTTF::RenderAsPrecompSyllable (PRUnichar* aSrc, 
00658                                              PRInt32* aSrcLength, char* aResult)
00659 {
00660 
00661   int composed = 0;
00662 
00663   if (*aSrcLength == 3 && IS_SYL_LC(aSrc[0]) && IS_SYL_VO(aSrc[1]) && 
00664       IS_SYL_TC(aSrc[2]))
00665     composed = 3;
00666   else if (*aSrcLength == 2 && IS_SYL_LC(aSrc[0]) && IS_SYL_VO(aSrc[1]))
00667     composed = 2;
00668   else
00669     composed = 0;
00670 
00671   if (composed)
00672   {
00673     PRUnichar wc;
00674     if (composed == 3)
00675       wc = SYL_FROM_LVT(aSrc[0], aSrc[1], aSrc[2]);
00676     else
00677       wc = SYL_FROM_LVT(aSrc[0], aSrc[1], TBASE);
00678     aResult[mByteOff++] = PRUint8(wc >> 8);
00679     aResult[mByteOff++] = PRUint8(wc & 0xff);
00680   }
00681 
00682   *aSrcLength -= composed;
00683 
00684   return composed;
00685 }
00686 
00687 // Fill up Cmap array quickly for a rather large range.
00688 /* static */
00689 inline void FillInfoRange(PRUint32* aInfo, PRUint32 aStart, PRUint32 aEnd)
00690 {
00691 
00692   PRUint32 b = aStart >> 5; 
00693   PRUint32 e = aEnd >> 5;
00694 
00695   if (aStart & 0x1f)
00696     aInfo[b++] |= ~ (0xFFFFFFFFL >> (32 - ((aStart) & 0x1f)));
00697 
00698   for( ; b < e ; b++)
00699     aInfo[b] |= 0xFFFFFFFFL;
00700 
00701   aInfo[e] |= (0xFFFFFFFFL >> (31 - ((aEnd) & 0x1f)));
00702 }
00703 
00704 
00705 #define ROWLEN 94
00706 #define IS_GR94(x) (0xA0 < (x) && (x) < 0xFF)
00707 
00708 // Given a range [aHigh1, aHigh2] in high bytes of EUC-KR, convert 
00709 // rows of 94 characters in the range (row by row) to Unicode and set 
00710 // representability if the result is not 0xFFFD (Unicode replacement char.).
00711 /* static */
00712 nsresult FillInfoEUCKR (PRUint32 *aInfo, PRUint16 aHigh1, PRUint16 aHigh2)
00713 {
00714   char row[ROWLEN * 2];
00715   PRUnichar dest[ROWLEN];
00716   nsresult rv = NS_OK;
00717 
00718   NS_ENSURE_TRUE(aInfo, NS_ERROR_NULL_POINTER);
00719   NS_ENSURE_TRUE(IS_GR94(aHigh1) && IS_GR94(aHigh2), NS_ERROR_INVALID_ARG);
00720 
00721   nsCOMPtr<nsIUnicodeDecoder> decoder;
00722   rv = GetDecoder(getter_AddRefs(decoder));
00723   NS_ENSURE_SUCCESS(rv,rv);
00724 
00725   for (PRUint16 i = aHigh1 ; i <= aHigh2; i++)
00726   {
00727     PRUint16 j;
00728     // handle a row of 94 char. at a time.
00729     for (j = 0 ; j < ROWLEN; j++)
00730     {
00731       row[j * 2] = char(i);
00732       row[j * 2 + 1] = char(j + 0xa1);
00733     }
00734     PRInt32 srcLen = ROWLEN * 2;
00735     PRInt32 destLen = ROWLEN;
00736     rv = decoder->Convert(row, &srcLen, dest, &destLen);
00737     NS_ENSURE_SUCCESS(rv, rv);
00738 
00739     // set representability according to the conversion result.
00740     for (j = 0 ; j < ROWLEN; j++)
00741       if (dest[j] != 0xFFFD)
00742         SET_REPRESENTABLE(aInfo, dest[j]);
00743   }
00744   return rv;
00745 }
00746 
00747 /* static */
00748 nsresult GetDecoder(nsIUnicodeDecoder** aDecoder)
00749 {
00750   nsresult rv; 
00751 
00752   if (gDecoder) {
00753     *aDecoder = gDecoder.get();
00754     NS_ADDREF(*aDecoder);
00755     return NS_OK;
00756   }
00757 
00758   nsCOMPtr<nsICharsetConverterManager> charsetConverterManager;
00759   charsetConverterManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
00760   NS_ENSURE_SUCCESS(rv,rv);
00761   rv = charsetConverterManager->GetUnicodeDecoderRaw("EUC-KR", getter_AddRefs(gDecoder));
00762   NS_ENSURE_SUCCESS(rv,rv);
00763 
00764   *aDecoder = gDecoder.get();
00765   NS_ADDREF(*aDecoder);
00766   return NS_OK;
00767 }
00768 
00769 
00770 /* static */
00771 PRInt32 JamoNormMapComp (const JamoNormMap& p1, const JamoNormMap& p2)
00772 {
00773   if (p1.seq[0] != p2.seq[0]) 
00774     return p1.seq[0] - p2.seq[0];
00775   if (p1.seq[1] != p2.seq[1]) 
00776     return p1.seq[1] - p2.seq[1];
00777   return p1.seq[2] - p2.seq[2];
00778 }
00779 
00780 /* static */
00781 const JamoNormMap* JamoClusterSearch (JamoNormMap aKey, 
00782                                 const JamoNormMap* aClusters, 
00783                                 PRInt16 aClustersSize)
00784 {
00785 
00786   if (aClustersSize <= 0 || !aClusters)
00787   {
00788     NS_WARNING("aClustersSize <= 0 || !aClusters");
00789     return nsnull;
00790   }
00791 
00792   if (aClustersSize < 9) 
00793   {
00794     PRInt16 i;
00795     for (i = 0; i < aClustersSize; i++)
00796       if (JamoNormMapComp (aKey, aClusters[i]) == 0) 
00797         return aClusters + i; 
00798     return nsnull;
00799   }
00800    
00801   PRUint16 l = 0, u = aClustersSize - 1;
00802   PRUint16 h = (l + u) / 2;
00803 
00804   if (JamoNormMapComp (aKey, aClusters[h]) < 0) 
00805     return JamoClusterSearch(aKey, &(aClusters[l]), h - l);   
00806   else if (JamoNormMapComp (aKey, aClusters[h]) > 0) 
00807     return JamoClusterSearch(aKey, &(aClusters[h + 1]), u - h);   
00808   else
00809     return aClusters + h;
00810 
00811 }
00812 
00813 
00814 /*
00815  *  look up cluster array for all possible matching Jamo sequences 
00816  *  in 'aIn' and  replace all matching substrings with match->liga in place. 
00817  *  returns the difference in aLength between before and after the replacement.
00818  *  XXX : 1. Do we need caching here? 
00819  **/
00820 
00821 /* static */
00822 PRInt16 JamoSrchReplace (const JamoNormMap* aClusters, 
00823                          PRUint16 aClustersSize, PRUnichar* aIn, 
00824                          PRInt32* aLength, PRUint16 aOffset)
00825 {
00826   PRInt32 origLen = *aLength; 
00827 
00828   // non-zero third element => clusternLen = 3. otherwise, it's 2.
00829   PRUint16 clusterLen = aClusters[0].seq[2] ? 3 : 2; 
00830 
00831   PRInt32 start = 0, end;
00832 
00833   // identify the substring of aIn with values in [aOffset, aOffset + 0x100).
00834   while (start < origLen && (aIn[start] & 0xff00) != aOffset)
00835     ++start;
00836   for (end=start; end < origLen && (aIn[end] & 0xff00) == aOffset; ++end);
00837 
00838   // now process the substring aIn[start] .. aIn[end] 
00839   // we don't need a separate range check here because the one in 
00840   // for-loop is sufficient.
00841   for (PRInt32 i = start; i <= end - clusterLen; i++)
00842   {
00843     const JamoNormMap *match;
00844     JamoNormMap key;
00845 
00846     // cluster array is made up of PRUint8's to save memory
00847     // and we have to subtract aOffset from the input before looking it up.
00848     key.seq[0] = aIn[i] - aOffset;
00849     key.seq[1] = aIn[i + 1] - aOffset;
00850     key.seq[2] = clusterLen == 3 ? (aIn[i + 2] - aOffset) : 0;
00851 
00852     match = JamoClusterSearch (key, aClusters, aClustersSize);
00853 
00854     if (match) 
00855     {
00856       aIn[i] = match->liga + aOffset; // add back aOffset. 
00857 
00858       // move up the 'tail'
00859       for (PRInt32 j = i + clusterLen ; j < *aLength; j++)
00860         aIn[j - clusterLen + 1] = aIn[j];
00861 
00862       end -= (clusterLen - 1);
00863       *aLength -= (clusterLen - 1);
00864     }
00865   }
00866 
00867   return *aLength - origLen;
00868 }
00869 
00870 /* static */
00871 nsresult ScanDecomposeSyllable(PRUnichar* aIn, PRInt32 *aLength, 
00872                                const PRInt32 maxLength)
00873 {
00874   nsresult rv = NS_OK;
00875 
00876   if (!aIn || *aLength < 1 || maxLength < *aLength + 2)
00877     return NS_ERROR_INVALID_ARG;
00878 
00879   PRInt32 i = 0;
00880   while (i < *aLength && !IS_SYL(aIn[i]))
00881     i++;
00882 
00883   // Convert a precomposed syllable to an LV or LVT sequence.
00884   if (i < *aLength && IS_SYL(aIn[i]))
00885   {
00886     PRUint16 j = IS_SYL_WITH_TC(aIn[i]) ? 1 : 0; 
00887     aIn[i] -= SBASE;
00888     memmove(aIn + i + 2 + j, aIn + i + 1, *aLength - i - 1);
00889     if (j)
00890       aIn[i + 2] = aIn[i] % TCOUNT + TBASE;
00891     aIn[i + 1] = (aIn[i] / TCOUNT) % VCOUNT + VBASE;
00892     aIn[i] = aIn[i] / (TCOUNT * VCOUNT) + LBASE;
00893     *aLength += 1 + j;
00894   }
00895 
00896   return rv;
00897 }
00898 
00899 /*
00900  *  1. Normalize (regularize) a jamo sequence to the regular
00901  *     syllable form defined in Unicode 3.2 section 3.11 to the extent
00902  *     that it's useful in rendering by render_func's().
00903  *
00904  *  2. Replace a compatibly decomposed Jamo sequence (unicode 2.0 
00905  *     definition) with a 'precomposed' Jamo cluster (with codepoint
00906  *     of its own in U+1100 block). For instance, a seq.
00907  *     of U+1100, U+1100 is replaced by U+1101. It actually
00908  *     more than Unicode 2.0 decomposition map suggests.
00909  *     For a Jamo cluster made up of three basic Jamos
00910  *     (e.g. U+1133 : Sios, Piup, Kiyeok), not only
00911  *      a sequence of Sios(U+1109), Piup(U+1107) and 
00912  *     Kiyeok(U+1100) but also two more sequences,
00913  *     {U+1132(Sios-Pieup), U+1100(Kiyeok) and {Sios(U+1109),
00914  *      U+111E(Piup-Kiyeok)} are mapped to U+1133.
00915  *
00916  *  3. the result is returned in a newly malloced
00917  *     PRUnichar*. Callers have to delete it, which 
00918  *     is taken care of by using nsXPIDLString in caller.
00919  */
00920 
00921 /* static */
00922 nsresult JamoNormalize(const PRUnichar* aInSeq, PRUnichar** aOutSeq, 
00923                        PRInt32* aLength) 
00924 {
00925   if (!aInSeq || !aOutSeq || *aLength <= 0)
00926     return NS_ERROR_INVALID_ARG;
00927 
00928   // 4 more slots : 2 for Lf and Vf, 2 for decomposing a modern precomposed 
00929   // syllable into a Jamo sequence of LVT?. 
00930   *aOutSeq = new PRUnichar[*aLength + 4]; 
00931   if (!*aOutSeq)
00932     return NS_ERROR_OUT_OF_MEMORY;
00933   memcpy(*aOutSeq, aInSeq, *aLength * sizeof(PRUnichar));
00934 
00935   nsresult rv = ScanDecomposeSyllable(*aOutSeq, aLength, *aLength + 4);
00936   NS_ENSURE_SUCCESS(rv, rv);
00937 
00938   // LV or LVT : no need to search for and replace jamo sequences 
00939   if ((*aLength == 2 && IS_LC((*aOutSeq)[0]) && IS_VO((*aOutSeq)[1])) || 
00940       (*aLength == 3 && IS_LC((*aOutSeq)[0]) && IS_VO((*aOutSeq)[1]) && 
00941       IS_TC((*aOutSeq)[2])))
00942     return NS_OK;
00943 
00944   // remove Lf in LfL sequence that may occur in an interim cluster during
00945   // a simple Xkb-based input. 
00946   if ((*aOutSeq)[0] == LFILL && *aLength > 1 && IS_LC((*aOutSeq)[1]))
00947   {
00948     memmove (*aOutSeq, *aOutSeq + 1, (*aLength - 1) * sizeof(PRUnichar)); 
00949     (*aLength)--;
00950   }
00951 
00952   if (*aLength > 1)
00953   {
00954     JamoSrchReplace (gJamoClustersGroup1,
00955         sizeof(gJamoClustersGroup1) / sizeof(gJamoClustersGroup1[0]), 
00956         *aOutSeq, aLength, LBASE);
00957     JamoSrchReplace (gJamoClustersGroup234,
00958         sizeof(gJamoClustersGroup234) / sizeof(gJamoClustersGroup234[0]), 
00959         *aOutSeq, aLength, LBASE);
00960   }
00961 
00962   // prepend a leading V with Lf 
00963   if (IS_VO((*aOutSeq)[0])) 
00964   {
00965      memmove(*aOutSeq + 1, *aOutSeq, *aLength * sizeof(PRUnichar));
00966     (*aOutSeq)[0] = LFILL;
00967     (*aLength)++;
00968   }
00969   /* prepend a leading T with LfVf */
00970   else if (IS_TC((*aOutSeq)[0])) 
00971   {
00972     memmove (*aOutSeq + 2, *aOutSeq, *aLength * sizeof(PRUnichar));
00973     (*aOutSeq)[0] = LFILL;
00974     (*aOutSeq)[1] = VFILL;
00975     *aLength += 2;
00976   }
00977   return NS_OK;
00978 }
00979 
00980 
00981 /*  JamosToExtJamos() :
00982  *  1. shift jamo sequences to three disjoint code blocks in
00983  *     PUA (0xF000 for LC, 0xF1000 for VO, 0xF200 for TC).
00984  *  2. replace a jamo sequence with a precomposed extended 
00985  *     cluster jamo code point in PUA
00986  *  3. this replacement is done 'in place' 
00987  */
00988 
00989 /* static */
00990 void JamosToExtJamos (PRUnichar* aInSeq,  PRInt32* aLength)
00991 {
00992   // translate jamo code points to temporary code points in PUA
00993   for (PRInt32 i = 0; i < *aLength; i++)
00994   {
00995     if (IS_LC(aInSeq[i]))
00996       aInSeq[i] += LC_OFFSET;
00997     else if (IS_VO(aInSeq[i]))
00998       aInSeq[i] += VO_OFFSET;
00999     else if (IS_TC(aInSeq[i]))
01000       aInSeq[i] += TC_OFFSET;
01001   }
01002 
01003   // LV or LVT : no need to search for and replace jamo sequences 
01004   if ((*aLength == 2 && IS_LC_EXT(aInSeq[0]) && IS_VO_EXT(aInSeq[1])) || 
01005       (*aLength == 3 && IS_LC_EXT(aInSeq[0]) && IS_VO_EXT(aInSeq[1]) && 
01006        IS_TC_EXT(aInSeq[2])))
01007     return;
01008 
01009   // replace a sequence of Jamos with the corresponding precomposed 
01010   // Jamo cluster in PUA 
01011     
01012   JamoSrchReplace (gExtLcClustersGroup1, 
01013       sizeof (gExtLcClustersGroup1) / sizeof (gExtLcClustersGroup1[0]), 
01014       aInSeq, aLength, LC_TMPPOS); 
01015   JamoSrchReplace (gExtLcClustersGroup2,
01016        sizeof (gExtLcClustersGroup2) / sizeof (gExtLcClustersGroup2[0]), 
01017        aInSeq, aLength, LC_TMPPOS);
01018   JamoSrchReplace (gExtVoClustersGroup1,
01019        sizeof (gExtVoClustersGroup1) / sizeof (gExtVoClustersGroup1[0]), 
01020        aInSeq, aLength, VO_TMPPOS);
01021   JamoSrchReplace (gExtVoClustersGroup2, 
01022        sizeof (gExtVoClustersGroup2) / sizeof (gExtVoClustersGroup2[0]), 
01023        aInSeq, aLength, VO_TMPPOS);
01024   JamoSrchReplace (gExtTcClustersGroup1, 
01025        sizeof (gExtTcClustersGroup1) / sizeof (gExtTcClustersGroup1[0]), 
01026        aInSeq, aLength, TC_TMPPOS);
01027   JamoSrchReplace (gExtTcClustersGroup2, 
01028        sizeof (gExtTcClustersGroup2) / sizeof (gExtTcClustersGroup2[0]), 
01029        aInSeq, aLength, TC_TMPPOS);
01030     return;
01031 }
01032