Back to index

lightning-sunbird  0.9+nobinonly
nsUnicodeToLangBoxArabic16.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* ***** BEGIN LICENSE BLOCK *****
00003  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License. You may obtain a copy of the License at
00008  * http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * The Original Code is Mozilla Communicator client code.
00016  *
00017  * The Initial Developer of the Original Code is
00018  * Netscape Communications Corporation.
00019  * Portions created by the Initial Developer are Copyright (C) 2002
00020  * the Initial Developer. All Rights Reserved.
00021  *
00022  * Contributor(s):
00023  *   Simon Montagu <smontagu@netscape.com>
00024  *
00025  * Alternatively, the contents of this file may be used under the terms of
00026  * either of the GNU General Public License Version 2 or later (the "GPL"),
00027  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00028  * in which case the provisions of the GPL or the LGPL are applicable instead
00029  * of those above. If you wish to allow use of your version of this file only
00030  * under the terms of either the GPL or the LGPL, and not to allow others to
00031  * use your version of this file under the terms of the MPL, indicate your
00032  * decision by deleting the provisions above and replace them with the notice
00033  * and other provisions required by the GPL or the LGPL. If you do not delete
00034  * the provisions above, a recipient may use your version of this file under
00035  * the terms of any one of the MPL, the GPL or the LGPL.
00036  *
00037  * ***** END LICENSE BLOCK ***** */
00038 
00039 
00040 #include "nsUCConstructors.h"
00041 #include "nsUnicodeToLangBoxArabic16.h"
00042 
00043 #include "nsISupports.h"
00044 
00045 // This table is based on http://www.langbox.com/arabic/FontSet_ISO8859-6-16.html
00046 // Codepoints not in Unicode are mapped to 0x3F
00047 static const unsigned char uni2lbox [] =
00048 {
00049   0x6B, // U+FE70 ARABIC FATHATAN ISOLATED FORM
00050   0x90, // U+FE71 ARABIC FATHATAN ON TATWEEL
00051   0x6C, // U+FE72 ARABIC DAMMATAN ISOLATED FORM
00052   0x3F, // U+FE73
00053   0x6D, // U+FE74 ARABIC KASRATAN ISOLATED FORM
00054   0x3F, // U+FE75
00055   0x6E, // U+FE76 ARABIC FATHA ISOLATED FORM
00056   0x93, // U+FE77 ARABIC FATHA ON TATWEEL
00057   0x6F, // U+FE78 ARABIC DAMMA ISOLATED FORM
00058   0x94, // U+FE79 ARABIC DAMMA ON TATWEEL
00059   0x70, // U+FE7A ARABIC KASRA ISOLATED FORM
00060   0x95, // U+FE7B ARABIC KASRA ON TATWEEL
00061   0x71, // U+FE7C ARABIC SHADDA ISOLATED FORM
00062   0x97, // U+FE7D ARABIC SHADDA ON TATWEEL
00063   0x72, // U+FE7E ARABIC SUKUN ISOLATED FORM
00064   0x96, // U+FE7F ARABIC SUKUN ON TATWEEL
00065   0x41, // U+FE80 ARABIC HAMZA ISOLATED FORM
00066   0x42, // U+FE81 ARABIC LIGATURE MADDA ON ALEF ISOLATED FORM
00067   0xA1, // U+FE82 ARABIC LIGATURE MADDA ON ALEF FINAL FORM
00068   0x43, // U+FE83 ARABIC LIGATURE HAMZA ON ALEF ISOLATED FORM
00069   0xA2, // U+FE84 ARABIC LIGATURE HAMZA ON ALEF FINAL FORM
00070   0x44, // U+FE85 ARABIC LIGATURE HAMZA ON WAW ISOLATED FORM
00071   0xA3, // U+FE86 ARABIC LIGATURE HAMZA ON WAW FINAL FORM
00072   0x45, // U+FE87 ARABIC LIGATURE HAMZA UNDER ALEF ISOLATED FORM
00073   0xA4, // U+FE88 ARABIC LIGATURE HAMZA UNDER ALEF FINAL FORM
00074   0x46, // U+FE89 ARABIC LIGATURE HAMZA ON YEH ISOLATED FORM
00075   0xF9, // U+FE8A ARABIC LIGATURE HAMZA ON YA FINAL FORM
00076   0xF8, // U+FE8B ARABIC LIGATURE HAMZA ON YA INITIAL FORM
00077   0xA0, // U+FE8C ARABIC LIGATURE HAMZA ON YA MEDIAL FORM
00078   0x47, // U+FE8D ARABIC ALEF ISOLATED FORM
00079   0xA5, // U+FE8E ARABIC ALEF FINAL FORM
00080   0x48, // U+FE8F ARABIC BAA ISOLATED FORM
00081   0xAE, // U+FE90 ARABIC BAA FINAL FORM
00082   0xAC, // U+FE91 ARABIC BAA INITTIAL FORM
00083   0xAD, // U+FE92 ARABIC BAA MEDIAL FORM
00084   0x49, // U+FE93 ARABIC TAA MARBUTA ISOLATED FORM
00085   0xB1, // U+FE94 ARABIC TAA MARBUTA FINAL FORM
00086   0x4A, // U+FE95 ARABIC TAA ISOLATED FORM
00087   0xB4, // U+FE96 ARABIC TAA FINAL FORM
00088   0xB2, // U+FE97 ARABIC TAA INITIAL FORM
00089   0xB3, // U+FE98 ARABIC TAA MEDIAL FORM
00090   0x4B, // U+FE99 ARABIC THAA ISOLATED FORM
00091   0xB7, // U+FE9A ARABIC THAA FINAL FORM
00092   0xB5, // U+FE9B ARABIC THAA INITIAL FORM
00093   0xB6, // U+FE9C ARABIC THAA MEDIAL FORM
00094   0x4C, // U+FE9D ARABIC JEEM ISOLATED FORM
00095   0xBA, // U+FE9E ARABIC JEEM FINAL FORM
00096   0xB8, // U+FE9F ARABIC JEEM INITIAL FORM
00097   0xB9, // U+FEA0 ARABIC JEEM MEDIAL FORM
00098   0x4D, // U+FEA1 ARABIC HAA ISOLATED FORM
00099   0xBD, // U+FEA2 ARABIC HAA FINAL FORM
00100   0xBB, // U+FEA3 ARABIC HAA INITIAL FORM
00101   0xBC, // U+FEA4 ARABIC HAA MEDIAL FORM
00102   0x4E, // U+FEA5 ARABIC KHAA ISOLATED FORM
00103   0xC0, // U+FEA6 ARABIC KHAA FINAL FORM
00104   0xBE, // U+FEA7 ARABIC KHAA INITIAL FORM
00105   0xBF, // U+FEA8 ARABIC KHAA MEDIAL FORM
00106   0x4F, // U+FEA9 ARABIC DAL ISOLATED FORM
00107   0xA6, // U+FEAA ARABIC DAL FINAL FORM
00108   0x50, // U+FEAB ARABIC THAL ISOLATED FORM
00109   0xA7, // U+FEAC ARABIC THAL FINAL FORM
00110   0x51, // U+FEAD ARABIC RA ISOLATED FORM
00111   0xA8, // U+FEAE ARABIC RA FINAL FORM
00112   0x52, // U+FEAF ARABIC ZAIN ISOLATED FORM
00113   0xA9, // U+FEB0 ARABIC ZAIN FINAL FORM
00114   0x53, // U+FEB1 ARABIC SEEN ISOLATED FORM
00115   0xC3, // U+FEB2 ARABIC SEEN FINAL FORM
00116   0xC1, // U+FEB3 ARABIC SEEN INITIAL FORM
00117   0xC2, // U+FEB4 ARABIC SEEN IMEDIAL FORM
00118   0x54, // U+FEB5 ARABIC SHEEN ISOLATED FORM
00119   0xC6, // U+FEB6 ARABIC SHEEN FINAL FORM
00120   0xC4, // U+FEB7 ARABIC SHEEN INITIAL FORM
00121   0xC5, // U+FEB8 ARABIC SHEEN MEDIAL FORM
00122   0x55, // U+FEB9 ARABIC SAD ISOLATED FORM
00123   0xC9, // U+FEBA ARABIC SAD FINAL FORM
00124   0xC7, // U+FEBB ARABIC SAD INITIAL FORM
00125   0xC8, // U+FEBC ARABIC SAD MEDIAL FORM
00126   0x56, // U+FEBD ARABIC DAD ISOLATED FORM
00127   0xCC, // U+FEBE ARABIC DAD FINAL FORM
00128   0xCA, // U+FEBF ARABIC DAD INITIAL FORM
00129   0xCB, // U+FEC0 ARABIC DAD MEDIAL FORM
00130   0x57, // U+FEC1 ARABIC TAH ISOLATED FORM
00131   0xCF, // U+FEC2 ARABIC TAH FINAL FORM
00132   0xCD, // U+FEC3 ARABIC TAH INITIAL FORM
00133   0xCE, // U+FEC4 ARABIC TAH MEDIAL FORM
00134   0x58, // U+FEC5 ARABIC ZAH ISOLATED FORM
00135   0xD2, // U+FEC6 ARABIC ZAH FINAL FORM
00136   0xD0, // U+FEC7 ARABIC ZAH INITIAL FORM
00137   0xD1, // U+FEC8 ARABIC ZAH MEDIAL FORM
00138   0x59, // U+FEC9 ARABIC AIN ISOLATED FORM
00139   0xD5, // U+FECA ARABIC AIN FINAL FORM
00140   0xD3, // U+FECB ARABIC AIN INITIAL FORM
00141   0xD4, // U+FECC ARABIC AIN MEDIAL FORM
00142   0x5A, // U+FECD ARABIC GHAIN ISOLATED FORM
00143   0xD8, // U+FECE ARABIC GHAIN FINAL FORM
00144   0xD6, // U+FECF ARABIC GHAIN INITIAL FORM
00145   0xD7, // U+FED0 ARABIC GHAIN MEDIAL FORM
00146   0x61, // U+FED1 ARABIC FA ISOLATED FORM
00147   0xDB, // U+FED2 ARABIC FEH FINAL FORM
00148   0xD9, // U+FED3 ARABIC FEH INITIAL FORM
00149   0xDA, // U+FED4 ARABIC FEH MEDIAL FORM
00150   0x62, // U+FED5 ARABIC QAF ISOLATED FORM
00151   0xDE, // U+FED6 ARABIC QAF FINAL FORM
00152   0xDC, // U+FED7 ARABIC QAF INITIAL FORM
00153   0xDD, // U+FED8 ARABIC QAF MEDIAL FORM
00154   0x63, // U+FED9 ARABIC KAF ISOLATED FORM
00155   0xE1, // U+FEDA ARABIC KAF FINAL FORM
00156   0xDF, // U+FEDB ARABIC KAF INITIAL FORM
00157   0xE0, // U+FEDC ARABIC KAF MEDIAL FORM
00158   0x64, // U+FEDD ARABIC LAM ISOLATED FORM
00159   0xE4, // U+FEDE ARABIC LAM FINAL FORM
00160   0xE2, // U+FEDF ARABIC LAM INITIAL FORM
00161   0xE3, // U+FEE0 ARABIC LAM MEDIAL FORM
00162   0x65, // U+FEE1 ARABIC MEEM ISOLATED FORM
00163   0xE7, // U+FEE2 ARABIC MEEM FINAL FORM
00164   0xE5, // U+FEE3 ARABIC MEEM INITIAL FORM
00165   0xE6, // U+FEE4 ARABIC MEEM MEDIAL FORM
00166   0x66, // U+FEE5 ARABIC NOON ISOLATED FORM
00167   0xEA, // U+FEE6 ARABIC NOON FINAL FORM
00168   0xE8, // U+FEE7 ARABIC NOON INITIAL FORM
00169   0xE9, // U+FEE8 ARABIC NOON MEDIAL FORM
00170   0x67, // U+FEE9 ARABIC HA ISOLATED FORM
00171   0xED, // U+FEEA ARABIC HEH FINAL FORM
00172   0xEB, // U+FEEB ARABIC HEH INITIAL FORM
00173   0xEC, // U+FEEC ARABIC HEH MEDIAL FORM
00174   0x68, // U+FEED ARABIC WAW ISOLATED FORM
00175   0xAA, // U+FEEE ARABIC WAW FINAL FORM
00176   0x69, // U+FEEF ARABIC ALEF MAKSURA ISOLATED FORM
00177   0xAB, // U+FEF0 ARABIC ALEF MAKSURA FINAL FORM
00178   0x6A, // U+FEF1 ARABIC YEH ISOLATED FORM
00179   0xF0, // U+FEF2 ARABIC YEH FINAL FORM
00180   0xEE, // U+FEF3 ARABIC YEH INITIAL FORM
00181   0xEF, // U+FEF4 ARABIC YEH MEDIAL FORM
00182   0x76, // U+FEF5 ARABIC LIGATURE MADDA ON LAM ALEF ISOLATED FORM
00183   0xFA, // U+FEF6 ARABIC LIGATURE MADDA ON LAM ON ALEF FINAL FORM
00184   0x77, // U+FEF7 ARABIC LIGATURE HAMZA ON LAM ALEF ISOLATED FORM
00185   0xFC, // U+FEF8 ARABIC LIGATURE HAMZA ON LAM ALEF FINAL FORM
00186   0x78, // U+FEF9 ARABIC LIGATURE HAMZA UNDER LAM ALEF ISOLATED FORM
00187   0xFB, // U+FEFA ARABIC LIGATURE HAMZA UNDER LAM ALEF FINAL FORM
00188   0x79, // U+FEFB ARABIC LIGATURE LAM ALEF ISOLATED FORM
00189   0xFD  // U+FEFC ARABIC LIGATURE LAM ALEF FINAL FORM
00190  };
00191 
00192 NS_IMETHODIMP nsUnicodeToLangBoxArabic16::Convert(
00193       const PRUnichar * aSrc, PRInt32 * aSrcLength,
00194       char * aDest, PRInt32 * aDestLength)
00195 {
00196    char* dest = aDest;
00197    PRInt32 inlen = 0;
00198 
00199    while (inlen < *aSrcLength) {
00200      PRUnichar aChar = aSrc[inlen];
00201      
00202      if (((aChar >= 0x0020) && (aChar <= 0x0027)) ||
00203           (aChar == 0x2A) ||
00204           (aChar == 0x2B) ||
00205          ((aChar >= 0x002D) && (aChar <= 0x002F)) ||
00206           (aChar == 0x003A) ||
00207          ((aChar >= 0x003C) && (aChar <= 0x003E)) ||
00208           (aChar == 0x40) ||
00209           (aChar == 0x5C) ||
00210           (aChar == 0x5E) ||
00211           (aChar == 0x5F) ||
00212           (aChar == 0x7C) ||
00213           (aChar == 0x7E)) {
00214        *dest++ = (char) aChar;
00215        // ISO-8859-6-16 swaps symmetric characters internally, but we have
00216        // already swapped them where necessary during Bidi reordering, so we
00217        // must swap them back here.
00218      } else if (0x0028 == aChar) {
00219        *dest++ = 0x29;
00220      } else if (0x0029 == aChar) {
00221        *dest++ = 0x28;
00222      } else if (0x005B == aChar) {
00223        *dest++ = 0x5D;
00224      } else if (0x005D == aChar) {
00225        *dest++ = 0x5B;
00226      } else if (0x007B == aChar) {
00227        *dest++ = 0x7D;
00228      } else if (0x007D == aChar) {
00229        *dest++ = 0x7B;
00230      } else if (0x060C == aChar) {
00231        // ARABIC COMMA
00232        *dest++ = 0x2C;
00233      } else if (0x061B == aChar) {
00234        // ARABIC SEMICOLON
00235        *dest++ = 0x3B;
00236      } else if (0x061F == aChar) {
00237        // ARABIC QUESTION MARK
00238        *dest++ = 0x3F;
00239      } else if (0x0640 == aChar) {
00240        // ARABIC TATWEEL
00241        *dest++ = 0x60;
00242      } else if ((aChar >= 0x0660) && (aChar <=0x0669)) {
00243        // ARABIC-INDIC DIGITS
00244        *dest++ = (char)(aChar - 0x0660 + 0x30);
00245      } else if ((aChar>=0xFE70) && (aChar <= 0xFEFC)) {
00246        // ARABIC PRESENTATION FORMS
00247        *dest++ = uni2lbox[aChar-0xFE70];
00248      } else {
00249        // do nothing
00250      }
00251      inlen++;
00252    }
00253 
00254     *aDestLength = dest - aDest;
00255     return NS_OK;
00256 }
00257 
00258 NS_IMETHODIMP nsUnicodeToLangBoxArabic16::GetMaxLength(
00259 const PRUnichar * aSrc, PRInt32 aSrcLength,
00260                            PRInt32 * aDestLength)
00261 {
00262   *aDestLength = aSrcLength;
00263   return NS_OK;
00264 }
00265 
00266 NS_IMETHODIMP nsUnicodeToLangBoxArabic16::Finish(
00267       char * aDest, PRInt32 * aDestLength)
00268 {
00269    *aDestLength=0;
00270    return NS_OK;
00271 }
00272 
00273 NS_IMETHODIMP nsUnicodeToLangBoxArabic16::Reset()
00274 {
00275    return NS_OK;
00276 }
00277 
00278 NS_IMETHODIMP nsUnicodeToLangBoxArabic16::SetOutputErrorBehavior(
00279       PRInt32 aBehavior,
00280       nsIUnicharEncoder * aEncoder, PRUnichar aChar)
00281 {
00282    return NS_ERROR_NOT_IMPLEMENTED;
00283 }
00284 
00285 
00286 NS_IMETHODIMP nsUnicodeToLangBoxArabic16::FillInfo(PRUint32* aInfo)
00287 {
00288    PRUnichar i;
00289 
00290    /* Start off by marking the whole ASCII range as unrepresentable. If we
00291     * don't do this we will try to use an ISO-8859-6.16 font for ASCII text
00292     * embedded in Arabic content, and it will not be rendered correctly.
00293     * See discussion in bug 172491
00294     */
00295    for(i=0x0000; i <= 0x007F; i++)
00296      CLEAR_REPRESENTABLE(aInfo, i);
00297 
00298    // Mark the few exceptions as representable.
00299    for(i=0x0020; i <= 0x002B; i++)
00300      SET_REPRESENTABLE(aInfo, i);
00301    for(i=0x002D; i <= 0x002F; i++)
00302      SET_REPRESENTABLE(aInfo, i);
00303    SET_REPRESENTABLE(aInfo, 0x003A);
00304    for(i=0x003C; i <= 0x003E; i++)
00305      SET_REPRESENTABLE(aInfo, i);
00306    SET_REPRESENTABLE(aInfo, 0x0040);
00307    for(i=0x005B; i <= 0x005F; i++)
00308      SET_REPRESENTABLE(aInfo, i);
00309    for(i=0x007B; i <= 0x007E;i++)
00310      SET_REPRESENTABLE(aInfo, i);
00311 
00312    // Arabic punctuation and numerals
00313    SET_REPRESENTABLE(aInfo, 0x060c);
00314    SET_REPRESENTABLE(aInfo, 0x061b);
00315    SET_REPRESENTABLE(aInfo, 0x061f);
00316    SET_REPRESENTABLE(aInfo, 0x0640);
00317    for(i=0x0660; i<=0x0669; i++)
00318       SET_REPRESENTABLE(aInfo, i);
00319 
00320    // Arabic Pres Form-B
00321    for(i=0xFE70; i <= 0xFE72; i++)
00322      SET_REPRESENTABLE(aInfo, i);
00323    SET_REPRESENTABLE(aInfo, 0xFE74);
00324    for(i=0xFE76; i <= 0xFEFC; i++)
00325      SET_REPRESENTABLE(aInfo, i);
00326 
00327    return NS_OK;
00328 }