Back to index

tetex-bin  3.0
UnicodeMap.cc
Go to the documentation of this file.
00001 //========================================================================
00002 //
00003 // UnicodeMap.cc
00004 //
00005 // Copyright 2001-2003 Glyph & Cog, LLC
00006 //
00007 //========================================================================
00008 
00009 #include <aconf.h>
00010 
00011 #ifdef USE_GCC_PRAGMAS
00012 #pragma implementation
00013 #endif
00014 
00015 #include <stdio.h>
00016 #include <string.h>
00017 #include "gmem.h"
00018 #include "gfile.h"
00019 #include "GString.h"
00020 #include "GList.h"
00021 #include "Error.h"
00022 #include "GlobalParams.h"
00023 #include "UnicodeMap.h"
00024 
00025 //------------------------------------------------------------------------
00026 
00027 #define maxExtCode 16
00028 
00029 struct UnicodeMapExt {
00030   Unicode u;                // Unicode char
00031   char code[maxExtCode];
00032   Guint nBytes;
00033 };
00034 
00035 //------------------------------------------------------------------------
00036 
00037 UnicodeMap *UnicodeMap::parse(GString *encodingNameA) {
00038   FILE *f;
00039   UnicodeMap *map;
00040   UnicodeMapRange *range;
00041   UnicodeMapExt *eMap;
00042   int size, eMapsSize;
00043   char buf[256];
00044   int line, nBytes, i, x;
00045   char *tok1, *tok2, *tok3;
00046 
00047   if (!(f = globalParams->getUnicodeMapFile(encodingNameA))) {
00048     error(-1, "Couldn't find unicodeMap file for the '%s' encoding",
00049          encodingNameA->getCString());
00050     return NULL;
00051   }
00052 
00053   map = new UnicodeMap(encodingNameA->copy());
00054 
00055   size = 8;
00056   map->ranges = (UnicodeMapRange *)gmalloc(size * sizeof(UnicodeMapRange));
00057   eMapsSize = 0;
00058 
00059   line = 1;
00060   while (getLine(buf, sizeof(buf), f)) {
00061     if ((tok1 = strtok(buf, " \t\r\n")) &&
00062        (tok2 = strtok(NULL, " \t\r\n"))) {
00063       if (!(tok3 = strtok(NULL, " \t\r\n"))) {
00064        tok3 = tok2;
00065        tok2 = tok1;
00066       }
00067       nBytes = strlen(tok3) / 2;
00068       if (nBytes <= 4) {
00069        if (map->len == size) {
00070          size *= 2;
00071          map->ranges = (UnicodeMapRange *)
00072            grealloc(map->ranges, size * sizeof(UnicodeMapRange));
00073        }
00074        range = &map->ranges[map->len];
00075        sscanf(tok1, "%x", &range->start);
00076        sscanf(tok2, "%x", &range->end);
00077        sscanf(tok3, "%x", &range->code);
00078        range->nBytes = nBytes;
00079        ++map->len;
00080       } else if (tok2 == tok1) {
00081        if (map->eMapsLen == eMapsSize) {
00082          eMapsSize += 16;
00083          map->eMaps = (UnicodeMapExt *)
00084            grealloc(map->eMaps, eMapsSize * sizeof(UnicodeMapExt));
00085        }
00086        eMap = &map->eMaps[map->eMapsLen];
00087        sscanf(tok1, "%x", &eMap->u);
00088        for (i = 0; i < nBytes; ++i) {
00089          sscanf(tok3 + i*2, "%2x", &x);
00090          eMap->code[i] = (char)x;
00091        }
00092        eMap->nBytes = nBytes;
00093        ++map->eMapsLen;
00094       } else {
00095        error(-1, "Bad line (%d) in unicodeMap file for the '%s' encoding",
00096              line, encodingNameA->getCString());
00097       }
00098     } else {
00099       error(-1, "Bad line (%d) in unicodeMap file for the '%s' encoding",
00100            line, encodingNameA->getCString());
00101     }
00102     ++line;
00103   }
00104 
00105   fclose(f);
00106 
00107   return map;
00108 }
00109 
00110 UnicodeMap::UnicodeMap(GString *encodingNameA) {
00111   encodingName = encodingNameA;
00112   unicodeOut = gFalse;
00113   kind = unicodeMapUser;
00114   ranges = NULL;
00115   len = 0;
00116   eMaps = NULL;
00117   eMapsLen = 0;
00118   refCnt = 1;
00119 #if MULTITHREADED
00120   gInitMutex(&mutex);
00121 #endif
00122 }
00123 
00124 UnicodeMap::UnicodeMap(char *encodingNameA, GBool unicodeOutA,
00125                      UnicodeMapRange *rangesA, int lenA) {
00126   encodingName = new GString(encodingNameA);
00127   unicodeOut = unicodeOutA;
00128   kind = unicodeMapResident;
00129   ranges = rangesA;
00130   len = lenA;
00131   eMaps = NULL;
00132   eMapsLen = 0;
00133   refCnt = 1;
00134 #if MULTITHREADED
00135   gInitMutex(&mutex);
00136 #endif
00137 }
00138 
00139 UnicodeMap::UnicodeMap(char *encodingNameA, GBool unicodeOutA,
00140                      UnicodeMapFunc funcA) {
00141   encodingName = new GString(encodingNameA);
00142   unicodeOut = unicodeOutA;
00143   kind = unicodeMapFunc;
00144   func = funcA;
00145   eMaps = NULL;
00146   eMapsLen = 0;
00147   refCnt = 1;
00148 #if MULTITHREADED
00149   gInitMutex(&mutex);
00150 #endif
00151 }
00152 
00153 UnicodeMap::~UnicodeMap() {
00154   delete encodingName;
00155   if (kind == unicodeMapUser && ranges) {
00156     gfree(ranges);
00157   }
00158   if (eMaps) {
00159     gfree(eMaps);
00160   }
00161 #if MULTITHREADED
00162   gDestroyMutex(&mutex);
00163 #endif
00164 }
00165 
00166 void UnicodeMap::incRefCnt() {
00167 #if MULTITHREADED
00168   gLockMutex(&mutex);
00169 #endif
00170   ++refCnt;
00171 #if MULTITHREADED
00172   gUnlockMutex(&mutex);
00173 #endif
00174 }
00175 
00176 void UnicodeMap::decRefCnt() {
00177   GBool done;
00178 
00179 #if MULTITHREADED
00180   gLockMutex(&mutex);
00181 #endif
00182   done = --refCnt == 0;
00183 #if MULTITHREADED
00184   gUnlockMutex(&mutex);
00185 #endif
00186   if (done) {
00187     delete this;
00188   }
00189 }
00190 
00191 GBool UnicodeMap::match(GString *encodingNameA) {
00192   return !encodingName->cmp(encodingNameA);
00193 }
00194 
00195 int UnicodeMap::mapUnicode(Unicode u, char *buf, int bufSize) {
00196   int a, b, m, n, i, j;
00197   Guint code;
00198 
00199   if (kind == unicodeMapFunc) {
00200     return (*func)(u, buf, bufSize);
00201   }
00202 
00203   a = 0;
00204   b = len;
00205   if (u >= ranges[a].start) {
00206     // invariant: ranges[a].start <= u < ranges[b].start
00207     while (b - a > 1) {
00208       m = (a + b) / 2;
00209       if (u >= ranges[m].start) {
00210        a = m;
00211       } else if (u < ranges[m].start) {
00212        b = m;
00213       }
00214     }
00215     if (u <= ranges[a].end) {
00216       n = ranges[a].nBytes;
00217       if (n > bufSize) {
00218        return 0;
00219       }
00220       code = ranges[a].code + (u - ranges[a].start);
00221       for (i = n - 1; i >= 0; --i) {
00222        buf[i] = (char)(code & 0xff);
00223        code >>= 8;
00224       }
00225       return n;
00226     }
00227   }
00228 
00229   for (i = 0; i < eMapsLen; ++i) {
00230     if (eMaps[i].u == u) {
00231       n = eMaps[i].nBytes;
00232       for (j = 0; j < n; ++j) {
00233        buf[j] = eMaps[i].code[j];
00234       }
00235       return n;
00236     }
00237   }
00238 
00239   return 0;
00240 }
00241 
00242 //------------------------------------------------------------------------
00243 
00244 UnicodeMapCache::UnicodeMapCache() {
00245   int i;
00246 
00247   for (i = 0; i < unicodeMapCacheSize; ++i) {
00248     cache[i] = NULL;
00249   }
00250 }
00251 
00252 UnicodeMapCache::~UnicodeMapCache() {
00253   int i;
00254 
00255   for (i = 0; i < unicodeMapCacheSize; ++i) {
00256     if (cache[i]) {
00257       cache[i]->decRefCnt();
00258     }
00259   }
00260 }
00261 
00262 UnicodeMap *UnicodeMapCache::getUnicodeMap(GString *encodingName) {
00263   UnicodeMap *map;
00264   int i, j;
00265 
00266   if (cache[0] && cache[0]->match(encodingName)) {
00267     cache[0]->incRefCnt();
00268     return cache[0];
00269   }
00270   for (i = 1; i < unicodeMapCacheSize; ++i) {
00271     if (cache[i] && cache[i]->match(encodingName)) {
00272       map = cache[i];
00273       for (j = i; j >= 1; --j) {
00274        cache[j] = cache[j - 1];
00275       }
00276       cache[0] = map;
00277       map->incRefCnt();
00278       return map;
00279     }
00280   }
00281   if ((map = UnicodeMap::parse(encodingName))) {
00282     if (cache[unicodeMapCacheSize - 1]) {
00283       cache[unicodeMapCacheSize - 1]->decRefCnt();
00284     }
00285     for (j = unicodeMapCacheSize - 1; j >= 1; --j) {
00286       cache[j] = cache[j - 1];
00287     }
00288     cache[0] = map;
00289     map->incRefCnt();
00290     return map;
00291   }
00292   return NULL;
00293 }