Back to index

texmacs  1.0.7.15
pdfencoding.c
Go to the documentation of this file.
00001 /*  $Header: /home/cvsroot/dvipdfmx/src/pdfencoding.c,v 1.12 2009/03/24 07:55:52 chofchof Exp $
00002     
00003     This is dvipdfmx, an eXtended version of dvipdfm by Mark A. Wicks.
00004 
00005     Copyright (C) 2008 by Jin-Hwan Cho, Matthias Franz, and Shunsaku Hirata,
00006     the dvipdfmx project team <dvipdfmx@project.ktug.or.kr>
00007     
00008     Copyright (C) 1998, 1999 by Mark A. Wicks <mwicks@kettering.edu>
00009 
00010     This program is free software; you can redistribute it and/or modify
00011     it under the terms of the GNU General Public License as published by
00012     the Free Software Foundation; either version 2 of the License, or
00013     (at your option) any later version.
00014     
00015     This program is distributed in the hope that it will be useful,
00016     but WITHOUT ANY WARRANTY; without even the implied warranty of
00017     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00018     GNU General Public License for more details.
00019     
00020     You should have received a copy of the GNU General Public License
00021     along with this program; if not, write to the Free Software
00022     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
00023 */
00024 
00025 #if HAVE_CONFIG_H
00026 #include "config.h"
00027 #endif
00028 
00029 #include <string.h>
00030 
00031 #include "system.h"
00032 #include "mem.h"
00033 #include "error.h"
00034 #include "dpxutil.h"
00035 
00036 #include "pdfparse.h"
00037 #include "pdfobj.h"
00038 
00039 #include "dpxfile.h"
00040 
00041 #include "pdfencoding.h"
00042 
00043 static int      is_similar_charset (char **encoding, const char **encoding2);
00044 static pdf_obj *make_encoding_differences (char **encoding, char **baseenc,
00045                                       const char *is_used);
00046 
00047 static unsigned char verbose = 0;
00048 
00049 static const char *MacRomanEncoding[256];
00050 static const char *MacExpertEncoding[256];
00051 static const char *WinAnsiEncoding[256];
00052 #if 0
00053 static const char *StandardEncoding[256];
00054 static const char *ISOLatin1Encoding[256];
00055 #endif
00056 
00057 void
00058 pdf_encoding_set_verbose (void)
00059 {
00060   verbose++;
00061 }
00062 
00063 /*
00064  * ident:  File name, e.g., 8a.enc.
00065  * name:   Name of encoding, StandardEncoding, TeXBase1Encoding, ...
00066  * glyphs: List of encoded glyphs (name).
00067  * flags:
00068  *   IS_PREDEFINED:
00069  *     Encoding is one of the MacRomanEncoding, MacExpertEncoding, and
00070  *     WinAnsiEncoding.
00071  *   FLAG_USED_BY_TYPE3:
00072  *     Encoding is used by a Type 3 font.
00073  */
00074 #define FLAG_IS_PREDEFINED  (1 << 0)
00075 #define FLAG_USED_BY_TYPE3  (1 << 1)
00076 
00077 typedef struct pdf_encoding
00078 {
00079   char     *ident;
00080 
00081   char     *enc_name;
00082   int       flags;
00083   char     *glyphs[256];     /* ".notdef" must be represented as NULL */
00084   char      is_used[256];
00085 
00086   struct pdf_encoding *baseenc;
00087   pdf_obj  *tounicode;
00088 
00089   pdf_obj  *resource;
00090 } pdf_encoding;
00091 
00092 static int      pdf_encoding_new_encoding (const char *enc_name,
00093                                       const char *ident,
00094                                       const char **encoding_vec,
00095                                       char *baseenc_name,
00096                                       int flags);
00097 
00098 static void
00099 pdf_init_encoding_struct (pdf_encoding *encoding)
00100 {
00101   ASSERT(encoding);
00102 
00103   encoding->ident    = NULL;
00104 
00105   encoding->enc_name = NULL;
00106 
00107   memset(encoding->glyphs,  0, 256*sizeof(char *));
00108   memset(encoding->is_used, 0, 256);
00109 
00110   encoding->tounicode = NULL;
00111 
00112   encoding->baseenc   = NULL;
00113   encoding->resource  = NULL;
00114 
00115   encoding->flags     = 0;
00116 
00117   return;
00118 }
00119 
00120 /* Creates the PDF Encoding entry for the encoding.
00121  * If baseenc is non-null, it is used as BaseEncoding entry.
00122  */
00123 static pdf_obj *
00124 create_encoding_resource (pdf_encoding *encoding, pdf_encoding *baseenc)
00125 {
00126   pdf_obj *differences;
00127   ASSERT(encoding);
00128   ASSERT(!encoding->resource);
00129 
00130   differences = make_encoding_differences(encoding->glyphs,
00131                                      baseenc ? baseenc->glyphs : NULL,
00132                                      encoding->is_used);
00133   
00134   if (differences) {
00135     pdf_obj *resource = pdf_new_dict();
00136     if (baseenc)
00137       pdf_add_dict(resource, pdf_new_name("BaseEncoding"),
00138                  pdf_link_obj(baseenc->resource));
00139     pdf_add_dict(resource, pdf_new_name("Differences"),  differences);
00140     return resource; 
00141   } else {
00142     /* Fix a bug with the MinionPro package using MnSymbol fonts
00143      * in its virtual fonts:
00144      *
00145      * Some font may have font_id even if no character is used.
00146      * For example, suppose that a virtual file A.vf uses two
00147      * other fonts, B and C. Even if only characters of B are used
00148      * in a DVI document, C will have font_id too.
00149      * In this case, both baseenc and differences can be NULL.
00150      *
00151      * Actually these fonts will be ignored in pdffont.c.
00152      */
00153     return baseenc ? pdf_link_obj(baseenc->resource) : NULL;
00154   }
00155 }
00156 
00157 static void
00158 pdf_flush_encoding (pdf_encoding *encoding)
00159 {
00160   ASSERT(encoding);
00161 
00162   if (encoding->resource) {
00163     pdf_release_obj(encoding->resource);
00164     encoding->resource  = NULL;
00165   }
00166   if (encoding->tounicode) {
00167     pdf_release_obj(encoding->tounicode);
00168     encoding->tounicode = NULL;
00169   }
00170 
00171   return;
00172 }
00173 
00174 static void
00175 pdf_clean_encoding_struct (pdf_encoding *encoding)
00176 {
00177   int   code;
00178 
00179   ASSERT(encoding);
00180 
00181   if (encoding->resource)
00182     ERROR("Object not flushed.");
00183 
00184   if (encoding->tounicode)
00185     pdf_release_obj(encoding->tounicode);
00186   if (encoding->ident)
00187     RELEASE(encoding->ident);
00188   if (encoding->enc_name)
00189     RELEASE(encoding->enc_name);
00190 
00191   encoding->ident    = NULL;
00192   encoding->enc_name = NULL;
00193 
00194   for (code = 0; code < 256; code++) {
00195     if (encoding->glyphs[code])
00196       RELEASE(encoding->glyphs[code]);
00197     encoding->glyphs[code] = NULL;
00198   }
00199   encoding->ident    = NULL;
00200   encoding->enc_name = NULL;
00201 
00202   return;
00203 }
00204 
00205 #if 0
00206 static int CDECL
00207 glycmp (const void *pv1, const void *pv2)
00208 {
00209   char *v1, *v2;
00210 
00211   v1 = (char *) pv1;
00212   v2 = *((char **) pv2);
00213 
00214   return strcmp(v1, v2);
00215 }
00216 #endif
00217 
00218 static int
00219 is_similar_charset (char **enc_vec, const char **enc_vec2)
00220 {
00221   int   code, same = 0;
00222 
00223   for (code = 0; code < 256; code++)
00224     if (!(enc_vec[code] && strcmp(enc_vec[code], enc_vec2[code]))
00225        && ++same >= 64)
00226       /* is 64 a good level? */
00227       return 1;
00228 
00229   return 0; 
00230 }
00231 
00232 /* Creates a PDF Differences array for the encoding, based on the
00233  * base encoding baseenc (if not NULL). Only character codes which
00234  * are actually used in the document are considered.
00235  */
00236 static pdf_obj *
00237 make_encoding_differences (char **enc_vec, char **baseenc, const char *is_used)
00238 {
00239   pdf_obj *differences = NULL;
00240   int      code, count = 0;
00241   int      skipping = 1;
00242 
00243   ASSERT(enc_vec);
00244 
00245   /*
00246    *  Write all entries (except .notdef) if baseenc is unknown.
00247    *  If is_used is given, write only used entries.
00248    */
00249   differences = pdf_new_array();
00250   for (code = 0; code < 256; code++) {
00251     /* We skip NULL (= ".notdef"). Any character code mapped to ".notdef"
00252      * glyph should not be used in the document.
00253      */
00254     if ((is_used && !is_used[code]) || !enc_vec[code])
00255       skipping = 1;
00256     else if (!baseenc || !baseenc[code] ||
00257              strcmp(baseenc[code], enc_vec[code]) != 0) {
00258       /*
00259        * Difference found.
00260        */
00261       if (skipping)
00262         pdf_add_array(differences, pdf_new_number(code));
00263       pdf_add_array(differences,   pdf_new_name(enc_vec[code]));
00264       skipping = 0;
00265       count++;
00266     } else
00267       skipping = 1;
00268   }
00269 
00270   /*
00271    * No difference found. Some PDF viewers can't handle differences without
00272    * any differences. We return NULL.
00273    */
00274   if (count == 0) {
00275     pdf_release_obj(differences);
00276     differences = NULL;
00277   }
00278 
00279   return differences;
00280 }
00281 
00282 static int
00283 load_encoding_file (const char *filename)
00284 {
00285   FILE    *fp;
00286   pdf_obj *enc_name = NULL;
00287   pdf_obj *encoding_array = NULL;
00288   char    *wbuf, *p, *endptr;
00289   char    *enc_vec[256];
00290   int      code, fsize, enc_id;
00291 
00292   if (!filename)
00293     return -1;
00294 
00295   if (verbose) {
00296     MESG("(Encoding:%s", filename);
00297   }
00298 
00299   fp = DPXFOPEN(filename, DPX_RES_TYPE_ENC);
00300   if (!fp)
00301     return -1;
00302   /*
00303    * file_size do seek_end witout saving current position and
00304    * do rewind.
00305    */
00306   fsize = file_size(fp);
00307 
00308   wbuf = NEW(fsize + 1, char); 
00309   fread(wbuf, sizeof(char), fsize, fp);
00310   DPXFCLOSE(fp);
00311 
00312   p        = wbuf;
00313   endptr   = wbuf + fsize;
00314   p[fsize] = '\0';
00315 
00316   skip_white(&p, endptr);
00317 
00318   /*
00319    * Skip comment lines.
00320    */
00321   while (p < endptr && p[0] == '%') {
00322     skip_line (&p, endptr);
00323     skip_white(&p, endptr);
00324   }
00325   if (p[0] == '/')
00326     enc_name = parse_pdf_name(&p, endptr);
00327 
00328   skip_white(&p, endptr);
00329   encoding_array = parse_pdf_array(&p, endptr, NULL);
00330   RELEASE(wbuf);
00331   if (!encoding_array) {
00332     if (enc_name)
00333       pdf_release_obj(enc_name);
00334     return -1;
00335   }
00336 
00337   for (code = 0; code < 256; code++) {
00338     enc_vec[code] = pdf_name_value(pdf_get_array(encoding_array, code));
00339   }
00340   enc_id = pdf_encoding_new_encoding(enc_name ? pdf_name_value(enc_name) : NULL,
00341                                  filename, (const char **) enc_vec, NULL, 0);
00342 
00343   if (enc_name) {
00344     if (verbose > 1)
00345       MESG("[%s]", pdf_name_value(enc_name));
00346     pdf_release_obj(enc_name);
00347   }
00348   pdf_release_obj(encoding_array);
00349 
00350   if (verbose) MESG(")");
00351 
00352   return enc_id;
00353 }
00354 
00355 #define CHECK_ID(n) do { \
00356   if ((n) < 0 || (n) >= enc_cache.count) { \
00357      ERROR("Invalid encoding id: %d", (n)); \
00358   } \
00359 } while (0)
00360 
00361 #define CACHE_ALLOC_SIZE 16u
00362 
00363 struct {
00364   int           count;
00365   int           capacity;
00366   pdf_encoding *encodings;
00367 } enc_cache = {
00368   0, 0, NULL
00369 };
00370 
00371 void
00372 pdf_init_encodings (void)
00373 {
00374   enc_cache.count     = 0;
00375   enc_cache.capacity  = 3;
00376   enc_cache.encodings = NEW(enc_cache.capacity, pdf_encoding);
00377 
00378   /*
00379    * PDF Predefined Encodings
00380    */
00381   pdf_encoding_new_encoding("WinAnsiEncoding", "WinAnsiEncoding",
00382                          WinAnsiEncoding, NULL, FLAG_IS_PREDEFINED);
00383   pdf_encoding_new_encoding("MacRomanEncoding", "MacRomanEncoding",
00384                          MacRomanEncoding, NULL, FLAG_IS_PREDEFINED);
00385   pdf_encoding_new_encoding("MacExpertEncoding", "MacExpertEncoding",
00386                          MacExpertEncoding, NULL, FLAG_IS_PREDEFINED);
00387 
00388   return;
00389 }
00390 
00391 /*
00392  * The original dvipdfm describes as:
00393  *
00394  *  Some software doesn't like BaseEncoding key (e.g., FastLane) 
00395  *  so this code is commented out for the moment.  It may reemerge in the
00396  *  future
00397  *
00398  * and the line for BaseEncoding is commented out.
00399  *
00400  * I'm not sure why this happens. But maybe BaseEncoding key causes problems
00401  * when the font is Symbol font or TrueType font.
00402  */
00403 
00404 static int
00405 pdf_encoding_new_encoding (const char *enc_name, const char *ident,
00406                         const char **encoding_vec,
00407                         char *baseenc_name, int flags)
00408 {
00409   int      enc_id, code;
00410 
00411   pdf_encoding *encoding;
00412 
00413   enc_id   = enc_cache.count;
00414   if (enc_cache.count++ >= enc_cache.capacity) {
00415     enc_cache.capacity += 16;
00416     enc_cache.encodings = RENEW(enc_cache.encodings,
00417                                 enc_cache.capacity,  pdf_encoding);
00418   }
00419   encoding = &enc_cache.encodings[enc_id];
00420 
00421   pdf_init_encoding_struct(encoding);
00422 
00423   encoding->ident = NEW(strlen(ident)+1, char);
00424   strcpy(encoding->ident, ident);
00425   encoding->enc_name  = NEW(strlen(enc_name)+1, char);
00426   strcpy(encoding->enc_name, enc_name);
00427 
00428   encoding->flags = flags;
00429 
00430   for (code = 0; code < 256; code++)
00431     if (encoding_vec[code] && strcmp(encoding_vec[code], ".notdef")) {
00432       encoding->glyphs[code] = NEW(strlen(encoding_vec[code])+1, char);
00433       strcpy(encoding->glyphs[code], encoding_vec[code]);
00434     }
00435 
00436   if (!baseenc_name && !(flags & FLAG_IS_PREDEFINED)
00437       && is_similar_charset(encoding->glyphs, WinAnsiEncoding)) {
00438     /* Dvipdfmx default setting. */
00439     baseenc_name = "WinAnsiEncoding";
00440   }
00441 
00442   /* TODO: make base encoding configurable */
00443   if (baseenc_name) {
00444     int baseenc_id = pdf_encoding_findresource(baseenc_name);
00445     if (baseenc_id < 0 || !pdf_encoding_is_predefined(baseenc_id))
00446       ERROR("Illegal base encoding %s for encoding %s\n",
00447            baseenc_name, encoding->enc_name);
00448     encoding->baseenc = &enc_cache.encodings[baseenc_id];
00449   }
00450 
00451   if (flags & FLAG_IS_PREDEFINED)
00452     encoding->resource = pdf_new_name(encoding->enc_name);
00453 
00454   return enc_id;
00455 }
00456 
00457 /* Creates Encoding resource and ToUnicode CMap 
00458  * for all non-predefined encodings.
00459  */
00460 void pdf_encoding_complete ()
00461 {
00462   int  enc_id;
00463 
00464   for (enc_id = 0; enc_id < enc_cache.count; enc_id++) {
00465     if (!pdf_encoding_is_predefined(enc_id)) {
00466       pdf_encoding *encoding = &enc_cache.encodings[enc_id];
00467       /* Section 5.5.4 of the PDF 1.5 reference says that the encoding
00468        * of a Type 3 font must be completely described by a Differences
00469        * array, but implementation note 56 explains that this is rather
00470        * an incorrect implementation in Acrobat 4 and earlier. Hence,
00471        * we do use a base encodings for PDF versions >= 1.3.
00472        */
00473       int with_base = !(encoding->flags & FLAG_USED_BY_TYPE3)
00474                      || pdf_get_version() >= 4;
00475       ASSERT(!encoding->resource);
00476       encoding->resource = create_encoding_resource(encoding,
00477                                               with_base ? encoding->baseenc : NULL);
00478       ASSERT(!encoding->tounicode);
00479       encoding->tounicode = pdf_create_ToUnicode_CMap(encoding->enc_name,
00480                                                 encoding->glyphs,
00481                                                 encoding->is_used);
00482     }
00483   }
00484 }
00485 
00486 void
00487 pdf_close_encodings (void)
00488 {
00489   int  enc_id;
00490 
00491   if (enc_cache.encodings) {
00492     for (enc_id = 0; enc_id < enc_cache.count; enc_id++) {
00493       pdf_encoding *encoding;
00494 
00495       encoding = &enc_cache.encodings[enc_id];
00496       if (encoding) {
00497         pdf_flush_encoding(encoding);
00498         pdf_clean_encoding_struct(encoding);
00499       }
00500     }
00501     RELEASE(enc_cache.encodings);
00502   }
00503   enc_cache.encodings = NULL;
00504   enc_cache.count     = 0;
00505   enc_cache.capacity  = 0;
00506 }
00507 
00508 int
00509 pdf_encoding_findresource (char *enc_name)
00510 {
00511   int           enc_id;
00512   pdf_encoding *encoding;
00513 
00514   ASSERT(enc_name);
00515   for (enc_id = 0; enc_id < enc_cache.count; enc_id++) {
00516     encoding = &enc_cache.encodings[enc_id];
00517     if (encoding->ident &&
00518         !strcmp(enc_name, encoding->ident))
00519       return enc_id;
00520     else if (encoding->enc_name &&
00521              !strcmp(enc_name, encoding->enc_name))
00522       return enc_id;
00523   }
00524 
00525   return load_encoding_file(enc_name);
00526 }
00527 
00528 
00529 /*
00530  * Pointer will change if other encoding is loaded...
00531  */
00532 
00533 char **
00534 pdf_encoding_get_encoding (int enc_id)
00535 {
00536   pdf_encoding *encoding;
00537 
00538   CHECK_ID(enc_id);
00539 
00540   encoding = &enc_cache.encodings[enc_id];
00541 
00542   return encoding->glyphs;
00543 }
00544 
00545 pdf_obj *
00546 pdf_get_encoding_obj (int enc_id)
00547 {
00548   pdf_encoding *encoding;
00549 
00550   CHECK_ID(enc_id);
00551 
00552   encoding = &enc_cache.encodings[enc_id];
00553 
00554   return encoding->resource;
00555 }
00556 
00557 int
00558 pdf_encoding_is_predefined (int enc_id)
00559 {
00560   pdf_encoding *encoding;
00561 
00562   CHECK_ID(enc_id);
00563 
00564   encoding = &enc_cache.encodings[enc_id];
00565 
00566   return (encoding->flags & FLAG_IS_PREDEFINED) ? 1 : 0;
00567 }
00568 
00569 void
00570 pdf_encoding_used_by_type3 (int enc_id)
00571 {
00572   pdf_encoding *encoding;
00573 
00574   CHECK_ID(enc_id);
00575 
00576   encoding = &enc_cache.encodings[enc_id];
00577 
00578   encoding->flags |= FLAG_USED_BY_TYPE3;
00579 }
00580 
00581 
00582 char *
00583 pdf_encoding_get_name (int enc_id)
00584 {
00585   pdf_encoding *encoding;
00586 
00587   CHECK_ID(enc_id);
00588 
00589   encoding = &enc_cache.encodings[enc_id];
00590 
00591   return encoding->enc_name;
00592 }
00593 
00594 /* CSI_UNICODE */
00595 #include "cid.h"
00596 
00597 #include "cmap.h"
00598 #include "cmap_read.h"
00599 #include "cmap_write.h"
00600 
00601 #include "agl.h"
00602 
00603 #define WBUF_SIZE 1024
00604 static unsigned char wbuf[WBUF_SIZE];
00605 static unsigned char range_min[1] = {0x00u};
00606 static unsigned char range_max[1] = {0xFFu};
00607 
00608 void
00609 pdf_encoding_add_usedchars (int encoding_id, const char *is_used)
00610 {
00611   pdf_encoding *encoding;
00612   int code;
00613 
00614   CHECK_ID(encoding_id);
00615 
00616   if (!is_used || pdf_encoding_is_predefined(encoding_id))
00617     return;
00618 
00619   encoding = &enc_cache.encodings[encoding_id];
00620 
00621   for (code = 0; code <= 0xff; code++)
00622     encoding->is_used[code] |= is_used[code];
00623 }
00624 
00625 pdf_obj *
00626 pdf_encoding_get_tounicode (int encoding_id)
00627 {
00628   CHECK_ID(encoding_id);
00629 
00630   return enc_cache.encodings[encoding_id].tounicode;
00631 }
00632 
00633 
00634 /* Creates a ToUnicode CMap. An empty CMap is replaced by NULL.
00635  *
00636  * For PDF <= 1.4 a complete CMap is created unless all character codes
00637  * are predefined in PDF. For PDF >= 1.5 only those character codes which
00638  * are not predefined appear in the CMap.
00639  *
00640  * Note: The PDF 1.4 reference is not consistent: Section 5.9 describes
00641  * the Unicode mapping of PDF 1.3 and Section 9.7.2 (in the context of
00642  * Tagged PDF) the one of PDF 1.5.
00643  */
00644 pdf_obj *
00645 pdf_create_ToUnicode_CMap (const char *enc_name,
00646                            char **enc_vec, const char *is_used)
00647 {
00648   pdf_obj  *stream;
00649   CMap     *cmap;
00650   int       code, all_predef;
00651   char     *cmap_name;
00652   unsigned char *p, *endptr;
00653 
00654   ASSERT(enc_name && enc_vec);
00655 
00656   cmap_name = NEW(strlen(enc_name)+strlen("-UTF16")+1, char);
00657   sprintf(cmap_name, "%s-UTF16", enc_name);
00658 
00659   cmap = CMap_new();
00660   CMap_set_name (cmap, cmap_name);
00661   CMap_set_type (cmap, CMAP_TYPE_TO_UNICODE);
00662   CMap_set_wmode(cmap, 0);
00663 
00664   CMap_set_CIDSysInfo(cmap, &CSI_UNICODE);
00665 
00666   CMap_add_codespacerange(cmap, range_min, range_max, 1);
00667 
00668   all_predef = 1;
00669   for (code = 0; code <= 0xff; code++) {
00670     if (is_used && !is_used[code])
00671       continue;
00672 
00673     if (enc_vec[code]) {
00674       long   len;
00675       int    fail_count = 0;
00676       agl_name *agln = agl_lookup_list(enc_vec[code]);
00677       /* Adobe glyph naming conventions are not used by viewers,
00678        * hence even ligatures (e.g, "f_i") must be explicitly defined
00679        */
00680       if (pdf_get_version() < 5 || !agln || !agln->is_predef) {
00681         wbuf[0] = (code & 0xff);
00682         p      = wbuf + 1;
00683         endptr = wbuf + WBUF_SIZE;
00684         len = agl_sput_UTF16BE(enc_vec[code], &p, endptr, &fail_count);
00685         if (len >= 1 && !fail_count) {
00686           CMap_add_bfchar(cmap, wbuf, 1, wbuf + 1, len);
00687          all_predef &= agln && agln->is_predef;
00688         }
00689       }
00690     }
00691   }
00692 
00693   stream = all_predef ? NULL : CMap_create_stream(cmap, 0);
00694 
00695   CMap_release(cmap);
00696   RELEASE(cmap_name);
00697 
00698   return stream;
00699 }
00700 
00701 
00702 pdf_obj *
00703 pdf_load_ToUnicode_stream (const char *ident)
00704 {
00705   pdf_obj *stream = NULL;
00706   CMap    *cmap;
00707   FILE    *fp;
00708 
00709   if (!ident)
00710     return NULL;
00711 
00712   fp = DPXFOPEN(ident, DPX_RES_TYPE_CMAP);
00713   if (!fp)
00714     return NULL;
00715   else if (CMap_parse_check_sig(fp) < 0) {
00716     DPXFCLOSE(fp);
00717     return NULL;
00718   }
00719 
00720   cmap = CMap_new();
00721   if (CMap_parse(cmap, fp) < 0) {
00722     WARN("Reading CMap file \"%s\" failed.", ident);
00723   } else {
00724     if (verbose) {
00725       MESG("(CMap:%s)", ident);
00726     }
00727     stream = CMap_create_stream(cmap, 0);
00728     if (!stream) {
00729       WARN("Failed to creat ToUnicode CMap stream for \"%s\".", ident);
00730     }
00731   }
00732   CMap_release(cmap);
00733   DPXFCLOSE(fp);
00734 
00735   return  stream;
00736 }
00737 
00738 
00739 static const char *
00740 MacRomanEncoding[256] = {
00741   ".notdef", ".notdef", ".notdef", ".notdef",
00742   ".notdef", ".notdef", ".notdef", ".notdef",
00743   ".notdef", ".notdef", ".notdef", ".notdef",
00744   ".notdef", ".notdef", ".notdef", ".notdef",
00745   ".notdef", ".notdef", ".notdef", ".notdef",
00746   ".notdef", ".notdef", ".notdef", ".notdef",
00747   ".notdef", ".notdef", ".notdef", ".notdef",
00748   ".notdef", ".notdef", ".notdef", ".notdef",
00749   "space", "exclam",  "quotedbl", "numbersign",
00750   "dollar", "percent", "ampersand", "quotesingle",
00751   "parenleft", "parenright", "asterisk", "plus",
00752   "comma", "hyphen", "period", "slash",
00753   "zero", "one", "two", "three",
00754   "four", "five", "six", "seven",
00755   "eight", "nine", "colon", "semicolon",
00756   "less", "equal", "greater", "question",
00757   "at", "A", "B", "C",
00758   "D", "E", "F", "G", "H",
00759   "I", "J", "K", "L",
00760   "M", "N", "O", "P",
00761   "Q", "R", "S", "T",
00762   "U", "V", "W", "X",
00763   "Y", "Z", "bracketleft", "backslash",
00764   "bracketright", "asciicircum", "underscore",
00765   "grave", "a", "b", "c",
00766   "d", "e", "f", "g",
00767   "h", "i", "j", "k",
00768   "l", "m", "n", "o",
00769   "p", "q", "r", "s",
00770   "t", "u", "v", "w",
00771   "x", "y", "z", "braceleft",
00772   "bar", "braceright", "asciitilde", ".notdef",
00773   "Adieresis", "Aring", "Ccedilla", "Eacute",
00774   "Ntilde", "Odieresis", "Udieresis", "aacute",
00775   "agrave", "acircumflex", "adieresis", "atilde",
00776   "aring", "ccedilla", "eacute", "egrave",
00777   "ecircumflex", "edieresis", "iacute", "igrave",
00778   "icircumflex", "idieresis", "ntilde", "oacute",
00779   "ograve", "ocircumflex", "odieresis", "otilde",
00780   "uacute", "ugrave", "ucircumflex", "udieresis",
00781   "dagger", "degree", "cent", "sterling",
00782   "section", "bullet", "paragraph", "germandbls",
00783   "registered", "copyright", "trademark", "acute",
00784   "dieresis", "notequal", "AE", "Oslash",
00785   "infinity", "plusminus", "lessequal", "greaterequal",
00786   "yen", "mu", "partialdiff", "summation",
00787   "product", "pi", "integral", "ordfeminine",
00788   "ordmasculine", "Omega", "ae", "oslash",
00789   "questiondown", "exclamdown", "logicalnot", "radical",
00790   "florin", "approxequal", "Delta", "guillemotleft",
00791   "guillemotright", "ellipsis", "space", "Agrave",
00792   "Atilde", "Otilde", "OE", "oe",
00793   "endash", "emdash", "quotedblleft", "quotedblright",
00794   "quoteleft", "quoteright", "divide", "lozenge",
00795   "ydieresis", "Ydieresis", "fraction", "currency",
00796   "guilsinglleft", "guilsinglright", "fi", "fl",
00797   "daggerdbl", "periodcentered", "quotesinglbase", "quotedblbase",
00798   "perthousand", "Acircumflex", "Ecircumflex", "Aacute",
00799   "Edieresis", "Egrave", "Iacute", "Icircumflex",
00800   "Idieresis", "Igrave", "Oacute", "Ocircumflex",
00801   "apple", "Ograve", "Uacute", "Ucircumflex",
00802   "Ugrave", "dotlessi", "circumflex", "tilde",
00803   "macron", "breve", "dotaccent", "ring",
00804   "cedilla", "hungarumlaut", "ogonek", "caron"
00805 };
00806 
00807 static const char *
00808 MacExpertEncoding[256] = {
00809   ".notdef", ".notdef", ".notdef", ".notdef",
00810   ".notdef", ".notdef", ".notdef", ".notdef",
00811   ".notdef", ".notdef", ".notdef", ".notdef",
00812   ".notdef", ".notdef", ".notdef", ".notdef",
00813   ".notdef", ".notdef", ".notdef", ".notdef",
00814   ".notdef", ".notdef", ".notdef", ".notdef",
00815   ".notdef", ".notdef", ".notdef", ".notdef",
00816   ".notdef", ".notdef", ".notdef", ".notdef",
00817   "space", "exclamsmall", "Hungarumlautsmall", "centoldstyle",
00818   "dollaroldstyle", "dollarsuperior", "ampersandsmall", "Acutesmall",
00819   "parenleftsuperior", "parenrightsuperior", "twodotenleader", "onedotenleader",
00820   "comma", "hyphen", "period", "fraction",
00821   "zerooldstyle", "oneoldstyle", "twooldstyle", "threeoldstyle",
00822   "fouroldstyle", "fiveoldstyle", "sixoldstyle", "sevenoldstyle",
00823   "eightoldstyle", "nineoldstyle", "colon", "semicolon",
00824   ".notdef", "threequartersemdash", ".notdef", "questionsmall",
00825   ".notdef", ".notdef", ".notdef", ".notdef",
00826   "Ethsmall", ".notdef", ".notdef", "onequarter",
00827   "onehalf", "threequarters", "oneeighth", "threeeighths",
00828   "fiveeighths", "seveneighths", "onethird", "twothirds",
00829   ".notdef", ".notdef", ".notdef", ".notdef",
00830   ".notdef", ".notdef", "ff", "fi",
00831   "fl", "ffi", "ffl", "parenleftinferior",
00832   ".notdef", "parenrightinferior", "Circumflexsmall", "hypheninferior",
00833   "Gravesmall", "Asmall", "Bsmall", "Csmall",
00834   "Dsmall", "Esmall", "Fsmall", "Gsmall",
00835   "Hsmall", "Ismall", "Jsmall", "Ksmall",
00836   "Lsmall", "Msmall", "Nsmall", "Osmall",
00837   "Psmall", "Qsmall", "Rsmall", "Ssmall",
00838   "Tsmall", "Usmall", "Vsmall", "Wsmall",
00839   "Xsmall", "Ysmall", "Zsmall", "colonmonetary",
00840   "onefitted", "rupiah", "Tildesmall", ".notdef",
00841   ".notdef", "asuperior", "centsuperior", ".notdef",
00842   ".notdef", ".notdef", ".notdef", "Aacutesmall",
00843   "Agravesmall", "Acircumflexsmall", "Adieresissmall", "Atildesmall",
00844   "Aringsmall", "Ccedillasmall", "Eacutesmall", "Egravesmall",
00845   "Ecircumflexsmall", "Edieresissmall", "Iacutesmall", "Igravesmall",
00846   "Icircumflexsmall", "Idieresissmall", "Ntildesmall", "Oacutesmall",
00847   "Ogravesmall", "Ocircumflexsmall", "Odieresissmall", "Otildesmall",
00848   "Uacutesmall", "Ugravesmall", "Ucircumflexsmall", "Udieresissmall",
00849   ".notdef", "eightsuperior", "fourinferior", "threeinferior",
00850   "sixinferior", "eightinferior", "seveninferior", "Scaronsmall",
00851   ".notdef", "centinferior", "twoinferior", ".notdef",
00852   "Dieresissmall", ".notdef", "Caronsmall", "osuperior",
00853   "fiveinferior", ".notdef", "commainferior", "periodinferior",
00854   "Yacutesmall", ".notdef", "dollarinferior", ".notdef",
00855   ".notdef", "Thornsmall", ".notdef", "nineinferior",
00856   "zeroinferior", "Zcaronsmall", "AEsmall", "Oslashsmall",
00857   "questiondownsmall", "oneinferior", "Lslashsmall", ".notdef",
00858   ".notdef", ".notdef", ".notdef", ".notdef",
00859   ".notdef", "Cedillasmall", ".notdef", ".notdef",
00860   ".notdef", ".notdef", ".notdef", "OEsmall",
00861   "figuredash", "hyphensuperior", ".notdef", ".notdef",
00862   ".notdef", ".notdef", "exclamdownsmall", ".notdef",
00863   "Ydieresissmall", ".notdef", "onesuperior", "twosuperior",
00864   "threesuperior", "foursuperior", "fivesuperior", "sixsuperior",
00865   "sevensuperior", "ninesuperior", "zerosuperior", ".notdef",
00866   "esuperior", "rsuperior", "tsuperior", ".notdef",
00867   ".notdef", "isuperior", "ssuperior", "dsuperior",
00868   ".notdef", ".notdef", ".notdef", ".notdef",
00869   ".notdef", "lsuperior", "Ogoneksmall", "Brevesmall",
00870   "Macronsmall", "bsuperior", "nsuperior", "msuperior",
00871   "commasuperior", "periodsuperior", "Dotaccentsmall", "Ringsmall",
00872   ".notdef", ".notdef", ".notdef", ".notdef"
00873 };
00874 
00875 static const char *
00876 WinAnsiEncoding[256] = {
00877   ".notdef", ".notdef", ".notdef", ".notdef",
00878   ".notdef", ".notdef", ".notdef", ".notdef",
00879   ".notdef", ".notdef", ".notdef", ".notdef",
00880   ".notdef", ".notdef", ".notdef", ".notdef",
00881   ".notdef", ".notdef", ".notdef", ".notdef",
00882   ".notdef", ".notdef", ".notdef", ".notdef",
00883   ".notdef", ".notdef", ".notdef", ".notdef",
00884   ".notdef", ".notdef", ".notdef", ".notdef",
00885   "space", "exclam", "quotedbl", "numbersign",
00886   "dollar", "percent", "ampersand", "quotesingle",
00887   "parenleft", "parenright", "asterisk", "plus",
00888   "comma", "hyphen", "period", "slash",
00889   "zero", "one", "two", "three",
00890   "four", "five", "six", "seven",
00891   "eight", "nine", "colon", "semicolon",
00892   "less", "equal", "greater", "question",
00893   "at", "A", "B", "C",
00894   "D", "E", "F", "G",
00895   "H", "I", "J", "K",
00896   "L", "M", "N", "O",
00897   "P", "Q", "R", "S",
00898   "T", "U", "V", "W",
00899   "X", "Y", "Z", "bracketleft",
00900   "backslash", "bracketright", "asciicircum", "underscore",
00901   "grave", "a", "b", "c",
00902   "d", "e", "f", "g",
00903   "h", "i", "j", "k",
00904   "l", "m", "n", "o",
00905   "p", "q", "r", "s",
00906   "t", "u", "v", "w",
00907   "x", "y", "z", "braceleft",
00908   "bar", "braceright", "asciitilde", "bullet",
00909   "Euro", "bullet", "quotesinglbase", "florin",
00910   "quotedblbase", "ellipsis", "dagger", "daggerdbl",
00911   "circumflex", "perthousand", "Scaron", "guilsinglleft",
00912   "OE", "bullet", "Zcaron", "bullet",
00913   "bullet", "quoteleft", "quoteright", "quotedblleft",
00914   "quotedblright", "bullet", "endash", "emdash",
00915   "tilde", "trademark", "scaron", "guilsinglright",
00916   "oe", "bullet", "zcaron", "Ydieresis",
00917   "space", "exclamdown", "cent", "sterling",
00918   "currency", "yen", "brokenbar", "section",
00919   "dieresis", "copyright", "ordfeminine", "guillemotleft",
00920   "logicalnot", "hyphen", "registered", "macron",
00921   "degree", "plusminus", "twosuperior", "threesuperior",
00922   "acute", "mu", "paragraph", "periodcentered",
00923   "cedilla", "onesuperior", "ordmasculine", "guillemotright",
00924   "onequarter", "onehalf", "threequarters", "questiondown",
00925   "Agrave", "Aacute", "Acircumflex", "Atilde",
00926   "Adieresis", "Aring", "AE", "Ccedilla",
00927   "Egrave", "Eacute", "Ecircumflex", "Edieresis",
00928   "Igrave", "Iacute", "Icircumflex", "Idieresis",
00929   "Eth", "Ntilde", "Ograve", "Oacute",
00930   "Ocircumflex", "Otilde", "Odieresis", "multiply",
00931   "Oslash", "Ugrave", "Uacute", "Ucircumflex",
00932   "Udieresis", "Yacute", "Thorn", "germandbls",
00933   "agrave", "aacute", "acircumflex", "atilde",
00934   "adieresis", "aring", "ae", "ccedilla",
00935   "egrave", "eacute", "ecircumflex", "edieresis",
00936   "igrave", "iacute", "icircumflex", "idieresis",
00937   "eth", "ntilde", "ograve", "oacute",
00938   "ocircumflex", "otilde", "odieresis", "divide",
00939   "oslash", "ugrave", "uacute", "ucircumflex",
00940   "udieresis", "yacute", "thorn", "ydieresis"
00941 };
00942 
00943 #if 0
00944 static const char *
00945 StandardEncoding[256] = {
00946   ".notdef", ".notdef", ".notdef", ".notdef",
00947   ".notdef", ".notdef", ".notdef", ".notdef",
00948   ".notdef", ".notdef", ".notdef", ".notdef",
00949   ".notdef", ".notdef", ".notdef", ".notdef",
00950   ".notdef", ".notdef", ".notdef", ".notdef",
00951   ".notdef", ".notdef", ".notdef", ".notdef",
00952   ".notdef", ".notdef", ".notdef", ".notdef",
00953   ".notdef", ".notdef", ".notdef", ".notdef",
00954   "space", "exclam", "quotedbl", "numbersign",
00955   "dollar", "percent", "ampersand", "quoteright",
00956   "parenleft", "parenright", "asterisk", "plus",
00957   "comma", "hyphen", "period", "slash",
00958   "zero", "one", "two", "three",
00959   "four", "five", "six", "seven",
00960   "eight", "nine", "colon", "semicolon",
00961   "less", "equal", "greater", "question",
00962   "at", "A", "B", "C",
00963   "D", "E", "F", "G",
00964   "H", "I", "J", "K",
00965   "L", "M", "N", "O",
00966   "P", "Q", "R", "S",
00967   "T", "U", "V", "W",
00968   "X", "Y", "Z", "bracketleft",
00969   "backslash", "bracketright", "asciicircum", "underscore",
00970   "quoteleft", "a", "b", "c",
00971   "d", "e", "f", "g",
00972   "h", "i", "j", "k",
00973   "l", "m", "n", "o",
00974   "p", "q", "r", "s",
00975   "t", "u", "v", "w",
00976   "x", "y", "z", "braceleft",
00977   "bar", "braceright", "asciitilde", ".notdef",
00978   ".notdef", ".notdef", ".notdef", ".notdef",
00979   ".notdef", ".notdef", ".notdef", ".notdef",
00980   ".notdef", ".notdef", ".notdef", ".notdef",
00981   ".notdef", ".notdef", ".notdef", ".notdef",
00982   ".notdef", ".notdef", ".notdef", ".notdef",
00983   ".notdef", ".notdef", ".notdef", ".notdef",
00984   ".notdef", ".notdef", ".notdef", ".notdef",
00985   ".notdef", ".notdef", ".notdef", ".notdef",
00986   ".notdef", "exclamdown", "cent", "sterling",
00987   "fraction", "yen", "florin", "section",
00988   "currency", "quotesingle", "quotedblleft", "guillemotleft",
00989   "guilsinglleft", "guilsinglright", "fi", "fl",
00990   ".notdef", "endash", "dagger", "daggerdbl",
00991   "periodcentered", ".notdef", "paragraph", "bullet",
00992   "quotesinglbase", "quotedblbase", "quotedblright", "guillemotright",
00993   "ellipsis", "perthousand", ".notdef", "questiondown",
00994   ".notdef", "grave", "acute", "circumflex",
00995   "tilde", "macron", "breve", "dotaccent",
00996   "dieresis", ".notdef", "ring", "cedilla",
00997   ".notdef", "hungarumlaut", "ogonek", "caron",
00998   "emdash", ".notdef", ".notdef", ".notdef",
00999   ".notdef", ".notdef", ".notdef", ".notdef",
01000   ".notdef", ".notdef", ".notdef", ".notdef",
01001   ".notdef", ".notdef", ".notdef", ".notdef",
01002   ".notdef", "AE", ".notdef", "ordfeminine",
01003   ".notdef", ".notdef", ".notdef", ".notdef",
01004   "Lslash", "Oslash", "OE", "ordmasculine",
01005   ".notdef", ".notdef", ".notdef", ".notdef",
01006   ".notdef", "ae", ".notdef", ".notdef",
01007   ".notdef", "dotlessi", ".notdef", ".notdef",
01008   "lslash", "oslash", "oe", "germandbls",
01009   ".notdef", ".notdef", ".notdef", ".notdef"
01010 };
01011 
01012 static const char *
01013 ISOLatin1Encoding[256] = {
01014   ".notdef", ".notdef", ".notdef", ".notdef",
01015   ".notdef", ".notdef", ".notdef", ".notdef",
01016   ".notdef", ".notdef", ".notdef", ".notdef",
01017   ".notdef", ".notdef", ".notdef", ".notdef",
01018   ".notdef", ".notdef", ".notdef", ".notdef",
01019   ".notdef", ".notdef", ".notdef", ".notdef",
01020   ".notdef", ".notdef", ".notdef", ".notdef",
01021   ".notdef", ".notdef", ".notdef", ".notdef",
01022   "space", "exclam", "quotedbl", "numbersign",
01023   "dollar", "percent", "ampersand", "quotesingle",
01024   "parenleft", "parenright", "asterisk", "plus",
01025   "comma", "hyphen", "period", "slash",
01026   "zero", "one", "two", "three",
01027   "four", "five", "six", "seven",
01028   "eight", "nine", "colon", "semicolon",
01029   "less", "equal", "greater", "question",
01030   "at", "A", "B", "C",
01031   "D", "E", "F", "G",
01032   "H", "I", "J", "K",
01033   "L", "M", "N", "O",
01034   "P", "Q", "R", "S",
01035   "T", "U", "V", "W",
01036   "X", "Y", "Z", "bracketleft",
01037   "backslash", "bracketright", "asciicircum", "underscore",
01038   "grave", "a", "b", "c",
01039   "d", "e", "f", "g",
01040   "h", "i", "j", "k",
01041   "l", "m", "n", "o",
01042   "p", "q", "r", "s",
01043   "t", "u", "v", "w",
01044   "x", "y", "z", "braceleft",
01045   "bar", "braceright", "asciitilde", ".notdef",
01046   ".notdef", ".notdef", ".notdef", ".notdef",
01047   ".notdef", ".notdef", ".notdef", ".notdef",
01048   ".notdef", ".notdef", ".notdef", ".notdef",
01049   ".notdef", ".notdef", ".notdef", ".notdef",
01050   "dotlessi", "quoteleft", "quoteright", "circumflex",
01051   "tilde", "macron", "breve", "dotaccent",
01052   "dieresis", ".notdef", "ring", "cedilla",
01053   ".notdef", "hungarumlaut", "ogonek", "caron",
01054   "space", "exclamdown", "cent", "sterling",
01055   "currency", "yen", "brokenbar", "section",
01056   "dieresis", "copyright", "ordfeminine", "guillemotleft",
01057   "logicalnot", "hyphen", "registered", "macron",
01058   "degree", "plusminus", "twosuperior", "threesuperior",
01059   "acute", "mu", "paragraph", "periodcentered",
01060   "cedilla", "onesuperior", "ordmasculine", "guillemotright",
01061   "onequarter", "onehalf", "threequarters", "questiondown",
01062   "Agrave", "Aacute", "Acircumflex", "Atilde",
01063   "Adieresis", "Aring", "AE", "Ccedilla",
01064   "Egrave", "Eacute", "Ecircumflex", "Edieresis",
01065   "Igrave", "Iacute", "Icircumflex", "Idieresis",
01066   "Eth", "Ntilde", "Ograve", "Oacute",
01067   "Ocircumflex", "Otilde", "Odieresis", "multiply",
01068   "Oslash", "Ugrave", "Uacute", "Ucircumflex",
01069   "Udieresis", "Yacute", "Thorn", "germandbls",
01070   "agrave", "aacute", "acircumflex", "atilde",
01071   "adieresis", "aring", "ae", "ccedilla",
01072   "egrave", "eacute", "ecircumflex", "edieresis",
01073   "igrave", "iacute", "icircumflex", "idieresis",
01074   "eth", "ntilde", "ograve", "oacute",
01075   "ocircumflex", "otilde", "odieresis", "divide",
01076   "oslash", "ugrave", "uacute", "ucircumflex",
01077   "udieresis", "yacute", "thorn", "ydieresis"
01078 };
01079 #endif