Back to index

texmacs  1.0.7.15
Classes | Defines | Typedefs | Functions | Variables
pdfencoding.c File Reference
#include <string.h>
#include "system.h"
#include "mem.h"
#include "error.h"
#include "dpxutil.h"
#include "pdfparse.h"
#include "pdfobj.h"
#include "dpxfile.h"
#include "pdfencoding.h"
#include "cid.h"
#include "cmap.h"
#include "cmap_read.h"
#include "cmap_write.h"
#include "agl.h"

Go to the source code of this file.

Classes

struct  pdf_encoding

Defines

#define FLAG_IS_PREDEFINED   (1 << 0)
#define FLAG_USED_BY_TYPE3   (1 << 1)
#define CHECK_ID(n)
#define CACHE_ALLOC_SIZE   16u
#define WBUF_SIZE   1024

Typedefs

typedef struct pdf_encoding pdf_encoding

Functions

static int is_similar_charset (char **encoding, const char **encoding2)
static pdf_objmake_encoding_differences (char **encoding, char **baseenc, const char *is_used)
void pdf_encoding_set_verbose (void)
static int pdf_encoding_new_encoding (const char *enc_name, const char *ident, const char **encoding_vec, char *baseenc_name, int flags)
static void pdf_init_encoding_struct (pdf_encoding *encoding)
static pdf_objcreate_encoding_resource (pdf_encoding *encoding, pdf_encoding *baseenc)
static void pdf_flush_encoding (pdf_encoding *encoding)
static void pdf_clean_encoding_struct (pdf_encoding *encoding)
static int load_encoding_file (const char *filename)
void pdf_init_encodings (void)
void pdf_encoding_complete ()
void pdf_close_encodings (void)
int pdf_encoding_findresource (char *enc_name)
char ** pdf_encoding_get_encoding (int enc_id)
pdf_objpdf_get_encoding_obj (int enc_id)
int pdf_encoding_is_predefined (int enc_id)
void pdf_encoding_used_by_type3 (int enc_id)
char * pdf_encoding_get_name (int enc_id)
void pdf_encoding_add_usedchars (int encoding_id, const char *is_used)
pdf_objpdf_encoding_get_tounicode (int encoding_id)
pdf_objpdf_create_ToUnicode_CMap (const char *enc_name, char **enc_vec, const char *is_used)
pdf_objpdf_load_ToUnicode_stream (const char *ident)

Variables

static unsigned char verbose = 0
static const char * MacRomanEncoding [256]
static const char * MacExpertEncoding [256]
static const char * WinAnsiEncoding [256]
struct {
int count
int capacity
pdf_encodingencodings
enc_cache
static unsigned char wbuf [WBUF_SIZE]
static unsigned char range_min [1] = {0x00u}
static unsigned char range_max [1] = {0xFFu}

Class Documentation

struct pdf_encoding

Definition at line 77 of file pdfencoding.c.

Collaboration diagram for pdf_encoding:
Class Members
struct pdf_encoding * baseenc
char * enc_name
int flags
char * glyphs
char * ident
char is_used
pdf_obj * resource
pdf_obj * tounicode

Define Documentation

#define CACHE_ALLOC_SIZE   16u

Definition at line 361 of file pdfencoding.c.

#define CHECK_ID (   n)
Value:
do { \
  if ((n) < 0 || (n) >= enc_cache.count) { \
     ERROR("Invalid encoding id: %d", (n)); \
  } \
} while (0)

Definition at line 355 of file pdfencoding.c.

#define FLAG_IS_PREDEFINED   (1 << 0)

Definition at line 74 of file pdfencoding.c.

#define FLAG_USED_BY_TYPE3   (1 << 1)

Definition at line 75 of file pdfencoding.c.

#define WBUF_SIZE   1024

Definition at line 603 of file pdfencoding.c.


Typedef Documentation

typedef struct pdf_encoding pdf_encoding

Function Documentation

static pdf_obj* create_encoding_resource ( pdf_encoding encoding,
pdf_encoding baseenc 
) [static]

Definition at line 124 of file pdfencoding.c.

{
  pdf_obj *differences;
  ASSERT(encoding);
  ASSERT(!encoding->resource);

  differences = make_encoding_differences(encoding->glyphs,
                                     baseenc ? baseenc->glyphs : NULL,
                                     encoding->is_used);
  
  if (differences) {
    pdf_obj *resource = pdf_new_dict();
    if (baseenc)
      pdf_add_dict(resource, pdf_new_name("BaseEncoding"),
                 pdf_link_obj(baseenc->resource));
    pdf_add_dict(resource, pdf_new_name("Differences"),  differences);
    return resource; 
  } else {
    /* Fix a bug with the MinionPro package using MnSymbol fonts
     * in its virtual fonts:
     *
     * Some font may have font_id even if no character is used.
     * For example, suppose that a virtual file A.vf uses two
     * other fonts, B and C. Even if only characters of B are used
     * in a DVI document, C will have font_id too.
     * In this case, both baseenc and differences can be NULL.
     *
     * Actually these fonts will be ignored in pdffont.c.
     */
    return baseenc ? pdf_link_obj(baseenc->resource) : NULL;
  }
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int is_similar_charset ( char **  encoding,
const char **  encoding2 
) [static]

Definition at line 219 of file pdfencoding.c.

{
  int   code, same = 0;

  for (code = 0; code < 256; code++)
    if (!(enc_vec[code] && strcmp(enc_vec[code], enc_vec2[code]))
       && ++same >= 64)
      /* is 64 a good level? */
      return 1;

  return 0; 
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int load_encoding_file ( const char *  filename) [static]

Definition at line 283 of file pdfencoding.c.

{
  FILE    *fp;
  pdf_obj *enc_name = NULL;
  pdf_obj *encoding_array = NULL;
  char    *wbuf, *p, *endptr;
  char    *enc_vec[256];
  int      code, fsize, enc_id;

  if (!filename)
    return -1;

  if (verbose) {
    MESG("(Encoding:%s", filename);
  }

  fp = DPXFOPEN(filename, DPX_RES_TYPE_ENC);
  if (!fp)
    return -1;
  /*
   * file_size do seek_end witout saving current position and
   * do rewind.
   */
  fsize = file_size(fp);

  wbuf = NEW(fsize + 1, char); 
  fread(wbuf, sizeof(char), fsize, fp);
  DPXFCLOSE(fp);

  p        = wbuf;
  endptr   = wbuf + fsize;
  p[fsize] = '\0';

  skip_white(&p, endptr);

  /*
   * Skip comment lines.
   */
  while (p < endptr && p[0] == '%') {
    skip_line (&p, endptr);
    skip_white(&p, endptr);
  }
  if (p[0] == '/')
    enc_name = parse_pdf_name(&p, endptr);

  skip_white(&p, endptr);
  encoding_array = parse_pdf_array(&p, endptr, NULL);
  RELEASE(wbuf);
  if (!encoding_array) {
    if (enc_name)
      pdf_release_obj(enc_name);
    return -1;
  }

  for (code = 0; code < 256; code++) {
    enc_vec[code] = pdf_name_value(pdf_get_array(encoding_array, code));
  }
  enc_id = pdf_encoding_new_encoding(enc_name ? pdf_name_value(enc_name) : NULL,
                                 filename, (const char **) enc_vec, NULL, 0);

  if (enc_name) {
    if (verbose > 1)
      MESG("[%s]", pdf_name_value(enc_name));
    pdf_release_obj(enc_name);
  }
  pdf_release_obj(encoding_array);

  if (verbose) MESG(")");

  return enc_id;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static pdf_obj * make_encoding_differences ( char **  encoding,
char **  baseenc,
const char *  is_used 
) [static]

Definition at line 237 of file pdfencoding.c.

{
  pdf_obj *differences = NULL;
  int      code, count = 0;
  int      skipping = 1;

  ASSERT(enc_vec);

  /*
   *  Write all entries (except .notdef) if baseenc is unknown.
   *  If is_used is given, write only used entries.
   */
  differences = pdf_new_array();
  for (code = 0; code < 256; code++) {
    /* We skip NULL (= ".notdef"). Any character code mapped to ".notdef"
     * glyph should not be used in the document.
     */
    if ((is_used && !is_used[code]) || !enc_vec[code])
      skipping = 1;
    else if (!baseenc || !baseenc[code] ||
             strcmp(baseenc[code], enc_vec[code]) != 0) {
      /*
       * Difference found.
       */
      if (skipping)
        pdf_add_array(differences, pdf_new_number(code));
      pdf_add_array(differences,   pdf_new_name(enc_vec[code]));
      skipping = 0;
      count++;
    } else
      skipping = 1;
  }

  /*
   * No difference found. Some PDF viewers can't handle differences without
   * any differences. We return NULL.
   */
  if (count == 0) {
    pdf_release_obj(differences);
    differences = NULL;
  }

  return differences;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void pdf_clean_encoding_struct ( pdf_encoding encoding) [static]

Definition at line 175 of file pdfencoding.c.

{
  int   code;

  ASSERT(encoding);

  if (encoding->resource)
    ERROR("Object not flushed.");

  if (encoding->tounicode)
    pdf_release_obj(encoding->tounicode);
  if (encoding->ident)
    RELEASE(encoding->ident);
  if (encoding->enc_name)
    RELEASE(encoding->enc_name);

  encoding->ident    = NULL;
  encoding->enc_name = NULL;

  for (code = 0; code < 256; code++) {
    if (encoding->glyphs[code])
      RELEASE(encoding->glyphs[code]);
    encoding->glyphs[code] = NULL;
  }
  encoding->ident    = NULL;
  encoding->enc_name = NULL;

  return;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void pdf_close_encodings ( void  )

Definition at line 487 of file pdfencoding.c.

{
  int  enc_id;

  if (enc_cache.encodings) {
    for (enc_id = 0; enc_id < enc_cache.count; enc_id++) {
      pdf_encoding *encoding;

      encoding = &enc_cache.encodings[enc_id];
      if (encoding) {
        pdf_flush_encoding(encoding);
        pdf_clean_encoding_struct(encoding);
      }
    }
    RELEASE(enc_cache.encodings);
  }
  enc_cache.encodings = NULL;
  enc_cache.count     = 0;
  enc_cache.capacity  = 0;
}

Here is the call graph for this function:

Here is the caller graph for this function:

pdf_obj* pdf_create_ToUnicode_CMap ( const char *  enc_name,
char **  enc_vec,
const char *  is_used 
)

Definition at line 645 of file pdfencoding.c.

{
  pdf_obj  *stream;
  CMap     *cmap;
  int       code, all_predef;
  char     *cmap_name;
  unsigned char *p, *endptr;

  ASSERT(enc_name && enc_vec);

  cmap_name = NEW(strlen(enc_name)+strlen("-UTF16")+1, char);
  sprintf(cmap_name, "%s-UTF16", enc_name);

  cmap = CMap_new();
  CMap_set_name (cmap, cmap_name);
  CMap_set_type (cmap, CMAP_TYPE_TO_UNICODE);
  CMap_set_wmode(cmap, 0);

  CMap_set_CIDSysInfo(cmap, &CSI_UNICODE);

  CMap_add_codespacerange(cmap, range_min, range_max, 1);

  all_predef = 1;
  for (code = 0; code <= 0xff; code++) {
    if (is_used && !is_used[code])
      continue;

    if (enc_vec[code]) {
      long   len;
      int    fail_count = 0;
      agl_name *agln = agl_lookup_list(enc_vec[code]);
      /* Adobe glyph naming conventions are not used by viewers,
       * hence even ligatures (e.g, "f_i") must be explicitly defined
       */
      if (pdf_get_version() < 5 || !agln || !agln->is_predef) {
        wbuf[0] = (code & 0xff);
        p      = wbuf + 1;
        endptr = wbuf + WBUF_SIZE;
        len = agl_sput_UTF16BE(enc_vec[code], &p, endptr, &fail_count);
        if (len >= 1 && !fail_count) {
          CMap_add_bfchar(cmap, wbuf, 1, wbuf + 1, len);
         all_predef &= agln && agln->is_predef;
        }
      }
    }
  }

  stream = all_predef ? NULL : CMap_create_stream(cmap, 0);

  CMap_release(cmap);
  RELEASE(cmap_name);

  return stream;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void pdf_encoding_add_usedchars ( int  encoding_id,
const char *  is_used 
)

Definition at line 609 of file pdfencoding.c.

{
  pdf_encoding *encoding;
  int code;

  CHECK_ID(encoding_id);

  if (!is_used || pdf_encoding_is_predefined(encoding_id))
    return;

  encoding = &enc_cache.encodings[encoding_id];

  for (code = 0; code <= 0xff; code++)
    encoding->is_used[code] |= is_used[code];
}

Here is the call graph for this function:

Here is the caller graph for this function:

void pdf_encoding_complete ( void  )

Definition at line 460 of file pdfencoding.c.

{
  int  enc_id;

  for (enc_id = 0; enc_id < enc_cache.count; enc_id++) {
    if (!pdf_encoding_is_predefined(enc_id)) {
      pdf_encoding *encoding = &enc_cache.encodings[enc_id];
      /* Section 5.5.4 of the PDF 1.5 reference says that the encoding
       * of a Type 3 font must be completely described by a Differences
       * array, but implementation note 56 explains that this is rather
       * an incorrect implementation in Acrobat 4 and earlier. Hence,
       * we do use a base encodings for PDF versions >= 1.3.
       */
      int with_base = !(encoding->flags & FLAG_USED_BY_TYPE3)
                     || pdf_get_version() >= 4;
      ASSERT(!encoding->resource);
      encoding->resource = create_encoding_resource(encoding,
                                              with_base ? encoding->baseenc : NULL);
      ASSERT(!encoding->tounicode);
      encoding->tounicode = pdf_create_ToUnicode_CMap(encoding->enc_name,
                                                encoding->glyphs,
                                                encoding->is_used);
    }
  }
}

Here is the call graph for this function:

Here is the caller graph for this function:

int pdf_encoding_findresource ( char *  enc_name)

Definition at line 509 of file pdfencoding.c.

{
  int           enc_id;
  pdf_encoding *encoding;

  ASSERT(enc_name);
  for (enc_id = 0; enc_id < enc_cache.count; enc_id++) {
    encoding = &enc_cache.encodings[enc_id];
    if (encoding->ident &&
        !strcmp(enc_name, encoding->ident))
      return enc_id;
    else if (encoding->enc_name &&
             !strcmp(enc_name, encoding->enc_name))
      return enc_id;
  }

  return load_encoding_file(enc_name);
}

Here is the call graph for this function:

Here is the caller graph for this function:

char** pdf_encoding_get_encoding ( int  enc_id)

Definition at line 534 of file pdfencoding.c.

{
  pdf_encoding *encoding;

  CHECK_ID(enc_id);

  encoding = &enc_cache.encodings[enc_id];

  return encoding->glyphs;
}

Here is the caller graph for this function:

char* pdf_encoding_get_name ( int  enc_id)

Definition at line 583 of file pdfencoding.c.

{
  pdf_encoding *encoding;

  CHECK_ID(enc_id);

  encoding = &enc_cache.encodings[enc_id];

  return encoding->enc_name;
}

Here is the caller graph for this function:

pdf_obj* pdf_encoding_get_tounicode ( int  encoding_id)

Definition at line 626 of file pdfencoding.c.

{
  CHECK_ID(encoding_id);

  return enc_cache.encodings[encoding_id].tounicode;
}

Here is the caller graph for this function:

int pdf_encoding_is_predefined ( int  enc_id)

Definition at line 558 of file pdfencoding.c.

{
  pdf_encoding *encoding;

  CHECK_ID(enc_id);

  encoding = &enc_cache.encodings[enc_id];

  return (encoding->flags & FLAG_IS_PREDEFINED) ? 1 : 0;
}

Here is the caller graph for this function:

static int pdf_encoding_new_encoding ( const char *  enc_name,
const char *  ident,
const char **  encoding_vec,
char *  baseenc_name,
int  flags 
) [static]

Definition at line 405 of file pdfencoding.c.

{
  int      enc_id, code;

  pdf_encoding *encoding;

  enc_id   = enc_cache.count;
  if (enc_cache.count++ >= enc_cache.capacity) {
    enc_cache.capacity += 16;
    enc_cache.encodings = RENEW(enc_cache.encodings,
                                enc_cache.capacity,  pdf_encoding);
  }
  encoding = &enc_cache.encodings[enc_id];

  pdf_init_encoding_struct(encoding);

  encoding->ident = NEW(strlen(ident)+1, char);
  strcpy(encoding->ident, ident);
  encoding->enc_name  = NEW(strlen(enc_name)+1, char);
  strcpy(encoding->enc_name, enc_name);

  encoding->flags = flags;

  for (code = 0; code < 256; code++)
    if (encoding_vec[code] && strcmp(encoding_vec[code], ".notdef")) {
      encoding->glyphs[code] = NEW(strlen(encoding_vec[code])+1, char);
      strcpy(encoding->glyphs[code], encoding_vec[code]);
    }

  if (!baseenc_name && !(flags & FLAG_IS_PREDEFINED)
      && is_similar_charset(encoding->glyphs, WinAnsiEncoding)) {
    /* Dvipdfmx default setting. */
    baseenc_name = "WinAnsiEncoding";
  }

  /* TODO: make base encoding configurable */
  if (baseenc_name) {
    int baseenc_id = pdf_encoding_findresource(baseenc_name);
    if (baseenc_id < 0 || !pdf_encoding_is_predefined(baseenc_id))
      ERROR("Illegal base encoding %s for encoding %s\n",
           baseenc_name, encoding->enc_name);
    encoding->baseenc = &enc_cache.encodings[baseenc_id];
  }

  if (flags & FLAG_IS_PREDEFINED)
    encoding->resource = pdf_new_name(encoding->enc_name);

  return enc_id;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void pdf_encoding_set_verbose ( void  )

Definition at line 58 of file pdfencoding.c.

{
  verbose++;
}

Here is the caller graph for this function:

void pdf_encoding_used_by_type3 ( int  enc_id)

Definition at line 570 of file pdfencoding.c.

{
  pdf_encoding *encoding;

  CHECK_ID(enc_id);

  encoding = &enc_cache.encodings[enc_id];

  encoding->flags |= FLAG_USED_BY_TYPE3;
}

Here is the caller graph for this function:

static void pdf_flush_encoding ( pdf_encoding encoding) [static]

Definition at line 158 of file pdfencoding.c.

{
  ASSERT(encoding);

  if (encoding->resource) {
    pdf_release_obj(encoding->resource);
    encoding->resource  = NULL;
  }
  if (encoding->tounicode) {
    pdf_release_obj(encoding->tounicode);
    encoding->tounicode = NULL;
  }

  return;
}

Here is the call graph for this function:

Here is the caller graph for this function:

pdf_obj* pdf_get_encoding_obj ( int  enc_id)

Definition at line 546 of file pdfencoding.c.

{
  pdf_encoding *encoding;

  CHECK_ID(enc_id);

  encoding = &enc_cache.encodings[enc_id];

  return encoding->resource;
}

Here is the caller graph for this function:

static void pdf_init_encoding_struct ( pdf_encoding encoding) [static]

Definition at line 99 of file pdfencoding.c.

{
  ASSERT(encoding);

  encoding->ident    = NULL;

  encoding->enc_name = NULL;

  memset(encoding->glyphs,  0, 256*sizeof(char *));
  memset(encoding->is_used, 0, 256);

  encoding->tounicode = NULL;

  encoding->baseenc   = NULL;
  encoding->resource  = NULL;

  encoding->flags     = 0;

  return;
}

Here is the caller graph for this function:

void pdf_init_encodings ( void  )

Definition at line 372 of file pdfencoding.c.

{
  enc_cache.count     = 0;
  enc_cache.capacity  = 3;
  enc_cache.encodings = NEW(enc_cache.capacity, pdf_encoding);

  /*
   * PDF Predefined Encodings
   */
  pdf_encoding_new_encoding("WinAnsiEncoding", "WinAnsiEncoding",
                         WinAnsiEncoding, NULL, FLAG_IS_PREDEFINED);
  pdf_encoding_new_encoding("MacRomanEncoding", "MacRomanEncoding",
                         MacRomanEncoding, NULL, FLAG_IS_PREDEFINED);
  pdf_encoding_new_encoding("MacExpertEncoding", "MacExpertEncoding",
                         MacExpertEncoding, NULL, FLAG_IS_PREDEFINED);

  return;
}

Here is the call graph for this function:

Here is the caller graph for this function:

pdf_obj* pdf_load_ToUnicode_stream ( const char *  ident)

Definition at line 703 of file pdfencoding.c.

{
  pdf_obj *stream = NULL;
  CMap    *cmap;
  FILE    *fp;

  if (!ident)
    return NULL;

  fp = DPXFOPEN(ident, DPX_RES_TYPE_CMAP);
  if (!fp)
    return NULL;
  else if (CMap_parse_check_sig(fp) < 0) {
    DPXFCLOSE(fp);
    return NULL;
  }

  cmap = CMap_new();
  if (CMap_parse(cmap, fp) < 0) {
    WARN("Reading CMap file \"%s\" failed.", ident);
  } else {
    if (verbose) {
      MESG("(CMap:%s)", ident);
    }
    stream = CMap_create_stream(cmap, 0);
    if (!stream) {
      WARN("Failed to creat ToUnicode CMap stream for \"%s\".", ident);
    }
  }
  CMap_release(cmap);
  DPXFCLOSE(fp);

  return  stream;
}

Here is the call graph for this function:

Here is the caller graph for this function:


Variable Documentation

struct { ... } enc_cache
static const char * MacExpertEncoding [static]

Definition at line 50 of file pdfencoding.c.

static const char * MacRomanEncoding [static]

Definition at line 49 of file pdfencoding.c.

unsigned char range_max[1] = {0xFFu} [static]

Definition at line 606 of file pdfencoding.c.

unsigned char range_min[1] = {0x00u} [static]

Definition at line 605 of file pdfencoding.c.

unsigned char verbose = 0 [static]

Definition at line 47 of file pdfencoding.c.

unsigned char wbuf[WBUF_SIZE] [static]

Definition at line 604 of file pdfencoding.c.

static const char * WinAnsiEncoding [static]

Definition at line 51 of file pdfencoding.c.