Back to index

texmacs  1.0.7.15
pdfobj.c
Go to the documentation of this file.
00001 /*  $Header: /home/cvsroot/dvipdfmx/src/pdfobj.c,v 1.74 2010/05/29 20:56:42 matthias Exp $
00002 
00003     This is dvipdfmx, an eXtended version of dvipdfm by Mark A. Wicks.
00004 
00005     Copyright (C) 2007 by Jin-Hwan Cho and Shunsaku Hirata,
00006     the dvipdfmx project team <dvipdfmx@project.ktug.or.kr>
00007     
00008     Copyright (C) 1998, 1999 by Mark A. Wicks <mwicks@kettering.edu>
00009 
00010     This program is free software; you can redistribute it and/or modify
00011     it under the terms of the GNU General Public License as published by
00012     the Free Software Foundation; either version 2 of the License, or
00013     (at your option) any later version.
00014     
00015     This program is distributed in the hope that it will be useful,
00016     but WITHOUT ANY WARRANTY; without even the implied warranty of
00017     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00018     GNU General Public License for more details.
00019     
00020     You should have received a copy of the GNU General Public License
00021     along with this program; if not, write to the Free Software
00022     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
00023 */
00024 
00025 #if HAVE_CONFIG_H
00026 #include "config.h"
00027 #endif
00028 
00029 #include <ctype.h>
00030 #include <string.h>
00031 
00032 #include "system.h"
00033 #include "mem.h"
00034 #include "error.h"
00035 #include "mfileio.h"
00036 #include "dpxutil.h"
00037 #include "pdflimits.h"
00038 #include "pdfencrypt.h"
00039 #include "pdfparse.h"
00040 
00041 #ifdef HAVE_ZLIB
00042 #include <zlib.h>
00043 #endif /* HAVE_ZLIB */
00044 
00045 #include "pdfobj.h"
00046 #include "pdfdev.h"
00047 
00048 #define STREAM_ALLOC_SIZE      4096u
00049 #define ARRAY_ALLOC_SIZE       256
00050 #define IND_OBJECTS_ALLOC_SIZE 512
00051 
00052 #define OBJ_NO_OBJSTM   (1 << 0)
00053 /* Objects with this flag will not be put into an object stream.
00054    For instance, all stream objects have this flag set.          */
00055 #define OBJ_NO_ENCRYPT  (1 << 1)
00056 /* Objects with this flag will not be encrypted.
00057    This implies OBJ_NO_OBJSTM if encryption is turned on.        */
00058 
00059 /* Any of these types can be represented as follows */
00060 struct pdf_obj 
00061 {
00062   int type;
00063 
00064   unsigned long  label;  /* Only used for indirect objects
00065                          all other "label" to zero */
00066   unsigned short generation;  /* Only used if "label" is used */
00067   unsigned refcount;  /* Number of links to this object */
00068   int      flags;
00069   void    *data;
00070 };
00071 
00072 struct pdf_boolean
00073 {
00074   char  value;
00075 };
00076 
00077 struct pdf_number
00078 {
00079   double value;
00080 };
00081 
00082 struct pdf_string
00083 {
00084   unsigned char *string;
00085   unsigned short length;
00086 };
00087 
00088 struct pdf_name
00089 {
00090   char *name;
00091 };
00092 
00093 struct pdf_array
00094 {
00095   unsigned long max;
00096   unsigned long size;
00097   struct pdf_obj **values;
00098 };
00099 
00100 struct pdf_dict
00101 {
00102   struct pdf_obj  *key;
00103   struct pdf_obj  *value;
00104   struct pdf_dict *next;
00105 };
00106 
00107 struct pdf_stream
00108 {
00109   struct pdf_obj *dict;
00110   unsigned char  *stream;
00111   long           *objstm_data;    /* used for object streams */
00112   unsigned long   stream_length;
00113   unsigned long   max_length;
00114   unsigned char   _flags;
00115 };
00116 
00117 struct pdf_indirect
00118 {
00119   pdf_file      *pf;
00120   unsigned long label;
00121   unsigned short generation;
00122 };
00123 
00124 typedef void                pdf_null;
00125 typedef struct pdf_boolean  pdf_boolean;
00126 typedef struct pdf_number   pdf_number;
00127 typedef struct pdf_string   pdf_string;
00128 typedef struct pdf_name     pdf_name;
00129 typedef struct pdf_array    pdf_array;
00130 typedef struct pdf_dict     pdf_dict;
00131 typedef struct pdf_stream   pdf_stream;
00132 typedef struct pdf_indirect pdf_indirect;
00133 
00134 static FILE *pdf_output_file = NULL;
00135 
00136 static long pdf_output_file_position = 0;
00137 static long pdf_output_line_position = 0;
00138 static long compression_saved        = 0;
00139 
00140 #define FORMAT_BUF_SIZE 4096
00141 static char format_buffer[FORMAT_BUF_SIZE];
00142 
00143 typedef struct xref_entry
00144 {
00145   unsigned char  type;       /* object storage type              */
00146   unsigned long  field2;     /* offset in file or object stream  */
00147   unsigned short field3;     /* generation or index              */
00148   pdf_obj       *direct;     /* used for imported objects        */
00149   pdf_obj       *indirect;   /* used for imported objects        */
00150 } xref_entry;
00151 
00152 static xref_entry *output_xref;
00153 
00154 static unsigned long pdf_max_ind_objects;
00155 static unsigned long next_label;
00156 
00157 static unsigned long startxref;
00158 
00159 struct pdf_file
00160 {
00161   FILE       *file;
00162   pdf_obj    *trailer;
00163   xref_entry *xref_table;
00164   pdf_obj    *catalog;
00165   long        num_obj;
00166   long        file_size;
00167   int         version;
00168 };
00169 
00170 static pdf_obj *output_stream;
00171 
00172 #define OBJSTM_MAX_OBJS  200
00173 /* the limit is only 100 for linearized PDF */
00174 
00175 static int enc_mode;
00176 static int doc_enc_mode;
00177 
00178 static pdf_obj *trailer_dict;
00179 static pdf_obj *xref_stream;
00180 
00181 /* Internal static routines */
00182 
00183 static int check_for_pdf_version (FILE *file);
00184 
00185 static void pdf_flush_obj (pdf_obj *object, FILE *file);
00186 static void pdf_label_obj (pdf_obj *object);
00187 static void pdf_write_obj (pdf_obj *object, FILE *file);
00188 
00189 static void  set_objstm_data (pdf_obj *objstm, long *data);
00190 static long *get_objstm_data (pdf_obj *objstm);
00191 static void  release_objstm  (pdf_obj *objstm);
00192 
00193 static void pdf_out_char (FILE *file, char c);
00194 static void pdf_out      (FILE *file, const void *buffer, long length);
00195 
00196 static void release_indirect (pdf_indirect *data);
00197 static void write_indirect   (pdf_indirect *indirect, FILE *file);
00198 
00199 static void release_boolean (pdf_obj *data);
00200 static void write_boolean   (pdf_boolean *data, FILE *file);
00201 
00202 static void release_null (pdf_null *data);
00203 static void write_null   (pdf_null *data, FILE *file);
00204 
00205 static void release_number (pdf_number *number);
00206 static void write_number   (pdf_number *number, FILE *file);
00207 
00208 static void write_string   (pdf_string *str, FILE *file);
00209 static void release_string (pdf_string *str);
00210 
00211 static void write_name   (pdf_name *name, FILE *file);
00212 static void release_name (pdf_name *name);
00213 
00214 static void write_array   (pdf_array *array, FILE *file);
00215 static void release_array (pdf_array *array);
00216 
00217 static void write_dict   (pdf_dict *dict, FILE *file);
00218 static void release_dict (pdf_dict *dict);
00219 
00220 static void write_stream   (pdf_stream *stream, FILE *file);
00221 static void release_stream (pdf_stream *stream);
00222 
00223 static int  verbose = 0;
00224 static char compression_level = 9;
00225 
00226 void
00227 pdf_set_compression (int level)
00228 {
00229 #ifndef   HAVE_ZLIB
00230   ERROR("You don't have compression compiled in. Possibly libz wasn't found by configure.");
00231 #else
00232 #ifndef HAVE_ZLIB_COMPRESS2
00233   if (level != 0) 
00234     WARN("Unable to set compression level -- your zlib doesn't have compress2().");
00235 #endif
00236   if (level >= 0 && level <= 9) 
00237     compression_level = level;
00238   else {
00239     ERROR("set_compression: invalid compression level: %d", level);
00240   }
00241 #endif /* !HAVE_ZLIB */
00242 
00243   return;
00244 }
00245 
00246 static unsigned pdf_version = PDF_VERSION_DEFAULT;
00247 
00248 void
00249 pdf_set_version (unsigned version)
00250 {
00251   /* Don't forget to update CIDFont_stdcc_def[] in cid.c too! */
00252   if (version >= PDF_VERSION_MIN && version <= PDF_VERSION_MAX) {
00253     pdf_version = version;
00254   }
00255 }
00256 
00257 unsigned
00258 pdf_get_version (void)
00259 {
00260   return pdf_version;
00261 }
00262 
00263 int
00264 pdf_obj_get_verbose(void)
00265 {
00266   return verbose;
00267 }
00268 
00269 void
00270 pdf_obj_set_verbose(void)
00271 {
00272   verbose++;
00273 }
00274 
00275 static pdf_obj *current_objstm = NULL;
00276 static int do_objstm;
00277 
00278 static void
00279 add_xref_entry (unsigned long label, unsigned char type, unsigned long field2, unsigned short field3)
00280 {
00281   if (label >= pdf_max_ind_objects) {
00282     pdf_max_ind_objects = (label/IND_OBJECTS_ALLOC_SIZE+1)*IND_OBJECTS_ALLOC_SIZE;
00283     output_xref = RENEW(output_xref, pdf_max_ind_objects, xref_entry);
00284   }
00285 
00286   output_xref[label].type   = type;
00287   output_xref[label].field2 = field2;
00288   output_xref[label].field3 = field3;
00289   output_xref[label].direct   = NULL;
00290   output_xref[label].indirect = NULL;
00291 }
00292 
00293 #define BINARY_MARKER "%\344\360\355\370\n"
00294 void
00295 pdf_out_init (const char *filename, int do_encryption)
00296 {
00297   char v;
00298 
00299   output_xref = NULL;
00300   pdf_max_ind_objects = 0;
00301   add_xref_entry(0, 0, 0, 0xffff);
00302   next_label = 1;
00303 
00304   if (pdf_version >= 5) {
00305     xref_stream = pdf_new_stream(STREAM_COMPRESS);
00306     xref_stream->flags |= OBJ_NO_ENCRYPT;
00307     trailer_dict = pdf_stream_dict(xref_stream);
00308     pdf_add_dict(trailer_dict, pdf_new_name("Type"), pdf_new_name("XRef"));
00309     do_objstm = 1;
00310   } else {
00311     xref_stream = NULL;
00312     trailer_dict = pdf_new_dict();
00313     do_objstm = 0;
00314   }
00315 
00316   output_stream = NULL;
00317 
00318   pdf_output_file = MFOPEN(filename, FOPEN_WBIN_MODE);
00319   if (!pdf_output_file) {
00320     if (strlen(filename) < 128)
00321       ERROR("Unable to open \"%s\".", filename);
00322     else
00323       ERROR("Unable to open file.");
00324   }
00325   pdf_out(pdf_output_file, "%PDF-1.", strlen("%PDF-1."));
00326   v = '0' + pdf_version;
00327   pdf_out(pdf_output_file, &v, 1);
00328   pdf_out(pdf_output_file, "\n", 1);
00329   pdf_out(pdf_output_file, BINARY_MARKER, strlen(BINARY_MARKER));
00330 
00331   enc_mode = 0;
00332   doc_enc_mode = do_encryption;
00333 }
00334 
00335 static void
00336 dump_xref_table (void)
00337 {
00338   long length;
00339   unsigned long i;
00340 
00341   pdf_out(pdf_output_file, "xref\n", 5);
00342 
00343   length = sprintf(format_buffer, "%d %lu\n", 0, next_label);
00344   pdf_out(pdf_output_file, format_buffer, length);
00345 
00346   /*
00347    * Every space counts.  The space after the 'f' and 'n' is * *essential*.
00348    * The PDF spec says the lines must be 20 characters long including the
00349    * end of line character.
00350    */
00351   for (i = 0; i < next_label; i++) {
00352     unsigned char type = output_xref[i].type;
00353     if (type > 1)
00354       ERROR("object type %hu not allowed in xref table", type);
00355     length = sprintf(format_buffer, "%010lu %05hu %c \n",
00356                    output_xref[i].field2, output_xref[i].field3,
00357                    type ? 'n' : 'f');
00358     pdf_out(pdf_output_file, format_buffer, length);
00359   }
00360 }
00361 
00362 static void
00363 dump_trailer_dict (void)
00364 {
00365   pdf_out(pdf_output_file, "trailer\n", 8);
00366   enc_mode = 0;
00367   write_dict(trailer_dict->data, pdf_output_file);
00368   pdf_release_obj(trailer_dict);
00369   pdf_out_char(pdf_output_file, '\n');
00370 }
00371 
00372 /*
00373  * output a PDF 1.5 cross-reference stream;
00374  * contributed by Matthias Franz (March 21, 2007)
00375  */
00376 static void
00377 dump_xref_stream (void)
00378 {
00379   unsigned long pos, i;
00380   unsigned poslen;
00381   unsigned char buf[7] = {0, 0, 0, 0, 0};
00382 
00383   pdf_obj *w;
00384 
00385   /* determine the necessary size of the offset field */
00386   pos = startxref; /* maximal offset value */
00387   poslen = 1;
00388   while (pos >>= 8)
00389     poslen++;
00390 
00391   w = pdf_new_array();
00392   pdf_add_array(w, pdf_new_number(1));      /* type                */
00393   pdf_add_array(w, pdf_new_number(poslen)); /* offset (big-endian) */
00394   pdf_add_array(w, pdf_new_number(2));      /* generation          */
00395   pdf_add_dict(trailer_dict, pdf_new_name("W"), w);
00396 
00397   /* We need the xref entry for the xref stream right now */
00398   add_xref_entry(next_label-1, 1, startxref, 0);
00399 
00400   for (i = 0; i < next_label; i++) {
00401     unsigned j;
00402     unsigned short f3;
00403     buf[0] = output_xref[i].type;
00404     pos = output_xref[i].field2;
00405     for (j = poslen; j--; ) {
00406       buf[1+j] = (unsigned char) pos;
00407       pos >>= 8;
00408     }
00409     f3 = output_xref[i].field3;
00410     buf[poslen+1] = (unsigned char) (f3 >> 8);
00411     buf[poslen+2] = (unsigned char) (f3);
00412     pdf_add_stream(xref_stream, &buf, poslen+3);
00413   }
00414 
00415   pdf_release_obj(xref_stream);
00416 }
00417 
00418 void
00419 pdf_out_flush (void)
00420 {
00421   if (pdf_output_file) {
00422     long length;
00423 
00424     /* Flush current object stream */
00425     if (current_objstm) {
00426       release_objstm(current_objstm);
00427       current_objstm =NULL;
00428     }
00429 
00430     /*
00431      * Label xref stream - we need the number of correct objects
00432      * for the xref stream dictionary (= trailer).
00433      * Labelling it in pdf_out_init (with 1)  does not work (why?).
00434      */
00435     if (xref_stream)
00436       pdf_label_obj(xref_stream);
00437 
00438     /* Record where this xref is for trailer */
00439     startxref = pdf_output_file_position;
00440 
00441     pdf_add_dict(trailer_dict, pdf_new_name("Size"),
00442                pdf_new_number(next_label));
00443 
00444     if (xref_stream)
00445       dump_xref_stream();
00446     else {
00447       dump_xref_table();
00448       dump_trailer_dict();
00449     }
00450 
00451     /* Done with xref table */
00452     RELEASE(output_xref);
00453 
00454     pdf_out(pdf_output_file, "startxref\n", 10);
00455     length = sprintf(format_buffer, "%lu\n", startxref);
00456     pdf_out(pdf_output_file, format_buffer, length);
00457     pdf_out(pdf_output_file, "%%EOF\n", 6);
00458 
00459     MESG("\n");
00460     if (verbose) {
00461       if (compression_level > 0) {
00462        MESG("Compression saved %ld bytes%s\n", compression_saved,
00463             pdf_version < 5 ? ". Try \"-V 5\" for better compression" : "");
00464       }
00465     }
00466     MESG("%ld bytes written", pdf_output_file_position);
00467 
00468     MFCLOSE(pdf_output_file);
00469   }
00470 }
00471 
00472 void
00473 pdf_error_cleanup (void)
00474 {
00475   /*
00476    * This routine is the cleanup required for an abnormal exit.
00477    * For now, simply close the file.
00478    */
00479   if (pdf_output_file)
00480     MFCLOSE(pdf_output_file);
00481 }
00482 
00483 
00484 void
00485 pdf_set_root (pdf_obj *object)
00486 {
00487   if (pdf_add_dict(trailer_dict, pdf_new_name("Root"), pdf_ref_obj(object))) {
00488     ERROR("Root object already set!");
00489   }
00490   /* Adobe Readers don't like a document catalog inside an encrypted
00491    * object stream, although the PDF v1.5 spec seems to allow this.
00492    * Note that we don't set OBJ_NO_ENCRYPT since the name dictionary in
00493    * a document catalog may contain strings, which should be encrypted.
00494    */
00495   if (doc_enc_mode)
00496     object->flags |= OBJ_NO_OBJSTM;
00497 }
00498 
00499 void
00500 pdf_set_info (pdf_obj *object)
00501 {
00502   if (pdf_add_dict(trailer_dict, pdf_new_name("Info"), pdf_ref_obj(object))) {
00503     ERROR ("Info object already set!");
00504   }
00505 }
00506 
00507 void
00508 pdf_set_id (pdf_obj *id)
00509 {
00510   if (pdf_add_dict(trailer_dict, pdf_new_name("ID"), id)) {
00511     ERROR ("ID already set!");
00512   }
00513 }
00514 
00515 void
00516 pdf_set_encrypt (pdf_obj *encrypt)
00517 {
00518   if (pdf_add_dict(trailer_dict, pdf_new_name("Encrypt"), pdf_ref_obj(encrypt))) {
00519     ERROR("Encrypt object already set!");
00520   }
00521   encrypt->flags |= OBJ_NO_ENCRYPT;
00522 }
00523 
00524 static
00525 void pdf_out_char (FILE *file, char c)
00526 {
00527   if (output_stream && file ==  pdf_output_file)
00528     pdf_add_stream(output_stream, &c, 1);
00529   else {
00530     fputc(c, file);
00531     /* Keep tallys for xref table *only* if writing a pdf file. */
00532     if (file == pdf_output_file) {
00533       pdf_output_file_position += 1;
00534       if (c == '\n')
00535         pdf_output_line_position  = 0;
00536       else
00537         pdf_output_line_position += 1;
00538     }
00539   }
00540 }
00541 
00542 #define pdf_out_xchar(f,c) do {\
00543   int __tmpnum;\
00544   __tmpnum = ((c) >> 4) & 0x0f;\
00545   pdf_out_char((f), (((__tmpnum) >= 10) ? (__tmpnum)+'W' : (__tmpnum)+'0'));\
00546   __tmpnum = (c) & 0x0f;\
00547   pdf_out_char((f), (((__tmpnum) >= 10) ? (__tmpnum)+'W' : (__tmpnum)+'0'));\
00548 } while (0)
00549 
00550 static
00551 void pdf_out (FILE *file, const void *buffer, long length)
00552 {
00553   if (output_stream && file ==  pdf_output_file)
00554     pdf_add_stream(output_stream, buffer, length);
00555   else {
00556     fwrite(buffer, 1, length, file);
00557     /* Keep tallys for xref table *only* if writing a pdf file */
00558     if (file == pdf_output_file) {
00559       pdf_output_file_position += length;
00560       pdf_output_line_position += length;
00561       /* "foo\nbar\n "... */
00562       if (length > 0 &&
00563        ((char *)buffer)[length-1] == '\n')
00564         pdf_output_line_position = 0;
00565     }
00566   }
00567 }
00568 
00569 /*  returns 1 if a white-space character is necessary to separate
00570     an object of type1 followed by an object of type2              */
00571 static
00572 int pdf_need_white (int type1, int type2)
00573 {
00574   return !(type1 == PDF_STRING || type1 == PDF_ARRAY || type1 == PDF_DICT ||
00575           type2 == PDF_STRING || type2 == PDF_NAME ||
00576           type2 == PDF_ARRAY || type2 == PDF_DICT);
00577 }
00578 
00579 static
00580 void pdf_out_white (FILE *file)
00581 {
00582   if (file == pdf_output_file && pdf_output_line_position >= 80) {
00583     pdf_out_char(file, '\n');
00584   } else {
00585     pdf_out_char(file, ' ');
00586   }
00587 }
00588 
00589 #define TYPECHECK(o,t) if (!(o) || (o)->type != (t)) {\
00590   ERROR("typecheck: Invalid object type: %d %d (line %d)", (o) ? (o)->type : -1, (t), __LINE__);\
00591 }
00592 
00593 #define INVALIDOBJ(o)  ((o) == NULL || (o)->type <= 0 || (o)->type > PDF_UNDEFINED)
00594 
00595 static pdf_obj *
00596 pdf_new_obj(int type)
00597 {
00598   pdf_obj *result;
00599 
00600   result = NEW(1, pdf_obj);
00601   result->type  = type;
00602   result->data  = NULL;
00603   result->label      = 0;
00604   result->generation = 0;
00605   result->refcount   = 1;
00606   result->flags      = 0;
00607 
00608   if (INVALIDOBJ(result))
00609     ERROR("Invalid object type: %d", type);
00610 
00611   return result;
00612 }
00613 
00614 int
00615 pdf_obj_typeof (pdf_obj *object)
00616 {
00617   if (INVALIDOBJ(object))
00618     return PDF_OBJ_INVALID;
00619 
00620   return object->type;
00621 }
00622 
00623 static void
00624 pdf_label_obj (pdf_obj *object)
00625 {
00626   if (INVALIDOBJ(object))
00627     ERROR("pdf_label_obj(): passed invalid object.");
00628 
00629   /*
00630    * Don't change label on an already labeled object. Ignore such calls.
00631    */
00632   if (object->label == 0) {
00633     object->label      = next_label++;
00634     object->generation = 0;
00635   }
00636 }
00637 
00638 /*
00639  * Transfer the label assigned to the object src to the object dst.
00640  * The object dst must not yet have been labeled.
00641  */
00642 void
00643 pdf_transfer_label (pdf_obj *dst, pdf_obj *src)
00644 {
00645   ASSERT(dst && !dst->label && src);
00646 
00647   dst->label      = src->label;
00648   dst->generation = src->generation;
00649   src->label      = 0;
00650   src->generation = 0;
00651 }
00652 
00653 /*
00654  * This doesn't really copy the object, but allows  it to be used without
00655  * fear that somebody else will free it.
00656  */
00657 pdf_obj *
00658 pdf_link_obj (pdf_obj *object)
00659 {
00660   if (INVALIDOBJ(object))
00661     ERROR("pdf_link_obj(): passed invalid object.");
00662 
00663   object->refcount += 1;
00664 
00665   return object;
00666 }
00667 
00668 
00669 pdf_obj *
00670 pdf_ref_obj (pdf_obj *object)
00671 {
00672   if (INVALIDOBJ(object))
00673     ERROR("pdf_ref_obj(): passed invalid object.");
00674   
00675   if (object->refcount == 0) {
00676     MESG("\nTrying to refer already released object!!!\n");
00677     pdf_write_obj(object, stderr);
00678     ERROR("Cannot continue...");
00679   }
00680 
00681   if (PDF_OBJ_INDIRECTTYPE(object)) {
00682     return pdf_link_obj(object);
00683   } else {
00684     if (object->label == 0) {
00685       pdf_label_obj(object);
00686     }
00687     return pdf_new_indirect(NULL, object->label, object->generation);
00688   }
00689 }
00690 
00691 static void
00692 release_indirect (pdf_indirect *data)
00693 {
00694   RELEASE(data);
00695 }
00696 
00697 static void
00698 write_indirect (pdf_indirect *indirect, FILE *file)
00699 {
00700   long length;
00701 
00702   ASSERT(!indirect->pf);
00703 
00704   length = sprintf(format_buffer, "%lu %hu R", indirect->label, indirect->generation);
00705   pdf_out(file, format_buffer, length);
00706 }
00707 
00708 /* The undefined object is used as a placeholder in pdfnames.c
00709  * for objects which are referenced before they are defined.
00710  */
00711 pdf_obj *
00712 pdf_new_undefined (void)
00713 {
00714   pdf_obj *result;
00715 
00716   result = pdf_new_obj(PDF_UNDEFINED);
00717   result->data = NULL;
00718 
00719   return result;
00720 }
00721 
00722 pdf_obj *
00723 pdf_new_null (void)
00724 {
00725   pdf_obj *result;
00726 
00727   result = pdf_new_obj(PDF_NULL);
00728   result->data = NULL;
00729 
00730   return result;
00731 }
00732 
00733 static void
00734 release_null (pdf_null *obj)
00735 {
00736   return;
00737 }
00738 
00739 static void
00740 write_null (pdf_null *obj, FILE *file)
00741 {
00742   pdf_out(file, "null", 4);
00743 }
00744 
00745 pdf_obj *
00746 pdf_new_boolean (char value)
00747 {
00748   pdf_obj     *result;
00749   pdf_boolean *data;
00750 
00751   result = pdf_new_obj(PDF_BOOLEAN);
00752   data   = NEW(1, pdf_boolean);
00753   data->value  = value;
00754   result->data = data;
00755 
00756   return result;
00757 }
00758 
00759 static void
00760 release_boolean (pdf_obj *data)
00761 {
00762   RELEASE (data);
00763 }
00764 
00765 static void
00766 write_boolean (pdf_boolean *data, FILE *file)
00767 {
00768   if (data->value) {
00769     pdf_out(file, "true", 4);
00770   } else {
00771     pdf_out(file, "false", 5);
00772   }
00773 }
00774 
00775 #if 0
00776 void
00777 pdf_set_boolean (pdf_obj *object, char value)
00778 {
00779   pdf_boolean *data;
00780 
00781   TYPECHECK(object, PDF_BOOLEAN);
00782 
00783   data = object->data;
00784   data->value = value;
00785 }
00786 #endif
00787 
00788 char
00789 pdf_boolean_value (pdf_obj *object)
00790 {
00791   pdf_boolean *data;
00792 
00793   TYPECHECK(object, PDF_BOOLEAN);
00794 
00795   data = object->data;
00796 
00797   return data->value;
00798 }
00799 
00800 pdf_obj *
00801 pdf_new_number (double value)
00802 {
00803   pdf_obj    *result;
00804   pdf_number *data;
00805 
00806   result = pdf_new_obj(PDF_NUMBER);
00807   data   = NEW(1, pdf_number);
00808   data->value  = value;
00809   result->data = data;
00810 
00811   return result;
00812 }
00813 
00814 static void
00815 release_number (pdf_number *data)
00816 {
00817   RELEASE (data);
00818 }
00819 
00820 static void
00821 write_number (pdf_number *number, FILE *file)
00822 {
00823   int count;
00824 
00825   count = pdf_sprint_number(format_buffer, number->value);
00826 
00827   pdf_out(file, format_buffer, count);
00828 }
00829 
00830 
00831 void
00832 pdf_set_number (pdf_obj *object, double value)
00833 {
00834   pdf_number *data;
00835 
00836   TYPECHECK(object, PDF_NUMBER);
00837 
00838   data = object->data;
00839   data->value = value;
00840 }
00841 
00842 double
00843 pdf_number_value (pdf_obj *object)
00844 {
00845   pdf_number *data;
00846 
00847   TYPECHECK(object, PDF_NUMBER);
00848 
00849   data = object->data;
00850 
00851   return data->value;
00852 }
00853 
00854 pdf_obj *
00855 pdf_new_string (const void *str, unsigned length)
00856 {
00857   pdf_obj    *result;
00858   pdf_string *data;
00859 
00860   ASSERT(str);
00861 
00862   result = pdf_new_obj(PDF_STRING);
00863   data   = NEW(1, pdf_string);
00864   result->data = data;
00865 
00866   data->length = length;
00867   data->string = NEW(length+1, unsigned char);
00868   memcpy(data->string, str, length);
00869   /* Shouldn't assume NULL terminated. */
00870   data->string[length] = '\0';
00871 
00872   return result;
00873 }
00874 
00875 void *
00876 pdf_string_value (pdf_obj *object)
00877 {
00878   pdf_string *data;
00879 
00880   TYPECHECK(object, PDF_STRING);
00881 
00882   data = object->data;
00883 
00884   return data->string;
00885 }
00886 
00887 unsigned
00888 pdf_string_length (pdf_obj *object)
00889 {
00890   pdf_string *data;
00891 
00892   TYPECHECK(object, PDF_STRING);
00893 
00894   data = object->data;
00895 
00896   return (unsigned) (data->length);
00897 }
00898 
00899 /*
00900  * This routine escapes non printable characters and control
00901  * characters in an output string.
00902  */
00903 int
00904 pdfobj_escape_str (char *buffer, int bufsize, const unsigned char *s, int len)
00905 {
00906   int result = 0;
00907   int i;
00908 
00909   for (i = 0; i < len; i++) {
00910     unsigned char ch;
00911 
00912     ch = s[i];
00913     if (result > bufsize - 4)
00914       ERROR("pdfobj_escape_str: Buffer overflow");
00915 
00916     /*
00917      * We always write three octal digits. Optimization only gives few Kb
00918      * smaller size for most documents when zlib compressed.
00919      */
00920     if (ch < 32 || ch > 126) {
00921       buffer[result++] = '\\';
00922 #if 0
00923       if (i < len - 1 && !isdigit(s[i+1]))
00924        result += sprintf(buffer+result, "%o", ch);
00925       else
00926        result += sprintf(buffer+result, "%03o", ch);
00927 #endif
00928       result += sprintf(buffer+result, "%03o", ch);
00929     } else {
00930       switch (ch) {
00931       case '(':
00932        buffer[result++] = '\\';
00933        buffer[result++] = '(';
00934        break;
00935       case ')':
00936        buffer[result++] = '\\';
00937        buffer[result++] = ')';
00938        break;
00939       case '\\':
00940        buffer[result++] = '\\';
00941        buffer[result++] = '\\';
00942        break;
00943       default:
00944        buffer[result++] = ch;
00945        break;
00946       }
00947     }
00948   }
00949 
00950   return result;
00951 }
00952 
00953 static void
00954 write_string (pdf_string *str, FILE *file)
00955 {
00956   unsigned char *s;
00957   char wbuf[FORMAT_BUF_SIZE]; /* Shouldn't use format_buffer[]. */
00958   int  nescc = 0, i, count;
00959 
00960   s = str->string;
00961 
00962   if (enc_mode)
00963     pdf_encrypt_data(s, str->length);
00964 
00965   /*
00966    * Count all ASCII non-printable characters.
00967    */
00968   for (i = 0; i < str->length; i++) {
00969     if (!isprint(s[i]))
00970       nescc++;
00971   }
00972   /*
00973    * If the string contains much escaped chars, then we write it as
00974    * ASCII hex string.
00975    */
00976   if (nescc > str->length / 3) {
00977     pdf_out_char(file, '<');
00978     for (i = 0; i < str->length; i++) {
00979       pdf_out_xchar(file, s[i]);
00980     }
00981     pdf_out_char(file, '>');
00982   } else {
00983     pdf_out_char(file, '(');
00984     /*
00985      * This section of code probably isn't speed critical.  Escaping the
00986      * characters in the string one at a time may seem slow, but it's
00987      * safe if the formatted string length exceeds FORMAT_BUF_SIZE.
00988      * Occasionally you see some long strings in PDF.  pdfobj_escape_str
00989      * is also used for strings of text with no kerning.  These must be
00990      * handled as quickly as possible since there are so many of them.
00991      */ 
00992     for (i = 0; i < str->length; i++) {
00993       count = pdfobj_escape_str(wbuf, FORMAT_BUF_SIZE, &(s[i]), 1);
00994       pdf_out(file, wbuf, count);
00995     }
00996     pdf_out_char(file, ')');
00997   }
00998 }
00999 
01000 static void
01001 release_string (pdf_string *data)
01002 {
01003   if (data->string != NULL) {
01004     RELEASE(data->string);
01005     data->string = NULL;
01006   }
01007   RELEASE(data);
01008 }
01009 
01010 void
01011 pdf_set_string (pdf_obj *object, unsigned char *str, unsigned length)
01012 {
01013   pdf_string *data;
01014 
01015   TYPECHECK(object, PDF_STRING);
01016 
01017   data = object->data;
01018   if (data->string != 0) {
01019     RELEASE(data->string);
01020   }
01021   if (length != 0) {
01022     data->length = length;
01023     data->string = NEW(length + 1, unsigned char);
01024     memcpy(data->string, str, length);
01025     data->string[length] = '\0';
01026   } else {
01027     data->length = 0;
01028     data->string = NULL;
01029   }
01030 }
01031 
01032 /* Name does *not* include the /. */ 
01033 pdf_obj *
01034 pdf_new_name (const char *name)
01035 {
01036   pdf_obj  *result;
01037   unsigned  length;
01038   pdf_name *data;
01039 
01040   result = pdf_new_obj(PDF_NAME);
01041   data   = NEW (1, pdf_name);
01042   result->data = data;
01043   length = strlen(name);
01044   if (length != 0) {
01045     data->name = NEW(length+1, char);
01046     memcpy(data->name, name, length);
01047     data->name[length] = '\0';
01048   } else {
01049     data->name = NULL;
01050   }
01051 
01052   return result;
01053 }
01054 
01055 static void
01056 write_name (pdf_name *name, FILE *file)
01057 {
01058   char *s;
01059   int i, length;
01060 
01061   s      = name->name;
01062   length = name->name ? strlen(name->name) : 0;
01063   /*
01064    * From PDF Reference, 3rd ed., p.33:
01065    *
01066    *  Beginning with PDF 1.2, any character except null (character code 0)
01067    *  may be included in a name by writing its 2-digit hexadecimal code,
01068    *  preceded bythe number sign character (#); see implementation notes 3
01069    *  and 4 in Appendix H. This syntax is required in order to represent
01070    *  any of the delimiter or white-space characters or the number sign
01071    *  character itself; it is recommended but not required for characters
01072    *  whose codes are outside the range 33 (!) to 126 (~).
01073    */
01074 #ifndef is_delim
01075   /* Avoid '{' and '}' for PostScript compatibility? */
01076 #define is_delim(c) ((c) == '(' || (c) == '/' || \
01077                      (c) == '<' || (c) == '>' || \
01078                      (c) == '[' || (c) == ']' || \
01079                      (c) == '{' || (c) == '}' || \
01080                      (c) == '%')
01081 #endif
01082   pdf_out_char(file, '/');
01083   for (i = 0; i < length; i++) {
01084     if (s[i] < '!' || s[i] > '~' || s[i] == '#' || is_delim(s[i])) {
01085       /*     ^ "space" is here. */
01086       pdf_out_char (file, '#');
01087       pdf_out_xchar(file, s[i]);
01088     } else {
01089       pdf_out_char (file, s[i]);
01090     }
01091   }
01092 }
01093 
01094 static void
01095 release_name (pdf_name *data)
01096 {
01097   if (data->name != NULL) {
01098     RELEASE(data->name);
01099     data->name = NULL;
01100   }
01101   RELEASE(data);
01102 }
01103 
01104 #if 0
01105 void
01106 pdf_set_name (pdf_obj *object, const char *name)
01107 {
01108   pdf_name *data;
01109   unsigned length;
01110 
01111   TYPECHECK(object, PDF_NAME);
01112 
01113   length = strlen(name);
01114   data   = object->data;
01115   if (data->name != NULL) {
01116     RELEASE(data->name);
01117   }
01118   if (length != 0) {
01119     data->name = NEW(length+1, char);
01120     memcpy(data->name, name, length);
01121     data->name[length] = 0;
01122   } else {
01123     data->name = NULL;
01124   }
01125 }
01126 #endif
01127 
01128 char *
01129 pdf_name_value (pdf_obj *object)
01130 {
01131   pdf_name *data;
01132 
01133   TYPECHECK(object, PDF_NAME);
01134 
01135   data = object->data;
01136 
01137   return data->name;
01138 }
01139 
01140 /*
01141  * We do not have pdf_name_length() since '\0' is not allowed
01142  * in PDF name object.
01143  */
01144 
01145 pdf_obj *
01146 pdf_new_array (void)
01147 {
01148   pdf_obj   *result;
01149   pdf_array *data;
01150 
01151   result = pdf_new_obj(PDF_ARRAY);
01152   data   = NEW(1, pdf_array);
01153   data->values = NULL;
01154   data->max    = 0;
01155   data->size   = 0;
01156   result->data = data;
01157 
01158   return result;
01159 }
01160 
01161 static void
01162 write_array (pdf_array *array, FILE *file)
01163 {
01164   pdf_out_char(file, '[');
01165   if (array->size > 0) {
01166     unsigned long i;
01167     int type1 = PDF_UNDEFINED, type2;
01168     
01169     for (i = 0; i < array->size; i++) {
01170       if (array->values[i]) {
01171        type2 = array->values[i]->type;
01172        if (type1 != PDF_UNDEFINED && pdf_need_white(type1, type2))
01173          pdf_out_white(file);
01174        type1 = type2;
01175        pdf_write_obj(array->values[i], file);
01176       } else
01177        WARN("PDF array element #ld undefined.", i);
01178     }
01179   }
01180   pdf_out_char(file, ']');
01181 }
01182 
01183 pdf_obj *
01184 pdf_get_array (pdf_obj *array, long idx)
01185 {
01186   pdf_obj   *result = NULL;
01187   pdf_array *data;
01188 
01189   TYPECHECK(array, PDF_ARRAY);
01190 
01191   data = array->data;
01192   if (idx < 0)
01193     result = data->values[idx + data->size];
01194   else if (idx < data->size) {
01195     result = data->values[idx];
01196   }
01197 
01198   return result;
01199 }
01200 
01201 unsigned int
01202 pdf_array_length (pdf_obj *array)
01203 {
01204   pdf_array *data;
01205 
01206   TYPECHECK(array, PDF_ARRAY);
01207 
01208   data = (pdf_array *) array->data;
01209 
01210   return (unsigned int) data->size;
01211 }
01212 
01213 static void
01214 release_array (pdf_array *data)
01215 {
01216   unsigned long i;
01217 
01218   if (data->values) {
01219     for (i = 0; i < data->size; i++) {
01220       pdf_release_obj(data->values[i]);
01221       data->values[i] = NULL;
01222     }
01223     RELEASE(data->values);
01224     data->values = NULL;
01225   }
01226   RELEASE(data);
01227 }
01228 
01229 /*
01230  * The name pdf_add_array is misleading. It behaves differently than
01231  * pdf_add_dict(). This should be pdf_push_array().
01232  */
01233 void
01234 pdf_add_array (pdf_obj *array, pdf_obj *object)
01235 {
01236   pdf_array *data;
01237 
01238   TYPECHECK(array, PDF_ARRAY);
01239 
01240   data = array->data;
01241   if (data->size >= data->max) {
01242     data->max   += ARRAY_ALLOC_SIZE;
01243     data->values = RENEW(data->values, data->max, pdf_obj *);
01244   }
01245   data->values[data->size] = object;
01246   data->size++;
01247 
01248   return;
01249 }
01250 
01251 #if 0
01252 void
01253 pdf_put_array (pdf_obj *array, unsigned idx, pdf_obj *object)
01254 {
01255   pdf_array *data;
01256   long       i;
01257 
01258   TYPECHECK(array, PDF_ARRAY);
01259 
01260   data = array->data;
01261   if (idx + 1 > data->max) {
01262     data->max   += ARRAY_ALLOC_SIZE;
01263     data->values = RENEW(data->values, data->max, pdf_obj *);
01264   }
01265   /*
01266    * Rangecheck error in PostScript interpreters if
01267    * idx > data->size - 1. But pdf_new_array() doesn't set
01268    * array size, pdf_add_array() dynamically increases size
01269    * of array. This might confusing...
01270    */
01271   if (idx + 1 > data->size) {
01272     for (i = data->size; i < idx; i++)
01273       data->values[i] = pdf_new_null(); /* release_array() won't work without this */
01274     data->values[idx] = object;
01275     data->size = idx + 1;
01276   } else {
01277     if (data->values[idx])
01278       pdf_release_obj(data->values[idx]);
01279     data->values[idx] = object;
01280   }
01281 }
01282 
01283 /* Easily leaks memory... */
01284 pdf_obj *
01285 pdf_shift_array (pdf_obj *array)
01286 {
01287   pdf_obj   *result = NULL;
01288   pdf_array *data;
01289 
01290   TYPECHECK(array, PDF_ARRAY);
01291 
01292   data = array->data;
01293   if (data->size > 0) {
01294     int i;
01295 
01296     result = data->values[0];
01297     for (i = 1; i < data->size; i++)
01298       data->values[i-1] = data->values[i];
01299     data->size--;
01300   }
01301 
01302   return result;
01303 }
01304 #endif
01305 
01306 /* Prepend an object to an array */
01307 void
01308 pdf_unshift_array (pdf_obj *array, pdf_obj *object)
01309 {
01310   pdf_array *data;
01311   int        i;
01312 
01313   TYPECHECK(array, PDF_ARRAY);
01314 
01315   data = array->data;
01316   if (data->size >= data->max) {
01317     data->max   += ARRAY_ALLOC_SIZE;
01318     data->values = RENEW(data->values, data->max, pdf_obj *);
01319   }
01320   for (i = 0; i < data->size; i++)
01321     data->values[i+1] = data->values[i];
01322   data->values[0] = object;
01323   data->size++;
01324 }
01325 
01326 #if 0
01327 pdf_obj *
01328 pdf_pop_array (pdf_obj *array)
01329 {
01330   pdf_obj   *result;
01331   pdf_array *data;
01332 
01333   TYPECHECK(array, PDF_ARRAY);
01334 
01335   data = array->data;
01336   if (data->size > 0) {
01337     result = data->values[data->size - 1];
01338     data->size--;
01339   } else {
01340     result = NULL;
01341   }
01342 
01343   return result;
01344 }
01345 #endif
01346 
01347 
01348 static void
01349 write_dict (pdf_dict *dict, FILE *file)
01350 {
01351 #if 0
01352   pdf_out (file, "<<\n", 3); /* dropping \n saves few kb. */
01353 #else
01354   pdf_out (file, "<<", 2);
01355 #endif
01356   while (dict->key != NULL) {
01357     pdf_write_obj(dict->key, file);
01358     if (pdf_need_white(PDF_NAME, (dict->value)->type)) {
01359       pdf_out_white(file);
01360     }
01361     pdf_write_obj(dict->value, file);
01362 #if 0
01363     pdf_out_char (file, '\n'); /* removing this saves few kb. */
01364 #endif
01365     dict = dict->next;
01366   }
01367   pdf_out(file, ">>", 2);
01368 }
01369 
01370 pdf_obj *
01371 pdf_new_dict (void)
01372 {
01373   pdf_obj  *result;
01374   pdf_dict *data;
01375 
01376   result = pdf_new_obj(PDF_DICT);
01377   data   = NEW(1, pdf_dict);
01378   data->key    = NULL;
01379   data->value  = NULL;
01380   data->next   = NULL;
01381   result->data = data;
01382 
01383   return result;
01384 }
01385 
01386 static void
01387 release_dict (pdf_dict *data)
01388 {
01389   pdf_dict *next;
01390 
01391   while (data != NULL && data->key != NULL) {
01392     pdf_release_obj(data->key);
01393     pdf_release_obj(data->value);
01394     data->key   = NULL;
01395     data->value = NULL;
01396     next = data->next;
01397     RELEASE(data);
01398     data = next;
01399   }
01400   RELEASE(data);
01401 }
01402 
01403 /* Array is ended by a node with NULL this pointer */
01404 /* pdf_add_dict returns 0 if the key is new and non-zero otherwise */
01405 int
01406 pdf_add_dict (pdf_obj *dict, pdf_obj *key, pdf_obj *value)
01407 {
01408   pdf_dict *data, *new_node;
01409 
01410   TYPECHECK(dict, PDF_DICT);
01411   TYPECHECK(key,  PDF_NAME);
01412 
01413   /* It seems that NULL is sometimes used for null object... */
01414   if (value != NULL && INVALIDOBJ(value))
01415     ERROR("pdf_add_dict(): Passed invalid value");
01416 
01417   /* If this key already exists, simply replace the value */
01418   for (data = dict->data; data->key != NULL; data = data->next) {
01419     if (!strcmp(pdf_name_value(key), pdf_name_value(data->key))) {
01420       /* Release the old value */
01421       pdf_release_obj(data->value);
01422       /* Release the new key (we don't need it) */
01423       pdf_release_obj(key);
01424       data->value = value;
01425       return 1;
01426     }
01427   }
01428   /*
01429    * We didn't find the key. We build a new "end" node and add
01430    * the new key just before the end
01431    */
01432   new_node = NEW (1, pdf_dict);
01433   new_node->key = NULL;
01434   new_node->value = NULL;
01435   new_node->next = NULL;
01436   data->next  = new_node;
01437   data->key   = key;
01438   data->value = value;
01439   return 0;
01440 }
01441 
01442 #if 0
01443 void
01444 pdf_put_dict (pdf_obj *dict, const char *key, pdf_obj *value)
01445 {
01446   pdf_dict *data;
01447 
01448   TYPECHECK(dict, PDF_DICT);
01449 
01450   if (!key) {
01451     ERROR("pdf_put_dict(): Passed invalid key.");
01452   }
01453   /* It seems that NULL is sometimes used for null object... */
01454   if (value != NULL && INVALIDOBJ(value)) {
01455     ERROR("pdf_add_dict(): Passed invalid value.");
01456   }
01457 
01458   data = dict->data;
01459 
01460   while (data->key != NULL) {
01461     if (!strcmp(key, pdf_name_value(data->key))) {
01462       pdf_release_obj(data->value);
01463       data->value = value;
01464       break;
01465     }
01466     data = data->next;
01467   }
01468 
01469   /*
01470    * If we didn't find the key, build a new "end" node and add
01471    * the new key just before the end
01472    */
01473   if (data->key == NULL) {
01474     pdf_dict *new_node;
01475 
01476     new_node = NEW (1, pdf_dict);
01477     new_node->key   = NULL;
01478     new_node->value = NULL;
01479     new_node->next  = NULL;
01480     data->next  = new_node;
01481     data->key   = pdf_new_name(key);
01482     data->value = value;
01483   }
01484 }
01485 #endif
01486 
01487 /* pdf_merge_dict makes a link for each item in dict2 before stealing it */
01488 void
01489 pdf_merge_dict (pdf_obj *dict1, pdf_obj *dict2)
01490 {
01491   pdf_dict *data;
01492 
01493   TYPECHECK(dict1, PDF_DICT);
01494   TYPECHECK(dict2, PDF_DICT);
01495 
01496   data = dict2->data;
01497   while (data->key != NULL) {
01498     pdf_add_dict(dict1, pdf_link_obj(data->key), pdf_link_obj(data->value));
01499     data = data->next;
01500   }
01501 }
01502 
01503 int
01504 pdf_foreach_dict (pdf_obj *dict,
01505                 int (*proc) (pdf_obj *, pdf_obj *, void *), void *pdata)
01506 {
01507   int       error = 0;
01508   pdf_dict *data;
01509 
01510   ASSERT(proc);
01511 
01512   TYPECHECK(dict, PDF_DICT);
01513 
01514   data = dict->data;
01515   while (!error &&
01516         data->key != NULL) {
01517     error = proc(data->key, data->value, pdata);
01518     data = data->next;
01519   }
01520 
01521   return error;
01522 }
01523 
01524 #define pdf_match_name(o,s) ((o) && (s) && !strcmp(((pdf_name *)(o)->data)->name, (s)))
01525 pdf_obj *
01526 pdf_lookup_dict (pdf_obj *dict, const char *name)
01527 {
01528   pdf_dict *data;
01529 
01530   ASSERT(name);
01531 
01532   TYPECHECK(dict, PDF_DICT);
01533 
01534   data = dict->data;
01535   while (data->key != NULL) {
01536     if (!strcmp(name, pdf_name_value(data->key))) {
01537       return data->value;
01538     }
01539     data = data->next;
01540   }
01541 
01542   return NULL;
01543 }
01544 
01545 /* Returns array of dictionary keys */
01546 pdf_obj *
01547 pdf_dict_keys (pdf_obj *dict)
01548 {
01549   pdf_obj  *keys;
01550   pdf_dict *data;
01551 
01552   TYPECHECK(dict, PDF_DICT);
01553 
01554   keys = pdf_new_array();
01555   for (data = dict->data; (data &&
01556                         data->key != NULL); data = data->next) {
01557     /* We duplicate name object rather than linking keys.
01558      * If we forget to free keys, broken PDF is generated.
01559      */
01560     pdf_add_array(keys, pdf_new_name(pdf_name_value(data->key)));
01561   }
01562 
01563   return keys;
01564 }
01565 
01566 void
01567 pdf_remove_dict (pdf_obj *dict, const char *name)
01568 {
01569   pdf_dict *data, **data_p;
01570 
01571   TYPECHECK(dict, PDF_DICT);
01572 
01573   data   = dict->data;
01574   data_p = (pdf_dict **) (void *) &(dict->data);
01575   while (data->key != NULL) {
01576     if (pdf_match_name(data->key, name)) {
01577       pdf_release_obj(data->key);
01578       pdf_release_obj(data->value);
01579       *data_p = data->next;
01580       RELEASE(data);
01581       break;
01582     }
01583     data_p = &(data->next);
01584     data   = data->next;
01585   }
01586 }
01587 
01588 pdf_obj *
01589 pdf_new_stream (int flags)
01590 {
01591   pdf_obj    *result;
01592   pdf_stream *data;
01593 
01594   result = pdf_new_obj(PDF_STREAM);
01595   data   = NEW(1, pdf_stream);
01596   /*
01597    * Although we are using an arbitrary pdf_object here, it must have
01598    * type=PDF_DICT and cannot be an indirect reference.  This will be
01599    * checked by the output routine.
01600    */
01601   data->dict   = pdf_new_dict();
01602   data->_flags = flags;
01603   data->stream = NULL;
01604   data->stream_length = 0;
01605   data->max_length    = 0;
01606   data->objstm_data = NULL;
01607 
01608   result->data = data;
01609   result->flags |= OBJ_NO_OBJSTM;
01610 
01611   return result;
01612 }
01613 
01614 static void
01615 write_stream (pdf_stream *stream, FILE *file)
01616 {
01617   unsigned char *filtered;
01618   unsigned long  filtered_length;
01619   unsigned long  buffer_length;
01620   unsigned char *buffer;
01621 
01622   /*
01623    * Always work from a copy of the stream. All filters read from
01624    * "filtered" and leave their result in "filtered".
01625    */
01626 #if 0
01627   filtered = NEW(stream->stream_length + 1, unsigned char);
01628 #endif
01629   filtered = NEW(stream->stream_length, unsigned char);
01630   memcpy(filtered, stream->stream, stream->stream_length);
01631   filtered_length = stream->stream_length;
01632 
01633 #if 0
01634   if (stream->stream_length < 10)
01635     stream->_flags &= ^STREAM_COMPRESS;
01636 #endif
01637 
01638 #ifdef HAVE_ZLIB
01639   /* Apply compression filter if requested */
01640   if (stream->stream_length > 0 &&
01641       (stream->_flags & STREAM_COMPRESS) &&
01642       compression_level > 0) {
01643 
01644     pdf_obj *filters = pdf_lookup_dict(stream->dict, "Filter");
01645 
01646     buffer_length = filtered_length + filtered_length/1000 + 14;
01647     buffer = NEW(buffer_length, unsigned char);
01648     {
01649       pdf_obj *filter_name = pdf_new_name("FlateDecode");
01650 
01651       if (filters)
01652         /*
01653          * FlateDecode is the first filter to be applied to the stream.
01654          */
01655         pdf_unshift_array(filters, filter_name);
01656       else
01657         /*
01658          * Adding the filter as a name instead of a one-element array
01659          * is crucial because otherwise Adobe Reader cannot read the
01660          * cross-reference stream any more, cf. the PDF v1.5 Errata.
01661          */
01662         pdf_add_dict(stream->dict, pdf_new_name("Filter"), filter_name);
01663     }
01664 #ifdef HAVE_ZLIB_COMPRESS2    
01665     if (compress2(buffer, &buffer_length, filtered,
01666                 filtered_length, compression_level)) {
01667       ERROR("Zlib error");
01668     }
01669 #else 
01670     if (compress(buffer, &buffer_length, filtered,
01671                filtered_length)) {
01672       ERROR ("Zlib error");
01673     }
01674 #endif /* HAVE_ZLIB_COMPRESS2 */
01675     RELEASE(filtered);
01676     compression_saved += filtered_length - buffer_length
01677       - (filters ? strlen("/FlateDecode "): strlen("/Filter/FlateDecode\n"));
01678 
01679     filtered        = buffer;
01680     filtered_length = buffer_length;
01681   }
01682 #endif /* HAVE_ZLIB */
01683 
01684 #if 0
01685   /*
01686    * An optional end-of-line marker preceding the "endstream" is
01687    * not part of stream data. See, PDF Reference 4th ed., p. 38.
01688    */
01689   /* Add a '\n' if the last character wasn't one */
01690   if (filtered_length > 0 &&
01691       filtered[filtered_length-1] != '\n') {
01692     filtered[filtered_length] = '\n';
01693     filtered_length++;
01694   }
01695 #endif
01696   pdf_add_dict(stream->dict,
01697               pdf_new_name("Length"), pdf_new_number(filtered_length));
01698 
01699   pdf_write_obj(stream->dict, file);
01700 
01701   pdf_out(file, "\nstream\n", 8);
01702 
01703   if (enc_mode)
01704     pdf_encrypt_data(filtered, filtered_length);
01705 
01706   if (filtered_length > 0) {
01707     pdf_out(file, filtered, filtered_length);
01708   }
01709   RELEASE(filtered);
01710 
01711   /*
01712    * This stream length "object" gets reset every time write_stream is
01713    * called for the stream object.
01714    * If this stream gets written more than once with different
01715    * filters, this could be a problem.
01716    */
01717 
01718   pdf_out(file, "\n", 1);
01719   pdf_out(file, "endstream", 9);
01720 }
01721 
01722 static void
01723 release_stream (pdf_stream *stream)
01724 {
01725   pdf_release_obj(stream->dict);
01726   stream->dict = NULL;
01727 
01728   if (stream->stream) {
01729     RELEASE(stream->stream);
01730     stream->stream = NULL;
01731   }
01732 
01733   if (stream->objstm_data) {
01734     RELEASE(stream->objstm_data);
01735     stream->objstm_data = NULL;
01736   }
01737 
01738   RELEASE(stream);
01739 }
01740 
01741 pdf_obj *
01742 pdf_stream_dict (pdf_obj *stream)
01743 {
01744   pdf_stream *data;
01745 
01746   TYPECHECK(stream, PDF_STREAM);
01747 
01748   data = stream->data;
01749 
01750   return data->dict;
01751 }
01752 
01753 const void *
01754 pdf_stream_dataptr (pdf_obj *stream)
01755 {
01756   pdf_stream *data;
01757 
01758   TYPECHECK(stream, PDF_STREAM);
01759 
01760   data = stream->data;
01761 
01762   return (const void *) data->stream;
01763 }
01764 
01765 long
01766 pdf_stream_length (pdf_obj *stream)
01767 {
01768   pdf_stream *data;
01769 
01770   TYPECHECK(stream, PDF_STREAM);
01771 
01772   data = stream->data;
01773 
01774   return (long) data->stream_length;
01775 }
01776 
01777 static void
01778 set_objstm_data (pdf_obj *objstm, long *data) {
01779   TYPECHECK(objstm, PDF_STREAM);
01780 
01781   ((pdf_stream *) objstm->data)->objstm_data = data;
01782 }
01783 
01784 static long *
01785 get_objstm_data (pdf_obj *objstm) {
01786   TYPECHECK(objstm, PDF_STREAM);
01787 
01788   return ((pdf_stream *) objstm->data)->objstm_data;
01789 }
01790 
01791 void
01792 pdf_add_stream (pdf_obj *stream, const void *stream_data, long length)
01793 {
01794   pdf_stream *data;
01795 
01796   TYPECHECK(stream, PDF_STREAM);
01797 
01798   if (length < 1)
01799     return;
01800   data = stream->data;
01801   if (data->stream_length + length > data->max_length) {
01802     data->max_length += length + STREAM_ALLOC_SIZE;
01803     data->stream      = RENEW(data->stream, data->max_length, unsigned char);
01804   }
01805   memcpy(data->stream + data->stream_length, stream_data, length);
01806   data->stream_length += length;
01807 }
01808 
01809 #if HAVE_ZLIB
01810 #define WBUF_SIZE 4096
01811 int
01812 pdf_add_stream_flate (pdf_obj *dst, const void *data, long len)
01813 {
01814   z_stream z;
01815   Bytef wbuf[WBUF_SIZE];
01816 
01817   z.zalloc = Z_NULL; z.zfree = Z_NULL; z.opaque = Z_NULL;
01818 
01819   z.next_in  = (Bytef *) data; z.avail_in  = len;
01820   z.next_out = (Bytef *) wbuf; z.avail_out = WBUF_SIZE;
01821 
01822   if (inflateInit(&z) != Z_OK) {
01823     WARN("inflateInit() failed.");
01824     return -1;
01825   }
01826 
01827   for (;;) {
01828     int status;
01829     status = inflate(&z, Z_NO_FLUSH);
01830     if (status == Z_STREAM_END)
01831       break;
01832     else if (status != Z_OK) {
01833       WARN("inflate() failed. Broken PDF file?");
01834       inflateEnd(&z);
01835       return -1;
01836     }
01837 
01838     if (z.avail_out == 0) {
01839       pdf_add_stream(dst, wbuf, WBUF_SIZE);
01840       z.next_out  = wbuf;
01841       z.avail_out = WBUF_SIZE;
01842     }
01843   }
01844 
01845   if (WBUF_SIZE - z.avail_out > 0)
01846     pdf_add_stream(dst, wbuf, WBUF_SIZE - z.avail_out);
01847 
01848   return (inflateEnd(&z) == Z_OK ? 0 : -1);
01849 }
01850 #endif
01851 
01852 
01853 int
01854 pdf_concat_stream (pdf_obj *dst, pdf_obj *src)
01855 {
01856   const char *stream_data;
01857   long        stream_length;
01858   pdf_obj    *stream_dict;
01859   pdf_obj    *filter;
01860 
01861   if (!PDF_OBJ_STREAMTYPE(dst) || !PDF_OBJ_STREAMTYPE(src))
01862     ERROR("Invalid type.");
01863 
01864   stream_data   = pdf_stream_dataptr(src);
01865   stream_length = pdf_stream_length (src);
01866   stream_dict   = pdf_stream_dict   (src);
01867 
01868   if (pdf_lookup_dict(stream_dict, "DecodeParms")) {
01869     WARN("Streams with DecodeParams not supported.");
01870     return -1;
01871   }
01872 
01873   filter = pdf_lookup_dict(stream_dict, "Filter");
01874   if (!filter) {
01875     pdf_add_stream(dst, stream_data, stream_length);
01876     return 0;
01877 #if HAVE_ZLIB
01878   } else {
01879     char *filter_name;
01880     if (PDF_OBJ_NAMETYPE(filter)) {
01881       filter_name = pdf_name_value(filter);
01882       if (filter_name && !strcmp(filter_name, "FlateDecode"))
01883        return pdf_add_stream_flate(dst, stream_data, stream_length);
01884       else {
01885        WARN("DecodeFilter \"%s\" not supported.", filter_name);
01886        return -1;
01887       }
01888     } else if (PDF_OBJ_ARRAYTYPE(filter)) {
01889       if (pdf_array_length(filter) > 1) {
01890        WARN("Multiple DecodeFilter not supported.");
01891        return -1;
01892       } else {
01893        filter_name = pdf_name_value(pdf_get_array(filter, 0));
01894        if (filter_name && !strcmp(filter_name, "FlateDecode"))
01895          return pdf_add_stream_flate(dst, stream_data, stream_length);
01896        else {
01897          WARN("DecodeFilter \"%s\" not supported.", filter_name);
01898          return -1;
01899        }
01900       }
01901     } else
01902       ERROR("Broken PDF file?");
01903 #endif /* HAVE_ZLIB */
01904   }
01905 
01906   return -1;
01907 }
01908 
01909 static pdf_obj *
01910 pdf_stream_uncompress (pdf_obj *src) {
01911   pdf_obj *dst = pdf_new_stream(0);
01912 
01913   TYPECHECK(src, PDF_STREAM);
01914 
01915   pdf_merge_dict(pdf_stream_dict(dst), pdf_stream_dict(src));
01916   pdf_remove_dict(pdf_stream_dict(dst), "Length");
01917   pdf_concat_stream(dst, src);
01918 
01919   return dst;
01920 }
01921 
01922 #if 0
01923 void
01924 pdf_stream_set_flags (pdf_obj *stream, int flags)
01925 {
01926   pdf_stream *data;
01927 
01928   TYPECHECK(stream, PDF_STREAM);
01929 
01930   data = stream->data;
01931   data->_flags = flags;
01932 }
01933 
01934 int
01935 pdf_stream_get_flags (pdf_obj *stream)
01936 {
01937   pdf_stream *data;
01938 
01939   TYPECHECK(stream, PDF_STREAM);
01940 
01941   data = stream->data;
01942 
01943   return data->_flags;
01944 }
01945 #endif
01946 
01947 static void
01948 pdf_write_obj (pdf_obj *object, FILE *file)
01949 {
01950   if (object == NULL) {
01951     write_null(NULL, file);
01952     return;
01953   }
01954 
01955   if (INVALIDOBJ(object) || PDF_OBJ_UNDEFINED(object))
01956     ERROR("pdf_write_obj: Invalid object, type = %d\n", object->type);
01957 
01958   if (file == stderr)
01959     fprintf(stderr, "{%d}", object->refcount);
01960 
01961   switch (object->type) {
01962   case PDF_BOOLEAN:
01963     write_boolean(object->data, file);
01964     break;
01965   case PDF_NUMBER:
01966     write_number (object->data, file);
01967     break;
01968   case PDF_STRING:
01969     write_string (object->data, file);
01970     break;
01971   case PDF_NAME:
01972     write_name(object->data, file);
01973     break;
01974   case PDF_ARRAY:
01975     write_array(object->data, file);
01976     break;
01977   case PDF_DICT:
01978     write_dict (object->data, file);
01979     break;
01980   case PDF_STREAM:
01981     write_stream(object->data, file);
01982     break;
01983   case PDF_NULL:
01984     write_null(NULL, file);
01985     break;
01986   case PDF_INDIRECT:
01987     write_indirect(object->data, file);
01988     break;
01989   }
01990 }
01991 
01992 /* Write the object to the file */ 
01993 static void
01994 pdf_flush_obj (pdf_obj *object, FILE *file)
01995 {
01996   long length;
01997 
01998   /*
01999    * Record file position
02000    */
02001   add_xref_entry(object->label, 1,
02002                pdf_output_file_position, object->generation);
02003   length = sprintf(format_buffer, "%lu %hu obj\n", object->label, object->generation);
02004   enc_mode = doc_enc_mode && !(object->flags & OBJ_NO_ENCRYPT);
02005   pdf_enc_set_label(object->label);
02006   pdf_enc_set_generation(object->generation);
02007   pdf_out(file, format_buffer, length);
02008   pdf_write_obj(object, file);
02009   pdf_out(file, "\nendobj\n", 8);
02010 }
02011 
02012 static long
02013 pdf_add_objstm (pdf_obj *objstm, pdf_obj *object)
02014 {
02015   long *data, pos;
02016 
02017   TYPECHECK(objstm, PDF_STREAM);
02018 
02019   data = get_objstm_data(objstm);
02020   pos = ++data[0];
02021 
02022   data[2*pos]   = object->label;
02023   data[2*pos+1] = pdf_stream_length(objstm);
02024 
02025   add_xref_entry(object->label, 2, objstm->label, pos-1);
02026  
02027   /* redirect output into objstm */
02028   output_stream = objstm;
02029   enc_mode = 0;
02030   pdf_write_obj(object, pdf_output_file);
02031   pdf_out_char(pdf_output_file, '\n');
02032   output_stream = NULL;
02033 
02034   return pos;
02035 }
02036 
02037 static void
02038 release_objstm (pdf_obj *objstm)
02039 {
02040   long *data = get_objstm_data(objstm);
02041   long pos = data[0];
02042   pdf_obj *dict;
02043   pdf_stream *stream;
02044   unsigned char *old_buf;
02045   unsigned long old_length;
02046   stream = (pdf_stream *) objstm->data;
02047 
02048   /* Precede stream data by offset table */
02049   old_buf = stream->stream;
02050   old_length = stream->stream_length;
02051   /* Reserve 22 bytes for each entry (two 10 digit numbers plus two spaces) */
02052   stream->stream = NEW(old_length + 22*pos, unsigned char);
02053   stream->stream_length = 0;
02054 
02055   {
02056     long i = 2*pos, *val = data+2;
02057     while (i--) {
02058       long length = sprintf(format_buffer, "%ld ", *(val++));
02059       pdf_add_stream(objstm, format_buffer, length);
02060     }
02061   }
02062 
02063   dict = pdf_stream_dict(objstm);
02064   pdf_add_dict(dict, pdf_new_name("Type"), pdf_new_name("ObjStm"));
02065   pdf_add_dict(dict, pdf_new_name("N"), pdf_new_number(pos));
02066   pdf_add_dict(dict, pdf_new_name("First"), pdf_new_number(stream->stream_length));
02067   
02068   pdf_add_stream(objstm, old_buf, old_length);
02069   RELEASE(old_buf);
02070   pdf_release_obj(objstm);
02071 }
02072 
02073 void
02074 pdf_release_obj (pdf_obj *object)
02075 {
02076   if (object == NULL)
02077     return;
02078   if (INVALIDOBJ(object) || object->refcount <= 0) {
02079     MESG("\npdf_release_obj: object=%p, type=%d, refcount=%d\n",
02080         object, object->type, object->refcount);
02081     pdf_write_obj(object, stderr);
02082     ERROR("pdf_release_obj:  Called with invalid object.");
02083   }
02084   object->refcount -= 1;
02085   if (object->refcount == 0) {
02086     /*
02087      * Nothing is using this object so it's okay to remove it.
02088      * Nonzero "label" means object needs to be written before it's destroyed.
02089      */
02090     if (object->label && pdf_output_file != NULL) {
02091       if (!do_objstm || object->flags & OBJ_NO_OBJSTM
02092          || (doc_enc_mode && object->flags & OBJ_NO_ENCRYPT)
02093          || object->generation)
02094        pdf_flush_obj(object, pdf_output_file);
02095       else {
02096         if (!current_objstm) {
02097          long *data = NEW(2*OBJSTM_MAX_OBJS+2, long);
02098          data[0] = data[1] = 0;
02099          current_objstm = pdf_new_stream(STREAM_COMPRESS);
02100          set_objstm_data(current_objstm, data);
02101          pdf_label_obj(current_objstm);
02102        }
02103        if (pdf_add_objstm(current_objstm, object) == OBJSTM_MAX_OBJS) {
02104          release_objstm(current_objstm);
02105          current_objstm = NULL;
02106        }
02107       }
02108     }
02109     switch (object->type) {
02110     case PDF_BOOLEAN:
02111       release_boolean(object->data);
02112       break;
02113     case PDF_NULL:
02114       release_null(object->data);
02115       break;
02116     case PDF_NUMBER:
02117       release_number(object->data);
02118       break;
02119     case PDF_STRING:
02120       release_string(object->data);
02121       break;
02122     case PDF_NAME:
02123       release_name(object->data);
02124       break;
02125     case PDF_ARRAY:
02126       release_array(object->data);
02127       break;
02128     case PDF_DICT:
02129       release_dict(object->data);
02130       break;
02131     case PDF_STREAM:
02132       release_stream(object->data);
02133       break;
02134     case PDF_INDIRECT:
02135       release_indirect(object->data);
02136       break;
02137     }
02138     /* This might help detect freeing already freed objects */
02139     object->type = -1;
02140     object->data = NULL;
02141     RELEASE(object);
02142   }
02143 }
02144 
02145 static int
02146 backup_line (FILE *pdf_input_file)
02147 {
02148   int ch = -1;
02149 
02150   /*
02151    * Note: this code should work even if \r\n is eol. It could fail on a
02152    * machine where \n is eol and there is a \r in the stream --- Highly
02153    * unlikely in the last few bytes where this is likely to be used.
02154    */
02155   if (tell_position(pdf_input_file) > 1)
02156     do {
02157       seek_relative (pdf_input_file, -2);
02158     } while (tell_position(pdf_input_file) > 0 &&
02159             (ch = fgetc(pdf_input_file)) >= 0 &&
02160             (ch != '\n' && ch != '\r' ));
02161   if (ch < 0) {
02162     return 0;
02163   }
02164 
02165   return 1;
02166 }
02167 
02168 static long
02169 find_xref (FILE *pdf_input_file)
02170 {
02171   long xref_pos;
02172   int  tries = 10;
02173 
02174   do {
02175     long currentpos;
02176 
02177     if (!backup_line(pdf_input_file)) {
02178       tries = 0;
02179       break;
02180     }
02181     currentpos = tell_position(pdf_input_file);
02182     fread(work_buffer, sizeof(char), strlen("startxref"), pdf_input_file);
02183     seek_absolute(pdf_input_file, currentpos);
02184     tries--;
02185   } while (tries > 0 &&
02186           strncmp(work_buffer, "startxref", strlen("startxref")));
02187   if (tries <= 0)
02188     return 0;
02189 
02190   /* Skip rest of this line */
02191   mfgets(work_buffer, WORK_BUFFER_SIZE, pdf_input_file);
02192   /* Next line of input file should contain actual xref location */
02193   mfgets(work_buffer, WORK_BUFFER_SIZE, pdf_input_file);
02194 
02195   {
02196     char *start, *end, *number;
02197 
02198     start = work_buffer;
02199     end   = start + strlen(work_buffer);
02200     skip_white(&start, end);
02201     number   = parse_number(&start, end);
02202     xref_pos = (long) atof(number);
02203     RELEASE(number);
02204   }
02205 
02206   return xref_pos;
02207 }
02208 
02209 /*
02210  * This routine must be called with the file pointer located
02211  * at the start of the trailer.
02212  */
02213 static pdf_obj *
02214 parse_trailer (pdf_file *pf)
02215 {
02216   pdf_obj *result;
02217   /*
02218    * Fill work_buffer and hope trailer fits. This should
02219    * be made a bit more robust sometime.
02220    */
02221   if (fread(work_buffer, sizeof(char),
02222            WORK_BUFFER_SIZE, pf->file) == 0 ||
02223       strncmp(work_buffer, "trailer", strlen("trailer"))) {
02224     WARN("No trailer.  Are you sure this is a PDF file?");
02225     WARN("buffer:\n->%s<-\n", work_buffer);
02226     result = NULL;
02227   } else {
02228     char *p = work_buffer + strlen("trailer");
02229     skip_white(&p, work_buffer + WORK_BUFFER_SIZE);
02230     result = parse_pdf_dict(&p, work_buffer + WORK_BUFFER_SIZE, pf);
02231   }
02232 
02233   return result;
02234 }
02235 
02236 /*
02237  * This routine tries to estimate an upper bound for character position
02238  * of the end of the object, so it knows how big the buffer must be.
02239  * The parsing routines require that the entire object be read into
02240  * memory. It would be a major pain to rewrite them.  The worst case
02241  * is that an object before an xref table will grab the whole table
02242  * :-(
02243  */
02244 static long
02245 next_object_offset (pdf_file *pf, unsigned long obj_num)
02246 {
02247   long  next = pf->file_size;  /* Worst case */
02248   long  i, curr;
02249 
02250   curr = pf->xref_table[obj_num].field2;
02251   /* Check all other type 1 objects to find next one */
02252   for (i = 0; i < pf->num_obj; i++) {
02253     if (pf->xref_table[i].type == 1 &&
02254         pf->xref_table[i].field2 > curr &&
02255         pf->xref_table[i].field2 < next)
02256       next = pf->xref_table[i].field2;
02257   }
02258 
02259   return  next;
02260 }
02261 
02262 #define checklabel(pf, n, g) ((n) > 0 && (n) < (pf)->num_obj && ( \
02263   ((pf)->xref_table[(n)].type == 1 && (pf)->xref_table[(n)].field3 == (g)) || \
02264   ((pf)->xref_table[(n)].type == 2 && !(g))))
02265 
02266 pdf_obj *
02267 pdf_new_indirect (pdf_file *pf, unsigned long obj_num, unsigned short obj_gen)
02268 {
02269   pdf_obj      *result;
02270   pdf_indirect *indirect;
02271 
02272   indirect = NEW(1, pdf_indirect);
02273   indirect->pf         = pf;
02274   indirect->label      = obj_num;
02275   indirect->generation = obj_gen;
02276 
02277   result   = pdf_new_obj(PDF_INDIRECT);
02278   result->data = indirect;
02279 
02280   return result;
02281 }
02282 
02283 static pdf_obj *
02284 pdf_read_object (unsigned long obj_num, unsigned short obj_gen,
02285               pdf_file *pf, long offset, long limit)
02286 {
02287   long     length;
02288   char    *buffer, *p, *endptr;
02289   pdf_obj *result;
02290 
02291   length = limit - offset;
02292 
02293   if (length <= 0)
02294     return NULL;
02295 
02296   buffer = NEW(length + 1, char);
02297 
02298   seek_absolute(pf->file, offset);
02299   fread(buffer, sizeof(char), length, pf->file);
02300 
02301   p      = buffer;
02302   endptr = p + length;
02303 
02304   /* Check for obj_num and obj_gen */
02305   {
02306     char         *q = p; /* <== p */
02307     char         *sp;
02308     unsigned long n, g;
02309 
02310     skip_white(&q, endptr);
02311     sp = parse_unsigned(&q, endptr);
02312     if (!sp) {
02313       RELEASE(buffer);
02314       return NULL;
02315     }
02316     n = strtoul(sp, NULL, 10);
02317     RELEASE(sp);
02318 
02319     skip_white(&q, endptr);
02320     sp = parse_unsigned(&q, endptr);
02321     if (!sp) {
02322       RELEASE(buffer);
02323       return NULL;
02324     }
02325     g = strtoul(sp, NULL, 10);
02326     RELEASE(sp);
02327 
02328     if (obj_num && (n != obj_num || g != obj_gen)) {
02329       RELEASE(buffer);
02330       return NULL;
02331     }
02332 
02333     p = q; /* ==> p */
02334   }
02335 
02336 
02337   skip_white(&p, endptr);
02338   if (memcmp(p, "obj", strlen("obj"))) {
02339     WARN("Didn't find \"obj\".");
02340     RELEASE(buffer);
02341     return NULL;
02342   }
02343   p += strlen("obj");
02344 
02345   result = parse_pdf_object(&p, endptr, pf);
02346 
02347   skip_white(&p, endptr);
02348   if (memcmp(p, "endobj", strlen("endobj"))) {
02349     WARN("Didn't find \"endobj\".");
02350     if (result)
02351       pdf_release_obj(result);
02352     result = NULL;
02353   }
02354   RELEASE(buffer);
02355 
02356   return result;
02357 }
02358 
02359 static pdf_obj *
02360 read_objstm (pdf_file *pf, unsigned long num)
02361 {
02362   unsigned long offset = pf->xref_table[num].field2;
02363   unsigned short gen = pf->xref_table[num].field3;
02364   long limit = next_object_offset(pf, num), n, first, *header = NULL;
02365   char *data, data1, *p, *q;
02366   int i;
02367 
02368   pdf_obj *objstm, *dict, *type, *n_obj, *first_obj;
02369 
02370   objstm = pdf_read_object(num, gen, pf, offset, limit);
02371 
02372   if (!PDF_OBJ_STREAMTYPE(objstm))
02373     goto error;
02374 
02375   {
02376     pdf_obj *tmp = pdf_stream_uncompress(objstm);
02377     if (!tmp)
02378       goto error;
02379     pdf_release_obj(objstm);
02380     objstm = tmp;
02381   }
02382 
02383   dict = pdf_stream_dict(objstm);
02384 
02385   type = pdf_lookup_dict(dict, "Type");
02386   if (!PDF_OBJ_NAMETYPE(type) ||
02387       strcmp(pdf_name_value(type), "ObjStm"))
02388     goto error;
02389 
02390   n_obj = pdf_lookup_dict(dict, "N");
02391   if (!PDF_OBJ_NUMBERTYPE(n_obj))
02392     goto error;
02393   n = (long) pdf_number_value(n_obj);
02394 
02395   first_obj = pdf_lookup_dict(dict, "First");
02396   if (!PDF_OBJ_NUMBERTYPE(first_obj))
02397     goto error;
02398   first = (long) pdf_number_value(first_obj);
02399   /* reject object streams without object data */
02400   if (first >= pdf_stream_length(objstm))
02401     goto error;
02402 
02403   header = NEW(2*(n+1), long);
02404   set_objstm_data(objstm, header);
02405   *(header++) = n;
02406   *(header++) = first;
02407 
02408   data = (char *) pdf_stream_dataptr(objstm);
02409 
02410   /* hack to avoid parsing beyond offset table */
02411   data1 = data[first];
02412   data[first] = 0;
02413 
02414   p = data;
02415   i = 2*n;
02416   while (i--) {
02417     *(header++) = strtoul(p, &q, 10);
02418     if (q == p)
02419       goto error;
02420     p = q;
02421   }
02422   data[first] = data1;
02423 
02424   /* Any garbage after last entry? */
02425   skip_white(&p, data+first);
02426   if (p != data+first)
02427     goto error;
02428   
02429   return pf->xref_table[num].direct = objstm;
02430 
02431  error:
02432   WARN("Cannot parse object stream.");
02433   if (objstm)
02434     pdf_release_obj(objstm);
02435   return NULL;
02436 }
02437 
02438 /* Label without corresponding object definition are replaced by the
02439  * null object, as required by the PDF spec. This is important to parse
02440  * several cross-reference sections.
02441  */ 
02442 static pdf_obj *
02443 pdf_get_object (pdf_file *pf, unsigned long obj_num, unsigned short obj_gen)
02444 {
02445   pdf_obj *result;
02446 
02447   if (!checklabel(pf, obj_num, obj_gen)) {
02448     WARN("Trying to read nonexistent or deleted object: %lu %u",
02449          obj_num, obj_gen);
02450     return pdf_new_null();
02451   }
02452 
02453   if ((result = pf->xref_table[obj_num].direct)) {
02454     return pdf_link_obj(result);
02455   }
02456 
02457   if (pf->xref_table[obj_num].type == 1) {
02458     /* type == 1 */
02459     unsigned long offset;
02460     long limit;
02461     offset = pf->xref_table[obj_num].field2;
02462     limit  = next_object_offset(pf, obj_num);
02463     result = pdf_read_object(obj_num, obj_gen, pf, offset, limit);
02464   } else {
02465     /* type == 2 */
02466     unsigned long  objstm_num = pf->xref_table[obj_num].field2;
02467     unsigned short index = pf->xref_table[obj_num].field3;
02468     pdf_obj *objstm;
02469     long *data, n, first, length;
02470     char *p, *q;
02471 
02472     if (objstm_num >= pf->num_obj ||
02473        pf->xref_table[objstm_num].type != 1 ||
02474        !((objstm = pf->xref_table[objstm_num].direct) ||
02475          (objstm = read_objstm(pf, objstm_num))))
02476       goto error;
02477 
02478     data = get_objstm_data(objstm);
02479     n = *(data++);
02480     first = *(data++);
02481 
02482     if (index >= n || data[2*index] != obj_num)
02483       goto error;
02484 
02485     length = pdf_stream_length(objstm);
02486     p = (char *) pdf_stream_dataptr(objstm) + first + data[2*index+1];
02487     q = p + (index == n-1 ? length : first+data[2*index+3]);
02488     result = parse_pdf_object(&p, q, pf);
02489     if (!result)
02490       goto error;
02491   }
02492 
02493   /* Make sure the caller doesn't free this object */
02494   pf->xref_table[obj_num].direct = pdf_link_obj(result);
02495 
02496   return result;
02497 
02498  error:
02499   WARN("Could not read object from object stream.");
02500   return pdf_new_null();
02501 }
02502 
02503 #define OBJ_FILE(o) (((pdf_indirect *)((o)->data))->pf)
02504 #define OBJ_NUM(o)  (((pdf_indirect *)((o)->data))->label)
02505 #define OBJ_GEN(o)  (((pdf_indirect *)((o)->data))->generation)
02506 
02507 /* pdf_deref_obj always returns a link instead of the original   */
02508 /* It never return the null object, but the NULL pointer instead */
02509 pdf_obj *
02510 pdf_deref_obj (pdf_obj *obj)
02511 {
02512   int count = PDF_OBJ_MAX_DEPTH;
02513 
02514   if (obj)
02515     obj = pdf_link_obj(obj);
02516 
02517   while (PDF_OBJ_INDIRECTTYPE(obj) && --count) {
02518     pdf_file *pf = OBJ_FILE(obj);
02519     unsigned long  obj_num = OBJ_NUM(obj);
02520     unsigned short obj_gen = OBJ_GEN(obj);
02521     ASSERT(pf);
02522     pdf_release_obj(obj);
02523     obj = pdf_get_object(pf, obj_num, obj_gen);
02524   }
02525 
02526   if (!count)
02527     ERROR("Loop in object hierarchy detected. Broken PDF file?");
02528 
02529   if (PDF_OBJ_NULLTYPE(obj)) {
02530     pdf_release_obj(obj);
02531     return NULL;
02532   } else
02533     return obj;
02534 }
02535 
02536 static void
02537 extend_xref (pdf_file *pf, long new_size) 
02538 {
02539   unsigned long i;
02540 
02541   pf->xref_table = RENEW(pf->xref_table, new_size, xref_entry);
02542   for (i = pf->num_obj; i < new_size; i++) {
02543     pf->xref_table[i].direct   = NULL;
02544     pf->xref_table[i].indirect = NULL;
02545     pf->xref_table[i].type     = 0;
02546     pf->xref_table[i].field3 = 0;
02547     pf->xref_table[i].field2 = 0L;
02548   }
02549   pf->num_obj = new_size;
02550 }
02551 
02552 static int
02553 parse_xref_table (pdf_file *pf, long xref_pos)
02554 {
02555   FILE         *pdf_input_file = pf->file;
02556   unsigned long first, size;
02557   unsigned long i, offset;
02558   unsigned int  obj_gen;
02559   char          flag;
02560   int           r;
02561 
02562   /*
02563    * This routine reads one xref segment. It may be called multiple times
02564    * on the same file.  xref tables sometimes come in pieces.
02565    */
02566 
02567   seek_absolute(pf->file, xref_pos);
02568 
02569   mfgets(work_buffer, WORK_BUFFER_SIZE, pdf_input_file);
02570   if (memcmp(work_buffer, "xref", strlen("xref"))) {
02571     /* Might be an xref stream and not an xref table */
02572     return 0;
02573   }
02574   /* Next line in file has first item and size of table */
02575   for (;;) {
02576     unsigned long current_pos;
02577 
02578     current_pos = tell_position(pdf_input_file);
02579     if (mfgets(work_buffer, WORK_BUFFER_SIZE, pdf_input_file) == NULL) {
02580       WARN("Premature end of PDF file while parsing xref table.");
02581       return -1;
02582     }
02583     if (!strncmp(work_buffer, "trailer", strlen ("trailer"))) {
02584       /*
02585        * Backup... This is ugly, but it seems like the safest thing to
02586        * do.  It is possible the trailer dictionary starts on the same
02587        * logical line as the word trailer.  In that case, the mfgets
02588        * call might have started to read the trailer dictionary and
02589        * parse_trailer would fail.
02590        */
02591       seek_absolute(pdf_input_file, current_pos);
02592       break;
02593     }
02594     sscanf(work_buffer, "%lu %lu", &first, &size);
02595     if (pf->num_obj < first + size) {
02596       extend_xref(pf, first + size);
02597     }
02598 
02599     for (i = first; i < first + size; i++) {
02600       fread(work_buffer, sizeof(char), 20, pdf_input_file);
02601       /*
02602        * Don't overwrite positions that have already been set by a
02603        * modified xref table.  We are working our way backwards
02604        * through the reference table, so we only set "position" 
02605        * if it hasn't been set yet.
02606        */
02607       work_buffer[19] = 0;
02608       offset = 0UL; obj_gen = 0; flag = 0;
02609       r = sscanf(work_buffer, "%010lu %05u %c", &offset, &obj_gen, &flag);
02610       if ( r != 3 ||
02611           ((flag != 'n' && flag != 'f') ||
02612            (flag == 'n' &&
02613            (offset >= pf->file_size || (offset > 0 && offset < 4))))) {
02614         WARN("Invalid xref table entry [%lu]. PDF file is corrupt...", i);
02615         return -1;
02616       }
02617       if (!pf->xref_table[i].field2) {
02618        pf->xref_table[i].type   = (flag == 'n');
02619        pf->xref_table[i].field2 = offset;
02620        pf->xref_table[i].field3 = obj_gen;       
02621       }
02622     }
02623   }
02624 
02625   return  1;
02626 }
02627 
02628 static unsigned long
02629 parse_xrefstm_field (const char **p, int length, unsigned long def)
02630 {
02631   unsigned long val = 0;
02632 
02633   if (!length)
02634     return def;
02635 
02636   while (length--) {
02637     val <<= 8;
02638     val |= (unsigned char) *((*p)++);
02639   }
02640 
02641   return val;
02642 }
02643 
02644 static int
02645 parse_xrefstm_subsec (pdf_file *pf,
02646                     const char **p, long *length,
02647                     int *W, int wsum,
02648                     long first, long size) {
02649   xref_entry *e;
02650 
02651   if ((*length -= wsum*size) < 0)
02652     return -1;
02653 
02654   if (pf->num_obj < first+size)
02655     extend_xref(pf, first+size);  /* TODO: change! why? */
02656 
02657   e = pf->xref_table + first;
02658   while (size--) {
02659     unsigned char  type;
02660     unsigned long  field2;
02661     unsigned short field3;
02662 
02663     type = (unsigned char) parse_xrefstm_field(p, W[0], 1);
02664     if (type > 2)
02665       WARN("Unknown cross-reference stream entry type.");
02666     else if (!W[1] || (type != 1 && !W[2]))
02667       return -1;
02668 
02669     field2 = (unsigned long)  parse_xrefstm_field(p, W[1], 0);
02670     field3 = (unsigned short) parse_xrefstm_field(p, W[2], 0);
02671 
02672     if (!e->field2) {
02673       e->type   = type;
02674       e->field2 = field2;
02675       e->field3 = field3;   
02676       }
02677     e++;
02678   }
02679 
02680   return 0;
02681 }
02682 
02683 static int
02684 parse_xref_stream (pdf_file *pf, long xref_pos, pdf_obj **trailer)
02685 {
02686   pdf_obj *xrefstm, *size_obj, *W_obj, *index;
02687   unsigned long size;
02688   long length;
02689   int W[3], i, wsum = 0;
02690   const char *p;
02691 
02692   xrefstm = pdf_read_object(0, 0, pf, xref_pos, pf->file_size);
02693   if (!PDF_OBJ_STREAMTYPE(xrefstm))
02694     goto error;
02695 
02696   {
02697     pdf_obj *tmp = pdf_stream_uncompress(xrefstm);
02698     if (!tmp)
02699       goto error;
02700     pdf_release_obj(xrefstm);
02701     xrefstm = tmp;
02702   }
02703 
02704   *trailer = pdf_link_obj(pdf_stream_dict(xrefstm));
02705 
02706   size_obj = pdf_lookup_dict(*trailer, "Size");
02707   if (!PDF_OBJ_NUMBERTYPE(size_obj))
02708     goto error;
02709   size = (unsigned long) pdf_number_value(size_obj);
02710 
02711   length = pdf_stream_length(xrefstm);
02712 
02713   W_obj = pdf_lookup_dict(*trailer, "W");
02714   if (!PDF_OBJ_ARRAYTYPE(W_obj) || pdf_array_length(W_obj) != 3)
02715     goto error;
02716 
02717   for (i = 0; i < 3; i++) {
02718     pdf_obj *tmp = pdf_get_array(W_obj, i);
02719     if (!PDF_OBJ_NUMBERTYPE(tmp))
02720       goto error;
02721     wsum += (W[i] = (int) pdf_number_value(tmp));
02722   }
02723 
02724   p = pdf_stream_dataptr(xrefstm);
02725 
02726   index = pdf_lookup_dict(*trailer, "Index");
02727   if (index) {
02728     unsigned int index_len;
02729     if (!PDF_OBJ_ARRAYTYPE(index) ||
02730        ((index_len = pdf_array_length(index)) % 2 ))
02731       goto error;
02732 
02733     i = 0;
02734     while (i < index_len) {
02735       pdf_obj *first = pdf_get_array(index, i++);
02736       pdf_obj *size  = pdf_get_array(index, i++);
02737       if (!PDF_OBJ_NUMBERTYPE(first) ||
02738          !PDF_OBJ_NUMBERTYPE(size) ||
02739          parse_xrefstm_subsec(pf, &p, &length, W, wsum,
02740                             (long) pdf_number_value(first),
02741                             (long) pdf_number_value(size)))
02742        goto error;
02743     }
02744   } else if (parse_xrefstm_subsec(pf, &p, &length, W, wsum, 0, size))
02745       goto error;
02746 
02747   if (length)
02748     WARN("Garbage in xref stream.");
02749 
02750   pdf_release_obj(xrefstm);
02751 
02752   return 1;
02753 
02754  error:
02755   WARN("Cannot parse cross-reference stream.");
02756   if (xrefstm)
02757     pdf_release_obj(xrefstm);
02758   if (*trailer) {
02759     pdf_release_obj(*trailer);
02760     *trailer = NULL;
02761   }
02762   return 0;
02763 }
02764 
02765 static pdf_obj *
02766 read_xref (pdf_file *pf)
02767 {
02768   pdf_obj *trailer = NULL, *main_trailer = NULL;
02769   long     xref_pos;
02770 
02771   if (!(xref_pos = find_xref(pf->file)))
02772     goto error;
02773 
02774   while (xref_pos) {
02775     pdf_obj *prev;
02776 
02777     int res = parse_xref_table(pf, xref_pos);
02778     if (res > 0) {
02779       /* cross-reference table */
02780       pdf_obj *xrefstm;
02781 
02782       if (!(trailer = parse_trailer(pf)))
02783        goto error;
02784 
02785       if (!main_trailer)
02786        main_trailer = pdf_link_obj(trailer);
02787 
02788       if ((xrefstm = pdf_lookup_dict(trailer, "XRefStm"))) {
02789        pdf_obj *new_trailer = NULL;
02790        if (PDF_OBJ_NUMBERTYPE(xrefstm) &&
02791            parse_xref_stream(pf, (long) pdf_number_value(xrefstm),
02792                            &new_trailer))
02793          pdf_release_obj(new_trailer);
02794        else
02795          WARN("Skipping hybrid reference section.");
02796        /* Many PDF 1.5 xref streams use DecodeParms, which we cannot
02797           parse. This way we can use at least xref tables in hybrid
02798           documents. Or should we better stop parsing the file?
02799        */
02800       }
02801 
02802     } else if (!res && parse_xref_stream(pf, xref_pos, &trailer)) {
02803       /* cross-reference stream */
02804       if (!main_trailer)
02805        main_trailer = pdf_link_obj(trailer);
02806     } else
02807       goto error;
02808 
02809     if ((prev = pdf_lookup_dict(trailer, "Prev"))) {
02810       if (PDF_OBJ_NUMBERTYPE(prev))
02811        xref_pos = (long) pdf_number_value(prev);
02812       else
02813        goto error;
02814     } else
02815       xref_pos = 0;
02816 
02817     pdf_release_obj(trailer);
02818   }
02819 
02820 #if 0
02821   if (!pdf_lookup_dict(main_trailer, "Root")) {
02822       WARN("Trailer doesn't have catalog. Is this a correct PDF file?");
02823       goto error;
02824     }
02825 #endif
02826 
02827   return main_trailer;
02828 
02829  error:
02830   WARN("Error while parsing PDF file.");
02831   if (trailer)
02832     pdf_release_obj(trailer);
02833   if (main_trailer)
02834     pdf_release_obj(main_trailer);
02835   return NULL;
02836 }
02837 
02838 static struct ht_table *pdf_files = NULL;
02839 
02840 pdf_file *
02841 pdf_file_new (FILE *file)
02842 {
02843   pdf_file *pf;
02844   ASSERT(file);
02845   pf = NEW(1, pdf_file);
02846   pf->file    = file;
02847   pf->trailer = NULL;
02848   pf->xref_table = NULL;
02849   pf->catalog = NULL;
02850   pf->num_obj = 0;
02851   pf->version = 0;
02852 
02853   seek_end(file);
02854   pf->file_size = tell_position(file);
02855 
02856   return pf;
02857 }
02858 
02859 static void
02860 pdf_file_free (pdf_file *pf)
02861 {
02862   unsigned long i;
02863 
02864   if (!pf) {
02865     return;
02866   }
02867 
02868   for (i = 0; i < pf->num_obj; i++) {
02869     if (pf->xref_table[i].direct)
02870       pdf_release_obj(pf->xref_table[i].direct);
02871     if (pf->xref_table[i].indirect)
02872       pdf_release_obj(pf->xref_table[i].indirect);
02873   }
02874 
02875   RELEASE(pf->xref_table);
02876   if (pf->trailer) {
02877     pdf_release_obj(pf->trailer);
02878     pf->trailer = NULL;
02879   }
02880   if (pf->catalog) {
02881     pdf_release_obj(pf->catalog);
02882     pf->catalog = NULL;
02883   }
02884 
02885   RELEASE(pf);  
02886 }
02887 
02888 void
02889 pdf_files_init (void)
02890 {
02891   pdf_files = NEW(1, struct ht_table);
02892   ht_init_table(pdf_files, (void (*)(void *)) pdf_file_free);
02893 }
02894 
02895 int
02896 pdf_file_get_version (pdf_file *pf)
02897 {
02898   ASSERT(pf);
02899   return pf->version;
02900 }
02901 
02902 #if 0
02903 pdf_obj *
02904 pdf_file_get_trailer (pdf_file *pf)
02905 {
02906   ASSERT(pf);
02907   return pf->trailer;
02908 }
02909 #endif
02910 
02911 pdf_obj *
02912 pdf_file_get_catalog (pdf_file *pf)
02913 {
02914   ASSERT(pf);
02915   return pf->catalog;
02916 }
02917 
02918 pdf_file *
02919 pdf_open (const char *ident, FILE *file)
02920 {
02921   pdf_file *pf;
02922 
02923   ASSERT(pdf_files);
02924 
02925   pf = (pdf_file *) ht_lookup_table(pdf_files, ident, strlen(ident));
02926 
02927   if (pf) {
02928     pf->file = file;
02929   } else {
02930     int version;
02931     pdf_obj *new_version;
02932 
02933     version = check_for_pdf_version(file);
02934     if (version < 0) {
02935       WARN("Not a PDF file.");
02936       return NULL;
02937     }
02938 
02939     pf = pdf_file_new(file);
02940     pf->version = version;
02941 
02942     if (!(pf->trailer = read_xref(pf)))
02943       goto error;
02944 
02945     if (pdf_lookup_dict(pf->trailer, "Encrypt")) {
02946       WARN("PDF document is encrypted.");
02947       goto error;
02948     }
02949 
02950     pf->catalog = pdf_deref_obj(pdf_lookup_dict(pf->trailer, "Root"));
02951     if (!PDF_OBJ_DICTTYPE(pf->catalog)) {
02952       WARN("Cannot read PDF document catalog. Broken PDF file?");
02953       goto error;
02954     }
02955 
02956     new_version = pdf_deref_obj(pdf_lookup_dict(pf->catalog, "Version"));
02957     if (new_version) {
02958       unsigned short minor;
02959 
02960       if (!PDF_OBJ_NAMETYPE(new_version) ||
02961          sscanf(pdf_name_value(new_version), "1.%hu", &minor) != 1) {
02962        pdf_release_obj(new_version);
02963        WARN("Illegal Version entry in document catalog. Broken PDF file?");
02964        goto error;
02965       }
02966 
02967       if (pf->version < minor)
02968        pf->version = minor;
02969 
02970       pdf_release_obj(new_version);
02971     }
02972 
02973     ht_append_table(pdf_files, ident, strlen(ident), pf);
02974   }
02975 
02976   return pf;
02977 
02978  error:
02979   pdf_file_free(pf);
02980   return NULL;
02981 }
02982 
02983 void
02984 pdf_close (pdf_file *pf)
02985 {
02986   if (pf)
02987     pf->file = NULL;
02988 }
02989 
02990 void
02991 pdf_files_close (void)
02992 {
02993   ASSERT(pdf_files);
02994   ht_clear_table(pdf_files);
02995   RELEASE(pdf_files);
02996 }
02997 
02998 static int
02999 check_for_pdf_version (FILE *file) 
03000 {
03001   unsigned short minor;
03002 
03003   rewind(file);
03004 
03005   return (ungetc(fgetc(file), file) == '%' &&
03006          fscanf(file, "%%PDF-1.%hu", &minor) == 1) ? minor : -1;
03007 }
03008 
03009 int
03010 check_for_pdf (FILE *file) 
03011 {
03012   return (check_for_pdf_version(file) >= 0);
03013 }
03014 
03015 static int CDECL
03016 import_dict (pdf_obj *key, pdf_obj *value, void *pdata)
03017 {
03018   pdf_obj *copy;
03019   pdf_obj *tmp;
03020 
03021   copy = (pdf_obj *) pdata;
03022 
03023   tmp  = pdf_import_object(value);
03024   if (!tmp) {
03025     return -1;
03026   }
03027   pdf_add_dict(copy, pdf_link_obj(key), tmp);
03028 
03029   return 0;
03030 }
03031 
03032 static pdf_obj loop_marker = { PDF_OBJ_INVALID, 0, 0, 0, 0, NULL };
03033 
03034 static pdf_obj *
03035 pdf_import_indirect (pdf_obj *object)
03036 {
03037   pdf_file *pf = OBJ_FILE(object);
03038   unsigned long obj_num = OBJ_NUM(object);
03039   unsigned short obj_gen = OBJ_GEN(object);
03040 
03041   pdf_obj *ref;
03042 
03043   ASSERT(pf);
03044 
03045   if (!checklabel(pf, obj_num, obj_gen)) {
03046     WARN("Can't resolve object: %lu %u", obj_num, obj_gen);
03047     return pdf_new_null();
03048   }
03049 
03050   if ((ref = pf->xref_table[obj_num].indirect)) {
03051     if (ref == &loop_marker)
03052       ERROR("Loop in object hierarchy detected. Broken PDF file?");
03053     return  pdf_link_obj(ref);
03054   } else {
03055     pdf_obj *obj, *tmp;
03056 
03057     obj = pdf_get_object(pf, obj_num, obj_gen);
03058 
03059     /* We mark the reference to be able to detect loops */
03060     pf->xref_table[obj_num].indirect = &loop_marker;
03061 
03062     tmp = pdf_import_object(obj);
03063     
03064     pf->xref_table[obj_num].indirect = ref = pdf_ref_obj(tmp);
03065     
03066     pdf_release_obj(tmp);
03067     pdf_release_obj(obj);
03068     
03069     return  pdf_link_obj(ref);
03070   }
03071 }
03072 
03073 /*
03074  * pdf_import_object recursively copies the object and those
03075  * referenced by it and changes the indirect references so that
03076  * they refer to the current output file. New indirect references
03077  * are remembered, which avoids duplicating objects when they
03078  * are imported several times.
03079  */
03080 pdf_obj *
03081 pdf_import_object (pdf_obj *object)
03082 {
03083   pdf_obj  *imported;
03084   pdf_obj  *tmp;
03085   int       i;
03086 
03087   switch (pdf_obj_typeof(object)) {
03088 
03089   case PDF_INDIRECT:
03090     if (OBJ_FILE(object)) {
03091       imported = pdf_import_indirect(object);
03092     } else {
03093       imported = pdf_link_obj(object);
03094     }
03095     break;
03096 
03097   case PDF_STREAM:
03098     {
03099       pdf_obj *stream_dict;
03100 
03101       tmp = pdf_import_object(pdf_stream_dict(object));
03102       if (!tmp)
03103        return NULL;
03104 
03105       imported    = pdf_new_stream(0);
03106       stream_dict = pdf_stream_dict(imported);
03107       pdf_merge_dict(stream_dict, tmp);
03108       pdf_release_obj(tmp);
03109       pdf_add_stream(imported,
03110                    pdf_stream_dataptr(object),
03111                    pdf_stream_length(object));
03112     }
03113     break;
03114 
03115   case PDF_DICT:
03116 
03117     imported = pdf_new_dict();
03118     if (pdf_foreach_dict(object, import_dict, imported) < 0) {
03119       pdf_release_obj(imported);
03120       return NULL;
03121     }
03122 
03123     break;
03124 
03125   case PDF_ARRAY:
03126 
03127     imported = pdf_new_array();
03128     for (i = 0; i < pdf_array_length(object); i++) {
03129       tmp = pdf_import_object(pdf_get_array(object, i));
03130       if (!tmp) {
03131        pdf_release_obj(imported);
03132        return NULL;
03133       }
03134       pdf_add_array(imported, tmp);
03135     }
03136     break;
03137 
03138   default:
03139     imported = pdf_link_obj(object);
03140   }
03141 
03142   return imported;
03143 }
03144 
03145 
03146 /* returns 0 if indirect references point to the same object */
03147 int
03148 pdf_compare_reference (pdf_obj *ref1, pdf_obj *ref2)
03149 {
03150   pdf_indirect *data1, *data2;
03151 
03152   ASSERT(PDF_OBJ_INDIRECTTYPE(ref1) && PDF_OBJ_INDIRECTTYPE(ref2));
03153 
03154   data1 = (pdf_indirect *) ref1->data;
03155   data2 = (pdf_indirect *) ref2->data;
03156 
03157   return data1->pf != data2->pf || data1->label != data2->label
03158     || data1->generation != data2->generation;
03159 }