Back to index

tetex-bin  3.0
pdfobj.c
Go to the documentation of this file.
00001 /*  $Header$
00002 
00003     This is dvipdfm, a DVI to PDF translator.
00004     Copyright (C) 1998, 1999 by Mark A. Wicks
00005 
00006     This program is free software; you can redistribute it and/or modify
00007     it under the terms of the GNU General Public License as published by
00008     the Free Software Foundation; either version 2 of the License, or
00009     (at your option) any later version.
00010 
00011     This program is distributed in the hope that it will be useful,
00012     but WITHOUT ANY WARRANTY; without even the implied warranty of
00013     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014     GNU General Public License for more details.
00015 
00016     You should have received a copy of the GNU General Public License
00017     along with this program; if not, write to the Free Software
00018     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00019     
00020     The author may be contacted via the e-mail address
00021 
00022        mwicks@kettering.edu
00023 */
00024 
00025        
00026 #include <ctype.h>
00027 #include <string.h>
00028 #include "system.h"
00029 #include "config.h"
00030 #include "mem.h"
00031 #include "error.h"
00032 #include "mfileio.h"
00033 #include "pdflimits.h"
00034 #include "pdfobj.h"
00035 #include "pdfspecial.h"
00036 #include "pdfparse.h"
00037 #include "twiddle.h"
00038 
00039 #ifdef HAVE_ZLIB
00040 #include <zlib.h>
00041 #endif /* HAVE_ZLIB */
00042 
00043 FILE *pdf_output_file = NULL;
00044 FILE *pdf_input_file = NULL;
00045 unsigned long pdf_output_file_position = 0, compression_saved = 0;
00046 int pdf_output_line_position = 0;
00047 #define FORMAT_BUF_SIZE 4096
00048 char format_buffer[FORMAT_BUF_SIZE];
00049 
00050 static struct xref_entry 
00051 {
00052   unsigned long file_position;
00053   pdf_obj *pdf_obj;
00054 } *output_xref = NULL;
00055 static unsigned long pdf_max_ind_objects = 0;
00056 static unsigned long next_label = 1;
00057 
00058 static unsigned long startxref;
00059 
00060 static unsigned pdf_root_obj = 0, pdf_info_obj = 0;
00061 
00062 /* Internal static routines */
00063 
00064 static void pdf_flush_obj (FILE *file, const pdf_obj *object);
00065 static void pdf_label_obj (pdf_obj *object);
00066 
00067 static void pdf_out_char (FILE *file, char c);
00068 static void pdf_out (FILE *file, void *buffer, int length);
00069 
00070 static void release_indirect (pdf_indirect *data);
00071 static void write_indirect (FILE *file, const pdf_indirect *indirect);
00072 
00073 static void release_boolean (pdf_obj *data);
00074 static void write_boolean (FILE *file, const pdf_boolean *data);
00075 
00076 static void write_null (FILE *file);
00077 static void release_null (void *data);
00078 
00079 static void release_number (pdf_number *data);
00080 static void write_number (FILE *file, const pdf_number
00081                        *number);
00082 
00083 static void write_string (FILE *file, const pdf_string *string);
00084 static void release_string (pdf_string *data);
00085 
00086 static void write_name (FILE *file, const pdf_name *name);
00087 static void release_name (pdf_name *data);
00088 
00089 static void write_array (FILE *file, const pdf_array *array);
00090 static void release_array (pdf_array *data);
00091 
00092 static void write_dict (FILE *file, const pdf_dict *dict);
00093 static void release_dict (pdf_dict *data);
00094 
00095 static void write_stream (FILE *file, pdf_stream *stream);
00096 static void release_stream (pdf_stream *stream);
00097 static int pdf_match_name (const pdf_obj *name_obj, const char *name);  /* Name does not include the / */
00098 
00099 static unsigned char debug = 0, verbose = 0;
00100 static char compression_level = 9;
00101 
00102 void pdf_obj_set_compression (int level)
00103 {
00104   if (level >= 0 && level <= 9) 
00105     compression_level = level;
00106   else {
00107     ERROR("set_compression: compression level");
00108   }
00109 #ifndef HAVE_ZLIB_COMPRESS2
00110   if (level != 0) 
00111     fprintf (stderr, "Unable to set compression level--your zlib doesn't have compress2()\n");
00112 #endif
00113   return;
00114 }
00115 
00116 static unsigned pdf_version = 3;
00117 void pdf_set_version (unsigned version)
00118 {
00119   if (version >= 2 && version <= 3) {
00120     pdf_version = version;
00121   }
00122 }
00123 
00124 void pdf_obj_set_debug(void)
00125 {
00126   debug = 1;
00127 }
00128 
00129 void pdf_obj_set_verbose(void)
00130 {
00131   if (verbose < 255)
00132     verbose += 1;
00133 }
00134 
00135 
00136 void pdf_out_init (const char *filename)
00137 {
00138   char v;
00139 #ifdef MEM_DEBUG
00140 MEM_START
00141 #endif
00142   if (!(pdf_output_file = MFOPEN (filename, FOPEN_WBIN_MODE))) {
00143     if (strlen(filename) < 128) {
00144       sprintf (format_buffer, "Unable to open %s\n", filename);
00145     } else
00146       sprintf (format_buffer, "Unable to open file");
00147     ERROR (format_buffer);
00148   }
00149   pdf_out (pdf_output_file, "%PDF-1.", strlen("%PDF-1."));
00150   v = '0'+pdf_version;
00151   pdf_out (pdf_output_file, &v, 1);
00152   pdf_out (pdf_output_file, "\n", 1);
00153 #ifdef MEM_DEBUG
00154 MEM_END
00155 #endif
00156 }
00157 
00158 static void dump_xref(void)
00159 {
00160   int length;
00161   unsigned long i;
00162   startxref = pdf_output_file_position;   /* Record where this xref is for
00163                                trailer */
00164   pdf_out (pdf_output_file, "xref\n", 5);
00165   sprintf (format_buffer, "%d %lu\n", 0, next_label);
00166   length = strlen (format_buffer);
00167   pdf_out (pdf_output_file, format_buffer, length);
00168   sprintf (format_buffer, "%010ld %05ld f \n", 0L, 65535L);
00169   length = strlen (format_buffer);
00170   /* Every space counts.  The space after the 'f' and 'n' is
00171    *essential*.  The PDF spec says the lines must be 20 characters
00172    long including the end of line character. */
00173   pdf_out (pdf_output_file, format_buffer, length);
00174   for (i=1; i<next_label; i++){
00175     sprintf (format_buffer, "%010ld %05ld n \n",
00176             output_xref[i-1].file_position, 0L);
00177     length = strlen (format_buffer);
00178     pdf_out (pdf_output_file, format_buffer, length);
00179   }
00180   /* Done with xref table */
00181   RELEASE (output_xref);
00182 }
00183 
00184 static void dump_trailer(void)
00185 {
00186   int length;
00187   unsigned long starttrailer;
00188   starttrailer = pdf_output_file_position;
00189   pdf_out (pdf_output_file, "trailer\n", 8);
00190   pdf_out (pdf_output_file, "<<\n", 3);
00191   sprintf (format_buffer, "/Size %lu\n",
00192           next_label);
00193   length = strlen (format_buffer);
00194   pdf_out (pdf_output_file, format_buffer, length);
00195   if (pdf_root_obj == 0) 
00196     ERROR ("dump_trailer:  Invalid root object");
00197   sprintf (format_buffer, "/Root %u %u R\n", pdf_root_obj, 0);
00198   length = strlen (format_buffer);
00199   pdf_out (pdf_output_file, format_buffer, length);
00200   if (pdf_info_obj != 0) {
00201     sprintf (format_buffer, "/Info %u %u R\n", pdf_info_obj, 0);
00202     length = strlen (format_buffer);
00203     pdf_out (pdf_output_file, format_buffer, length);
00204   }
00205   pdf_out (pdf_output_file, ">>\n", 3);
00206   pdf_out (pdf_output_file, "startxref\n", 10);
00207   sprintf (format_buffer, "%lu\n", startxref);
00208   length = strlen (format_buffer);
00209   pdf_out (pdf_output_file, format_buffer, length);
00210   pdf_out (pdf_output_file, "%%EOF\n", 6);
00211 }
00212 
00213 void pdf_out_flush (void)
00214 {
00215   if (pdf_output_file) {
00216     if (debug) fprintf (stderr, "pdf_obj_out_flush:  dumping xref\n");
00217     dump_xref();
00218     if (debug) fprintf (stderr, "pdf_obj_out_flush:  dumping trailer\n");
00219     dump_trailer();
00220     if (verbose) {
00221       if (compression_level>0) {
00222        fprintf (stderr, "\nCompression eliminated approximately %lu bytes",
00223                compression_saved);
00224       }
00225     }
00226     fprintf (stderr, "\n%lu bytes written",
00227             pdf_output_file_position);
00228     MFCLOSE (pdf_output_file);
00229   }
00230 }
00231 
00232 void pdf_error_cleanup (void)
00233 {
00234   /* This routine is the cleanup required for an abnormal exit.
00235      For now, simply close the file. */
00236   if (pdf_output_file)
00237     MFCLOSE (pdf_output_file);
00238 }
00239 
00240 
00241 void pdf_set_root (pdf_obj *object)
00242 {
00243   if (pdf_root_obj != 0) {
00244     ERROR ("pdf_set_root:  root object already set");
00245   }
00246   if (object -> label == 0) {  /* Make sure this object has a label */
00247     pdf_label_obj (object);
00248   }
00249   pdf_root_obj = object -> label;
00250 }
00251 
00252 void pdf_set_info (pdf_obj *object)
00253 {
00254   if (pdf_info_obj != 0) {
00255     ERROR ("pdf_set_info:  info object already set");
00256   }
00257   if (object -> label == 0) {  /* Make sure this object has a label */
00258     pdf_label_obj (object);
00259   }
00260   pdf_info_obj = object -> label;
00261 }
00262 
00263 static void pdf_out_char (FILE *file, char c)
00264 {
00265   fputc (c, file);
00266   /* Keep tallys for xref table *only* if writing a pdf file */
00267   if (file == pdf_output_file) {
00268     pdf_output_file_position += 1;
00269     pdf_output_line_position += 1;
00270   }
00271   if (file == pdf_output_file && c == '\n')
00272     pdf_output_line_position = 0;
00273 }
00274 
00275 static void pdf_out (FILE *file, void *buffer, int length)
00276 {
00277   fwrite (buffer, 1, length, file);
00278   /* Keep tallys for xref table *only* if writing a pdf file */
00279   if (file == pdf_output_file) {
00280     pdf_output_file_position += length;
00281     pdf_output_line_position += length;
00282     if (length > 0 && ((char *)buffer)[length-1] == '\n')
00283       pdf_output_line_position = 0;
00284   }
00285 }
00286 
00287 static void pdf_out_white (FILE *file)
00288 {
00289   if (file == pdf_output_file && pdf_output_line_position >= 80) {
00290     pdf_out_char (file, '\n');
00291   } else {
00292     pdf_out_char (file, ' ');
00293   }
00294   return;
00295 }
00296 
00297 pdf_obj *pdf_new_obj(pdf_obj_type type)
00298 {
00299   pdf_obj *result;
00300   result = NEW (1, pdf_obj);
00301   result -> type = type;
00302   result -> data = NULL;
00303   result -> label = 0;
00304   result -> generation = 0;
00305   result -> refcount = 1;
00306   return result;
00307 }
00308 
00309 static void pdf_label_obj (pdf_obj *object)
00310 {
00311   if (object == NULL)
00312     return;
00313   if (next_label > pdf_max_ind_objects) {
00314     pdf_max_ind_objects += IND_OBJECTS_ALLOC_SIZE;
00315     output_xref = RENEW (output_xref, pdf_max_ind_objects,
00316                       struct xref_entry);
00317   }
00318   if (object -> label == 0) {  /* Don't change label on an already labeled
00319                               object.  Ignore such calls */
00320     /* Save so we can lookup this object by its number */
00321     output_xref[next_label-1].pdf_obj = object;
00322     object -> label = next_label++;
00323     object -> generation = 0;
00324   }
00325 }
00326 
00327 /* This doesn't really copy the object, but allows 
00328    it to be used without fear that somebody else will free it */
00329 
00330 pdf_obj *pdf_link_obj (pdf_obj *object)
00331 {
00332   if (object == NULL)
00333     ERROR ("pdf_link_obj passed null pointer");
00334   object -> refcount += 1;
00335   return object;
00336 }
00337 
00338 
00339 pdf_obj *pdf_ref_obj(pdf_obj *object)
00340 {
00341   pdf_obj *result;
00342   pdf_indirect *indirect;
00343   
00344   if (object == NULL)
00345     ERROR ("pdf_ref_obj passed null pointer");
00346   
00347   if (object -> refcount == 0) {
00348     fprintf (stderr, "\npdf_ref_obj:  Called with already released object");
00349     pdf_write_obj (stderr, object);
00350     ERROR ("Fatal Error\n");
00351   }
00352   result = pdf_new_obj (PDF_INDIRECT);
00353   indirect = NEW (1, pdf_indirect);
00354   result -> data = indirect;
00355   if (object -> type == PDF_INDIRECT) { /* If an object is already an indirect reference,
00356                                       reference the original
00357                                       object, not the indirect
00358                                       one */
00359     indirect -> label = ((pdf_indirect *) (object -> data)) -> label;
00360     indirect -> generation = ((pdf_indirect *) (object -> data)) -> generation;
00361     indirect -> dirty = ((pdf_indirect *) (object -> data)) -> dirty;
00362     indirect -> dirty_file = ((pdf_indirect *) (object -> data)) -> dirty_file;
00363   } else {
00364     if (object -> label == 0) {
00365       pdf_label_obj (object);
00366     }
00367     indirect -> label = object -> label;
00368     indirect -> generation = object -> generation;
00369     indirect -> dirty = 0;
00370     indirect -> dirty_file = NULL;
00371   }
00372   return result;
00373 }
00374 
00375 static void release_indirect (pdf_indirect *data)
00376 {
00377   RELEASE (data);
00378 }
00379 
00380 static void write_indirect (FILE *file, const pdf_indirect *indirect)
00381 {
00382   int length;
00383 #ifdef MEM_DEBUG
00384 MEM_START
00385 #endif
00386   if (indirect -> dirty) {
00387     if (file == stderr) {
00388       pdf_out (file, "{d}", 3);
00389       sprintf (format_buffer, "%d %d R", indirect -> label,
00390               indirect -> generation);
00391       length = strlen (format_buffer);
00392       pdf_out (file, format_buffer, length);
00393     }
00394     else {
00395       pdf_obj *clean;
00396       if (indirect -> dirty_file != pdf_input_file) {
00397        fprintf (stderr, "\nwrite_indirect, label=%d, from_file=%p, current_file=%p\n", indirect -> label, indirect->dirty_file, pdf_input_file);
00398        ERROR ("write_indirect:  input PDF file doesn't match object");
00399       }
00400       clean = pdf_ref_file_obj (indirect -> label);
00401       pdf_write_obj (file, clean);
00402       pdf_release_obj (clean);
00403     }
00404   } else {
00405     sprintf (format_buffer, "%d %d R", indirect -> label,
00406             indirect -> generation);
00407     length = strlen (format_buffer);
00408     pdf_out (file, format_buffer, length);
00409   }
00410 #ifdef MEM_DEBUG
00411 MEM_END
00412 #endif
00413 }
00414 
00415 pdf_obj *pdf_new_null (void)
00416 {
00417   pdf_obj *result;
00418   result = pdf_new_obj (PDF_NULL);
00419   result -> data = NULL;
00420   return result;
00421 }
00422 
00423 static void release_null (void *data)
00424 {
00425   return;
00426 }
00427 
00428 static void write_null (FILE *file)
00429 {
00430   pdf_out (file, "null", 4);
00431 }
00432 
00433 pdf_obj *pdf_new_boolean (char value)
00434 {
00435   pdf_obj *result;
00436   pdf_boolean *data;
00437   result = pdf_new_obj (PDF_BOOLEAN);
00438   data = NEW (1, pdf_boolean);
00439   result -> data = data;
00440   data -> value = value;
00441   return result;
00442 }
00443 
00444 static void release_boolean (pdf_obj *data)
00445 {
00446   RELEASE (data);
00447 }
00448 
00449 static void write_boolean (FILE *file, const pdf_boolean *data)
00450 {
00451   if (data -> value) {
00452     pdf_out (file, "true", 4);
00453   }
00454   else {
00455     pdf_out (file, "false", 5);
00456   }
00457 }
00458 
00459 void pdf_set_boolean (pdf_obj *object, char value)
00460 {
00461    if (object == NULL || object -> type != PDF_BOOLEAN) {
00462      ERROR ("pdf_set_boolean:  Passed non-boolean object");
00463    }
00464    ((pdf_boolean *) (object -> data)) -> value = value;
00465 }
00466 
00467 pdf_obj *pdf_new_number (double value)
00468 {
00469   pdf_obj *result;
00470   pdf_number *data;
00471   result = pdf_new_obj (PDF_NUMBER);
00472   data = NEW (1, pdf_number);
00473   result -> data = data;
00474   data -> value = value;
00475   return result;
00476 }
00477 
00478 static void release_number (pdf_number *data)
00479 {
00480   RELEASE (data);
00481 }
00482 
00483 static void write_number (FILE *file, const pdf_number *number)
00484 {
00485   int count;
00486   sprintf (format_buffer, "%.10g", number -> value);
00487   count = strlen (format_buffer);
00488   pdf_out (file, format_buffer, count);
00489 }
00490 
00491 
00492 void pdf_set_number (pdf_obj *object, double value)
00493 {
00494    if (object == NULL || object -> type != PDF_NUMBER) {
00495      ERROR ("pdf_set_number:  Passed non-number object");
00496    }
00497    ((pdf_number *) (object -> data)) -> value = value;
00498 }
00499 
00500 double pdf_number_value (pdf_obj *object)
00501 {
00502   if (object == NULL || object -> type != PDF_NUMBER) {
00503     ERROR ("pdf_obj_number_value:  Passed non-number object");
00504   }
00505   return ((pdf_number *)(object -> data)) -> value;
00506 }
00507 
00508 pdf_obj *pdf_new_string (const void *string, unsigned length)
00509 {
00510   pdf_obj *result;
00511   pdf_string *data;
00512   result = pdf_new_obj (PDF_STRING);
00513   data = NEW (1, pdf_string);
00514   result -> data = data;
00515   if (length != 0) {
00516     data -> length = length;
00517     data -> string = NEW (length+1, unsigned char);
00518     memcpy (data -> string, string, length);
00519     data -> string[length] = 0;
00520   } else {
00521     data -> length = 0;
00522     data -> string = NULL;
00523   }
00524   return result;
00525 }
00526 
00527 void *pdf_string_value (pdf_obj *a_pdf_string)
00528 {
00529   pdf_string *data;
00530   data = a_pdf_string -> data;
00531   return data -> string;
00532 }
00533 
00534 unsigned int pdf_string_length (pdf_obj *a_pdf_string)
00535 {
00536   pdf_string *data;
00537   data = a_pdf_string -> data;
00538   return (data -> length);
00539 }
00540 
00541 /* This routine escapes non printable characters and control
00542    characters in an output string.  It optionally remaps
00543    the problem characters in the encoding */
00544 
00545 int pdfobj_escape_str (char *buffer, int bufsize, unsigned char *s,
00546                      int len, int remap)
00547 {
00548   int result = 0, i;
00549   unsigned char ch;
00550   for (i=0; i<len; i++) {
00551     ch = remap? twiddle(s[i]): s[i];
00552     /* Exit as fast as possible for printable characters */
00553     if (result+4 > bufsize)
00554       ERROR ("pdfobj_escape_str: Buffer overflow");
00555     if (ch < 32 || ch > 126) {
00556       buffer[result++] = '\\';
00557       sprintf (buffer+result, "%03o", ch);
00558       result += strlen (buffer+result);
00559       continue;
00560     }
00561     switch (ch) {
00562     case '(':
00563       buffer[result++] = '\\';
00564       buffer[result++] = '(';
00565       break;
00566     case ')':
00567       buffer[result++] = '\\';
00568       buffer[result++] = ')';
00569       break;
00570     case '\\':
00571       buffer[result++] = '\\';
00572       buffer[result++] = '\\';
00573       break;
00574     default:
00575       buffer[result++] = ch;
00576       break;
00577     }
00578   }
00579   return result;
00580 }
00581 
00582 
00583 static void write_string (FILE *file, const pdf_string *string)
00584 {
00585   unsigned char *s = string -> string;
00586   int count, i;
00587   pdf_out_char (file, '(');
00588     /* This section of code probably isn't speed critical.  Escaping the
00589      characters in the string one at a time may seem slow, but it's
00590      safe if the formatted string length exceeds FORMAT_BUF_SIZE.
00591      Occasionally you see some long strings in PDF.  pdfobj_escape_str
00592      is also used for strings of text with no kerning.  These must be
00593      handled as quickly as possible since there are so many of them.  */ 
00594   for (i=0; i<string->length; i++) {
00595     count = pdfobj_escape_str (format_buffer, FORMAT_BUF_SIZE, s+i,
00596                             1, 0);
00597     pdf_out (file, format_buffer, count);
00598   }
00599   pdf_out_char (file, ')');
00600 }
00601 
00602 static void release_string (pdf_string *data)
00603 {
00604   if (data -> string != NULL)
00605     RELEASE (data -> string);
00606   RELEASE (data);
00607 }
00608 
00609 void pdf_set_string (pdf_obj *object, unsigned char *string, unsigned length)
00610 {
00611   pdf_string *data;
00612   if (object == NULL || object -> type != PDF_STRING) {
00613      ERROR ("pdf_set_string:  Passed non-string object");
00614   }
00615   data = object -> data;
00616   if (data -> length != 0) {
00617     RELEASE (data -> string);
00618   }
00619   if (length != 0) {
00620     data -> length = length;
00621     data -> string = NEW (length+1, unsigned char);
00622     memcpy (data -> string, string, length);
00623     (data->string)[length] = 0;
00624   } else {
00625     data -> length = 0;
00626     data -> string = NULL;
00627   }
00628   return;
00629 }
00630 
00631 int pdf_check_name(const char *name)
00632 {
00633   static char *valid_chars =
00634     "!\"$&'*+,-.0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\\^_`abcdefghijklmnopqrstuvwxyz|~";
00635   if (strspn (name, valid_chars) == strlen (name))
00636     return 1;
00637   else
00638     return 0;
00639 }
00640 
00641 pdf_obj *pdf_new_name (const char *name)  /* name does *not* include the / */ 
00642 {
00643   pdf_obj *result;
00644   unsigned length = strlen (name);
00645   pdf_name *data;
00646   if (!pdf_check_name (name)) {
00647     fprintf (stderr, "Invalid PDF name \"%s\"\n", name);
00648     ERROR ("pdf_new_name:  invalid PDF name");
00649   }
00650   result = pdf_new_obj (PDF_NAME);
00651   data = NEW (1, pdf_name);
00652   result -> data = data;
00653   if (length != 0) {
00654     data -> name = NEW (length+1, char);
00655     memcpy (data -> name, name, length);
00656     (data->name)[length] = 0;
00657   } else 
00658     data -> name = NULL;
00659   return result;
00660 }
00661 
00662 static void write_name (FILE *file, const pdf_name *name)
00663 {
00664   char *s = name -> name;
00665   int i, length;
00666   pdf_out_char (file, '/');
00667   if (name -> name == NULL)
00668     length = 0;
00669   else
00670     length = strlen (name -> name);
00671   for (i=0; i < length; i++) {
00672     if (isprint (s[i]) &&
00673        s[i] != '/' &&
00674        s[i] != '%' &&
00675        s[i] != '(' &&
00676        s[i] != ')' &&
00677        s[i] != '[' && 
00678        s[i] != ']' && 
00679        s[i] != '#')
00680       pdf_out_char (file, s[i]);
00681   }
00682 }
00683 
00684 
00685 static void release_name (pdf_name *data)
00686 {
00687   if (data -> name != NULL)
00688     RELEASE (data -> name);
00689   RELEASE (data);
00690 }
00691 
00692 void pdf_set_name (pdf_obj *object, char *name)
00693 {
00694   pdf_name *data;
00695   unsigned length = strlen (name);
00696   if (object == NULL || object -> type != PDF_NAME) {
00697      ERROR ("pdf_set_name:  Passed non-name object");
00698   }
00699   data = object -> data;
00700   if (data -> name != NULL) {
00701     RELEASE (data -> name);
00702   }
00703   if (length != 0) {
00704     data -> name = NEW (length+1, char);
00705     memcpy (data -> name, name, length);
00706     (data->name)[length] = 0;
00707   } else {
00708     data -> name = NULL;
00709   }
00710 }
00711 
00712 char *pdf_name_value (pdf_obj *object)
00713 {
00714   pdf_name *data;
00715   if (object == NULL || object -> type != PDF_NAME) {
00716      ERROR ("pdf_name_value:  Passed non-name object");
00717   }
00718   data = object -> data;
00719   if (data -> name == NULL)
00720     return NULL;
00721   return data -> name;
00722 }
00723 
00724 
00725 pdf_obj *pdf_new_array (void)
00726 {
00727   pdf_obj *result;
00728   pdf_array *data;
00729   result = pdf_new_obj (PDF_ARRAY);
00730   data = NEW (1, pdf_array);
00731   data -> values = NULL;
00732   data -> max = 0;
00733   data -> size = 0;
00734   result -> data = data;
00735   return result;
00736 }
00737 
00738 static void write_array (FILE *file, const pdf_array *array)
00739 {
00740   if (array -> size > 0) {
00741     unsigned long i;
00742     pdf_out_char (file, '[');
00743     for (i=0; i<array->size; i++) {
00744       if (i != 0 &&
00745          ((array->values)[i]->type) != PDF_STRING &&
00746          ((array->values)[i]->type) != PDF_NAME &&
00747          ((array->values)[i]->type) != PDF_ARRAY)
00748        pdf_out_white (file);
00749       pdf_write_obj (file, (array->values)[i]);
00750     }
00751     pdf_out_char (file, ']');
00752   } else {
00753     write_null (file);
00754   }
00755 }
00756 
00757 pdf_obj *pdf_get_array (pdf_obj *array, unsigned long index)
00758 {
00759   pdf_array *data;
00760   pdf_obj *result = NULL;
00761   if (array == NULL) {
00762     ERROR ("pdf_get_array: passed NULL object");
00763   }
00764   if (array -> type != PDF_ARRAY) {
00765     ERROR ("pdf_get_array: passed non array object");
00766   }
00767   data = array -> data;
00768   if (index < data -> size) {
00769     result = (data->values)[index];
00770   }
00771   return result;
00772 }
00773 
00774 static void release_array (pdf_array *data)
00775 {
00776   unsigned long i;
00777   for (i=0; i<data->size; i++) {
00778     pdf_release_obj ((data ->values)[i]);
00779   }
00780   if (data->size > 0)
00781     RELEASE (data->values);
00782   RELEASE (data);
00783 }
00784 
00785 void pdf_add_array (pdf_obj *array, pdf_obj *object) /* Array is ended
00786                                                  by a node with NULL
00787                                                  this pointer */
00788 {
00789   pdf_array *data;
00790   if (array == NULL || array -> type != PDF_ARRAY) {
00791      ERROR ("pdf_add_array:  Passed non-array object");
00792   }
00793   data = array -> data;
00794   if (data -> size >= data -> max) {
00795     data->max += ARRAY_ALLOC_SIZE;
00796     data->values = RENEW (data->values, data->max, pdf_obj *);
00797   }
00798   (data->values)[data->size++] = object;
00799   return;
00800 }
00801 
00802 
00803 static void write_dict (FILE *file, const pdf_dict *dict)
00804 {
00805   pdf_out (file, "<<\n", 3);
00806   while (dict -> key != NULL) {
00807     pdf_write_obj (file, dict -> key);
00808     if (((dict -> value) -> type) == PDF_BOOLEAN ||
00809        ((dict -> value) -> type) == PDF_NUMBER ||
00810        ((dict -> value) -> type) == PDF_INDIRECT ||
00811        ((dict -> value) -> type) == PDF_NULL)
00812        pdf_out_white (file);
00813     pdf_write_obj (file, dict -> value);
00814     dict = dict -> next;
00815     pdf_out_char (file, '\n');
00816   }
00817   pdf_out (file, ">>", 2);
00818 }
00819 
00820 pdf_obj *pdf_new_dict (void)
00821 {
00822   pdf_obj *result;
00823   pdf_dict *data;
00824   result = pdf_new_obj (PDF_DICT);
00825   data = NEW (1, pdf_dict);
00826   data -> key = NULL;
00827   data -> value = NULL;
00828   data -> next = NULL;
00829   result -> data = data;
00830   return result;
00831 }
00832 
00833 static void release_dict (pdf_dict *data)
00834 {
00835   pdf_dict *next;
00836   while (data != NULL && data -> key != NULL) {
00837     pdf_release_obj (data -> key);
00838     pdf_release_obj (data -> value);
00839     next = data -> next;
00840     RELEASE (data);
00841     data = next;
00842   }
00843   RELEASE (data);
00844 }
00845 
00846 void pdf_add_dict (pdf_obj *dict, pdf_obj *key, pdf_obj *value) /* Array is ended
00847                                                            by a node with NULL
00848                                                            this pointer */
00849 {
00850   pdf_dict *data;
00851   pdf_dict *new_node;
00852   if (key == NULL || key -> type != PDF_NAME ) {
00853     ERROR ("pdf_add_dict: Passed invalid key");
00854   }
00855   if (value != NULL &&
00856       (value -> type == 0 || value -> type > PDF_INDIRECT )) {
00857     ERROR ("pdf_add_dict: Passed invalid value");
00858   }
00859   if (dict == NULL || dict -> type != PDF_DICT) {
00860     fprintf (stderr, "key:");
00861     pdf_write_obj (stderr, key);
00862     fprintf (stderr, "value:");
00863     pdf_write_obj (stderr, value);
00864     ERROR ("pdf_add_dict:  Passed non-dict object");
00865   }
00866   data = dict -> data;
00867   /* If this key already exists, simply replace the value */
00868   while (data -> key != NULL) {
00869     if (!strcmp (pdf_name_value(key), pdf_name_value(data->key))) {
00870       /* Release the old value */
00871       pdf_release_obj (data->value);
00872       /* Release the new key (we don't need it) */
00873       pdf_release_obj (key);
00874       data->value = value;
00875       break;
00876     }
00877     data = data -> next;
00878   }
00879   /* If we didn't find the key, build a new "end" node and add
00880      the new key just before the end */
00881   if (data -> key == NULL) {
00882     new_node = NEW (1, pdf_dict);
00883     new_node -> key = NULL;
00884     new_node -> value = NULL;
00885     new_node -> next = NULL;
00886     data -> next = new_node;
00887     data -> key = key;
00888     data -> value = value;
00889   }
00890   return;
00891 }
00892 
00893 /* pdf_merge_dict makes a link for each item in dict2 before
00894    stealing it */
00895 void pdf_merge_dict (pdf_obj *dict1, pdf_obj *dict2)
00896 {
00897   pdf_dict *data;
00898   if (dict1 == NULL || dict1 -> type != PDF_DICT) 
00899     ERROR ("pdf_merge_dict:  Passed invalid first dictionary");
00900   if (dict2 == NULL || dict2 -> type != PDF_DICT)
00901     ERROR ("pdf_merge_dict:  Passed invalid second dictionary");
00902   data = dict2 -> data;
00903   while (data -> key != NULL) {
00904     pdf_add_dict (dict1, pdf_link_obj(data -> key),
00905                 pdf_link_obj (data -> value));
00906     data = data -> next;
00907   }
00908 }
00909 
00910 static int pdf_match_name (const pdf_obj *name_obj, const char *name_string)
00911 {
00912   pdf_name *data;
00913   data = name_obj -> data;
00914   return (!strcmp (data -> name, name_string));
00915 }
00916 
00917 pdf_obj *pdf_lookup_dict (const pdf_obj *dict, const char *name)
00918 {
00919   pdf_dict *data;
00920   if (dict == NULL || dict ->type != PDF_DICT) 
00921     ERROR ("pdf_lookup_dict:  Passed invalid dictionary");
00922   data = dict -> data;
00923   while (data -> key != NULL) {
00924     if (pdf_match_name (data -> key, name))
00925       return data -> value;
00926     data = data -> next;
00927   }
00928   return NULL;
00929 }
00930 
00931 void pdf_remove_dict (pdf_obj *dict, const char *name)
00932 {
00933   pdf_dict *data, **data_p;
00934   if (dict == NULL || dict->type != PDF_DICT)
00935     ERROR ("pdf_remove_dict:  Passed invalid dictionary");
00936   data = (dict -> data);
00937   data_p = (pdf_dict **) &(dict->data);
00938   while (data->key != NULL) {
00939     if (pdf_match_name (data -> key, name)) {
00940       pdf_release_obj (data -> key);
00941       pdf_release_obj (data -> value);
00942       *data_p = data -> next;
00943       RELEASE (data);
00944       break;
00945     }
00946     data_p = &(data->next);
00947     data = data -> next;
00948   }
00949   return;
00950 }
00951 
00952 char *pdf_get_dict (const pdf_obj *dict, int index)
00953 {
00954   pdf_dict *data;
00955   char *result;
00956   if (dict == NULL) {
00957     ERROR ("pdf_get_dict: passed NULL object");
00958   }
00959   if (dict -> type != PDF_DICT) {
00960     ERROR ("pdf_get_dict: passed non array object");
00961   }
00962   data = dict -> data;
00963   while (index-- > 0 && data -> next != NULL)
00964     data = data -> next;
00965   if (data -> next == NULL)
00966     return NULL;
00967   result = pdf_name_value (data -> key);
00968   return result;
00969 }
00970 
00971 pdf_obj *pdf_new_stream (int flags)
00972 {
00973   pdf_obj *result;
00974 #ifdef HAVE_ZLIB  
00975   pdf_obj *filters = NULL;
00976 #endif /* HAVE_ZLIB */
00977   pdf_stream *data;
00978   result = pdf_new_obj (PDF_STREAM);
00979   data = NEW (1, pdf_stream);
00980   result -> data = data;
00981   data -> dict = pdf_new_dict ();  /* Although we are using an arbitrary
00982                                   pdf_object here, it must have
00983                                   type=PDF_DICT and cannot be an
00984                                   indirect reference.  This will
00985                                   be checked by the output routine 
00986                                */
00987   data -> _flags = flags;
00988 #ifdef HAVE_ZLIB
00989   if ((flags & STREAM_COMPRESS) && compression_level > 0) {
00990     if (!filters) {
00991       filters = pdf_new_array();
00992       pdf_add_dict (data -> dict, pdf_new_name ("Filter"), filters);
00993     }
00994     pdf_add_array (filters, pdf_new_name ("FlateDecode"));
00995   }
00996 #endif /* HAVE_ZLIB */  
00997 
00998   data -> stream_length = 0;
00999   data -> max_length = 0;
01000   data -> stream = NULL;
01001   return result;
01002 }
01003 
01004 
01005 static void write_stream (FILE *file, pdf_stream *stream)
01006 {
01007 #define COMPRESS_LEVEL 9
01008 #define THRESHOLD 100
01009   unsigned char *filtered;
01010   unsigned long filtered_length;
01011 #ifdef HAVE_ZLIB
01012   unsigned long buffer_length;
01013   unsigned char *buffer;
01014 #endif /* HAVE_ZLIB */
01015   /* Always work from a copy of the stream */
01016   /* All filters read from "filtered" and leave their result in
01017      "filtered" */
01018   filtered = NEW (stream->stream_length+1, unsigned char);
01019   memcpy (filtered, stream->stream, stream->stream_length);
01020   filtered_length = stream->stream_length;
01021 
01022 #ifdef HAVE_ZLIB
01023   /* Apply compression filter if requested */
01024   if ((stream -> _flags & STREAM_COMPRESS) && compression_level > 0) {
01025     buffer_length = filtered_length + filtered_length/1000 + 14;
01026     buffer = NEW (buffer_length, unsigned char);
01027 #ifdef HAVE_ZLIB_COMPRESS2    
01028     if (compress2 (buffer, &buffer_length, filtered,
01029                  filtered_length, compression_level))
01030       ERROR ("Zlib error");
01031 #else 
01032     if (compress (buffer, &buffer_length, filtered,
01033                 filtered_length))
01034       ERROR ("Zlib error");
01035 #endif /* HAVE_ZLIB_COMPRESS2 */
01036     RELEASE (filtered);
01037     filtered = buffer;
01038     compression_saved += (filtered_length-buffer_length)-strlen("/Filter [/FlateDecode]\n");
01039     filtered_length = buffer_length;
01040   }
01041 #endif /* HAVE_ZLIB */
01042   /* Add a '\n' if the last character wasn't one */
01043   if (filtered_length > 0 && filtered[filtered_length-1] != '\n') {
01044     filtered[filtered_length++] = '\n';
01045   }
01046   pdf_add_dict (stream->dict,
01047               pdf_new_name ("Length"),
01048               pdf_new_number(filtered_length));
01049   pdf_write_obj (file, stream -> dict);
01050   pdf_out (file, "\nstream\n", 8);
01051   
01052   if (filtered_length > 0) {
01053     pdf_out (file, filtered, filtered_length);
01054   }
01055 
01056   RELEASE (filtered);
01057   /* This stream length "object" gets reset every time write_stream is
01058      called for the stream object */
01059   /* If this stream gets written more than once with different
01060      filters, this could be a problem */
01061   pdf_out (file, "endstream", 9);
01062   return;
01063 }
01064 
01065 static void release_stream (pdf_stream *stream)
01066 {
01067   pdf_release_obj (stream -> dict);
01068   if (stream -> stream_length > 0)
01069     RELEASE (stream -> stream);
01070   RELEASE (stream);
01071 }
01072 
01073 pdf_obj *pdf_stream_dict (pdf_obj *stream)
01074 {
01075   pdf_stream *data;
01076   if (stream == NULL || stream -> type != PDF_STREAM) {
01077      ERROR ("pdf_stream_dict:  Passed non-stream object");
01078   }
01079   data = stream -> data;
01080   return data -> dict;
01081 }
01082 
01083 void pdf_add_stream (pdf_obj *stream, char *stream_data, unsigned length)
01084 {
01085   pdf_stream *data;
01086   if (stream == NULL || stream -> type != PDF_STREAM) {
01087      ERROR ("pdf_add_stream:  Passed non-stream object");
01088   }
01089   if (length == 0)
01090     return;
01091   data = stream -> data;
01092   if (data -> stream_length + length > data -> max_length) {
01093     data -> max_length += length + STREAM_ALLOC_SIZE;
01094     data -> stream = RENEW (data -> stream, data->max_length, char);
01095   }
01096   memcpy ((data->stream)+(data->stream_length), stream_data,
01097          length);
01098   data->stream_length += length;
01099 }
01100 
01101 void pdf_write_obj (FILE *file, const pdf_obj *object)
01102 {
01103   if (object == NULL) {
01104     write_null(file);
01105     return;
01106   }
01107   if (object -> type > PDF_INDIRECT) {
01108     fprintf (stderr, "Object type = %d\n", object -> type);
01109     ERROR ("pdf_write_obj:  Called with invalid object");
01110   }
01111   if (file == stderr)
01112     fprintf (stderr, "{%d}", object -> refcount);
01113   switch (object -> type) {
01114   case PDF_BOOLEAN:
01115     write_boolean (file, object -> data);
01116     break;
01117   case PDF_NUMBER:
01118     write_number (file, object -> data);
01119     break;
01120   case PDF_STRING:
01121     write_string (file, object -> data);
01122     break;
01123   case PDF_NAME:
01124     write_name (file, object -> data);
01125     break;
01126   case PDF_ARRAY:
01127     write_array (file, object -> data);
01128     break;
01129   case PDF_DICT:
01130     write_dict (file, object -> data);
01131     break;
01132   case PDF_STREAM:
01133     write_stream (file, object -> data);
01134     break;
01135   case PDF_NULL:
01136     write_null (file);
01137     break;
01138   case PDF_INDIRECT:
01139     write_indirect (file, object -> data);
01140     break;
01141   }
01142 }
01143 
01144 static void pdf_flush_obj (FILE *file, const pdf_obj *object) 
01145      /* Write the object to the file */ 
01146 {
01147   int length;
01148   /* Record file position.  No object is numbered 0, so subtract 1
01149      when using as an array index */
01150   output_xref[object->label-1].file_position = pdf_output_file_position;
01151   sprintf (format_buffer, "%lu %d obj\n", object -> label ,
01152           object -> generation);
01153   length = strlen (format_buffer);
01154   pdf_out (file, format_buffer, length);
01155   pdf_write_obj (file, object);
01156   pdf_out (file, "\nendobj\n", 8);
01157 }
01158 
01159 
01160 void pdf_release_obj (pdf_obj *object)
01161 {
01162   if (object == NULL)
01163     return;
01164   if (object -> type > PDF_INDIRECT ||
01165       object -> refcount <= 0) {
01166     fprintf (stderr, "pdf_release_obj: object = %p, type = %d\n", object, object ->
01167             type);
01168     pdf_write_obj (stderr, object);
01169     ERROR ("pdf_release_obj:  Called with invalid object");
01170   }
01171   object -> refcount -= 1;
01172     if (object -> refcount == 0) { /* Nothing is using this object so it's okay to
01173                                 remove it */
01174     /* Nonzero "label" means object needs to be written before it's destroyed*/
01175     if (object -> label && pdf_output_file != NULL) { 
01176       pdf_flush_obj (pdf_output_file, object);
01177     }
01178     switch (object -> type) {
01179     case PDF_BOOLEAN:
01180       release_boolean (object -> data);
01181       break;
01182     case PDF_NULL:
01183       release_null (object -> data);
01184       break;
01185     case PDF_NUMBER:
01186       release_number (object -> data);
01187       break;
01188     case PDF_STRING:
01189       release_string (object -> data);
01190       break;
01191     case PDF_NAME:
01192       release_name (object -> data);
01193       break;
01194     case PDF_ARRAY:
01195       release_array (object -> data);
01196       break;
01197     case PDF_DICT:
01198       release_dict (object -> data);
01199       break;
01200     case PDF_STREAM:
01201       release_stream (object -> data);
01202       break;
01203     case PDF_INDIRECT:
01204       release_indirect (object -> data);
01205       break;
01206     }
01207   /* This might help detect freeing already freed objects */
01208     /*  object -> type = -1;*/
01209     RELEASE (object);
01210   }
01211 }
01212 
01213 static int backup_line (void)
01214 {
01215   int ch;
01216   ch = -1;
01217   if (debug) {
01218     fprintf (stderr, "\nbackup_line:\n");
01219   }
01220   /* Note: this code should work even if \r\n is eol.
01221      It could fail on a machine where \n is eol and
01222      there is a \r in the stream---Highly unlikely
01223      in the last few bytes where this is likely to be used.
01224   */
01225   if (tell_position (pdf_input_file) > 1)
01226     do {
01227       seek_relative (pdf_input_file, -2);
01228       if (debug)
01229        fprintf (stderr, "%c", ch);
01230     } while (tell_position (pdf_input_file) > 0 &&
01231             (ch = fgetc (pdf_input_file)) >= 0 &&
01232             (ch != '\n' && ch != '\r' ));
01233   if (debug)
01234     fprintf (stderr, "<-\n");
01235   if (ch < 0) {
01236     return 0;
01237   }
01238   return 1;
01239 }
01240 
01241 static unsigned long pdf_file_size = 0;
01242 
01243 static long find_xref(void)
01244 {
01245   long currentpos, xref_pos;
01246   int tries = 10;
01247   char *start, *end, *number;
01248   if (debug)
01249     fprintf (stderr, "(find_xref");
01250   seek_end (pdf_input_file);
01251   pdf_file_size = tell_position (pdf_input_file);
01252   do {
01253     if (!backup_line()) {
01254       tries = 0;
01255       break;
01256     }
01257     currentpos = tell_position(pdf_input_file);
01258     fread (work_buffer, sizeof(char), strlen("startxref"),
01259           pdf_input_file);
01260     if (debug) {
01261       work_buffer[strlen("startxref")] = 0;
01262       fprintf (stderr, "[%s]\n", work_buffer);
01263     }
01264     seek_absolute(pdf_input_file, currentpos);
01265     tries--;
01266   } while (tries > 0 && strncmp (work_buffer, "startxref", strlen ("startxref")));
01267   if (tries <= 0)
01268     return 0;
01269   /* Skip rest of this line */
01270   mfgets (work_buffer, WORK_BUFFER_SIZE, pdf_input_file);
01271   /* Next line of input file should contain actual xref location */
01272   mfgets (work_buffer, WORK_BUFFER_SIZE, pdf_input_file);
01273   if (debug) {
01274     fprintf (stderr, "\n->[%s]<-\n", work_buffer);
01275   }
01276   start = work_buffer;
01277   end = start+strlen(work_buffer);
01278   skip_white(&start, end);
01279   xref_pos = (long) atof (number = parse_number (&start, end));
01280   RELEASE (number);
01281   if (debug) {
01282     fprintf (stderr, ")\n");
01283     fprintf (stderr, "xref @ %lu\n", xref_pos);
01284   }
01285   return xref_pos;
01286 }
01287 
01288 pdf_obj *parse_trailer (void)
01289 {
01290   char *start;
01291 #ifdef MEM_DEBUG
01292 MEM_START
01293 #endif
01294   /* This routine must be called with the file pointer located at
01295      the start of the trailer */
01296   /* Fill work_buffer and hope trailer fits.  This should
01297      be made a bit more robust sometime */
01298   if (fread (work_buffer, sizeof(char), WORK_BUFFER_SIZE,
01299             pdf_input_file) == 0 ||
01300       strncmp (work_buffer, "trailer", strlen("trailer"))) {
01301     fprintf (stderr, "No trailer.  Are you sure this is a PDF file?\n");
01302     fprintf (stderr, "\nbuffer:\n->%s<-\n", work_buffer);
01303     return NULL;
01304   }
01305   start = work_buffer + strlen("trailer");
01306   skip_white(&start, work_buffer+WORK_BUFFER_SIZE);
01307 #ifdef MEM_DEBUG
01308 MEM_END
01309 #endif
01310   return (parse_pdf_dict (&start, work_buffer+WORK_BUFFER_SIZE));
01311 }
01312 
01313 struct object 
01314 {
01315   unsigned long position;
01316   unsigned generation;
01317   /* Object numbers in original file and new file must have different
01318      object numbers.
01319      new_ref provides a reference for the object in the new file
01320      object space.  When it is first set, an object in the old file
01321      is copied to the new file with a new number.  new_ref remains set
01322      until the file is closed so that future references can access the
01323      object via new_ref instead of copying the object again */
01324   pdf_obj *direct;
01325   pdf_obj *indirect;
01326   int used;
01327 } *xref_table = NULL;
01328 long num_input_objects;
01329 
01330 long next_object (unsigned long obj)
01331 {
01332   /* routine tries to estimate an upper bound for character position
01333      of the end of the object, so it knows how big the buffer must be.
01334      The parsing routines require that the entire object be read into
01335      memory. It would be a major pain to rewrite them.  The worst case
01336      is that an object before an xref table will grab the whole table
01337      :-( */
01338   unsigned long i;
01339   long this_position, result = pdf_file_size;  /* Worst case */
01340   this_position = xref_table[obj].position;
01341   /* Check all other objects to find next one */
01342   for (i=0; i<num_input_objects; i++) {
01343     if ((xref_table[i].used) &&
01344        xref_table[i].position > this_position &&
01345        xref_table[i].position < result)
01346       result = xref_table[i].position;
01347   }
01348   return result;
01349 }
01350 
01351 /* The following routine returns a reference to an object existing
01352    only in the input file.  It does this as follows.  If the object
01353    has never been referenced before, it reads the object
01354    in and creates a reference to it.  Then it writes
01355    the object out, keeping the existing reference. If the
01356    object has been read in (and written out) before, it simply
01357    returns the retained existing reference to that object */
01358 
01359 pdf_obj *pdf_ref_file_obj (unsigned long obj_no)
01360 {
01361   pdf_obj *direct, *indirect;
01362 #ifdef MEM_DEBUG
01363 MEM_START
01364 #endif
01365   if (obj_no >= num_input_objects) {
01366     fprintf (stderr, "\n\npdf_ref_file_obj: nonexistent object\n");
01367     return NULL;
01368   }
01369   if (xref_table[obj_no].indirect != NULL) {
01370     return pdf_link_obj(xref_table[obj_no].indirect);
01371   }
01372   if ((direct = pdf_read_object (obj_no)) == NULL) {
01373     fprintf (stderr, "\npdf_ref_file_obj: Could not read object\n");
01374     return NULL;
01375   }
01376   indirect = pdf_ref_obj (direct);
01377   xref_table[obj_no].indirect = indirect;
01378   xref_table[obj_no].direct = direct;
01379   /* Make sure the caller can't doesn't free this object */
01380 #ifdef MEM_DEBUG
01381 MEM_END
01382 #endif
01383   return pdf_link_obj(indirect);
01384 }
01385 
01386 
01387 pdf_obj *pdf_new_ref (unsigned long label, int generation) 
01388 {
01389   pdf_obj *result;
01390   pdf_indirect *indirect;
01391   if (label >= num_input_objects || label < 0) {
01392     fprintf (stderr, "pdf_new_ref: Object doesn't exist\n");
01393     return NULL;
01394   }
01395   result = pdf_new_obj (PDF_INDIRECT);
01396   indirect = NEW (1, pdf_indirect);
01397   result -> data = indirect;
01398   indirect -> label = label;
01399   indirect -> generation = generation;
01400   indirect -> dirty = 1;
01401   indirect -> dirty_file = pdf_input_file;
01402   return result;
01403 }
01404 
01405 pdf_obj *pdf_read_object (unsigned long obj_no) 
01406 {
01407   long start_pos, end_pos;
01408   char *buffer, *number, *parse_pointer, *end;
01409   pdf_obj *result;
01410 #ifdef MEM_DEBUG
01411 MEM_START
01412 #endif
01413   if (debug) {
01414     fprintf (stderr, "\nread_object: obj=%lu\n", obj_no);
01415   }
01416   if (obj_no < 0 || obj_no >= num_input_objects) {
01417     fprintf (stderr, "\nTrying to read nonexistent object\n");
01418     return NULL;
01419   }
01420   if (!xref_table[obj_no].used) {
01421     fprintf (stderr, "\nTrying to read deleted object\n");
01422     return NULL;
01423   }
01424   if (debug) {
01425     fprintf (stderr, "\nobj@%lu\n", xref_table[obj_no].position);
01426   }
01427   seek_absolute (pdf_input_file, start_pos =
01428                xref_table[obj_no].position);
01429   end_pos = next_object (obj_no);
01430   if (debug) {
01431     fprintf (stderr, "\nendobj@%lu\n", end_pos);
01432   }
01433   buffer = NEW (end_pos - start_pos+1, char);
01434   fread (buffer, sizeof(char), end_pos-start_pos, pdf_input_file);
01435   buffer[end_pos-start_pos] = 0;
01436   if (debug) {
01437     fprintf (stderr, "\nobject:\n%s", buffer);
01438   }
01439   parse_pointer = buffer;
01440   end = buffer+(end_pos-start_pos);
01441   skip_white (&parse_pointer, end);
01442   number = parse_number (&parse_pointer, end);
01443   if ((int) atof(number) != obj_no) {
01444     fprintf (stderr, "Object number doesn't match\n");
01445     RELEASE (buffer);
01446     return NULL;
01447   }
01448   if (number != NULL)
01449     RELEASE(number);
01450   skip_white (&parse_pointer, end);
01451   number = parse_number (&parse_pointer, end);
01452   if (number != NULL)
01453     RELEASE(number);
01454   skip_white(&parse_pointer, end);
01455   if (strncmp(parse_pointer, "obj", strlen("obj"))) {
01456     fprintf (stderr, "Didn't find \"obj\"\n");
01457     RELEASE (buffer);
01458     return (NULL);
01459   }
01460   parse_pointer += strlen("obj");
01461   result = parse_pdf_object (&parse_pointer, end);
01462   skip_white (&parse_pointer, end);
01463   if (strncmp(parse_pointer, "endobj", strlen("endobj"))) {
01464     fprintf (stderr, "Didn't find \"endobj\"\n");
01465     if (result != NULL)
01466       pdf_release_obj (result);
01467     result = NULL;
01468   }
01469   RELEASE (buffer);
01470   return (result);
01471 #ifdef MEM_DEBUG
01472 MEM_END
01473 #endif
01474 }
01475 /* pdf_deref_obj always returns a link instead of the original */ 
01476 pdf_obj *pdf_deref_obj (pdf_obj *obj)
01477 {
01478   pdf_obj *result, *tmp;
01479   pdf_indirect *indirect;
01480   if (obj == NULL)
01481     return NULL;
01482   if (obj -> type != PDF_INDIRECT) {
01483     return pdf_link_obj (obj);
01484   }
01485   indirect = obj -> data;
01486   if (!(indirect -> dirty)) {
01487     ERROR ("Tried to deref a non-file object");
01488   }
01489   result = pdf_read_object (indirect -> label);
01490 
01491   if (debug){
01492     fprintf (stderr, "\npdf_deref_obj: read_object returned\n");
01493     pdf_write_obj (stderr, result);
01494   }
01495   
01496   while (result && result -> type == PDF_INDIRECT) {
01497     tmp = pdf_read_object (result -> label);
01498     pdf_release_obj (result);
01499     result = tmp;
01500   }
01501   return result;
01502 }
01503 
01504 /* extends the xref table if we get another segment
01505    with higher object numbers than the current object */
01506 static void extend_xref (long new_size) 
01507 {
01508   unsigned long i;
01509   xref_table = RENEW (xref_table, new_size,
01510                     struct object);
01511   for (i=num_input_objects; i<new_size; i++) {
01512     xref_table[i].direct = NULL;
01513     xref_table[i].indirect = NULL;
01514     xref_table[i].used = 0;
01515     xref_table[i].position = 0;
01516   }
01517   num_input_objects = new_size;
01518 }
01519 
01520 
01521 
01522 static int parse_xref (void)
01523 {
01524   unsigned long first_obj, num_table_objects;
01525   unsigned long i;
01526   /* This routine reads one xref segment.  It must be called
01527      positioned at the beginning of an xref table.  It may be called
01528      multiple times on the same file.  xref tables sometimes come in
01529      pieces */
01530   mfgets (work_buffer, WORK_BUFFER_SIZE, pdf_input_file);
01531   if (strncmp (work_buffer, "xref", strlen("xref"))) {
01532     fprintf (stderr, "No xref.  Are you sure this is a PDF file?\n");
01533     return 0;
01534   }
01535   /* Next line in file has first item and size of table */
01536   for (;;) {
01537     unsigned long current_pos;
01538     current_pos = tell_position (pdf_input_file);
01539     if (mfgets (work_buffer, WORK_BUFFER_SIZE, pdf_input_file) ==
01540        NULL)
01541       ERROR ("parse_xref: premature end of PDF file while parsing xref");
01542     if (!strncmp (work_buffer, "trailer", strlen ("trailer"))) {
01543       /* Backup... This is ugly, but it seems like the safest thing to
01544         do.  It is possible the trailer dictionary starts on the same
01545         logical line as the word trailer.  In that case, the mfgets
01546         call might have started to read the trailer dictionary and
01547         parse_trailer would fail */
01548       seek_absolute (pdf_input_file, current_pos);
01549       break;
01550     }
01551     sscanf (work_buffer, "%lu %lu", &first_obj, &num_table_objects);
01552     if (num_input_objects < first_obj+num_table_objects) {
01553       extend_xref (first_obj+num_table_objects);
01554     }
01555     if (debug) {
01556       fprintf (stderr, "\nfirstobj=%lu,number=%lu\n",
01557               first_obj,num_table_objects);
01558     }
01559     for (i=first_obj; i<first_obj+num_table_objects; i++) {
01560       fread (work_buffer, sizeof(char), 20, pdf_input_file);
01561       /* Don't overwrite positions that have already been set by a
01562         modified xref table.  We are working our way backwards
01563         through the reference table, so we only set "position" 
01564         if it hasn't been set yet. */
01565       if (xref_table[i].position == 0) {
01566        work_buffer[19] = 0;
01567        sscanf (work_buffer, "%lu %u", &(xref_table[i].position), 
01568               &(xref_table[i].generation));
01569       }
01570       if (debug) {
01571        fprintf (stderr, "pos: %lu gen: %u\n", xref_table[i].position,
01572                xref_table[i].generation);
01573       }
01574       if (work_buffer[17] != 'n' && work_buffer[17] != 'f') {
01575        fprintf (stderr, "PDF file is corrupt\n");
01576        fprintf (stderr, "[%s]\n", work_buffer);
01577        return 0;
01578       }
01579       if (work_buffer[17] == 'n')
01580        xref_table[i].used = 1;
01581       else
01582        xref_table[i].used = 0;
01583       xref_table[i].direct = NULL;
01584       xref_table[i].indirect = NULL;
01585     }
01586   }
01587   return 1;
01588 }
01589 
01590 pdf_obj *read_xref (void)
01591 {
01592   pdf_obj *main_trailer, *prev_trailer, *prev_xref, *xref_size;
01593   long xref_pos;
01594 #ifdef MEM_DEBUG
01595 MEM_START
01596 #endif  
01597   if ((xref_pos = find_xref()) == 0) {
01598     fprintf (stderr, "Can't find xref table.\n");
01599     return NULL;
01600   }
01601   if (debug) {
01602     fprintf(stderr, "xref@%lu\n", xref_pos);
01603   }
01604   /* Read primary xref table */
01605   seek_absolute (pdf_input_file, xref_pos);
01606   if (!parse_xref()) {
01607     fprintf (stderr,
01608             "\nCouldn't read xref table.  Is this a correct PDF file?\n");
01609     return NULL;
01610   }
01611   if ((main_trailer = parse_trailer()) == NULL) {
01612     fprintf (stderr,
01613             "\nCouldn't read xref trailer.  Is this a correct PDF file?\n");
01614     return NULL;
01615   }
01616   if (pdf_lookup_dict (main_trailer, "Root") == NULL ||
01617       (xref_size = pdf_lookup_dict (main_trailer, "Size")) == NULL) {
01618     fprintf (stderr,
01619             "\nTrailer doesn't have catalog or a size.  Is this a correct PDF file?\n");
01620     return NULL;
01621   }
01622   if (num_input_objects < pdf_number_value (xref_size)) {
01623     extend_xref (pdf_number_value (xref_size));
01624   }
01625   /* Read any additional xref tables */
01626   prev_trailer = pdf_link_obj (main_trailer);
01627   while ((prev_xref = pdf_lookup_dict (prev_trailer, "Prev")) != NULL) {
01628     xref_pos = pdf_number_value (prev_xref);
01629     seek_absolute (pdf_input_file, xref_pos);
01630     pdf_release_obj (prev_trailer);
01631     if (!parse_xref()) {
01632       fprintf (stderr,
01633               "\nCouldn't read xref table.  Is this a correct PDF file?\n");
01634       return NULL;
01635     }
01636     if ((prev_trailer = parse_trailer()) == NULL) {
01637       fprintf (stderr,
01638               "\nCouldn't read xref trailer.  Is this a correct PDF file?\n");
01639       return NULL;
01640     }
01641     if (debug) {
01642       fprintf (stderr, "\nprev_trailer:\n");
01643       pdf_write_obj (stderr, prev_trailer);
01644     }
01645   }
01646 #ifdef MEM_DEBUG
01647 MEM_END
01648 #endif  
01649   pdf_release_obj (prev_trailer);
01650   return main_trailer;
01651 }
01652 
01653 static char any_open = 0;
01654 
01655 pdf_obj *pdf_open (FILE *file)
01656 {
01657   pdf_obj *trailer;
01658 #ifdef MEM_DEBUG
01659 MEM_START
01660 #endif
01661   if (any_open) {
01662     fprintf (stderr, "\nOnly one PDF file may be open at one time.\n");
01663     any_open = 1;
01664     exit(1);
01665   }
01666   pdf_input_file = file;
01667   if (!check_for_pdf (pdf_input_file)) {
01668     fprintf (stderr, "pdf_open: Not a PDF 1.[1-3] file\n");
01669     return NULL;
01670   }
01671   if ((trailer = read_xref()) == NULL) {
01672     fprintf (stderr, "No trailer.\n");
01673     pdf_close ();
01674     return NULL;
01675   }
01676   if (debug) {
01677     fprintf (stderr, "\nDone with xref:\n");
01678   }
01679 #ifdef MEM_DEBUG
01680   MEM_END
01681 #endif
01682   return trailer;
01683 }
01684 
01685 void pdf_close (void)
01686 {
01687   /* Following loop must be iterated because each write could trigger
01688      an additional indirect reference of an object with a lower
01689      number! */
01690   unsigned long i;
01691   int done;
01692   if (debug) {
01693     fprintf (stderr, "\npdf_close:\n");
01694     fprintf (stderr, "pdf_input_file=%p\n", pdf_input_file);
01695   }
01696   do {
01697     done = 1;
01698     for (i=0; i<num_input_objects; i++) {
01699       if (xref_table[i].direct != NULL) {
01700        pdf_release_obj (xref_table[i].direct);
01701        xref_table[i].direct = NULL;
01702        done = 0;
01703       }
01704     }
01705   } while (!done);
01706   /* Now take care of the indirect objects
01707      They had to be left around until all the direct
01708      objects were flushed */
01709   for (i=0; i<num_input_objects; i++) {
01710     if (xref_table[i].indirect != NULL) {
01711       pdf_release_obj (xref_table[i].indirect);
01712     }
01713   }
01714   RELEASE (xref_table);
01715   xref_table = NULL;
01716   num_input_objects = 0;
01717   any_open = 0;
01718   pdf_input_file = NULL;
01719   if (debug) {
01720     fprintf (stderr, "\nexiting pdf_close:\n");
01721   }
01722 }
01723 
01724 int check_for_pdf (FILE *file) 
01725 {
01726   int result = 0;
01727   rewind (file);
01728   if (fread (work_buffer, sizeof(char), strlen("%PDF-1.x"), file) ==
01729       strlen("%PDF-1.x") &&
01730       !strncmp(work_buffer, "%PDF-1.", strlen("%PDF-1."))) {
01731     if (work_buffer[7] >= '0' && work_buffer[7] <= '0'+pdf_version)
01732       result = 1;
01733     else {
01734       fprintf (stderr, "\nVersion of PDF file (1.%c) is newer than version limit specification.\n", work_buffer[7]);
01735     }
01736   }
01737   return result;
01738 }