Back to index

tetex-bin  3.0
Parser.cc
Go to the documentation of this file.
00001 //========================================================================
00002 //
00003 // Parser.cc
00004 //
00005 // Copyright 1996-2003 Glyph & Cog, LLC
00006 //
00007 //========================================================================
00008 
00009 #include <aconf.h>
00010 
00011 #ifdef USE_GCC_PRAGMAS
00012 #pragma implementation
00013 #endif
00014 
00015 #include <stddef.h>
00016 #include "Object.h"
00017 #include "Array.h"
00018 #include "Dict.h"
00019 #include "Parser.h"
00020 #include "XRef.h"
00021 #include "Error.h"
00022 #ifndef NO_DECRYPTION
00023 #include "Decrypt.h"
00024 #endif
00025 
00026 Parser::Parser(XRef *xrefA, Lexer *lexerA) {
00027   xref = xrefA;
00028   lexer = lexerA;
00029   inlineImg = 0;
00030   lexer->getObj(&buf1);
00031   lexer->getObj(&buf2);
00032 }
00033 
00034 Parser::~Parser() {
00035   buf1.free();
00036   buf2.free();
00037   delete lexer;
00038 }
00039 
00040 #ifndef NO_DECRYPTION
00041 Object *Parser::getObj(Object *obj,
00042                      Guchar *fileKey, int keyLength,
00043                      int objNum, int objGen) {
00044 #else
00045 Object *Parser::getObj(Object *obj) {
00046 #endif
00047   char *key;
00048   Stream *str;
00049   Object obj2;
00050   int num;
00051 #ifndef NO_DECRYPTION
00052   Decrypt *decrypt;
00053   GString *s;
00054   char *p;
00055   int i;
00056 #endif
00057 
00058   // refill buffer after inline image data
00059   if (inlineImg == 2) {
00060     buf1.free();
00061     buf2.free();
00062     lexer->getObj(&buf1);
00063     lexer->getObj(&buf2);
00064     inlineImg = 0;
00065   }
00066 
00067   // array
00068   if (buf1.isCmd("[")) {
00069     shift();
00070     obj->initArray(xref);
00071     while (!buf1.isCmd("]") && !buf1.isEOF())
00072 #ifndef NO_DECRYPTION
00073       obj->arrayAdd(getObj(&obj2, fileKey, keyLength, objNum, objGen));
00074 #else
00075       obj->arrayAdd(getObj(&obj2));
00076 #endif
00077     if (buf1.isEOF())
00078       error(getPos(), "End of file inside array");
00079     shift();
00080 
00081   // dictionary or stream
00082   } else if (buf1.isCmd("<<")) {
00083     shift();
00084     obj->initDict(xref);
00085     while (!buf1.isCmd(">>") && !buf1.isEOF()) {
00086       if (!buf1.isName()) {
00087        error(getPos(), "Dictionary key must be a name object");
00088        shift();
00089       } else {
00090        key = copyString(buf1.getName());
00091        shift();
00092        if (buf1.isEOF() || buf1.isError()) {
00093          gfree(key);
00094          break;
00095        }
00096 #ifndef NO_DECRYPTION
00097        obj->dictAdd(key, getObj(&obj2, fileKey, keyLength, objNum, objGen));
00098 #else
00099        obj->dictAdd(key, getObj(&obj2));
00100 #endif
00101       }
00102     }
00103     if (buf1.isEOF())
00104       error(getPos(), "End of file inside dictionary");
00105     if (buf2.isCmd("stream")) {
00106       if ((str = makeStream(obj))) {
00107        obj->initStream(str);
00108 #ifndef NO_DECRYPTION
00109        if (fileKey) {
00110          str->getBaseStream()->doDecryption(fileKey, keyLength,
00111                                         objNum, objGen);
00112        }
00113 #endif
00114       } else {
00115        obj->free();
00116        obj->initError();
00117       }
00118     } else {
00119       shift();
00120     }
00121 
00122   // indirect reference or integer
00123   } else if (buf1.isInt()) {
00124     num = buf1.getInt();
00125     shift();
00126     if (buf1.isInt() && buf2.isCmd("R")) {
00127       obj->initRef(num, buf1.getInt());
00128       shift();
00129       shift();
00130     } else {
00131       obj->initInt(num);
00132     }
00133 
00134 #ifndef NO_DECRYPTION
00135   // string
00136   } else if (buf1.isString() && fileKey) {
00137     buf1.copy(obj);
00138     s = obj->getString();
00139     decrypt = new Decrypt(fileKey, keyLength, objNum, objGen);
00140     for (i = 0, p = obj->getString()->getCString();
00141         i < s->getLength();
00142         ++i, ++p) {
00143       *p = decrypt->decryptByte(*p);
00144     }
00145     delete decrypt;
00146     shift();
00147 #endif
00148 
00149   // simple object
00150   } else {
00151     buf1.copy(obj);
00152     shift();
00153   }
00154 
00155   return obj;
00156 }
00157 
00158 Stream *Parser::makeStream(Object *dict) {
00159   Object obj;
00160   Stream *str;
00161   Guint pos, endPos, length;
00162 
00163   // get stream start position
00164   lexer->skipToNextLine();
00165   pos = lexer->getPos();
00166 
00167   // get length
00168   dict->dictLookup("Length", &obj);
00169   if (obj.isInt()) {
00170     length = (Guint)obj.getInt();
00171     obj.free();
00172   } else {
00173     error(getPos(), "Bad 'Length' attribute in stream");
00174     obj.free();
00175     return NULL;
00176   }
00177 
00178   // check for length in damaged file
00179   if (xref && xref->getStreamEnd(pos, &endPos)) {
00180     length = endPos - pos;
00181   }
00182 
00183   // in badly damaged PDF files, we can run off the end of the input
00184   // stream immediately after the "stream" token
00185   if (!lexer->getStream()) {
00186     return NULL;
00187   }
00188 
00189   // make base stream
00190   str = lexer->getStream()->getBaseStream()->makeSubStream(pos, gTrue,
00191                                                     length, dict);
00192 
00193   // get filters
00194   str = str->addFilters(dict);
00195 
00196   // skip over stream data
00197   lexer->setPos(pos + length);
00198 
00199   // refill token buffers and check for 'endstream'
00200   shift();  // kill '>>'
00201   shift();  // kill 'stream'
00202   if (buf1.isCmd("endstream")) {
00203     shift();
00204   } else {
00205     error(getPos(), "Missing 'endstream'");
00206     str->ignoreLength();
00207   }
00208 
00209   return str;
00210 }
00211 
00212 void Parser::shift() {
00213   if (inlineImg > 0) {
00214     if (inlineImg < 2) {
00215       ++inlineImg;
00216     } else {
00217       // in a damaged content stream, if 'ID' shows up in the middle
00218       // of a dictionary, we need to reset
00219       inlineImg = 0;
00220     }
00221   } else if (buf2.isCmd("ID")) {
00222     lexer->skipChar();             // skip char after 'ID' command
00223     inlineImg = 1;
00224   }
00225   buf1.free();
00226   buf1 = buf2;
00227   if (inlineImg > 0)        // don't buffer inline image data
00228     buf2.initNull();
00229   else
00230     lexer->getObj(&buf2);
00231 }