Back to index

tetex-bin  3.0
PSTokenizer.cc
Go to the documentation of this file.
00001 //========================================================================
00002 //
00003 // PSTokenizer.cc
00004 //
00005 // Copyright 2002-2003 Glyph & Cog, LLC
00006 //
00007 //========================================================================
00008 
00009 #include <aconf.h>
00010 
00011 #ifdef USE_GCC_PRAGMAS
00012 #pragma implementation
00013 #endif
00014 
00015 #include <stdio.h>
00016 #include <stdlib.h>
00017 #include "PSTokenizer.h"
00018 
00019 //------------------------------------------------------------------------
00020 
00021 // A '1' in this array means the character is white space.  A '1' or
00022 // '2' means the character ends a name or command.
00023 static char specialChars[256] = {
00024   1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0,   // 0x
00025   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // 1x
00026   1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2,   // 2x
00027   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0,   // 3x
00028   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // 4x
00029   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0,   // 5x
00030   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // 6x
00031   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0,   // 7x
00032   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // 8x
00033   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // 9x
00034   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // ax
00035   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // bx
00036   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // cx
00037   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // dx
00038   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   // ex
00039   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0    // fx
00040 };
00041 
00042 //------------------------------------------------------------------------
00043 
00044 PSTokenizer::PSTokenizer(int (*getCharFuncA)(void *), void *dataA) {
00045   getCharFunc = getCharFuncA;
00046   data = dataA;
00047   charBuf = -1;
00048 }
00049 
00050 PSTokenizer::~PSTokenizer() {
00051 }
00052 
00053 GBool PSTokenizer::getToken(char *buf, int size, int *length) {
00054   GBool comment, backslash;
00055   int c;
00056   int i;
00057 
00058   // skip whitespace and comments
00059   comment = gFalse;
00060   while (1) {
00061     if ((c = getChar()) == EOF) {
00062       buf[0] = '\0';
00063       *length = 0;
00064       return gFalse;
00065     }
00066     if (comment) {
00067       if (c == '\x0a' || c == '\x0d') {
00068        comment = gFalse;
00069       }
00070     } else if (c == '%') {
00071       comment = gTrue;
00072     } else if (specialChars[c] != 1) {
00073       break;
00074     }
00075   }
00076 
00077   // read a token
00078   i = 0;
00079   buf[i++] = c;
00080   if (c == '(') {
00081     backslash = gFalse;
00082     while ((c = lookChar()) != EOF) {
00083       if (i < size - 1) {
00084        buf[i++] = c;
00085       }
00086       getChar();
00087       if (c == '\\') {
00088        backslash = gTrue;
00089       } else if (!backslash && c == ')') {
00090        break;
00091       } else {
00092        backslash = gFalse;
00093       }
00094     }
00095   } else if (c == '<') {
00096     while ((c = lookChar()) != EOF) {
00097       getChar();
00098       if (i < size - 1) {
00099        buf[i++] = c;
00100       }
00101       if (c == '>') {
00102        break;
00103       }
00104     }
00105   } else if (c != '[' && c != ']') {
00106     while ((c = lookChar()) != EOF && !specialChars[c]) {
00107       getChar();
00108       if (i < size - 1) {
00109        buf[i++] = c;
00110       }
00111     }
00112   }
00113   buf[i] = '\0';
00114   *length = i;
00115 
00116   return gTrue;
00117 }
00118 
00119 int PSTokenizer::lookChar() {
00120   if (charBuf < 0) {
00121     charBuf = (*getCharFunc)(data);
00122   }
00123   return charBuf;
00124 }
00125 
00126 int PSTokenizer::getChar() {
00127   int c;
00128 
00129   if (charBuf < 0) {
00130     charBuf = (*getCharFunc)(data);
00131   }
00132   c = charBuf;
00133   charBuf = -1;
00134   return c;
00135 }