Back to index

tetex-bin  3.0
token.c
Go to the documentation of this file.
00001 /* $XConsortium: token.c,v 1.2 91/10/10 11:19:55 rws Exp $ */
00002 /* Copyright International Business Machines,Corp. 1991
00003  * All Rights Reserved
00004  *
00005  * License to use, copy, modify, and distribute this software
00006  * and its documentation for any purpose and without fee is
00007  * hereby granted, provided that the above copyright notice
00008  * appear in all copies and that both that copyright notice and
00009  * this permission notice appear in supporting documentation,
00010  * and that the name of IBM not be used in advertising or
00011  * publicity pertaining to distribution of the software without
00012  * specific, written prior permission.
00013  *
00014  * IBM PROVIDES THIS SOFTWARE "AS IS", WITHOUT ANY WARRANTIES
00015  * OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING, BUT NOT
00016  * LIMITED TO ANY IMPLIED WARRANTIES OF MERCHANTABILITY,
00017  * FITNESS FOR A PARTICULAR PURPOSE, AND NONINFRINGEMENT OF
00018  * THIRD PARTY RIGHTS.  THE ENTIRE RISK AS TO THE QUALITY AND
00019  * PERFORMANCE OF THE SOFTWARE, INCLUDING ANY DUTY TO SUPPORT
00020  * OR MAINTAIN, BELONGS TO THE LICENSEE.  SHOULD ANY PORTION OF
00021  * THE SOFTWARE PROVE DEFECTIVE, THE LICENSEE (NOT IBM) ASSUMES
00022  * THE ENTIRE COST OF ALL SERVICING, REPAIR AND CORRECTION.  IN
00023  * NO EVENT SHALL IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR
00024  * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
00025  * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
00026  * CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
00027  * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
00028  * SOFTWARE.
00029  */
00030 /* Authors: Sig Nin & Carol Thompson IBM Almaden Research Laboratory */
00031 #include "types.h"
00032 #include "t1stdio.h"
00033 #include "util.h"
00034 #include "digit.h"
00035 #include "token.h"
00036 #include "tokst.h"
00037 #include "hdigit.h"
00038  
00039 extern int T1Getc(struct F_FILE *f);
00040 extern int T1Ungetc(int c,struct F_FILE *f);
00041 /*
00042  * -------------------------------------------------------------------
00043  * Globals
00044  * -------------------------------------------------------------------
00045  */
00046  
00047 /* These variables are set by the caller */
00048 char           *tokenStartP;   /* Pointer to token buffer in VM */
00049 char           *tokenMaxP;     /* Pointer to last byte in buffer + 1 */
00050  
00051 /* These variables are set by TOKEN */
00052 int             tokenLength;   /* Characters in token */
00053 boolean         tokenTooLong;  /* Token too long for buffer */
00054 int             tokenType;     /* Type of token identified */
00055 psvalue         tokenValue;    /* Token value */
00056  
00057 /*
00058  * -------------------------------------------------------------------
00059  * Private variables
00060  * -------------------------------------------------------------------
00061  */
00062  
00063 static FILE    *inputFileP;    /* Current input file */
00064  
00065  
00066 /* Token */
00067 static char    *tokenCharP;    /* Pointer to next character in token */
00068  
00069 /*
00070  * -------------------------------------------------------------------
00071  * Private routines for manipulating numbers
00072  * -------------------------------------------------------------------
00073  */
00074  
00075 #define Exp10(e) \
00076 ((e) == 0\
00077  ? (DOUBLE)(1.0)\
00078  : (-64 <= (e) && (e) <= 63\
00079     ? Exp10T[(e)+64]\
00080     : P10(e)\
00081    )\
00082 )
00083  
00084 static DOUBLE Exp10T[128] = {
00085   1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57,
00086   1e-56, 1e-55, 1e-54, 1e-53, 1e-52, 1e-51, 1e-50, 1e-49,
00087   1e-48, 1e-47, 1e-46, 1e-45, 1e-44, 1e-43, 1e-42, 1e-41,
00088   1e-40, 1e-39, 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33,
00089   1e-32, 1e-31, 1e-30, 1e-29, 1e-28, 1e-27, 1e-26, 1e-25,
00090   1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, 1e-18, 1e-17,
00091   1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9,
00092   1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1,
00093   1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7,
00094   1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15,
00095   1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22, 1e23,
00096   1e24, 1e25, 1e26, 1e27, 1e28, 1e29, 1e30, 1e31,
00097   1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39,
00098   1e40, 1e41, 1e42, 1e43, 1e44, 1e45, 1e46, 1e47,
00099   1e48, 1e49, 1e50, 1e51, 1e52, 1e53, 1e54, 1e55,
00100   1e56, 1e57, 1e58, 1e59, 1e60, 1e61, 1e62, 1e63
00101 };
00102  
00103 static DOUBLE P10(exponent)
00104   LONG exponent;
00105 {
00106   DOUBLE value, power;
00107  
00108   if (exponent < 0) {
00109     power = 0.1;
00110     value = (exponent & 1 ? power : 1.0);
00111     exponent = -(++exponent >> 1); /* portable C for -(exponent/2) */
00112   }
00113   else {
00114     power = 10.0;
00115     value = (exponent & 1 ? power : 1.0);
00116     exponent = exponent >> 1;
00117   }
00118  
00119   while(exponent > 0) {
00120     power *= power;
00121     if (exponent & 1)
00122       value *= power;
00123     exponent >>= 1;
00124   }
00125  
00126   return(value);
00127 }
00128  
00129 /*
00130  * -------------------------------------------------------------------
00131  * Private routines and macros for manipulating the input
00132  * -------------------------------------------------------------------
00133  */
00134  
00135 /* Get next character from the input --
00136  *
00137  */
00138 #define next_ch()    (getc(inputFileP))
00139  
00140 /* Push a character back into the input --
00141  *
00142  * Ungetc of EOF will fail, but that's ok: the next getc will
00143  * return EOF.
00144  *
00145  * NOTE:  These macros are presently written to return the character
00146  * pushed, or EOF if none was pushed.  However, they are not
00147  * required to return anything in particular, and callers should
00148  * not rely on the returned value.
00149  */
00150 #define back_ch(ch)   (ungetc(ch, inputFileP))
00151  
00152 /* Push a character back into the input if it was not white space.
00153  * If it is a carriage return (\r) then check next char for
00154  * linefeed and consume them both, otherwise put next char back.
00155  *
00156  */
00157 #define back_ch_not_white(ch) \
00158 (\
00159 isWHITE_SPACE(ch)\
00160  ? ((ch == '\r')\
00161    ? (((ch = next_ch()) == '\n')\
00162      ? EOF\
00163      : back_ch(ch)\
00164      )\
00165    : EOF\
00166    )\
00167  : back_ch(ch)\
00168 )
00169  
00170 /*
00171  * -------------------------------------------------------------------
00172  * Private routines and macros for manipulating the token buffer
00173  * -------------------------------------------------------------------
00174  */
00175  
00176 /* Add a character to the token
00177  * ---- use ONLY when you KNOW that this character will
00178  *      be stored within the token buffer.
00179  */
00180 #define save_unsafe_ch(ch) (*tokenCharP++ = ch)
00181  
00182 /* Add a character to the token, if not too long to fit */
00183 #define save_ch(ch) \
00184 ((tokenCharP < tokenMaxP)\
00185  ? save_unsafe_ch(ch)\
00186  : (tokenTooLong = TRUE)\
00187 )
00188  
00189 /*
00190  * -------------------------------------------------------------------
00191  * Action Routines
00192  *
00193  *  These routines all
00194  *    -- take int ch as a parameter
00195  *    -- return int ch if no token was recognized, DONE otherwise
00196  *    -- leave the next character in the input, if returning DONE
00197  * -------------------------------------------------------------------
00198  */
00199  
00200 #define DONE  (256)
00201  
00202 /* Get the next input character */
00203 static int next_char(ch)
00204   int ch;
00205 {
00206   return(next_ch());
00207 }
00208  
00209 /* Add character to token */
00210 static int add_char(ch)
00211   int ch;
00212 {
00213   save_ch(ch);
00214   return(next_ch());
00215 }
00216  
00217  
00218 /* -------------------------------------------------------------------
00219  * Skip white space and comments
00220  */
00221  
00222 /* Skip white space */
00223 static int skip_space(ch)
00224   int ch;
00225 {
00226   do {
00227     ch = next_ch();
00228   } while(isWHITE_SPACE(ch));
00229   return(ch);
00230 }
00231  
00232 /* Skip comments */
00233 static int skip_comment(ch)
00234   int ch;
00235 {
00236   do {
00237     ch = next_ch();
00238   } while(isCOMMENT(ch));
00239   return(ch);
00240 }
00241  
00242 /* -------------------------------------------------------------------
00243  * Collect value elements for a number
00244  */
00245  
00246 /* decimal integer or real number mantissa */
00247 static int m_sign;
00248 static LONG m_value;
00249 static LONG m_scale;
00250  
00251 /* real number exponent */
00252 static int e_sign;
00253 static LONG e_value;
00254 static LONG e_scale;
00255  
00256 /* radix number */
00257 static LONG r_base;
00258 static LONG r_value;
00259 static LONG r_scale;
00260  
00261 static int add_sign(ch)
00262   int ch;
00263 {
00264   m_sign = ch;
00265   save_unsafe_ch(ch);
00266   return(next_ch());
00267 }
00268  
00269 static int add_1st_digits(ch)
00270   int ch;
00271 {
00272   m_sign = '+';
00273   return(add_digits(ch));
00274 }
00275  
00276 static int add_digits(ch)
00277   int ch;
00278 {
00279   LONG value, p_value, scale;
00280   int digit;
00281  
00282   /* On entry, expect m_sign to be set to '+' or '-';
00283    *  ch is a decimal digit.
00284    * Expect at most one character saved at this point,
00285    *  a sign.  This routine will save up to 10 more
00286    *  characters without checking the buffer boundary.
00287    */
00288  
00289   value = ch - '0';
00290   save_unsafe_ch(ch);
00291   ch = next_ch();
00292  
00293   while(isDECIMAL_DIGIT(ch) && value < (MAX_INTEGER/10)) {
00294     value = (value << 3) + (value << 1) + (ch - '0');
00295     save_unsafe_ch(ch);
00296     ch = next_ch();
00297   }
00298  
00299   /* Quick exit for small integers --
00300    *    |x| <= 10*((MAX_INTEGER/10)-1)+9
00301    *    |x| <= 2,147,483,639 for 32 bit integers
00302    */
00303   if (isNUMBER_ENDER(ch)) {
00304     back_ch_not_white(ch);
00305     tokenValue.integer = (m_sign == '-' ? -value : value);
00306     tokenType = TOKEN_INTEGER;
00307     return(DONE);
00308   }
00309  
00310   /* Handle additional digits.  Beyond the boundary case,
00311    *   10*(MAX_INTEGER/10) <= |number| <= MAX_INTEGER
00312    * just count the digits: the number is too large to
00313    * represent as an integer and will be returned as a real.
00314    * The mantissa of a real holds fewer bits than an integer.
00315    */
00316   p_value = value;
00317   value = (m_sign == '-' ? -value : value);
00318   scale = 0;
00319  
00320   if (isDECIMAL_DIGIT(ch)) {
00321  
00322     /* Handle the boundary case */
00323     if (p_value == (MAX_INTEGER/10)) {
00324       digit = ch - '0';
00325  
00326       /* Must handle positive and negative values separately  */
00327       /* for 2's complement arithmetic */
00328       if (value > 0) {
00329         if (digit <= MAX_INTEGER%10)
00330           value = (value << 3) + (value << 1) + digit;
00331         else
00332           ++scale;  /* Too big, just count it */
00333       }
00334       else {
00335         /* Use positive % operands for portability */
00336         if (digit <= -(MIN_INTEGER+10)%10)
00337           value = (value << 3) + (value << 1) - digit;
00338         else
00339           ++scale;  /* Too big, just count it */
00340       }
00341     }
00342     else
00343       ++scale;  /* Not boundary case, just count digit */
00344  
00345     save_unsafe_ch(ch);
00346     ch = next_ch();
00347  
00348     /* Continue scanning digits, but can't store them */
00349     while(isDECIMAL_DIGIT(ch)) {
00350       ++scale;
00351       save_ch(ch);
00352       ch = next_ch();
00353     }
00354   }
00355  
00356   /* Continue from here scanning radix integer or real */
00357   m_value = value;
00358   m_scale = scale;
00359  
00360   /* Initialize for possible real */
00361   e_sign = '+';
00362   e_value = 0;
00363   e_scale = 0;
00364  
00365   return(ch);
00366 }
00367  
00368 static int add_1st_decpt(ch)
00369   int ch;
00370 {
00371   m_sign = '+';
00372   return(add_decpt(ch));
00373 }
00374  
00375 static int add_decpt(ch)
00376   int ch;
00377 {
00378   /* On entry, expect m_sign to be set to '+' or '-' */
00379   m_value = 0;
00380   m_scale = 0;
00381   save_unsafe_ch(ch);
00382   return(next_ch());
00383 }
00384  
00385 static int add_fraction(ch)
00386   int ch;
00387 {
00388   LONG value, scale;
00389   int digit;
00390  
00391   /* On entry, expect m_value and m_scale to be initialized,
00392    * and m_sign to be set to '+' or '-'.  Expect m_value and m_sign
00393    * to be consistent (this is not checked).
00394    */
00395   value = m_value;
00396   scale = m_scale;
00397  
00398   /* Scan leading zeroes */
00399   if (value == 0) {
00400     while(ch == '0') {
00401       --scale;
00402       save_ch(ch);
00403       ch = next_ch();
00404     }
00405  
00406     /* Scan first significant digit */
00407     if (isDECIMAL_DIGIT(ch)) {
00408       --scale;
00409       value = ch - '0';
00410       value = (m_sign == '-' ? -value : value);
00411       save_ch(ch);
00412       ch = next_ch();
00413     }
00414     else
00415       /* no significant digits -- number is zero */
00416       scale = 0;
00417   }
00418   /* value != 0 || value == 0 && !isDECIMAL_DIGIT(ch) */
00419  
00420   /* Scan additional significant digits */
00421   if (isDECIMAL_DIGIT(ch)) {
00422     if (value > 0) {
00423       while(isDECIMAL_DIGIT(ch) && value < (MAX_INTEGER/10)) {
00424         --scale;
00425         value = (value << 3) + (value << 1) + (ch - '0');
00426         save_ch(ch);
00427         ch = next_ch();
00428       }
00429       /* Check boundary case */
00430       if (isDECIMAL_DIGIT(ch) && value == (MAX_INTEGER/10)) {
00431         digit = ch - '0';
00432         if (digit <= MAX_INTEGER%10) {
00433           --scale;
00434           value = (value << 3) + (value << 1) + digit;
00435           save_ch(ch);
00436           ch = next_ch();
00437         }
00438       }
00439     }
00440     else {
00441       /* value < 0 */
00442       while(isDECIMAL_DIGIT(ch) && value > -(-(MIN_INTEGER+10)/10+1)) {
00443         /* Use positive / operands for portability */
00444         --scale;
00445         value = (value << 3) + (value << 1) - (ch - '0');
00446         save_ch(ch);
00447         ch = next_ch();
00448       }
00449       /* Check boundary case */
00450       if (isDECIMAL_DIGIT(ch)
00451           && value == -(-(MIN_INTEGER+10)/10+1)) {
00452         digit = ch - '0';
00453         if (digit <= -(MIN_INTEGER+10)%10) {
00454         /* Use positive % operands for portability */
00455           --scale;
00456           value = (value << 3) + (value << 1) - digit;
00457           save_ch(ch);
00458           ch = next_ch();
00459         }
00460       }
00461     }
00462  
00463     /* Additional digits can be discarded */
00464     while(isDECIMAL_DIGIT(ch)) {
00465       save_ch(ch);
00466       ch = next_ch();
00467     }
00468   }
00469  
00470   /* Store results */
00471   m_value = value;
00472   m_scale = scale;
00473  
00474   /* Initialize for possible real */
00475   e_sign = '+';
00476   e_value = 0;
00477   e_scale = 0;
00478  
00479   return(ch);
00480 }
00481  
00482 static int add_e_sign(ch)
00483   int ch;
00484 {
00485   e_sign = ch;
00486   save_ch(ch);
00487   return(next_ch());
00488 }
00489  
00490 static int add_exponent(ch)
00491   int ch;
00492 {
00493   LONG value, p_value;
00494   LONG scale = 0;
00495   int digit;
00496  
00497   /* On entry, expect e_sign to be set to '+' or '-' */
00498  
00499   value = ch - '0';
00500   save_ch(ch);
00501   ch = next_ch();
00502  
00503   while(isDECIMAL_DIGIT(ch) && value < (MAX_INTEGER/10)) {
00504     value = (value << 3) + (value << 1) + (ch - '0');
00505     save_ch(ch);
00506     ch = next_ch();
00507   }
00508  
00509   p_value = value;
00510   value = (e_sign == '-' ? -value : value);
00511  
00512   /* Handle additional digits.  Beyond the boundary case,
00513    *   10*(MAX_INTEGER/10) <= |number| <= MAX_INTEGER
00514    * just count the digits: the number is too large to
00515    * represent as an integer.
00516    */
00517   if (isDECIMAL_DIGIT(ch)) {
00518  
00519     /* Examine boundary case */
00520     if (p_value == (MAX_INTEGER/10)) {
00521       digit = ch - '0';
00522  
00523       /* Must handle positive and negative values separately */
00524       /*  for 2's complement arithmetic */
00525       if (value > 0) {
00526         if (digit <= MAX_INTEGER%10)
00527           value = (value << 3) + (value << 1) + digit;
00528         else
00529           ++scale; /* Too big, just count it */
00530       }
00531       else {
00532         /* Use positive % operands for portability */
00533         if (digit <= -(MIN_INTEGER+10)%10)
00534           value = (value << 3) + (value << 1) - digit;
00535         else
00536           ++scale; /* Too big, just count it */
00537       }
00538     }
00539     else
00540       ++scale;  /* Not boundary case, just count digit */
00541  
00542     save_ch(ch);
00543     ch = next_ch();
00544  
00545     /* Continue scanning digits, but can't store any more */
00546     while(isDECIMAL_DIGIT(ch)) {
00547       ++scale;
00548       save_ch(ch);
00549       ch = next_ch();
00550     }
00551   }
00552  
00553   /* Store results */
00554   e_value = value;
00555   e_scale = scale;
00556  
00557   return(ch);
00558 }
00559  
00560 static int add_radix(ch)
00561   int ch;
00562 {
00563   if (2 <= m_value && m_value <= 36 && m_scale == 0) {
00564     r_base = m_value;
00565     save_ch(ch);
00566     return(next_ch());
00567   }
00568   else {
00569     /* Radix invalid, complete a name token */
00570     return(AAH_NAME(ch));
00571   }
00572 }
00573  
00574 static int add_r_digits(ch)
00575   int ch;
00576 {
00577   ULONG value;
00578   LONG radix, scale;
00579   int digit;
00580  
00581   /* NOTE:  The syntax of a radix number allows only for
00582    * values of zero or more.  The value will be stored as
00583    * a 32 bit integer, which PostScript then interprets
00584    * as signed.  This means, for example, that the numbers:
00585    *
00586    *     8#37777777777
00587    *    10#4294967295
00588    *    16#FFFFFFFF
00589    *    36#1Z141Z3
00590    *
00591    * are all interpreted as -1.  This routine implements this
00592    * idea explicitly:  it accumulates the number's value
00593    * as unsigned, then casts it to signed when done.
00594    */
00595  
00596   /* Expect r_base to be initialized */
00597   radix = r_base;
00598   value = 0;
00599   scale = 0;
00600  
00601   /* Scan leading zeroes */
00602   while(ch == '0') {
00603     save_ch(ch);
00604     ch = next_ch();
00605   }
00606  
00607   /* Handle first non-zero digit */
00608   if ((digit=digit_value[ch]) < radix) {
00609     value = digit;
00610     save_ch(ch);
00611     ch = next_ch();
00612  
00613     /* Add digits until boundary case reached */
00614     while((digit=digit_value[ch]) < radix
00615             && value < (MAX_ULONG / radix)) {
00616       value = value * radix + digit;
00617       save_ch(ch);
00618       ch = next_ch();
00619     };
00620  
00621     /* Scan remaining digits */
00622     if ((digit=digit_value[ch]) < radix) {
00623  
00624       /* Examine boundary case ---
00625        *   radix*(MAX_ULONG/radix) <= number <= MAX_ULONG
00626        */
00627       if (value == (MAX_ULONG/radix) && digit <= MAX_ULONG%radix)
00628         value = value * radix + digit;
00629       else
00630         ++scale;
00631  
00632       /* Continue scanning digits, but can't store them */
00633       save_ch(ch);
00634       ch = next_ch();
00635       while(digit_value[ch] < radix) {
00636         ++scale;
00637         save_ch(ch);
00638         ch = next_ch();
00639       }
00640     }
00641   }
00642  
00643   /* Store result */
00644   r_value = (LONG) value; /* result is signed */
00645   r_scale = scale;
00646  
00647   return(ch);
00648 }
00649  
00650 /* -------------------------------------------------------------------
00651  * Complete a number; set token type and done flag.
00652  * Put current input character back, if it is not white space.
00653  */
00654  
00655 /* Done: Radix Number */
00656 static int RADIX_NUMBER(ch)
00657   int ch;
00658 {
00659   back_ch_not_white(ch);
00660   if (r_scale == 0) {
00661     tokenValue.integer = r_value;
00662     tokenType = TOKEN_INTEGER;
00663   }
00664   else {
00665     tokenType = TOKEN_NAME;
00666   }
00667   return(DONE);
00668 }
00669  
00670 /* Done: Integer */
00671 static int INTEGER(ch)
00672   int ch;
00673 {
00674   back_ch_not_white(ch);
00675   if (m_scale == 0) {
00676     tokenValue.integer = m_value;
00677     tokenType = TOKEN_INTEGER;
00678   }
00679   else {
00680     tokenValue.real = (DOUBLE)(m_value) * Exp10(m_scale);
00681     tokenType = TOKEN_REAL;
00682   }
00683   return(DONE);
00684 }
00685  
00686 /* Done: Real */
00687 static int REAL(ch)
00688   int ch;
00689 {
00690   DOUBLE temp;
00691  
00692   back_ch_not_white(ch);
00693  
00694   /* NOTE: ignore e_scale, since e_value alone will cause
00695    *   exponent overflow if e_scale > 0.
00696    */
00697  
00698   /* HAZARD: exponent overflow of intermediate result
00699    * (e.g., in 370 floating point); this should not be a problem
00700    * with IEEE floating point.  Reduce exponent overflow hazard by
00701    * combining m_scale and e_value first, if they have different signs,
00702    * or multiplying m_value and one of the other factors, if both
00703    * m_scale and e_value are negative.
00704    */
00705   if ((m_scale >= 0 && e_value <= 0)
00706       || (m_scale <= 0 && e_value >= 0)) {
00707     tokenValue.real = (DOUBLE)(m_value) * Exp10(m_scale + e_value);
00708   }
00709   else {
00710     temp = (DOUBLE)(m_value) * Exp10(m_scale);
00711     tokenValue.real = temp * Exp10(e_value);
00712   }
00713  
00714   tokenType = TOKEN_REAL;
00715   return(DONE);
00716 }
00717  
00718  
00719 /* -------------------------------------------------------------------
00720  * Assemble a hex string; set token type and done flag.
00721  */
00722  
00723 /* Done: Hex String */
00724 static int HEX_STRING(ch)
00725   int ch;
00726 {
00727   int value;
00728  
00729   while(TRUE) {
00730  
00731     /* Process odd digit */
00732     ch = next_ch();
00733     if (!isHEX_DIGIT(ch)) {
00734  
00735       /* Skip white space */
00736       while(isWHITE_SPACE(ch))
00737         ch = next_ch();
00738  
00739       /* Check for terminator */
00740       if (!isHEX_DIGIT(ch)) {
00741         break;
00742       }
00743     }
00744     value = digit_value[ch] << 4;
00745  
00746     /* Process even digit */
00747     ch = next_ch();
00748     if (!isHEX_DIGIT(ch)) {
00749  
00750       /* Skip white space */
00751       while(isWHITE_SPACE(ch))
00752         ch = next_ch();
00753  
00754       /* Check for terminator */
00755       if (!isHEX_DIGIT(ch)) {
00756         save_ch(value);
00757         break;
00758       }
00759     }
00760     save_ch(value + digit_value[ch]);
00761   }
00762  
00763   /* Classify result, based on why loop ended */
00764   if (ch == '>')
00765     tokenType = TOKEN_HEX_STRING;
00766   else {
00767     /* save the invalid character for error reporting */
00768     save_ch(ch);
00769     tokenType = TOKEN_INVALID;
00770   }
00771  
00772   return(DONE);
00773 }
00774  
00775 /* -------------------------------------------------------------------
00776  * Assemble a string; set token type and done flag
00777  */
00778  
00779 /* Save a backslash-coded character in a string --
00780  *
00781  *   Store the proper character for special cases
00782  *   "\b", "\f", "\n", "\r", and "\t".
00783  *
00784  *   Decode and store octal-coded character, up to
00785  *   three octal digits, "\o", "\oo", and "\ooo".
00786  *
00787  *   The sequence "<newline>" is a line continuation,
00788  *   so consume both without storing anything.
00789  *
00790  *   The sequence "<EOF>" is an error; exit without
00791  *   storing anything and let the caller handle it.
00792  *
00793  *   For other characters, including the sequences
00794  *   "\\", "\(", and "\)", simply store the second
00795  *   character.
00796  */
00797 static void save_digraph(ch)
00798   int ch;
00799 {
00800   int value;
00801  
00802   switch (ch) {
00803  
00804     case 'b':   /* backspace */
00805       ch = '\b';
00806       break;
00807  
00808     case 'f':   /* formfeed */
00809       ch = '\f';
00810       break;
00811  
00812     case 'n':   /* newline */
00813       ch = '\n';
00814       break;
00815  
00816     case 'r':   /* carriage return */
00817       ch = '\r';
00818       break;
00819  
00820     case 't':   /* horizontal tab */
00821       ch = '\t';
00822       break;
00823  
00824     case '\n':  /* line continuation -- consume it */
00825       return;
00826  
00827     case '\r':  /* carriage return   -- consume it */
00828       ch = next_ch();   /* look at next character, is it \n?  */
00829       if (ch == '\n')  return;
00830       back_ch(ch);      /* if not a line feed, then return it */
00831       return;
00832  
00833     case EOF:   /* end of file -- forget it */
00834       return;
00835  
00836   default:
00837     /* scan up to three octal digits to get value */
00838     if (isOCTAL_DIGIT(ch)) {
00839       value = digit_value[ch];
00840       ch = next_ch();
00841       if (isOCTAL_DIGIT(ch)) {
00842         value = (value << 3) + digit_value[ch];
00843         ch = next_ch();
00844         if (isOCTAL_DIGIT(ch))
00845           value = (value << 3) + digit_value[ch];
00846         else
00847           back_ch(ch);
00848       }
00849       else
00850         back_ch(ch);
00851       ch = value;
00852     }
00853   }
00854  
00855   /* Found a character to save */
00856   save_ch(ch);
00857 }
00858  
00859 /* Done: String */
00860 static int STRING(ch)
00861   int ch;
00862 {
00863   int nest_level = 1;
00864  
00865   tokenType = TOKEN_STRING;
00866  
00867   do {
00868  
00869     ch = next_ch();
00870     while(!isSTRING_SPECIAL(ch)) {
00871       save_ch(ch);
00872       ch = next_ch();
00873     };
00874  
00875     switch (ch) {
00876  
00877      case '(':
00878        ++nest_level;
00879        save_ch(ch);
00880        break;
00881  
00882      case ')':
00883        if (--nest_level > 0)
00884          save_ch(ch);
00885        break;
00886  
00887      case '\\':
00888           save_digraph(next_ch());
00889         break;
00890  
00891      case '\r':
00892         /* All carriage returns (\r) are turned into linefeeds (\n)*/
00893           ch = next_ch();       /* get the next one, is it \n? */
00894           if (ch != '\n') {     /* if not, then put it back.   */
00895             back_ch(ch);
00896           }
00897           save_ch('\n');        /* in either case, save a linefeed */
00898         break;
00899  
00900  
00901      case EOF:
00902        tokenType = TOKEN_INVALID;  /* Unterminated string */
00903        nest_level = 0;
00904        break;
00905     }
00906  
00907   } while(nest_level > 0);
00908  
00909   return(DONE);
00910 }
00911  
00912  
00913 /* -------------------------------------------------------------------
00914  * Assemble a name; set token type and done flag.
00915  * Put current input character back, if it is not white space.
00916  */
00917  
00918 /* Done: Name
00919  *  (Safe version used to complete name tokens that
00920  *   start out looking like something else).
00921  */
00922  
00923 static int AAH_NAME(ch)
00924   int ch;
00925 {
00926   do {
00927     save_ch(ch);
00928     ch = next_ch();
00929   } while(isNAME(ch));
00930  
00931   back_ch_not_white(ch);
00932   tokenType = TOKEN_NAME;
00933   return(DONE);
00934 }
00935  
00936 /* Done: Name */
00937 static int NAME(ch)
00938   int ch;
00939 {
00940   save_unsafe_ch(ch);
00941   ch = next_ch();
00942   if (isNAME(ch)) {
00943     save_unsafe_ch(ch);
00944     ch = next_ch();
00945     if (isNAME(ch)) {
00946       save_unsafe_ch(ch);
00947       ch = next_ch();
00948       if (isNAME(ch)) {
00949         save_unsafe_ch(ch);
00950         ch = next_ch();
00951         if (isNAME(ch)) {
00952           save_unsafe_ch(ch);
00953           ch = next_ch();
00954           if (isNAME(ch)) {
00955             save_unsafe_ch(ch);
00956             ch = next_ch();
00957             if (isNAME(ch)) {
00958               save_unsafe_ch(ch);
00959               ch = next_ch();
00960               while(isNAME(ch)) {
00961                 save_ch(ch);
00962                 ch = next_ch();
00963               }
00964             }
00965           }
00966         }
00967       }
00968     }
00969   }
00970  
00971   back_ch_not_white(ch);
00972   tokenType = TOKEN_NAME;
00973   return(DONE);
00974 }
00975  
00976 /* Done: Literal Name */
00977 static int LITERAL_NAME(ch)
00978   int ch;
00979 {
00980   if (isNAME(ch)) {
00981     save_unsafe_ch(ch);
00982     ch = next_ch();
00983     if (isNAME(ch)) {
00984       save_unsafe_ch(ch);
00985       ch = next_ch();
00986       if (isNAME(ch)) {
00987         save_unsafe_ch(ch);
00988         ch = next_ch();
00989         if (isNAME(ch)) {
00990           save_unsafe_ch(ch);
00991           ch = next_ch();
00992           if (isNAME(ch)) {
00993             save_unsafe_ch(ch);
00994             ch = next_ch();
00995             if (isNAME(ch)) {
00996               save_unsafe_ch(ch);
00997               ch = next_ch();
00998               while(isNAME(ch)) {
00999                 save_ch(ch);
01000                 ch = next_ch();
01001               }
01002             }
01003           }
01004         }
01005       }
01006     }
01007   }
01008  
01009   back_ch_not_white(ch);
01010   tokenType = TOKEN_LITERAL_NAME;
01011   return(DONE);
01012 }
01013  
01014 /* Done: immediate Name */
01015 static int IMMED_NAME(ch)
01016   int ch;
01017 {
01018   ch = next_ch();
01019   if (isNAME(ch)) {
01020     save_unsafe_ch(ch);
01021     ch = next_ch();
01022     if (isNAME(ch)) {
01023       save_unsafe_ch(ch);
01024       ch = next_ch();
01025       if (isNAME(ch)) {
01026         save_unsafe_ch(ch);
01027         ch = next_ch();
01028         if (isNAME(ch)) {
01029           save_unsafe_ch(ch);
01030           ch = next_ch();
01031           if (isNAME(ch)) {
01032             save_unsafe_ch(ch);
01033             ch = next_ch();
01034             if (isNAME(ch)) {
01035               save_unsafe_ch(ch);
01036               ch = next_ch();
01037               while(isNAME(ch)) {
01038                 save_ch(ch);
01039                 ch = next_ch();
01040               }
01041             }
01042           }
01043         }
01044       }
01045     }
01046   }
01047  
01048   back_ch_not_white(ch);
01049   tokenType = TOKEN_IMMED_NAME;
01050   return(DONE);
01051 }
01052  
01053 /* Done: Name found while looking for something else */
01054 static int OOPS_NAME(ch)
01055   int ch;
01056 {
01057   back_ch_not_white(ch);
01058   tokenType = TOKEN_NAME;
01059   return(DONE);
01060 }
01061  
01062  
01063 /* -------------------------------------------------------------------
01064  * Complete a miscellaneous token; set token type and done flag.
01065  */
01066  
01067 /* Done: Unmatched Right Angle-Bracket */
01068 static int RIGHT_ANGLE(ch)
01069   int ch;
01070 {
01071   tokenType = TOKEN_RIGHT_ANGLE;
01072   return(DONE);
01073 }
01074  
01075 /* Done: Unmatched Right Parenthesis */
01076 static int RIGHT_PAREN(ch)
01077   int ch;
01078 {
01079   tokenType = TOKEN_RIGHT_PAREN;
01080   return(DONE);
01081 }
01082  
01083 /* Done: Left Brace */
01084 static int LEFT_BRACE(ch)
01085   int ch;
01086 {
01087   tokenType = TOKEN_LEFT_BRACE;
01088   return(DONE);
01089 }
01090  
01091 /* Done: Right Brace */
01092 static int RIGHT_BRACE(ch)
01093   int ch;
01094 {
01095   tokenType = TOKEN_RIGHT_BRACE;
01096   return(DONE);
01097 }
01098  
01099 /* Done: Left Bracket */
01100 static int LEFT_BRACKET(ch)
01101   int ch;
01102 {
01103   save_unsafe_ch(ch);
01104   tokenType = TOKEN_LEFT_BRACKET;
01105   return(DONE);
01106 }
01107  
01108 /* Done: Right Bracket */
01109 static int RIGHT_BRACKET(ch)
01110   int ch;
01111 {
01112   save_unsafe_ch(ch);
01113   tokenType = TOKEN_RIGHT_BRACKET;
01114   return(DONE);
01115 }
01116  
01117 /* Done: Break */
01118 static int BREAK_SIGNAL(ch)
01119   int ch;
01120 {
01121   tokenType = TOKEN_BREAK;
01122   return(DONE);
01123 }
01124  
01125 /* Done: No Token Found */
01126 static int NO_TOKEN(ch)
01127   int ch;
01128 {
01129   tokenType = TOKEN_EOF;
01130   return(DONE);
01131 }
01132  
01133  
01134 /*
01135  * -------------------------------------------------------------------
01136  *  scan_token -- scan one token from the input.  It uses a simple
01137  *    finite state machine to recognize token classes.
01138  *
01139  *  The input is from a file.
01140  *
01141  *  On entry --
01142  *
01143  *    inputP -> input PostScript object, a file.
01144  *    tokenStartP -> buffer in VM for accumulating the token.
01145  *    tokenMaxP -> last character in the token buffer
01146  *
01147  *  On exit --
01148  *
01149  *    tokenLength = number of characters in the token
01150  *    tokenTooLong = TRUE if the token did not fit in the buffer
01151  *    tokenType = code for the type of token parsed.
01152  *    tokenValue = converted value of a numeric token.
01153  *
01154  *
01155  * -------------------------------------------------------------------
01156  */
01157 void scan_token(inputP)
01158   psobj *inputP;
01159 {
01160   int ch;
01161   unsigned char *stateP = s0;
01162   unsigned char entry;
01163   int (*actionP)();
01164  
01165   /* Define input source */
01166   inputFileP = inputP->data.fileP;
01167   if (inputFileP == NULL)  {
01168     tokenType = TOKEN_EOF;
01169     return;
01170   }
01171  
01172   /* Ensure enough space for most cases
01173    * (so we don't have to keep checking)
01174    * The length needs to cover the maximum number
01175    * of save_unsafe_ch() calls that might be executed.
01176    * That number is 11 (a sign and 10 decimal digits, e.g.,
01177    * when scanning -2147483648), but use MAX_NAME_LEN
01178    * in case someone changes that without checking.
01179    */
01180   if (vm_free_bytes() < (MAX_NAME_LEN)) {
01181      if (!(vm_init())) {
01182         tokenLength = 0;
01183         tokenTooLong = TRUE;
01184         tokenType = TOKEN_NONE;
01185         tokenValue.integer = 0;
01186         return;
01187      }
01188   }
01189  
01190   tokenStartP = vm_next_byte();
01191  
01192   /* Reset token */
01193   tokenCharP = tokenStartP;
01194   tokenTooLong = FALSE;
01195  
01196   /* Scan one token */
01197   ch = next_ch();
01198   do {
01199     entry = stateP[ch];
01200     stateP = classActionTable[entry].nextStateP;
01201     actionP = classActionTable[entry].actionRoutineP;
01202     ch = (*actionP)(ch);
01203   } while(ch != DONE);
01204  
01205  
01206   /* Return results */
01207   tokenLength = tokenCharP - tokenStartP;
01208 }