Back to index

cell-binutils  2.17cvs20070401
safe-ctype.c
Go to the documentation of this file.
00001 /* <ctype.h> replacement macros.
00002 
00003    Copyright (C) 2000, 2001, 2002, 2003, 2004,
00004    2005 Free Software Foundation, Inc.
00005    Contributed by Zack Weinberg <zackw@stanford.edu>.
00006 
00007 This file is part of the libiberty library.
00008 Libiberty is free software; you can redistribute it and/or
00009 modify it under the terms of the GNU Library General Public
00010 License as published by the Free Software Foundation; either
00011 version 2 of the License, or (at your option) any later version.
00012 
00013 Libiberty is distributed in the hope that it will be useful,
00014 but WITHOUT ANY WARRANTY; without even the implied warranty of
00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00016 Library General Public License for more details.
00017 
00018 You should have received a copy of the GNU Library General Public
00019 License along with libiberty; see the file COPYING.LIB.  If
00020 not, write to the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
00021 Boston, MA 02110-1301, USA.  */
00022 
00023 /*
00024 
00025 @defvr Extension HOST_CHARSET
00026 This macro indicates the basic character set and encoding used by the
00027 host: more precisely, the encoding used for character constants in
00028 preprocessor @samp{#if} statements (the C "execution character set").
00029 It is defined by @file{safe-ctype.h}, and will be an integer constant
00030 with one of the following values:
00031 
00032 @ftable @code
00033 @item HOST_CHARSET_UNKNOWN
00034 The host character set is unknown - that is, not one of the next two
00035 possibilities.
00036 
00037 @item HOST_CHARSET_ASCII
00038 The host character set is ASCII.
00039 
00040 @item HOST_CHARSET_EBCDIC
00041 The host character set is some variant of EBCDIC.  (Only one of the
00042 nineteen EBCDIC varying characters is tested; exercise caution.)
00043 @end ftable
00044 @end defvr
00045 
00046 @deffn  Extension ISALPHA  (@var{c})
00047 @deffnx Extension ISALNUM  (@var{c})
00048 @deffnx Extension ISBLANK  (@var{c})
00049 @deffnx Extension ISCNTRL  (@var{c})
00050 @deffnx Extension ISDIGIT  (@var{c})
00051 @deffnx Extension ISGRAPH  (@var{c})
00052 @deffnx Extension ISLOWER  (@var{c})
00053 @deffnx Extension ISPRINT  (@var{c})
00054 @deffnx Extension ISPUNCT  (@var{c})
00055 @deffnx Extension ISSPACE  (@var{c})
00056 @deffnx Extension ISUPPER  (@var{c})
00057 @deffnx Extension ISXDIGIT (@var{c})
00058 
00059 These twelve macros are defined by @file{safe-ctype.h}.  Each has the
00060 same meaning as the corresponding macro (with name in lowercase)
00061 defined by the standard header @file{ctype.h}.  For example,
00062 @code{ISALPHA} returns true for alphabetic characters and false for
00063 others.  However, there are two differences between these macros and
00064 those provided by @file{ctype.h}:
00065 
00066 @itemize @bullet
00067 @item These macros are guaranteed to have well-defined behavior for all 
00068 values representable by @code{signed char} and @code{unsigned char}, and
00069 for @code{EOF}.
00070 
00071 @item These macros ignore the current locale; they are true for these
00072 fixed sets of characters:
00073 @multitable {@code{XDIGIT}} {yada yada yada yada yada yada yada yada}
00074 @item @code{ALPHA}  @tab @kbd{A-Za-z}
00075 @item @code{ALNUM}  @tab @kbd{A-Za-z0-9}
00076 @item @code{BLANK}  @tab @kbd{space tab}
00077 @item @code{CNTRL}  @tab @code{!PRINT}
00078 @item @code{DIGIT}  @tab @kbd{0-9}
00079 @item @code{GRAPH}  @tab @code{ALNUM || PUNCT}
00080 @item @code{LOWER}  @tab @kbd{a-z}
00081 @item @code{PRINT}  @tab @code{GRAPH ||} @kbd{space}
00082 @item @code{PUNCT}  @tab @kbd{`~!@@#$%^&*()_-=+[@{]@}\|;:'",<.>/?}
00083 @item @code{SPACE}  @tab @kbd{space tab \n \r \f \v}
00084 @item @code{UPPER}  @tab @kbd{A-Z}
00085 @item @code{XDIGIT} @tab @kbd{0-9A-Fa-f}
00086 @end multitable
00087 
00088 Note that, if the host character set is ASCII or a superset thereof,
00089 all these macros will return false for all values of @code{char} outside
00090 the range of 7-bit ASCII.  In particular, both ISPRINT and ISCNTRL return
00091 false for characters with numeric values from 128 to 255.
00092 @end itemize
00093 @end deffn
00094 
00095 @deffn  Extension ISIDNUM         (@var{c})
00096 @deffnx Extension ISIDST          (@var{c})
00097 @deffnx Extension IS_VSPACE       (@var{c})
00098 @deffnx Extension IS_NVSPACE      (@var{c})
00099 @deffnx Extension IS_SPACE_OR_NUL (@var{c})
00100 @deffnx Extension IS_ISOBASIC     (@var{c})
00101 These six macros are defined by @file{safe-ctype.h} and provide
00102 additional character classes which are useful when doing lexical
00103 analysis of C or similar languages.  They are true for the following
00104 sets of characters:
00105 
00106 @multitable {@code{SPACE_OR_NUL}} {yada yada yada yada yada yada yada yada}
00107 @item @code{IDNUM}        @tab @kbd{A-Za-z0-9_}
00108 @item @code{IDST}         @tab @kbd{A-Za-z_}
00109 @item @code{VSPACE}       @tab @kbd{\r \n}
00110 @item @code{NVSPACE}      @tab @kbd{space tab \f \v \0}
00111 @item @code{SPACE_OR_NUL} @tab @code{VSPACE || NVSPACE}
00112 @item @code{ISOBASIC}     @tab @code{VSPACE || NVSPACE || PRINT}
00113 @end multitable
00114 @end deffn
00115 
00116 */
00117 
00118 #include "ansidecl.h"
00119 #include <safe-ctype.h>
00120 #include <stdio.h>  /* for EOF */
00121 
00122 #if EOF != -1
00123  #error "<safe-ctype.h> requires EOF == -1"
00124 #endif
00125 
00126 /* Shorthand */
00127 #define bl _sch_isblank
00128 #define cn _sch_iscntrl
00129 #define di _sch_isdigit
00130 #define is _sch_isidst
00131 #define lo _sch_islower
00132 #define nv _sch_isnvsp
00133 #define pn _sch_ispunct
00134 #define pr _sch_isprint
00135 #define sp _sch_isspace
00136 #define up _sch_isupper
00137 #define vs _sch_isvsp
00138 #define xd _sch_isxdigit
00139 
00140 /* Masks.  */
00141 #define L  (const unsigned short) (lo|is   |pr)  /* lower case letter */
00142 #define XL (const unsigned short) (lo|is|xd|pr)  /* lowercase hex digit */
00143 #define U  (const unsigned short) (up|is   |pr)  /* upper case letter */
00144 #define XU (const unsigned short) (up|is|xd|pr)  /* uppercase hex digit */
00145 #define D  (const unsigned short) (di   |xd|pr)  /* decimal digit */
00146 #define P  (const unsigned short) (pn      |pr)  /* punctuation */
00147 #define _  (const unsigned short) (pn|is   |pr)  /* underscore */
00148 
00149 #define C  (const unsigned short) (         cn)  /* control character */
00150 #define Z  (const unsigned short) (nv      |cn)  /* NUL */
00151 #define M  (const unsigned short) (nv|sp   |cn)  /* cursor movement: \f \v */
00152 #define V  (const unsigned short) (vs|sp   |cn)  /* vertical space: \r \n */
00153 #define T  (const unsigned short) (nv|sp|bl|cn)  /* tab */
00154 #define S  (const unsigned short) (nv|sp|bl|pr)  /* space */
00155 
00156 /* Are we ASCII? */
00157 #if HOST_CHARSET == HOST_CHARSET_ASCII
00158 
00159 const unsigned short _sch_istable[256] =
00160 {
00161   Z,  C,  C,  C,   C,  C,  C,  C,   /* NUL SOH STX ETX  EOT ENQ ACK BEL */
00162   C,  T,  V,  M,   M,  V,  C,  C,   /* BS  HT  LF  VT   FF  CR  SO  SI  */
00163   C,  C,  C,  C,   C,  C,  C,  C,   /* DLE DC1 DC2 DC3  DC4 NAK SYN ETB */
00164   C,  C,  C,  C,   C,  C,  C,  C,   /* CAN EM  SUB ESC  FS  GS  RS  US  */
00165   S,  P,  P,  P,   P,  P,  P,  P,   /* SP  !   "   #    $   %   &   '   */
00166   P,  P,  P,  P,   P,  P,  P,  P,   /* (   )   *   +    ,   -   .   /   */
00167   D,  D,  D,  D,   D,  D,  D,  D,   /* 0   1   2   3    4   5   6   7   */
00168   D,  D,  P,  P,   P,  P,  P,  P,   /* 8   9   :   ;    <   =   >   ?   */
00169   P, XU, XU, XU,  XU, XU, XU,  U,   /* @   A   B   C    D   E   F   G   */
00170   U,  U,  U,  U,   U,  U,  U,  U,   /* H   I   J   K    L   M   N   O   */
00171   U,  U,  U,  U,   U,  U,  U,  U,   /* P   Q   R   S    T   U   V   W   */
00172   U,  U,  U,  P,   P,  P,  P,  _,   /* X   Y   Z   [    \   ]   ^   _   */
00173   P, XL, XL, XL,  XL, XL, XL,  L,   /* `   a   b   c    d   e   f   g   */
00174   L,  L,  L,  L,   L,  L,  L,  L,   /* h   i   j   k    l   m   n   o   */
00175   L,  L,  L,  L,   L,  L,  L,  L,   /* p   q   r   s    t   u   v   w   */
00176   L,  L,  L,  P,   P,  P,  P,  C,   /* x   y   z   {    |   }   ~   DEL */
00177 
00178   /* high half of unsigned char is locale-specific, so all tests are
00179      false in "C" locale */
00180   0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
00181   0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
00182   0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
00183   0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
00184 
00185   0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
00186   0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
00187   0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
00188   0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
00189 };
00190 
00191 const unsigned char _sch_tolower[256] =
00192 {
00193    0,  1,  2,  3,   4,  5,  6,  7,   8,  9, 10, 11,  12, 13, 14, 15,
00194   16, 17, 18, 19,  20, 21, 22, 23,  24, 25, 26, 27,  28, 29, 30, 31,
00195   32, 33, 34, 35,  36, 37, 38, 39,  40, 41, 42, 43,  44, 45, 46, 47,
00196   48, 49, 50, 51,  52, 53, 54, 55,  56, 57, 58, 59,  60, 61, 62, 63,
00197   64,
00198 
00199   'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
00200   'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
00201 
00202   91, 92, 93, 94, 95, 96,
00203 
00204   'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
00205   'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
00206 
00207  123,124,125,126,127,
00208 
00209  128,129,130,131, 132,133,134,135, 136,137,138,139, 140,141,142,143,
00210  144,145,146,147, 148,149,150,151, 152,153,154,155, 156,157,158,159,
00211  160,161,162,163, 164,165,166,167, 168,169,170,171, 172,173,174,175,
00212  176,177,178,179, 180,181,182,183, 184,185,186,187, 188,189,190,191,
00213 
00214  192,193,194,195, 196,197,198,199, 200,201,202,203, 204,205,206,207,
00215  208,209,210,211, 212,213,214,215, 216,217,218,219, 220,221,222,223,
00216  224,225,226,227, 228,229,230,231, 232,233,234,235, 236,237,238,239,
00217  240,241,242,243, 244,245,246,247, 248,249,250,251, 252,253,254,255,
00218 };
00219 
00220 const unsigned char _sch_toupper[256] =
00221 {
00222    0,  1,  2,  3,   4,  5,  6,  7,   8,  9, 10, 11,  12, 13, 14, 15,
00223   16, 17, 18, 19,  20, 21, 22, 23,  24, 25, 26, 27,  28, 29, 30, 31,
00224   32, 33, 34, 35,  36, 37, 38, 39,  40, 41, 42, 43,  44, 45, 46, 47,
00225   48, 49, 50, 51,  52, 53, 54, 55,  56, 57, 58, 59,  60, 61, 62, 63,
00226   64,
00227 
00228   'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
00229   'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
00230 
00231   91, 92, 93, 94, 95, 96,
00232 
00233   'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
00234   'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
00235 
00236  123,124,125,126,127,
00237 
00238  128,129,130,131, 132,133,134,135, 136,137,138,139, 140,141,142,143,
00239  144,145,146,147, 148,149,150,151, 152,153,154,155, 156,157,158,159,
00240  160,161,162,163, 164,165,166,167, 168,169,170,171, 172,173,174,175,
00241  176,177,178,179, 180,181,182,183, 184,185,186,187, 188,189,190,191,
00242 
00243  192,193,194,195, 196,197,198,199, 200,201,202,203, 204,205,206,207,
00244  208,209,210,211, 212,213,214,215, 216,217,218,219, 220,221,222,223,
00245  224,225,226,227, 228,229,230,231, 232,233,234,235, 236,237,238,239,
00246  240,241,242,243, 244,245,246,247, 248,249,250,251, 252,253,254,255,
00247 };
00248 
00249 #else
00250 # if HOST_CHARSET == HOST_CHARSET_EBCDIC
00251   #error "FIXME: write tables for EBCDIC"
00252 # else
00253   #error "Unrecognized host character set"
00254 # endif
00255 #endif