Back to index

plt-scheme  4.2.1
char.c
Go to the documentation of this file.
00001 /*
00002   MzScheme
00003   Copyright (c) 2004-2009 PLT Scheme Inc.
00004   Copyright (c) 1995-2001 Matthew Flatt
00005 
00006     This library is free software; you can redistribute it and/or
00007     modify it under the terms of the GNU Library General Public
00008     License as published by the Free Software Foundation; either
00009     version 2 of the License, or (at your option) any later version.
00010 
00011     This library is distributed in the hope that it will be useful,
00012     but WITHOUT ANY WARRANTY; without even the implied warranty of
00013     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014     Library General Public License for more details.
00015 
00016     You should have received a copy of the GNU Library General Public
00017     License along with this library; if not, write to the Free
00018     Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00019     Boston, MA 02110-1301 USA.
00020 
00021   libscheme
00022   Copyright (c) 1994 Brent Benson
00023   All rights reserved.
00024 */
00025 
00026 #include "schpriv.h"
00027 #include <ctype.h>
00028 
00029 /* globals */
00030 #include "schuchar.inc"
00031 Scheme_Object **scheme_char_constants;
00032 
00033 /* locals */
00034 static Scheme_Object *char_p (int argc, Scheme_Object *argv[]);
00035 static Scheme_Object *char_eq (int argc, Scheme_Object *argv[]);
00036 static Scheme_Object *char_lt (int argc, Scheme_Object *argv[]);
00037 static Scheme_Object *char_gt (int argc, Scheme_Object *argv[]);
00038 static Scheme_Object *char_lt_eq (int argc, Scheme_Object *argv[]);
00039 static Scheme_Object *char_gt_eq (int argc, Scheme_Object *argv[]);
00040 static Scheme_Object *char_eq_ci (int argc, Scheme_Object *argv[]);
00041 static Scheme_Object *char_lt_ci (int argc, Scheme_Object *argv[]);
00042 static Scheme_Object *char_gt_ci (int argc, Scheme_Object *argv[]);
00043 static Scheme_Object *char_lt_eq_ci (int argc, Scheme_Object *argv[]);
00044 static Scheme_Object *char_gt_eq_ci (int argc, Scheme_Object *argv[]);
00045 static Scheme_Object *char_alphabetic (int argc, Scheme_Object *argv[]);
00046 static Scheme_Object *char_numeric (int argc, Scheme_Object *argv[]);
00047 static Scheme_Object *char_whitespace (int argc, Scheme_Object *argv[]);
00048 static Scheme_Object *char_symbolic (int argc, Scheme_Object *argv[]);
00049 static Scheme_Object *char_graphic (int argc, Scheme_Object *argv[]);
00050 static Scheme_Object *char_blank (int argc, Scheme_Object *argv[]);
00051 static Scheme_Object *char_control (int argc, Scheme_Object *argv[]);
00052 static Scheme_Object *char_punctuation (int argc, Scheme_Object *argv[]);
00053 static Scheme_Object *char_upper_case (int argc, Scheme_Object *argv[]);
00054 static Scheme_Object *char_lower_case (int argc, Scheme_Object *argv[]);
00055 static Scheme_Object *char_title_case (int argc, Scheme_Object *argv[]);
00056 static Scheme_Object *char_to_integer (int argc, Scheme_Object *argv[]);
00057 static Scheme_Object *integer_to_char (int argc, Scheme_Object *argv[]);
00058 static Scheme_Object *char_upcase (int argc, Scheme_Object *argv[]);
00059 static Scheme_Object *char_downcase (int argc, Scheme_Object *argv[]);
00060 static Scheme_Object *char_titlecase (int argc, Scheme_Object *argv[]);
00061 static Scheme_Object *char_foldcase (int argc, Scheme_Object *argv[]);
00062 static Scheme_Object *char_general_category (int argc, Scheme_Object *argv[]);
00063 static Scheme_Object *char_utf8_length (int argc, Scheme_Object *argv[]);
00064 static Scheme_Object *char_map_list (int argc, Scheme_Object *argv[]);
00065 
00066 static Scheme_Object *general_category_symbols[NUM_GENERAL_CATEGORIES];
00067 
00068 void scheme_init_portable_case(void)
00069 {
00070   init_uchar_table();
00071 }
00072 
00073 void scheme_init_char (Scheme_Env *env)
00074 {
00075   Scheme_Object *p;
00076   int i;
00077 
00078   REGISTER_SO(scheme_char_constants);
00079   REGISTER_SO(general_category_symbols);
00080 
00081   scheme_char_constants = 
00082     (Scheme_Object **)scheme_malloc_eternal(256 * sizeof(Scheme_Object*));
00083     
00084   for (i = 0; i < 256; i++) {
00085     Scheme_Object *sc;
00086     sc = scheme_alloc_eternal_small_object();
00087     sc->type = scheme_char_type;
00088     SCHEME_CHAR_VAL(sc) = i;
00089     
00090     scheme_char_constants[i] = sc;
00091   }
00092 
00093   p = scheme_make_folding_prim(char_p, "char?", 1, 1, 1);
00094   SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_UNARY_INLINED;
00095   scheme_add_global_constant("char?", p, env);
00096 
00097   p = scheme_make_folding_prim(char_eq, "char=?", 2, -1, 1);
00098   SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_BINARY_INLINED;
00099   scheme_add_global_constant("char=?", p, env);
00100 
00101   scheme_add_global_constant("char<?", 
00102                           scheme_make_folding_prim(char_lt, 
00103                                                 "char<?", 
00104                                                 2, -1, 1), 
00105                           env);
00106   scheme_add_global_constant("char>?", 
00107                           scheme_make_folding_prim(char_gt, 
00108                                                 "char>?", 
00109                                                 2, -1, 1), 
00110                           env);
00111   scheme_add_global_constant("char<=?", 
00112                           scheme_make_folding_prim(char_lt_eq, 
00113                                                 "char<=?", 
00114                                                 2, -1, 1), 
00115                           env);
00116   scheme_add_global_constant("char>=?", 
00117                           scheme_make_folding_prim(char_gt_eq, 
00118                                                 "char>=?", 
00119                                                 2, -1, 1), 
00120                           env);
00121   scheme_add_global_constant("char-ci=?", 
00122                           scheme_make_folding_prim(char_eq_ci, 
00123                                                 "char-ci=?", 
00124                                                 2, -1, 1), 
00125                           env);
00126   scheme_add_global_constant("char-ci<?", 
00127                           scheme_make_folding_prim(char_lt_ci, 
00128                                                 "char-ci<?", 
00129                                                 2, -1, 1), 
00130                           env);
00131   scheme_add_global_constant("char-ci>?", 
00132                           scheme_make_folding_prim(char_gt_ci, 
00133                                                 "char-ci>?", 
00134                                                 2, -1, 1), 
00135                           env);
00136   scheme_add_global_constant("char-ci<=?", 
00137                           scheme_make_folding_prim(char_lt_eq_ci, 
00138                                                 "char-ci<=?", 
00139                                                 2, -1, 1), 
00140                           env);
00141   scheme_add_global_constant("char-ci>=?", 
00142                           scheme_make_folding_prim(char_gt_eq_ci, 
00143                                                 "char-ci>=?", 
00144                                                 2, -1, 1), 
00145                           env);
00146   scheme_add_global_constant("char-alphabetic?", 
00147                           scheme_make_folding_prim(char_alphabetic, 
00148                                                 "char-alphabetic?", 
00149                                                 1, 1, 1), 
00150                           env);
00151   scheme_add_global_constant("char-numeric?", 
00152                           scheme_make_folding_prim(char_numeric, 
00153                                                 "char-numeric?", 
00154                                                 1, 1, 1), 
00155                           env);
00156   scheme_add_global_constant("char-symbolic?", 
00157                           scheme_make_folding_prim(char_symbolic, 
00158                                                 "char-symbolic?", 
00159                                                 1, 1, 1), 
00160                           env);
00161   scheme_add_global_constant("char-graphic?", 
00162                           scheme_make_folding_prim(char_graphic, 
00163                                                 "char-graphic?", 
00164                                                 1, 1, 1), 
00165                           env);
00166   scheme_add_global_constant("char-whitespace?", 
00167                           scheme_make_folding_prim(char_whitespace, 
00168                                                 "char-whitespace?", 
00169                                                 1, 1, 1), 
00170                           env);
00171   scheme_add_global_constant("char-blank?", 
00172                           scheme_make_folding_prim(char_blank, 
00173                                                 "char-blank?", 
00174                                                 1, 1, 1), 
00175                           env);
00176   scheme_add_global_constant("char-iso-control?", 
00177                           scheme_make_folding_prim(char_control, 
00178                                                 "char-iso-control?", 
00179                                                 1, 1, 1), 
00180                           env);
00181   scheme_add_global_constant("char-punctuation?", 
00182                           scheme_make_folding_prim(char_punctuation, 
00183                                                 "char-punctuation?", 
00184                                                 1, 1, 1), 
00185                           env);
00186   scheme_add_global_constant("char-upper-case?", 
00187                           scheme_make_folding_prim(char_upper_case, 
00188                                                 "char-upper-case?", 
00189                                                 1, 1, 1),
00190                           env);
00191   scheme_add_global_constant("char-title-case?", 
00192                           scheme_make_folding_prim(char_title_case, 
00193                                                 "char-title-case?", 
00194                                                 1, 1, 1),
00195                           env);
00196   scheme_add_global_constant("char-lower-case?", 
00197                           scheme_make_folding_prim(char_lower_case, 
00198                                                 "char-lower-case?", 
00199                                                 1, 1, 1), 
00200                           env);
00201   scheme_add_global_constant("char-title-case?", 
00202                           scheme_make_folding_prim(char_title_case, 
00203                                                 "char-title-case?", 
00204                                                 1, 1, 1), 
00205                           env);
00206   scheme_add_global_constant("char->integer", 
00207                           scheme_make_folding_prim(char_to_integer, 
00208                                                 "char->integer", 
00209                                                 1, 1, 1),
00210                           env);
00211   scheme_add_global_constant("integer->char",
00212                           scheme_make_folding_prim(integer_to_char, 
00213                                                 "integer->char",
00214                                                 1, 1, 1), 
00215                           env);
00216   scheme_add_global_constant("char-upcase", 
00217                           scheme_make_folding_prim(char_upcase, 
00218                                                 "char-upcase", 
00219                                                 1, 1, 1), 
00220                           env);
00221   scheme_add_global_constant("char-downcase", 
00222                           scheme_make_folding_prim(char_downcase, 
00223                                                 "char-downcase", 
00224                                                 1, 1, 1),
00225                           env);
00226   scheme_add_global_constant("char-titlecase", 
00227                           scheme_make_folding_prim(char_titlecase, 
00228                                                 "char-titlecase", 
00229                                                 1, 1, 1),
00230                           env);
00231   scheme_add_global_constant("char-foldcase", 
00232                           scheme_make_folding_prim(char_foldcase, 
00233                                                 "char-foldcase", 
00234                                                 1, 1, 1),
00235                           env);
00236   scheme_add_global_constant("char-general-category", 
00237                           scheme_make_folding_prim(char_general_category, 
00238                                                 "char-general-category", 
00239                                                 1, 1, 1),
00240                           env);
00241 
00242   scheme_add_global_constant("char-utf-8-length", 
00243                           scheme_make_folding_prim(char_utf8_length, 
00244                                                 "char-utf-8-length", 
00245                                                 1, 1, 1),
00246                           env);
00247 
00248   scheme_add_global_constant("make-known-char-range-list", 
00249                           scheme_make_immed_prim(char_map_list, 
00250                                               "make-known-char-range-list", 
00251                                               0, 0),
00252                           env);
00253 }
00254 
00255 Scheme_Object *scheme_make_char(mzchar ch)
00256 {
00257   Scheme_Object *o;
00258 
00259   if (ch < 256)
00260     return scheme_char_constants[ch];
00261   
00262   o = scheme_malloc_small_atomic_tagged(sizeof(Scheme_Small_Object));
00263   CLEAR_KEY_FIELD(o);
00264   o->type = scheme_char_type;
00265   SCHEME_CHAR_VAL(o) = ch;
00266 
00267   return o;
00268 }
00269 
00270 Scheme_Object *scheme_make_char_or_nul(mzchar v)
00271 {
00272   if ((v >= 0) 
00273       && (v <= 0x10FFFF)
00274       && ((v < 0xD800) || (v > 0xDFFF)))
00275     return scheme_make_char(v);
00276 
00277   return scheme_char_constants[0];
00278 }
00279 
00280 /* locals */
00281 
00282 static Scheme_Object *
00283 char_p (int argc, Scheme_Object *argv[])
00284 {
00285   return (SCHEME_CHARP(argv[0]) ? scheme_true : scheme_false);
00286 }
00287 
00288 #define charSTD_FOLDCASE(nl) nl;
00289 #define charNO_FOLDCASE(nl) /* empty */
00290 
00291 #define GEN_CHAR_COMP(func_name, scheme_name, comp, FOLDCASE) \
00292  static Scheme_Object *func_name(int argc, Scheme_Object *argv[])     \
00293  { int c, prev, i; Scheme_Object *rv = scheme_true; \
00294    if (!SCHEME_CHARP(argv[0]))      \
00295      scheme_wrong_type(#scheme_name, "character", 0, argc, argv);     \
00296    prev = SCHEME_CHAR_VAL(argv[0]);     \
00297    FOLDCASE(prev = scheme_tofold(prev)) \
00298    for (i = 1; i < argc; i++) {     \
00299      if (!SCHEME_CHARP(argv[i]))      \
00300        scheme_wrong_type(#scheme_name, "character", i, argc, argv);     \
00301      c = SCHEME_CHAR_VAL(argv[i]);     \
00302      FOLDCASE(c = scheme_tofold(c)) \
00303      if (!(prev comp c)) rv = scheme_false;   \
00304      prev = c;     \
00305    }     \
00306    return rv;     \
00307  }
00308 
00309 GEN_CHAR_COMP(char_eq, char=?, ==, charNO_FOLDCASE)
00310 GEN_CHAR_COMP(char_lt, char<?, <, charNO_FOLDCASE)
00311 GEN_CHAR_COMP(char_gt, char>?, >, charNO_FOLDCASE)
00312 GEN_CHAR_COMP(char_lt_eq, char<=?, <=, charNO_FOLDCASE)
00313 GEN_CHAR_COMP(char_gt_eq, char>=?, >=, charNO_FOLDCASE)
00314 
00315 GEN_CHAR_COMP(char_eq_ci, char-ci=?, ==, charSTD_FOLDCASE)
00316 GEN_CHAR_COMP(char_lt_ci, char-ci<?, <, charSTD_FOLDCASE)
00317 GEN_CHAR_COMP(char_gt_ci, char-ci>?, >, charSTD_FOLDCASE)
00318 GEN_CHAR_COMP(char_lt_eq_ci, char-ci<=?, <=, charSTD_FOLDCASE)
00319 GEN_CHAR_COMP(char_gt_eq_ci, char-ci>=?, >=, charSTD_FOLDCASE)
00320 
00321 #define GEN_CHAR_TEST(func_name, scheme_name, pred) \
00322 static Scheme_Object *func_name (int argc, Scheme_Object *argv[]) \
00323 { \
00324   mzchar c;    \
00325   if (!SCHEME_CHARP(argv[0]))  \
00326     scheme_wrong_type(scheme_name, "character", 0, argc, argv); \
00327   c = SCHEME_CHAR_VAL(argv[0]);                    \
00328   return (pred(c) ? scheme_true : scheme_false);   \
00329 }
00330      
00331 GEN_CHAR_TEST(char_numeric, "char-numeric?", scheme_isdigit)
00332 GEN_CHAR_TEST(char_alphabetic, "char-alphabetic?", scheme_isalpha)
00333 GEN_CHAR_TEST(char_whitespace, "char-whitespace?", scheme_isspace)
00334 GEN_CHAR_TEST(char_blank, "char-blank?", scheme_isblank)
00335 GEN_CHAR_TEST(char_control, "char-iso-control?", scheme_iscontrol)
00336 GEN_CHAR_TEST(char_punctuation, "char-punctuation?", scheme_ispunc)
00337 GEN_CHAR_TEST(char_symbolic, "char-symbolic?", scheme_issymbol)
00338 GEN_CHAR_TEST(char_graphic, "char-graphic?", scheme_isgraphic)
00339 GEN_CHAR_TEST(char_upper_case, "char-upper-case?", scheme_isupper)
00340 GEN_CHAR_TEST(char_lower_case, "char-lower-case?", scheme_islower)
00341 GEN_CHAR_TEST(char_title_case, "char-title-case?", scheme_istitle)
00342 
00343 static Scheme_Object *
00344 char_to_integer (int argc, Scheme_Object *argv[])
00345 {
00346   mzchar c;
00347 
00348   if (!SCHEME_CHARP(argv[0]))
00349     scheme_wrong_type("char->integer", "character", 0, argc, argv);
00350 
00351   c = SCHEME_CHAR_VAL(argv[0]);
00352 
00353   return scheme_make_integer_value(c);
00354 }
00355 
00356 static Scheme_Object *
00357 integer_to_char (int argc, Scheme_Object *argv[])
00358 {
00359   if (SCHEME_INTP(argv[0])) {
00360     long v;
00361     v = SCHEME_INT_VAL(argv[0]);
00362     if ((v >= 0) 
00363        && (v <= 0x10FFFF)
00364        && ((v < 0xD800) || (v > 0xDFFF)))
00365       return _scheme_make_char(v);
00366   } else if (SCHEME_BIGNUMP(argv[0])
00367             && SCHEME_BIGPOS(argv[0])) {
00368     /* On 32-bit machines, there's still a chance... */
00369     long y;
00370     if (scheme_get_int_val(argv[0], &y)) {
00371       if (y <= 0x10FFFF)
00372        return _scheme_make_char(y);
00373     }
00374   }
00375 
00376   scheme_wrong_type("integer->char", 
00377                   "exact integer in [0,#x10FFFF], not in [#xD800,#xDFFF]", 
00378                   0, argc, argv);
00379   return NULL;
00380 }
00381 
00382 #define GEN_RECASE(func_name, scheme_name, cvt) \
00383 static Scheme_Object *func_name (int argc, Scheme_Object *argv[]) \
00384 { \
00385   mzchar c, nc;    \
00386   if (!SCHEME_CHARP(argv[0]))  \
00387     scheme_wrong_type(scheme_name, "character", 0, argc, argv); \
00388   c = SCHEME_CHAR_VAL(argv[0]);                    \
00389   nc = cvt(c);                                      \
00390   if (nc == c) return argv[0];       \
00391   return scheme_make_character(nc);  \
00392 }
00393 
00394 GEN_RECASE(char_upcase, "char-upcase", scheme_toupper)
00395 GEN_RECASE(char_downcase, "char-downcase", scheme_tolower)
00396 GEN_RECASE(char_titlecase, "char-titlecase", scheme_totitle)
00397 GEN_RECASE(char_foldcase, "char-foldcase", scheme_tofold)
00398 
00399 static Scheme_Object *char_general_category (int argc, Scheme_Object *argv[])
00400 {
00401   mzchar c;
00402   int cat;
00403 
00404   if (!SCHEME_CHARP(argv[0]))
00405     scheme_wrong_type("char-general-category", "character", 0, argc, argv);
00406 
00407   c = SCHEME_CHAR_VAL(argv[0]);
00408   cat = scheme_general_category(c);
00409   if (!general_category_symbols[cat]) {
00410     Scheme_Object *s;
00411     s = scheme_intern_symbol(general_category_names[cat]);
00412     general_category_symbols[cat] = s;
00413   }
00414 
00415   return general_category_symbols[cat];
00416 }
00417 
00418 static Scheme_Object *char_utf8_length (int argc, Scheme_Object *argv[])
00419 {
00420   mzchar wc;
00421   if (!SCHEME_CHARP(argv[0]))
00422     scheme_wrong_type("char-utf-8-length", "character", 0, argc, argv);
00423 
00424   wc = SCHEME_CHAR_VAL(argv[0]);
00425   if (wc < 0x80) {
00426     return scheme_make_integer(1);
00427   } else if (wc < 0x800) {
00428     return scheme_make_integer(2);
00429   } else if (wc < 0x10000) {
00430     return scheme_make_integer(3);
00431   } else if (wc < 0x200000) {
00432     return scheme_make_integer(4);
00433   } else if (wc < 0x4000000) {
00434     return scheme_make_integer(5);
00435   } else {
00436     return scheme_make_integer(6);
00437   }
00438 }
00439 
00440 static Scheme_Object *char_map_list (int argc, Scheme_Object *argv[])
00441 {
00442   int i, bottom, top, uniform;
00443   Scheme_Object *l = scheme_null;
00444 
00445 # define cons scheme_make_pair
00446 
00447   for (i = 2 * (NUM_UCHAR_RANGES - 1); i >= 0; i -= 2) {
00448     bottom = mapped_uchar_ranges[i];
00449     top = mapped_uchar_ranges[i + 1];
00450     if (top & URANGE_VARIES) {
00451       top -= URANGE_VARIES;
00452       uniform = 0;
00453     } else
00454       uniform = 1;
00455     l = cons(cons(scheme_make_integer_value(bottom),
00456                   cons(scheme_make_integer_value(top),
00457                        cons((uniform ? scheme_true : scheme_false),
00458                             scheme_null))),
00459              l);
00460   }
00461 
00462   return l;
00463 }