Back to index

texmacs  1.0.7.15
analyze.cpp
Go to the documentation of this file.
00001 
00002 /******************************************************************************
00003 * MODULE     : analyze.cpp
00004 * DESCRIPTION: Properties of characters and strings
00005 * COPYRIGHT  : (C) 1999  Joris van der Hoeven
00006 *******************************************************************************
00007 * This software falls under the GNU general public license version 3 or later.
00008 * It comes WITHOUT ANY WARRANTY WHATSOEVER. For details, see the file LICENSE
00009 * in the root directory or <http://www.gnu.org/licenses/gpl-3.0.html>.
00010 ******************************************************************************/
00011 
00012 #include "analyze.hpp"
00013 #include "merge_sort.hpp"
00014 #include "converter.hpp"
00015 #include "scheme.hpp"
00016 
00017 /******************************************************************************
00018 * Tests for caracters
00019 ******************************************************************************/
00020 
00021 bool
00022 is_alpha (register char c) {
00023   return ((c>='a') && (c<='z')) || ((c>='A') && (c<='Z'));
00024 }
00025 
00026 bool
00027 is_iso_alpha (register char c) {
00028   int i= ((int) ((unsigned char) c));
00029   return
00030     ((c>='a') && (c<='z')) ||
00031     ((c>='A') && (c<='Z')) ||
00032     ((i >= 128) && (i != 159) && (i != 189) && (i != 190) && (i != 191));
00033 }
00034 
00035 bool
00036 is_locase (register char c) {
00037   int code= (int) ((unsigned char) c);
00038   return
00039     ((c>='a') && (c<='z')) ||
00040     ((code >= 160) && (code < 189)) ||
00041     (code >= 224);
00042 }
00043 
00044 bool
00045 is_upcase (register char c) {
00046   int code= (int) ((unsigned char) c);
00047   return
00048     ((c>='A') && (c<='Z')) ||
00049     ((code >= 128) && (code < 159)) ||
00050     ((code >= 192) && (code < 224));
00051 }
00052 
00053 bool
00054 is_digit (register char c) {
00055   return (c>='0') && (c<='9');
00056 }
00057 
00058 bool
00059 is_numeric (register char c) {
00060   return ((c>='0') && (c<='9')) || (c=='.');
00061 }
00062 
00063 bool
00064 is_punctuation (register char c) {
00065   return
00066     (c=='.') || (c==',') || (c==':') || (c=='\'') || (c=='`') ||
00067     (c==';') || (c=='!') || (c=='?');
00068 }
00069 
00070 bool
00071 is_space (register char c) {
00072   return (c == ' ') || (c == '\11') || (c == '\12') || (c == '\15');\
00073 }
00074 
00075 /******************************************************************************
00076 * Tests for strings
00077 ******************************************************************************/
00078 
00079 bool
00080 is_alpha (string s) {
00081   int i;
00082   if (N(s)==0) return false;
00083   for (i=0; i<N(s); i++)
00084     if (!is_alpha (s[i])) return false;
00085   return true;
00086 }
00087 
00088 bool
00089 is_locase_alpha (string s) {
00090   int i;
00091   if (N(s)==0) return false;
00092   for (i=0; i<N(s); i++)
00093     if (s[i]<'a' || s[i]>'z') return false;
00094   return true;
00095 }
00096 
00097 bool
00098 is_iso_alpha (string s) {
00099   int i;
00100   if (N(s)==0) return false;
00101   for (i=0; i<N(s); i++)
00102     if (!is_iso_alpha (s[i])) return false;
00103   return true;
00104 }
00105 
00106 bool
00107 is_numeric (string s) {
00108   int i;
00109   if (N(s)==0) return false;
00110   for (i=0; i<N(s); i++)
00111     if (!is_numeric (s[i])) return false;
00112   return true;
00113 }
00114 
00115 /******************************************************************************
00116 * Changing cases
00117 ******************************************************************************/
00118 
00119 char
00120 upcase (char c) {
00121   if (is_locase (c))
00122     return (char) (((int) ((unsigned char) c)) - 32);
00123   else return c;
00124 }
00125 
00126 char
00127 locase (char c) {
00128   if (is_upcase (c))
00129     return (char) (((int) ((unsigned char) c)) + 32);
00130   else return c;
00131 }
00132 
00133 string
00134 upcase_first (string s) {
00135   if ((N(s)==0) || (!is_locase (s[0]))) return s;
00136   return string ((char) (((int) ((unsigned char) s[0]))-32)) * s (1, N(s));
00137 }
00138 
00139 string
00140 locase_first (string s) {
00141   if ((N(s)==0) || (!is_upcase (s[0]))) return s;
00142   return string ((char) (((int) ((unsigned char) s[0]))+32)) * s (1, N(s));
00143 }
00144 
00145 string
00146 upcase_all (string s) {
00147   int i;
00148   string r (N(s));
00149   for (i=0; i<N(s); i++)
00150     if (!is_locase (s[i])) r[i]= s[i];
00151     else r[i]= (char) (((int) ((unsigned char) s[i]))-32);
00152   return r;
00153 }
00154 
00155 string
00156 locase_all (string s) {
00157   int i;
00158   string r (N(s));
00159   for (i=0; i<N(s); i++)
00160     if (!is_upcase (s[i])) r[i]= s[i];
00161     else r[i]= (char) (((int) ((unsigned char) s[i]))+32);
00162   return r;
00163 }
00164 
00165 /******************************************************************************
00166 * Inserting or removing a character into a string as a set of characters
00167 ******************************************************************************/
00168 
00169 string
00170 string_union (string s1, string s2) {
00171   return string_minus (s1, s2) * s2;
00172 }
00173 
00174 string
00175 string_minus (string s1, string s2) {
00176   string r;
00177   int i1, n1= N(s1), i2, n2= N(s2);
00178   for (i1=0; i1<n1; i1++) {
00179     for (i2=0; i2<n2; i2++)
00180       if (s1[i1] == s2[i2]) break;
00181     if (i2==n2) r << s1[i1];
00182   }
00183   return r;
00184 }
00185 
00186 /******************************************************************************
00187 * Spanish in relation with ispell
00188 ******************************************************************************/
00189 
00190 string
00191 ispanish_to_spanish (string s) {
00192   int i, n= N(s);
00193   string r;
00194   for (i=0; i<n; i++)
00195     if ((s[i] == '\'') && ((i+1)<n)) {
00196       switch (s[i+1]) {
00197       case 'A': r << ''; break;
00198       case 'E': r << ''; break;
00199       case 'I': r << ''; break;
00200       case 'N': r << ''; break;
00201       case 'O': r << ''; break;
00202       case 'U': r << ''; break;
00203       case 'Y': r << ''; break;
00204       case 'a': r << ''; break;
00205       case 'e': r << ''; break;
00206       case 'i': r << ''; break;
00207       case 'n': r << ''; break;
00208       case 'o': r << ''; break;
00209       case 'u': r << ''; break;
00210       case 'y': r << ''; break;
00211       default : r << '\'' << s[i+1];
00212       }
00213       i++;
00214     }
00215     else r << s[i];
00216   return r;
00217 }
00218 
00219 string
00220 spanish_to_ispanish (string s) {
00221   int i, n= N(s);
00222   string r;
00223   for (i=0; i<n; i++)
00224     switch (s[i]) {
00225     case '': r << "'A"; break;
00226     case '': r << "'E"; break;
00227     case '': r << "'I"; break;
00228     case '': r << "'N"; break;
00229     case '': r << "'O"; break;
00230     case '': r << "'U"; break;
00231     case '': r << "'Y"; break;
00232     case '': r << "'a"; break;
00233     case '': r << "'e"; break;
00234     case '': r << "'i"; break;
00235     case '': r << "'n"; break;
00236     case '': r << "'o"; break;
00237     case '': r << "'u"; break;
00238     case '': r << "'y"; break;
00239     default : r << s[i];
00240     }
00241   return r;
00242 }
00243 
00244 string
00245 igerman_to_german (string s) {
00246   int i, n= N(s);
00247   string r;
00248   for (i=0; i<n; i++)
00249     if (s[i] == '') r << '';
00250     else r << s[i];
00251   return r;
00252 }
00253 
00254 string
00255 german_to_igerman (string s) {
00256   int i, n= N(s);
00257   string r;
00258   for (i=0; i<n; i++)
00259     if (s[i] == '') r << '';
00260     else r << s[i];
00261   return r;
00262 }
00263 
00264 /******************************************************************************
00265 * Iso latin 2 encoding for polish and czech
00266 ******************************************************************************/
00267 
00268 static string il2_to_cork_string=
00269   "   €ĈǃɆ˅΄ЋԎ.ږݕ䨢/ ";
00270 static string cork_to_il2_string=
00271   "áGť ئYIIg嵳 yi!?LAAAAEEIINOOOOUU Saaaaeeiinoooouu ";
00272 
00273 static char
00274 il2_to_cork (char c) {
00275   int i= (int) ((unsigned char) c);
00276   if (i<128) return c;
00277   return il2_to_cork_string [i-128];
00278 }
00279 
00280 static char
00281 cork_to_il2 (char c) {
00282   int i= (int) ((unsigned char) c);
00283   if (i<128) return c;
00284   return cork_to_il2_string [i-128];
00285 }
00286 
00287 string
00288 il2_to_cork (string s) {
00289   int i, n= N(s);
00290   string r (n);
00291   for (i=0; i<n; i++)
00292     r[i]= il2_to_cork (s[i]);
00293   return r;
00294 }
00295 
00296 string
00297 cork_to_il2 (string s) {
00298   int i, n= N(s);
00299   string r (n);
00300   for (i=0; i<n; i++)
00301     r[i]= cork_to_il2 (s[i]);
00302   return r;
00303 }
00304 
00305 /******************************************************************************
00306 * Koi8 encoding for russian
00307 ******************************************************************************/
00308 
00309 static string koi8_to_iso_string=
00310   "";
00311 static string iso_to_koi8_string=
00312   "";
00313 
00314 static char
00315 koi8_to_iso (char c, bool ukrainian) {
00316   int i= (int) ((unsigned char) c);
00317   if (i==156) return '';
00318   if (i==188) return '';
00319   if (ukrainian)
00320   {
00321      switch(c)
00322      {
00323          case 'I':return '';
00324          case '':return '';
00325          case '':return '';
00326          case '':return '';
00327          case 'i':return '';
00328          case '':return '';
00329          case '':return '';
00330          case '':return '';
00331      }
00332   }
00333   if (i<192) return c;
00334   return koi8_to_iso_string [i-192];
00335 }
00336 
00337 static char
00338 iso_to_koi8 (char c, bool ukrainian) {
00339   int i= (int) ((unsigned char) c);
00340   if (c=='') return (char) 156;
00341   if (c=='') return (char) 188;
00342   if (ukrainian)
00343   {
00344      switch(c)
00345      {
00346          case '':return 'I';
00347          case '':return '';
00348          case '':return '';
00349          case '':return '';
00350          case '':return 'i';
00351          case '':return '';
00352          case '':return '';
00353          case '':return '';
00354      }
00355   }
00356   if (i<192) return c;
00357   return iso_to_koi8_string [i-192];
00358 }
00359 
00360 string
00361 koi8_to_iso (string s) {
00362   int i, n= N(s);
00363   string r (n);
00364   for (i=0; i<n; i++)
00365     r[i]= koi8_to_iso (s[i], false);
00366   return r;
00367 }
00368 
00369 string
00370 iso_to_koi8 (string s) {
00371   int i, n= N(s);
00372   string r (n);
00373   for (i=0; i<n; i++)
00374     r[i]= iso_to_koi8 (s[i], false);
00375   return r;
00376 }
00377 
00378 string
00379 koi8uk_to_iso (string s) {
00380   int i, n= N(s);
00381   string r (n);
00382   for (i=0; i<n; i++)
00383     r[i]= koi8_to_iso (s[i], true);
00384   return r;
00385 }
00386 
00387 string
00388 iso_to_koi8uk (string s) {
00389   int i, n= N(s);
00390   string r (n);
00391   for (i=0; i<n; i++)
00392     r[i]= iso_to_koi8 (s[i], true);
00393   return r;
00394 }
00395 
00396 /******************************************************************************
00397 * Convert between TeXmacs and XML strings
00398 ******************************************************************************/
00399 
00400 static bool
00401 is_xml_name (char c) {
00402   return
00403     is_alpha (c) || is_numeric (c) ||
00404     (c == '.') || (c == '-') || (c == ':');
00405 }
00406 
00407 string
00408 tm_to_xml_name (string s) {
00409   string r;
00410   int i, n= N(s);
00411   for (i=0; i<n; i++)
00412     if (is_xml_name (s[i])) r << s[i];
00413     else r << "_" << as_string ((int) ((unsigned char) s[i])) << "_";
00414   return r;
00415 }
00416 
00417 string
00418 xml_name_to_tm (string s) {
00419   string r;
00420   int i, n= N(s);
00421   for (i=0; i<n; i++)
00422     if (s[i] != '_') r << s[i];
00423     else {
00424       int start= ++i;
00425       while ((i<n) && (s[i]!='_')) i++;
00426       r << (char) ((unsigned char) as_int (s (start, i)));
00427     }
00428   return r;
00429 }
00430 
00431 string
00432 old_tm_to_xml_cdata (string s) {
00433   string r;
00434   int i, n= N(s);
00435   for (i=0; i<n; i++)
00436     if (s[i] == '&') r << "&amp;";
00437     else if (s[i] == '>') r << "&gt;";
00438     else if (s[i] != '<') r << s[i];
00439     else {
00440       int start= ++i;
00441       while ((i<n) && (s[i]!='>')) i++;
00442       r << "&" << tm_to_xml_name (s (start, i)) << ";";
00443     }
00444   return r;
00445 }
00446 
00447 object
00448 tm_to_xml_cdata (string s) {
00449   array<object> a;
00450   a << symbol_object ("!concat");
00451   string r;
00452   int i, n= N(s);
00453   for (i=0; i<n; i++)
00454     if (s[i] == '&') r << "&amp;";
00455     else if (s[i] == '>') r << "&gt;";
00456     else if (s[i] == '\\') r << "\\";
00457     else if (s[i] != '<') r << cork_to_utf8 (s (i, i+1));
00458     else {
00459       int start= i++;
00460       while ((i<n) && (s[i]!='>')) i++;
00461       string ss= s (start, i+1);
00462       string rr= cork_to_utf8 (ss);
00463       string qq= utf8_to_cork (rr);
00464       if (rr != ss && qq == ss) r << rr;
00465       else {
00466        if (r != "") a << object (r);
00467        a << cons (symbol_object ("tm-sym"),
00468                  cons (ss (1, N(ss)-1),
00469                       null_object ()));
00470        r= "";
00471       }
00472     }
00473   if (r != "") a << object (r);
00474   if (N(a) == 1) return object ("");
00475   else if (N(a) == 2) return a[1];
00476   else return call ("list", a);
00477 }
00478 
00479 string
00480 old_xml_cdata_to_tm (string s) {
00481   string r;
00482   int i, n= N(s);
00483   for (i=0; i<n; i++)
00484     if (s[i] == '<') r << "<less>";
00485     else if (s[i] == '>') r << "<gtr>";
00486     else if (s[i] != '&') r << s[i];
00487     else {
00488       int start= ++i;
00489       while ((i<n) && (s[i]!=';')) i++;
00490       string x= "<" * xml_name_to_tm (s (start, i)) * ">";
00491       if (x == "<amp>") r << "&";
00492       else r << x;
00493     }
00494   return r;
00495 }
00496 
00497 string
00498 xml_unspace (string s, bool first, bool last) {
00499   string r;
00500   int i= 0, n= N(s);
00501   if (first) while ((i<n) && is_space (s[i])) i++;
00502   while (i<n)
00503     if (!is_space (s[i])) r << s[i++];
00504     else {
00505       while ((i<n) && is_space (s[i])) i++;
00506       if ((i<n) || (!last)) r << ' ';
00507     }
00508   return r;
00509 }
00510 
00511 bool
00512 contains_unicode_char (string s) {
00513   int i= 0, n= N(s);
00514   while (i+1<n) {
00515     if (s[i] == '<' && s[i+1] == '#') return true;
00516     tm_char_forwards (s, i);
00517   }
00518   return false;
00519 }
00520 
00521 /******************************************************************************
00522 * Roman and alpha numbers
00523 ******************************************************************************/
00524 
00525 static string ones[10]= {
00526   "", "i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix" };
00527 static string tens[10]= {
00528   "", "x", "xx", "xxx", "xl", "l", "lx", "lxx", "lxxx", "xc" };
00529 static string hundreds[10]= {
00530   "", "c", "cc", "ccc", "cd", "d", "dc", "dcc", "dccc", "cm" };
00531 
00532 string
00533 roman_nr (int nr) {
00534   if (nr<0) return "-" * roman_nr (nr);
00535   if (nr==0) return "o";
00536   if (nr>1000) return "m" * roman_nr (nr-1000);
00537   if (nr==1000) return "m";
00538   if (nr==999) return "im";
00539   if (nr==499) return "id";
00540   if ((nr%100)==99) return hundreds[nr/100] * "ic";
00541   if ((nr%100)==49) return hundreds[nr/100] * "il";
00542   return hundreds[nr/100] * tens[(nr%100)/10] * ones[nr%10];
00543 }
00544 
00545 string
00546 Roman_nr (int nr) {
00547   return upcase_all (roman_nr (nr));
00548 }
00549 
00550 string
00551 alpha_nr (int nr) {
00552   if (nr<0) return "-" * alpha_nr (nr);
00553   if (nr==0) return "0";
00554   if (nr<=26) return string ((char) (((int) 'a')+ nr-1));
00555   return alpha_nr ((nr-1)/26) * alpha_nr (((nr-1)%26)+1);
00556 }
00557 
00558 string
00559 Alpha_nr (int nr) {
00560   return upcase_all (alpha_nr (nr));
00561 }
00562 
00563 string
00564 fnsymbol_nr (int nr) {
00565   string sym, r;
00566   int i, m= (nr-1)%3, n= ((nr-1)/3)+1;
00567   switch (m) {
00568   case 0: sym= "<ast>"; break;
00569   case 1: sym= "<dag>"; break;
00570   case 2: sym= "<ddag>"; break;
00571   }
00572   for (i=0; i<n; i++) r << sym;
00573   return r;
00574 }
00575 
00576 /******************************************************************************
00577 * Conversions to and from hexadecimal
00578 ******************************************************************************/
00579 
00580 static const char* hex_string= "0123456789ABCDEF";
00581 
00582 string
00583 as_hexadecimal (int i) {
00584   if (i<0) return "-" * as_hexadecimal (-i);
00585   if (i<16) return hex_string [i & 15];
00586   return as_hexadecimal (i >> 4) * hex_string [i & 15];
00587 }
00588 
00589 string
00590 as_hexadecimal (pointer ptr) {
00591   intptr_t i= (intptr_t) ptr;
00592   if (i<0) return "-" * as_hexadecimal (-i);
00593   if (i<16) return hex_string [i & 15];
00594   return as_hexadecimal (i >> 4) * hex_string [i & 15];
00595 }
00596 
00597 string
00598 as_hexadecimal (int i, int len) {
00599   if (len==1) return hex_string [i & 15];
00600   else return as_hexadecimal (i >> 4, len-1) * hex_string [i & 15];
00601 }
00602 
00603 int
00604 from_hexadecimal (string s) {
00605   int i, n= N(s), res= 0;
00606   if ((n>0) && (s[0]=='-'))
00607     return -from_hexadecimal (s (1, n));
00608   for (i=0; i<n; i++) {
00609     res= res << 4;
00610     if ((s[i] >= '0') && (s[i] <= '9')) res += (int) (s[i] - '0');
00611     if ((s[i] >= 'A') && (s[i] <= 'F')) res += (int) (s[i] + 10 - 'A');
00612     if ((s[i] >= 'a') && (s[i] <= 'f')) res += (int) (s[i] + 10 - 'a');
00613   }
00614   return res;
00615 }
00616 
00617 /******************************************************************************
00618 * Routines for the TeXmacs encoding
00619 ******************************************************************************/
00620 
00621 string
00622 tm_encode (string s) {
00623   // verbatim to TeXmacs encoding
00624   register int i;
00625   string r;
00626   for (i=0; i<N(s); i++) {
00627     if (s[i]=='<') r << "<less>";
00628     else if (s[i]=='>') r << "<gtr>";
00629     else r << s[i];
00630   }
00631   return r;
00632 }
00633 
00634 string
00635 tm_decode (string s) {
00636   // TeXmacs encoding to verbatim
00637   register int i;
00638   string r;
00639   for (i=0; i<N(s); i++) {
00640     if (s[i]=='<') {
00641       register int j;
00642       for (j=i+1; j<N(s); j++)
00643        if (s[j]=='>') break;
00644       if (j<N(s)) j++;
00645       if (s(i,j) == "<less>") r << "<";
00646       else if (s(i,j) == "<gtr>") r << ">";
00647       i=j-1;
00648       if (s[i]!='>') return r;
00649     }
00650     else if (s[i]!='>') r << s[i];
00651   }
00652   return r;
00653 }
00654 
00655 string
00656 tm_var_encode (string s) {
00657   register int i, n= N(s);
00658   string r;
00659   for (i=0; i<n; i++) {
00660     if (s[i]=='<') {
00661       if (i+1 < n && s[i+1] == '#') {
00662        while (i<n && s[i] != '>') r << s[i++];
00663        if (i<n) r << s[i];
00664       }
00665       else r << "<less>";
00666     }
00667     else if (s[i]=='>') r << "<gtr>";
00668     else r << s[i];
00669   }
00670   return r;
00671 }
00672 
00673 string
00674 tm_correct (string s) {
00675   register int i;
00676   string r;
00677   for (i=0; i<N(s); i++) {
00678     if (s[i]=='<') {
00679       register bool flag= true;
00680       register int j, k;
00681       for (j=i+1; j<N(s); j++)
00682        if (s[j]=='>') break;
00683       if (j==N(s)) return r;
00684       for (k=i+1; k<j; k++)
00685        if (s[k]=='<') flag= false;
00686       if (flag) r << s(i,j+1);
00687       i=j;
00688     }
00689     else if (s[i]!='>') r << s[i];
00690   }
00691   return r;
00692 }
00693 
00694 void
00695 tm_char_forwards (string s, int& pos) {
00696   ASSERT (pos >= 0 && pos <= N(s), "out of range");
00697   int n= N(s);
00698   if (pos == n);
00699   else if (s[pos] != '<') pos++;
00700   else {
00701     while (pos<n && s[pos] != '>') pos++;
00702     if (pos<n) pos++;
00703   }
00704 }
00705 
00706 void
00707 tm_char_backwards (string s, int& pos) {
00708   ASSERT (pos >= 0 && pos <= N(s), "out of range");
00709   if (pos == 0);
00710   else if (s[pos-1] != '>') pos--;
00711   else {
00712     while (pos>0 && s[pos-1] != '<') pos--;
00713     if (pos>0) pos--;
00714   }
00715 }
00716 
00717 int
00718 tm_char_next (string s, int pos) {
00719   tm_char_forwards (s, pos);
00720   return pos;
00721 }
00722 
00723 int
00724 tm_char_previous (string s, int pos) {
00725   tm_char_backwards (s, pos);
00726   return pos;
00727 }
00728 
00729 string
00730 tm_forward_access (string s, int k) {
00731   int pos= 0;
00732   for (int i=0; i<k; i++)
00733     tm_char_forwards (s, pos);
00734   int start= pos;
00735   tm_char_forwards (s, pos);
00736   return s (start, pos);
00737 }
00738 
00739 string
00740 tm_backward_access (string s, int k) {
00741   int pos= N(s);
00742   for (int i=0; i<k; i++)
00743     tm_char_backwards (s, pos);
00744   int end= pos;
00745   tm_char_backwards (s, pos);
00746   return s (pos, end);
00747 }
00748 
00749 int
00750 tm_string_length (string s) {
00751   int i= 0, pos= 0;
00752   while (pos < N(s)) {
00753     tm_char_forwards (s, pos);
00754     i++;
00755   }
00756   return i;
00757 }
00758 
00759 array<string>
00760 tm_tokenize (string s) {
00761   array<string> r;
00762   int pos= 0;
00763   while (pos < N(s)) {
00764     int start= pos;
00765     tm_char_forwards (s, pos);
00766     r << s (start, pos);
00767   }
00768   return r;
00769 }
00770 
00771 string
00772 tm_recompose (array<string> a) {
00773   string r;
00774   for (int i=0; i<N(a); i++)
00775     r << a[i];
00776   return r;
00777 }
00778 
00779 /******************************************************************************
00780 * Quoting
00781 ******************************************************************************/
00782 
00783 string
00784 scm_quote (string s) {
00785   // R5RS compliant external string representation.
00786   int i, n= N(s);
00787   string r;
00788   r << '"';
00789   for (i=0; i<n; i++)
00790     switch (s[i]) {
00791     case '\"':
00792     case '\\':
00793       r << '\\' << s[i];
00794       break;
00795     default:
00796       r << s[i];
00797     }
00798   r << '"';
00799   return r;
00800 }
00801 
00802 string
00803 scm_unquote (string s) {
00804   if ((N(s)>=2) && (s[0]=='\"') && (s[N(s)-1]=='\"')) {
00805     int i, n= N(s);
00806     string r;
00807     for (i=1; i<n-1; i++)
00808       if (s[i] == '\\' && (s[i+1] == '\"' || s[i+1] == '\\')) r << s[++i];
00809       else r << s[i];
00810     return r;
00811   }
00812   else return s;
00813 }
00814 
00815 string
00816 raw_quote (string s) {
00817   // Mark the label of a STRING tree as representing a string and not a symbol.
00818   return "\"" * s * "\"";
00819 }
00820  
00821 string
00822 raw_unquote (string s) {
00823   // Get the string value of a STRING tree label representing a string.
00824   if ((N(s)>=2) && (s[0]=='\"') && (s[N(s)-1]=='\"'))
00825     return s (1, N(s)-1);
00826   else return s;
00827 }
00828 
00829 /******************************************************************************
00830 * Handling escape characters
00831 ******************************************************************************/
00832 
00833 string
00834 escape_sh (string s) {
00835 #if defined (__MINGW__) || defined (__MINGW32__) || defined (OS_WIN32)
00836   return raw_quote (s);
00837 #else
00838   int i, n= N(s);
00839   string r;
00840   for (i=0; i<n; i++)
00841     switch (s[i]) {
00842     case '?':
00843     case '&':
00844     case '$':
00845     case '`':
00846     case '\"':
00847     case '\\':
00848     case ' ':
00849       r << '\\' << s[i];
00850       break;
00851     default:
00852       r << s[i];
00853     }
00854   return r;
00855 #endif
00856 }
00857 
00858 string
00859 escape_generic (string s) {
00860   int i, n= N(s);
00861   string r;
00862   for (i=0; i<n; i++) {
00863     if ((s[i] == '\2') || (s[i] == '\5') || (s[i] == '\33')) r << '\33';
00864     r << s[i];
00865   }
00866   return r;
00867 }
00868 
00869 string
00870 escape_verbatim (string s) {
00871   int i, n= N(s);
00872   string r;
00873   for (i=0; i<n; i++) {
00874     unsigned char c= (unsigned char) s[i];
00875     if ((c == '\n') || (c == '\t')) r << ' ';
00876     else if (((int) c) >= 32) r << s[i];
00877   }
00878   return r;
00879 }
00880 
00881 string
00882 escape_spaces (string s) {
00883   int i, n= N(s);
00884   string r;
00885   for (i=0; i<n; i++) {
00886     unsigned char c= (unsigned char) s[i];
00887     if (c == ' ') r << '\\';
00888     r << c;
00889   }
00890   return r;
00891 }
00892 
00893 string
00894 dos_to_better (string s) {
00895   int i, n= N(s);
00896   string r;
00897   for (i=0; i<n; i++)
00898     if (s[i] == '\015');
00899     else r << s[i];
00900   return r;
00901 }
00902 
00903 /******************************************************************************
00904 * Reading input from a string
00905 ******************************************************************************/
00906 
00907 bool
00908 test (string s, int i, const char* test) {
00909   int n= N(s), j=0;
00910   while (test[j]!='\0') {
00911     if (i>=n) return false;
00912     if (s[i]!=test[j]) return false;
00913     i++; j++;
00914   }
00915   return true;
00916 }
00917 
00918 bool
00919 test (string s, int i, string test) {
00920   int n= N(s), m= N(test), j=0;
00921   while (j<m) {
00922     if (i>=n) return false;
00923     if (s[i]!=test[j]) return false;
00924     i++; j++;
00925   }
00926   return true;
00927 }
00928 
00929 bool
00930 starts (string s, const char* what) {
00931   return test (s, 0, what);
00932 }
00933 
00934 bool
00935 starts (string s, const string what) {
00936   return test (s, 0, what);
00937 }
00938 
00939 bool
00940 ends (string s, const char* what) {
00941   string r (what);
00942   if (N(r) > N(s)) return false;
00943   return s (N(s)-N(r), N(s)) == r;
00944 }
00945 
00946 bool
00947 ends (string s, const string r) {
00948   if (N(r) > N(s)) return false;
00949   return s (N(s)-N(r), N(s)) == r;
00950 }
00951 
00952 bool
00953 read (string s, int& i, const char* test) {
00954   int n= N(s), j=0, k=i;
00955   while (test[j]!='\0') {
00956     if (k>=n) return false;
00957     if (s[k]!=test[j]) return false;
00958     j++; k++;
00959   }
00960   i=k;
00961   return true;
00962 }
00963 
00964 bool
00965 read (string s, string test) {
00966   int i = 0;
00967   return read (s, i, test);
00968 }
00969 
00970 bool
00971 read (string s, int& i, string test) {
00972   int n= N(s), m= N(test), j=0, k=i;
00973   while (j<m) {
00974     if (k>=n) return false;
00975     if (s[k]!=test[j]) return false;
00976     j++; k++;
00977   }
00978   i=k;
00979   return true;
00980 }
00981 
00982 bool
00983 read_line (string s, int& i, string& result) {
00984   int start= i;
00985   for (; i<N(s); i++) {
00986     if (s[i]=='\n') {
00987       result= s(start,i++);
00988       return true;
00989     }
00990   }
00991   result= s(start,i);
00992   return false;
00993 }
00994 
00995 bool
00996 read_int (string s, int& i, int& result) {
00997   int n= N(s), start= i;
00998   result= 0;
00999   if (i==n) return false;
01000   if (s[i]=='-') {
01001     if (i+1==n) return false;
01002     if (!is_digit (s[i+1])) return false;
01003     i++;
01004   }
01005   else if (!is_digit (s[i])) return false;
01006   while ((i<n) && is_digit (s[i])) i++;
01007   result= as_int (s(start,i));
01008   return true;
01009 }
01010 
01011 bool
01012 read_double (string s, int& i, double& result) {
01013   int n= N(s), start= i;
01014   result= 0.0;
01015   if (i==n) return false;
01016   if (s[i]=='-') {
01017     if (i+1==n) return false;
01018     if (!is_numeric (s[i+1])) return false;
01019     i++;
01020   }
01021   else if (!is_numeric (s[i])) return false;
01022   while ((i<n) && is_digit (s[i])) i++;
01023   if ((i<n) && (s[i]=='.')) i++;
01024   while ((i<n) && is_digit (s[i])) i++;
01025   if ((i<n) && ((s[i]=='e') || (s[i]=='E'))) {
01026     i++;
01027     if ((i<n) && (s[i]=='-')) i++;
01028     if ((i==n) || (!is_digit (s[i]))) { i=start; return false; }
01029     while ((i<n) && is_digit (s[i])) i++;
01030   }
01031   result= as_double (s(start,i));
01032   return true;
01033 }
01034 
01035 void
01036 skip_spaces (string s, int& i) {
01037   int n=N(s);
01038   while ((i<n) && ((s[i]==' ') || (s[i]=='\t'))) i++;
01039 }
01040 
01041 void
01042 skip_line (string s, int& i) {
01043   int n=N(s);
01044   while ((i<n) && (s[i]!='\n')) i++;
01045   if (i<n) i++;
01046 }
01047 
01048 void
01049 skip_symbol (string s, int& i) {
01050   int n=N(s);
01051   if (i<n) {
01052     if (s[i]=='<') {
01053       for (i++; i<n; i++)
01054        if (s[i-1]=='>') break;
01055     }
01056     else i++;
01057   }
01058 }
01059 
01060 /******************************************************************************
01061 * Parsing binary data
01062 ******************************************************************************/
01063 
01064 void
01065 parse (string s, int& pos, QI& ret) {
01066   ret= (QI) s[pos++];
01067 }
01068 
01069 void
01070 parse (string s, int& pos, QN& ret) {
01071   ret= (QN) s[pos++];
01072 }
01073 
01074 void
01075 parse (string s, int& pos, HI& ret) {
01076   QI c1= (QI) s[pos++];
01077   QN c2= (QN) s[pos++];
01078   ret= (((HI) c1)<<8)+ c2;
01079 }
01080 
01081 void
01082 parse (string s, int& pos, HN& ret) {
01083   QN c1= (QN) s[pos++];
01084   QN c2= (QN) s[pos++];
01085   ret= (((HN) c1)<<8)+ c2;
01086 }
01087 
01088 void
01089 parse (string s, int& pos, SI& ret) {
01090   QI c1= (QI) s[pos++];
01091   QN c2= (QN) s[pos++];
01092   QN c3= (QN) s[pos++];
01093   QN c4= (QN) s[pos++];
01094   ret= (((((((SI) c1)<<8)+ ((SI) c2))<<8)+ ((SI) c3))<<8)+ c4;
01095 }
01096 
01097 void
01098 parse (string s, int& pos, SI*& a, int len) {
01099   int i;
01100   a= tm_new_array<int> (len);
01101   for (i=0; i<len; i++) parse (s, pos, a[i]);
01102 }
01103 
01104 /******************************************************************************
01105 * Searching, replacing and pattern matching
01106 ******************************************************************************/
01107 
01108 int
01109 search_forwards (string s, int pos, string in) {
01110   int k= N(s), n= N(in);
01111   if (k == 0) return pos;
01112   char c= s[0];
01113   while (pos+k <= n) {
01114     if (in[pos] == c && test (in, pos, s)) return pos;
01115     pos++;
01116   }
01117   return -1;
01118 }
01119 
01120 int
01121 search_forwards (string s, string in) {
01122   return search_forwards (s, 0, in);
01123 }
01124 
01125 bool
01126 occurs (string what, string in) {
01127   return search_forwards (what, 0, in) >= 0;
01128 }
01129 
01130 int
01131 search_backwards (string s, int pos, string in) {
01132   while (pos >= 0) {
01133     if (test (in, pos, s)) return pos;
01134     pos--;
01135   }
01136   return -1;
01137 }
01138 
01139 int
01140 search_backwards (string s, string in) {
01141   return search_backwards (s, N(in)-N(s), in);
01142 }
01143 
01144 int
01145 count_occurrences (string s, string in) {
01146   int count= 0;
01147   int i=0, next, n= N(s);
01148   while (i<n) {
01149     next= search_forwards (s, i, in);
01150     if (next == -1) break;
01151     count++;
01152     i= next+1;
01153   }
01154   return count;
01155 }
01156 
01157 string
01158 replace (string s, string what, string by) {
01159   int i, n= N(s);
01160   string r;
01161   for (i=0; i<n; )
01162     if (test (s, i, what)) {
01163       r << by;
01164       i += N(what);
01165     }
01166     else {
01167       r << s[i];
01168       i++;
01169     }
01170   return r;
01171 }
01172 
01173 static bool
01174 match_wildcard (string s, int spos, string w, int wpos) {
01175   if (wpos == N(w)) return spos == N(s);
01176   if (w[wpos] != '*')
01177     return (spos < N(s)) && (s[spos] == w[wpos]) &&
01178       match_wildcard (s, spos+1, w, wpos+1);
01179   while ((wpos<N(w)) && (w[wpos]=='*')) wpos++;
01180   while (spos <= N(s)) {
01181     if (match_wildcard (s, spos, w, wpos)) return true;
01182     spos++;
01183   }
01184   return false;
01185 }
01186 
01187 bool
01188 match_wildcard (string s, string w) {
01189   return match_wildcard (s, 0, w, 0);
01190 }
01191 
01192 array<string>
01193 tokenize (string s, string sep) {
01194   int start=0;
01195   array<string> a;
01196   for (int i=0; i<N(s); )
01197     if (test (s, i, sep)) {
01198       a << s (start, i);
01199       i += N(sep);
01200       start= i;
01201     }
01202     else i++;
01203   return a;
01204 }
01205 
01206 string
01207 recompose (array<string> a, string sep) {
01208   string r;
01209   for (int i=0; i<N(a); i++) {
01210     if (i != 0) r << sep;
01211     r << a[i];
01212   }
01213   return r;
01214 }
01215 
01216 string
01217 trim_spaces (string s) {
01218   int start, end;
01219   for (start=0; start<N(s) && is_space (s[start]); start++);
01220   for (end=N(s); end>start && is_space (s[end]); end--);
01221   return s (start, end);
01222 }
01223 
01224 array<string>
01225 trim_spaces (array<string> a) {
01226   array<string> b (N(a));
01227   for (int i=0; i<N(a); i++)
01228     b[i]= trim_spaces (a[i]);
01229   return b;
01230 }
01231 
01232 /******************************************************************************
01233 * Computations with completions
01234 ******************************************************************************/
01235 
01236 array<string>
01237 as_completions (hashset<string> h) {
01238   tree t= (tree) h;
01239   int i, n= N(t);
01240   array<string> a (n);
01241   for (i=0; i<n; i++) a[i]= t[i]->label;
01242   merge_sort (a);
01243   return a;
01244 }
01245 
01246 /*
01247 static void
01248 close_completions (hashset<string>& h) {
01249   array<string> a= as_completions (h);
01250   int i, j, n= N(a);
01251   for (i=1; i<n; i++) {
01252     for (j=0; j < min (N(a[i-1]), N(a[i])); j++)
01253       if (a[i-1][j] != a[i][j]) break;
01254     if (j < min (N(a[i-1]), N(a[i])))
01255       h->insert (a[i](0,j));
01256   }
01257 }
01258 
01259 array<string>
01260 close_completions (array<string> a) {
01261   int i, n= N(a);
01262   hashset<string> h;
01263   for (i=0; i<n; i++) h->insert (a[i]);
01264   close_completions (h);
01265   return as_completions (h);
01266 }
01267 */
01268 
01269 array<string>
01270 close_completions (array<string> a) {
01271   if (N(a) == 0) return a;
01272   merge_sort (a);
01273   int i, j, n= N(a), l= N(a[0]);
01274   for (i=1; i<n; i++) {
01275     for (j=0; j<l && j<N(a[i]); j++)
01276       if (a[i-1][j] != a[i][j]) break;
01277     l= j;
01278   }
01279   array<string> r;
01280   r << a[0] (0, l);
01281   for (i=0; i<n; i++)
01282     if (a[i] != r[N(r)-1])
01283       r << a[i];
01284   return r;
01285 }
01286 
01287 array<string>
01288 strip_completions (array<string> a, string prefix) {
01289   int i, n= N(a);
01290   array<string> b;
01291   for (i=0; i<n; i++)
01292     if (starts (a[i], prefix))
01293       b << a[i] (N(prefix), N(a[i]));
01294   return b;
01295 }