Back to index

texmacs  1.0.7.15
fromtm.cpp
Go to the documentation of this file.
00001 
00002 /******************************************************************************
00003 * MODULE     : fromtm.cpp
00004 * DESCRIPTION: conversion from the TeXmacs file format to TeXmacs trees
00005 *              older versions are automatically converted into the present one
00006 * COPYRIGHT  : (C) 1999  Joris van der Hoeven
00007 *******************************************************************************
00008 * This software falls under the GNU general public license version 3 or later.
00009 * It comes WITHOUT ANY WARRANTY WHATSOEVER. For details, see the file LICENSE
00010 * in the root directory or <http://www.gnu.org/licenses/gpl-3.0.html>.
00011 ******************************************************************************/
00012 
00013 #include "convert.hpp"
00014 #include "path.hpp"
00015 #include "vars.hpp"
00016 #include "drd_std.hpp"
00017 
00018 /******************************************************************************
00019 * Conversion of TeXmacs strings of the present format to TeXmacs trees
00020 ******************************************************************************/
00021 
00022 struct tm_reader {
00023   string  version;            // document was composed using this version
00024   hashmap<string,int> codes;  // codes for to present version
00025   tree_label EXPAND_APPLY;    // APPLY (version < 0.3.3.22) or EXPAND (otherw)
00026   bool    backslash_ok;       // true for versions >= 1.0.1.23
00027   bool    with_extensions;    // true for versions >= 1.0.2.4
00028   string  buf;                // the string being read from
00029   int     pos;                // the current position of the reader
00030   string  last;               // last read string
00031 
00032   tm_reader (string buf2):
00033     version (TEXMACS_VERSION),
00034     codes (STD_CODE),
00035     EXPAND_APPLY (EXPAND),
00036     backslash_ok (true),
00037     with_extensions (true),
00038     buf (buf2), pos (0), last ("") {}
00039   tm_reader (string buf2, string version2):
00040     version (version2),
00041     codes (get_codes (version)),
00042     EXPAND_APPLY (version_inf (version, "0.3.3.22")? APPLY: EXPAND),
00043     backslash_ok (version_inf (version, "1.0.1.23")? false: true),
00044     with_extensions (version_inf (version, "1.0.2.4")? false: true),
00045     buf (buf2), pos (0), last ("") {}
00046 
00047   int    skip_blank ();
00048   string decode (string s);
00049   string read_char ();
00050   string read_next ();
00051   string read_function_name ();
00052   tree   read_apply (string s, bool skip_flag);
00053   tree   read (bool skip_flag);
00054 };
00055 
00056 int
00057 tm_reader::skip_blank () {
00058   int n=0;
00059   for (; pos < N(buf); pos++) {
00060     if (buf[pos]==' ') continue;
00061     if (buf[pos]=='\t') continue;
00062     if (buf[pos]=='\r') continue;
00063     if (buf[pos]=='\n') { n++; continue; }
00064     break;
00065   }
00066   return n;
00067 }
00068 
00069 string
00070 tm_reader::decode (string s) {
00071   int i, n=N(s);
00072   string r;
00073   for (i=0; i<n; i++)
00074     if (((i+1)<n) && (s[i]=='\\')) {
00075       i++;
00076       if (s[i] == ';');
00077       else if (s[i] == '0') r << '\0';
00078       else if (s[i] == 't') r << '\t';
00079       else if (s[i] == 'r') r << '\r';
00080       else if (s[i] == 'n') r << '\n';
00081       else if (s[i] == '\\') r << '\\';
00082       else if ((s[i] >= '@') && (s[i] < '`')) r << (s[i] - '@');
00083       else r << s[i];
00084     }
00085     else r << s[i];
00086   return r;
00087 }
00088 
00089 string
00090 tm_reader::read_char () {
00091   while (((pos+1) < N(buf)) && (buf[pos] == '\\') && (buf[pos+1] == '\n')) {
00092     pos += 2;
00093     skip_spaces (buf, pos);
00094   }
00095   if (pos >= N(buf)) return "";
00096   pos++;
00097   return buf (pos-1, pos);
00098 }
00099 
00100 string
00101 tm_reader::read_next () {
00102   int old_pos= pos;
00103   string c= read_char ();
00104   if (c == "") return c;
00105   switch (c[0]) {
00106   case '\t':
00107   case '\n':
00108   case '\r':
00109   case ' ': 
00110     pos--;
00111     if (skip_blank () <= 1) return " ";
00112     else return "\n";
00113   case '<':
00114     {
00115       old_pos= pos;
00116       c= read_char ();
00117       if (c == "") return "";
00118       if (c == "#") return "<#";
00119       if ((c == "\\") || (c == "|") || (c == "/")) return "<" * c;
00120       if (is_iso_alpha (c[0]) || (c == ">")) {
00121        pos= old_pos;
00122        return "<";
00123       }
00124       pos= old_pos;
00125       return "<";
00126       /*
00127       string d= read_char ();
00128       if ((d == "\\") || (d == "|") || (d == "/")) return "<" * c * d;
00129       pos= old_pos;
00130       return "<" * c;
00131       */
00132     }
00133   case '|':
00134   case '>':
00135     return c;
00136   }
00137 
00138   string r;
00139   pos= old_pos;
00140   while (true) {
00141     old_pos= pos;
00142     c= read_char ();
00143     if (c == "") return r;
00144     else if (c == "\\") {
00145       if ((pos < N(buf)) && (buf[pos] == '\\') && backslash_ok) {
00146        r << c << "\\";
00147        pos++;
00148       }
00149       else r << c << read_char ();
00150     }
00151     else if (c == "\t") break;
00152     else if (c == "\r") break;
00153     else if (c == "\n") break;
00154     else if (c == " ") break;
00155     else if (c == "<") break;
00156     else if (c == "|") break;
00157     else if (c == ">") break;
00158     else r << c;
00159   }
00160   pos= old_pos;
00161   return r;
00162 }
00163 
00164 string
00165 tm_reader::read_function_name () {
00166   string name= decode (read_next ());
00167   // cout << "==> " << name << "\n";
00168   while (true) {
00169     last= read_next ();
00170     // cout << "~~> " << last << "\n";
00171     if ((last == "") || (last == "|") || (last == ">")) break;
00172   }
00173   return name;
00174 }
00175 
00176 static void
00177 get_collection (tree& u, tree t) {
00178   if (is_func (t, COLLECTION) ||
00179           is_func (t, DOCUMENT) ||
00180           is_func (t, CONCAT)) {
00181     int i;
00182     for (i=0; i<N(t); i++)
00183       get_collection (u, t[i]);
00184   }
00185   else if (is_compound (t)) u << t;
00186 }
00187 
00188 tree
00189 tm_reader::read_apply (string name, bool skip_flag) {
00190   // cout << "Read apply " << name << INDENT << LF;
00191   tree t (make_tree_label (name));
00192   if (!with_extensions)
00193     t= tree (EXPAND_APPLY, name);
00194   if (codes->contains (name)) {
00195     // cout << "  " << name << " -> " << as_string ((tree_label) codes [name]) << "\n";
00196     t= tree ((tree_label) codes [name]);
00197   }
00198 
00199   bool closed= !skip_flag;
00200   while (pos < N(buf)) {
00201     // cout << "last= " << last << LF;
00202     bool sub_flag= (skip_flag) && ((last == "") || (last[N(last)-1] != '|'));
00203     if (sub_flag) (void) skip_blank ();
00204     t << read (sub_flag);
00205     if ((last == "/>") || (last == "/|")) closed= true;
00206     if (closed && ((last == ">") || (last == "/>"))) break;
00207   }
00208   // cout << "last= " << last << UNINDENT << LF;
00209   // cout << "Done" << LF;
00210 
00211   if (is_func (t, COLLECTION)) {
00212     tree u (COLLECTION);
00213     get_collection (u, t);
00214     return u;
00215   }
00216   return t;
00217 }
00218 
00219 static void
00220 flush (tree& D, tree& C, string& S, bool& spc_flag, bool& ret_flag) {
00221   if (spc_flag) S << " ";
00222   if (S != "") {
00223     if ((N(C) == 0) || (!is_atomic (C[N(C)-1]))) C << S;
00224     else C[N(C)-1]->label << S;
00225     S= "";
00226     spc_flag= false;
00227   }
00228 
00229   if (ret_flag) {
00230     if (N(C) == 0) D << "";
00231     else if (N(C) == 1) D << C[0];
00232     else D << C;
00233     C= tree (CONCAT);
00234     ret_flag= false;
00235   }
00236 }
00237 
00238 tree
00239 tm_reader::read (bool skip_flag) {
00240   tree   D (DOCUMENT);
00241   tree   C (CONCAT);
00242   string S ("");
00243   bool   spc_flag= false;
00244   bool   ret_flag= false;
00245 
00246   while (true) {
00247     last= read_next ();
00248     // cout << "--> " << last << "\n";
00249     if (last == "") break;
00250     if (last == "|") break;
00251     if (last == ">") break;
00252     
00253     if (last[0] == '<') {
00254       if (last[N(last)-1] == '\\') {
00255        flush (D, C, S, spc_flag, ret_flag);
00256        string name= read_function_name ();
00257        if (last == ">") last= "\\>";
00258        else last= "\\|";
00259        C << read_apply (name, true);
00260       }
00261       else if (last[N(last)-1] == '|') {
00262        (void) read_function_name ();
00263        if (last == ">") last= "|>";
00264        else last= "||";
00265        break;
00266       }
00267       else if (last[N(last)-1] == '/') {
00268        (void) read_function_name ();
00269        if (last == ">") last= "/>";
00270        else last= "/|";
00271        break;
00272       }
00273       else if (last[N(last)-1] == '#') {
00274        string r;
00275        while ((buf[pos] != '>') && (pos+2<N(buf))) {
00276          r << ((char) from_hexadecimal (buf (pos, pos+2)));
00277          pos += 2;
00278        }
00279        if (buf[pos] == '>') pos++;
00280        flush (D, C, S, spc_flag, ret_flag);
00281        C << tree (RAW_DATA, r);
00282        last= read_next ();
00283        break;
00284       }
00285       else {
00286        flush (D, C, S, spc_flag, ret_flag);
00287        string name= decode (read_next ());
00288        string sep = ">";
00289        if (name == ">") name= "";
00290        else sep = read_next ();
00291        // cout << "==> " << name << "\n";
00292        // cout << "~~> " << sep << "\n";
00293        if (sep == '|') {
00294          last= "|";
00295          C << read_apply (name, false);
00296        }
00297        else {
00298          tree t (make_tree_label (name));
00299          if (!with_extensions)
00300            t= tree (EXPAND_APPLY, name);
00301          if (codes->contains (name)) {
00302            // cout << name << " -> " << as_string ((tree_label) codes [name]) << "\n";
00303            t= tree ((tree_label) codes [name]);
00304          }
00305          C << t;
00306        }
00307       }
00308     }
00309     else if (last == " ") spc_flag= true;
00310     else if (last == "\n") ret_flag= true;
00311     else {
00312       flush (D, C, S, spc_flag, ret_flag);
00313       // cout << "<<< " << last << "\n";
00314       // cout << ">>> " << decode (last) << "\n";
00315       S << decode (last);
00316       if ((S == "") && (N(C) == 0)) C << "";
00317     }
00318   }
00319 
00320   if (skip_flag) spc_flag= ret_flag= false;
00321   flush (D, C, S, spc_flag, ret_flag);
00322   if (N(C) == 1) D << C[0];
00323   else if (N(C)>1) D << C;
00324   // cout << "*** " << D << "\n";
00325   if (N(D)==0) return "";
00326   if (N(D)==1) {
00327     if (!skip_flag) return D[0];
00328     if (version_inf_eq (version, "0.3.4.10")) return D[0];
00329     if (is_func (D[0], COLLECTION)) return D[0];
00330   }
00331   return D;
00332 }
00333 
00334 tree
00335 texmacs_to_tree (string s) {
00336   tm_reader tmr (s);
00337   return tmr.read (true);
00338 }
00339 
00340 tree
00341 texmacs_to_tree (string s, string version) {
00342   tm_reader tmr (s, version);
00343   return tmr.read (true);
00344 }
00345 
00346 /******************************************************************************
00347 * Conversion of TeXmacs strings to TeXmacs trees
00348 ******************************************************************************/
00349 
00350 static bool
00351 is_apply (tree t, string s, int n) {
00352   return (L(t) == APPLY) && (N(t) == n+1) && (t[0] == s);
00353 }
00354 
00355 static bool
00356 is_expand (tree t, string s, int n) {
00357   return (L(t) == EXPAND) && (N(t) == n+1) && (t[0] == s);
00358 }
00359 
00360 tree
00361 texmacs_document_to_tree (string s) {
00362   tree error (ERROR, "bad format or data");
00363   if (starts (s, "edit") ||
00364       starts (s, "TeXmacs") ||
00365       starts (s, "\\(\\)(TeXmacs"))
00366   {
00367     string version= "0.0.0.0";
00368     tree t= string_to_tree (s, version);
00369     if (is_tuple (t) && (N(t)>0)) t= t (1, N(t));
00370     int n= arity (t);
00371 
00372     tree doc (DOCUMENT);
00373     doc << compound ("TeXmacs", version);
00374     if (n<3) return error;
00375     else if (n<4)
00376       doc << compound ("body", t[2])
00377          << compound ("style", t[0])
00378          << compound ("initial", t[1]);
00379     else if (n<7)
00380       doc << compound ("body", t[0])
00381          << compound ("style", t[1])
00382          << compound ("initial", t[2])
00383          << compound ("references", t[3]);
00384     else
00385       doc << compound ("body", t[0])
00386          << compound ("project", t[1])
00387          << compound ("style", t[2])
00388          << compound ("initial", t[3])
00389          << compound ("final", t[4])
00390          << compound ("references", t[5])
00391          << compound ("auxiliary", t[6]);
00392     return upgrade (doc, version);
00393   }
00394 
00395   if (starts (s, "<TeXmacs|")) {
00396     int i;
00397     for (i=9; i<N(s); i++)
00398       if (s[i] == '>') break;
00399     string version= s (9, i);
00400     tree doc= texmacs_to_tree (s, version);
00401     if (is_compound (doc, "TeXmacs", 1) ||
00402        is_expand (doc, "TeXmacs", 1) ||
00403        is_apply (doc, "TeXmacs", 1))
00404       doc= tree (DOCUMENT, doc);
00405     if (!is_document (doc)) return error;
00406     if (N(doc) == 0 || !is_compound (doc[0], "TeXmacs", 1)) {
00407       tree d (DOCUMENT);
00408       d << compound ("TeXmacs", version);
00409       d << A(doc);
00410       doc= d;
00411     }
00412     return upgrade (doc, version);
00413   }
00414   return error;
00415 }
00416 
00417 /******************************************************************************
00418 * Extracting attributes from a TeXmacs document tree
00419 ******************************************************************************/
00420 
00421 tree
00422 extract (tree doc, string attr) {
00423   int i, n= arity (doc);
00424   for (i=0; i<n; i++)
00425     if (is_compound (doc[i], attr, 1) ||
00426        is_expand (doc[i], attr, 1) ||
00427        is_apply (doc[i], attr, 1))
00428       {
00429        tree r= doc[i][N(doc[i])-1];
00430        if ((attr == "body") && (!is_document (r))) return tree (DOCUMENT, r);
00431        if (attr == "style") {
00432          if (r == "none") return tree (TUPLE);
00433          if (r == "") return tree (TUPLE);
00434          if (r == "style") return tree (TUPLE);
00435          if (is_atomic (r)) return tree (TUPLE, r);
00436          if (!is_func (r, TUPLE)) return tree (TUPLE);
00437        }
00438        return r;
00439       }
00440 
00441   if (attr == "TeXmacs") return "";
00442   if (attr == "body") return tree (DOCUMENT, "");
00443   if (attr == "project") return "";
00444   if (attr == "style") return tree (TUPLE);
00445   if (attr == "initial") return tree (COLLECTION);
00446   if (attr == "final") return tree (COLLECTION);
00447   if (attr == "references") return tree (COLLECTION);
00448   if (attr == "auxiliary") return tree (COLLECTION);
00449   return "";
00450 }
00451 
00452 tree
00453 extract_document (tree doc) {
00454   if (is_func (doc, ERROR)) return doc;
00455   tree body= extract (doc, "body");
00456   tree init= extract (doc, "initial");
00457   if (is_func (init, COLLECTION)) {
00458     tree w (WITH);
00459     int i, n= N(init);
00460     for (i=0; i<n; i++)
00461       if (is_func (init[i], ASSOCIATE, 2)) {
00462        tree l= init[i][0];
00463        tree r= init[i][1];
00464        if ((l == PAGE_MEDIUM) ||
00465            (l == PAGE_PRINTED) ||
00466            (l == PAGE_TYPE) ||
00467            (l == PAGE_ORIENTATION) ||
00468            (l == PAGE_WIDTH_MARGIN) ||
00469            (l == PAGE_SCREEN_MARGIN) ||
00470            (l == PAGE_NR) ||
00471            (l == PAGE_WIDTH) ||
00472            (l == PAGE_HEIGHT) ||
00473            (l == PAGE_ODD) ||
00474            (l == PAGE_EVEN) ||
00475            (l == PAGE_RIGHT) ||
00476            (l == PAGE_ODD_SHIFT) ||
00477            (l == PAGE_EVEN_SHIFT) ||
00478            (l == PAGE_TOP) ||
00479            (l == PAGE_BOT) ||
00480            (l == PAGE_SCREEN_WIDTH) ||
00481            (l == PAGE_SCREEN_HEIGHT) ||
00482            (l == PAGE_SCREEN_LEFT) ||
00483            (l == PAGE_SCREEN_RIGHT) ||
00484            (l == PAGE_SCREEN_TOP) ||
00485            (l == PAGE_SCREEN_BOT) ||
00486            (l == PAGE_SHOW_HF)) continue;
00487        w << l << r;
00488       }
00489     if (N(w)>0) {
00490       w << body;
00491       body= w;
00492     }
00493   }
00494   return body;
00495 }
00496 
00497 tree
00498 change_doc_attr (tree doc, string attr, tree val) {
00499   int i, n= arity (doc);
00500   tree r (doc, n);
00501   bool done= false;
00502   for (i=0; i<n; i++)
00503     if (is_compound (doc[i], attr, 1)) {
00504       r[i]= tree (L(doc[i]), val);
00505       done= true;
00506     }
00507     else r[i]= doc[i];
00508   if (!done) r << compound (attr, val);
00509   return r;
00510 }