Back to index

texmacs  1.0.7.15
tree_correct.cpp
Go to the documentation of this file.
00001 
00002 /******************************************************************************
00003 * MODULE     : tree_correct.cpp
00004 * DESCRIPTION: make a tree syntactically match a drd
00005 * COPYRIGHT  : (C) 2005  Joris van der Hoeven
00006 *******************************************************************************
00007 * This software falls under the GNU general public license version 3 or later.
00008 * It comes WITHOUT ANY WARRANTY WHATSOEVER. For details, see the file LICENSE
00009 * in the root directory or <http://www.gnu.org/licenses/gpl-3.0.html>.
00010 ******************************************************************************/
00011 
00012 #include "tree_correct.hpp"
00013 #include "tree_analyze.hpp"
00014 #include "scheme.hpp"
00015 #include "packrat.hpp"
00016 
00017 /******************************************************************************
00018 * DRD based correction
00019 ******************************************************************************/
00020 
00021 tree
00022 drd_correct (drd_info drd, tree t) {
00023   if (is_atomic (t)) return t;
00024   else {
00025     int i, n= N(t);
00026     if (drd->contains (as_string (L(t))) &&
00027        !drd->correct_arity (L(t), n))
00028       return "";
00029     tree r (t, n);
00030     for (i=0; i<n; i++)
00031       r[i]= drd_correct (drd, t[i]);
00032     return r;
00033   }
00034 }
00035 
00036 /******************************************************************************
00037 * Correct WITHs or WITH-like macros
00038 ******************************************************************************/
00039 
00040 tree
00041 with_correct (tree t) {
00042   if (is_atomic (t)) return t;
00043   else {
00044     //cout << "Correcting " << t << LF << INDENT;
00045     tree u (t, N(t));
00046     for (int k=0; k<N(t); k++)
00047       u[k]= with_correct (t[k]);
00048     array<tree> a= concat_decompose (u);
00049     int i, n= N(a);
00050     array<tree> r;
00051     for (i=0; i<n; i++) {
00052       if (is_with_like (a[i])) {
00053        array<tree> b= with_decompose (a[i], with_body (a[i]));
00054        int p= N(b), k1, k2;
00055        for (k1=0; k1<p ; k1++)
00056          if (is_with_like (b[k1]) && with_similar_type (a[i], b[k1]));
00057          else break;
00058        for (k2=p; k2>k1; k2--)
00059          if (is_with_like (b[k2-1]) && with_similar_type (a[i], b[k2-1]));
00060          else break;
00061        array<tree> x;
00062        if (0  < k1) x << range (b, 0, k1);
00063        if (k1 < k2) x << with_recompose (a[i], range (b, k1, k2));
00064        if (k2 < p ) x << range (b, k2, p);
00065        if (N(x) == 0) continue;
00066        if (N(r) != 0 &&
00067            is_with_like (r[N(r)-1]) &&
00068            with_same_type (r[N(r)-1], x[0]))
00069          {
00070            array<tree> c= concat_decompose (with_body (r[N(r)-1]));
00071            c << concat_decompose (with_body (x[0]));
00072            r[N(r)-1]= with_recompose (x[0], c);
00073            r << range (x, 1, N(x));
00074          }
00075        else r << x;
00076       }
00077       else r << a[i];
00078     }
00079     //cout << UNINDENT << "Corrected " << t << " -> "
00080     //<< concat_recompose (r) << LF;
00081     return concat_recompose (r);
00082   }
00083 }
00084 
00085 static tree
00086 superfluous_with_correct (tree t, tree env) {
00087   if (is_atomic (t)) return t;
00088   else {
00089     //cout << "Superfluous correcting " << t << ", " << env << LF;
00090     if (is_compound (t, "body", 1))
00091       return compound ("body", superfluous_with_correct (t[0], env));
00092     if (is_func (t, WITH) && ((N(t) & 1) == 0))
00093       t= t * tree (WITH, "");
00094     tree r (t, N(t));
00095     for (int i=0; i<N(t); i++)
00096       r[i]= superfluous_with_correct
00097              (t[i], the_drd->get_env_child (t, i, env));
00098     if (is_compound (r, "math", 1) && r[0] == "") return "";
00099     else if (is_compound (r, "text", 1) && r[0] == "") return "";
00100     else if (is_compound (r, "math", 1) && drd_env_read (env, MODE) == "math")
00101       return r[0];
00102     else if (is_compound (r, "text", 1) && drd_env_read (env, MODE) == "text")
00103       return r[0];
00104     else if (is_func (r, WITH)) {
00105       for (int i=0; i+1<N(r); i+=2)
00106        if (!is_atomic (r[i])) return r;
00107        else if (drd_env_read (env, r[i]->label) != r[i+1]) return r;
00108       return r[N(r)-1];
00109     }
00110     else if (is_func (r, CONCAT)) {
00111       array<tree> a= concat_decompose (r);
00112       return concat_recompose (a);
00113     }
00114     return r;
00115   }
00116 }
00117 
00118 tree
00119 superfluous_with_correct (tree t) {
00120   with_drd drd (get_document_drd (t));
00121   return superfluous_with_correct (t, tree (WITH, MODE, "text"));
00122 }
00123 
00124 /******************************************************************************
00125 * Replace symbols by appropriate homoglyphs
00126 ******************************************************************************/
00127 
00128 static array<tree>
00129 homoglyph_correct (array<tree> a) {
00130   array<int>  tp= symbol_types (a);
00131   array<tree> r;
00132   //cout << a << ", " << tp << "\n";
00133   for (int i=0; i<N(a); i++)
00134     if (a[i] == "<minus>") r << tree ("-");
00135     else if (a[i] == "\\" || a[i] == "<backslash>") {
00136       int j1, j2;
00137       for (j1= i-1; j1>=0; j1--)
00138        if (tp[j1] != SYMBOL_SKIP && tp[j1] != SYMBOL_SCRIPT) break;
00139       for (j2= i+1; j2<N(a); j2++)
00140        if (tp[j2] != SYMBOL_SKIP && tp[j2] != SYMBOL_SCRIPT) break;
00141       if (j1 < 0 || j2 >= N(a));
00142       else if ((a[i] == "\\" ||
00143               a[i] == "<backslash>") &&
00144               ((tp[j1] == SYMBOL_BASIC) ||
00145               (tp[j1] == SYMBOL_POSTFIX)) &&
00146               ((tp[j2] == SYMBOL_BASIC) ||
00147               (tp[j2] == SYMBOL_PREFIX)))
00148        r << tree ("<setminus>");
00149       else r << a[i];
00150     }
00151     else if (is_func (a[i], NEG, 1) && is_atomic (a[i][0])) {
00152       string s= a[i][0]->label;
00153       if (s == "=") r << tree ("<neq>");
00154       else if (s == "<less>") r << tree ("<nless>");
00155       else if (s == "<gtr>") r << tree ("<ngtr>");
00156       else if (s == "<leq>") r << tree ("<nleq>");
00157       else if (s == "<geq>") r << tree ("<ngeq>");
00158       else if (s == "<leqslant>") r << tree ("<nleqslant>");
00159       else if (s == "<geqslant>") r << tree ("<ngeqslant>");
00160       else if (s == "<prec>") r << tree ("<nprec>");
00161       else if (s == "<succ>") r << tree ("<nsucc>");
00162       else if (s == "<preceq>") r << tree ("<npreceq>");
00163       else if (s == "<succeq>") r << tree ("<nsucceq>");
00164       else if (s == "<preccurlyeq>") r << tree ("<npreccurlyeq>");
00165       else if (s == "<succcurlyeq>") r << tree ("<nsucccurlyeq>");
00166       else if (s == "<rightarrow>") r << tree ("<nrightarrow>");
00167       else if (s == "<Rightarrow>") r << tree ("<nRightarrow>");
00168       else if (s == "<leftarrow>") r << tree ("<nleftarrow>");
00169       else if (s == "<Leftarrow>") r << tree ("<nLeftarrow>");
00170       else if (s == "<leftrightarrow>") r << tree ("<nleftrightarrow>");
00171       else if (s == "<Leftrightarrow>") r << tree ("<nLeftrightarrow>");
00172       else if (s == "<equiv>") r << tree ("<nequiv>");
00173       else if (s == "<sim>") r << tree ("<nsim>");
00174       else if (s == "<simeq>") r << tree ("<nsimeq>");
00175       else if (s == "<approx>") r << tree ("<napprox>");
00176       else if (s == "<cong>") r << tree ("<ncong>");
00177       else if (s == "<asymp>") r << tree ("<nasymp>");
00178       else if (s == "<in>") r << tree ("<nin>");
00179       else if (s == "<ni>") r << tree ("<nni>");
00180       else if (s == "<subset>") r << tree ("<nsubset>");
00181       else if (s == "<supset>") r << tree ("<nsupset>");
00182       else if (s == "<subseteq>") r << tree ("<nsubseteq>");
00183       else if (s == "<supseteq>") r << tree ("<nsupseteq>");
00184       else if (s == "<sqsubset>") r << tree ("<nsqsubset>");
00185       else if (s == "<sqsupset>") r << tree ("<nsqsupset>");
00186       else if (s == "<sqsubseteq>") r << tree ("<nsqsubseteq>");
00187       else if (s == "<sqsupseteq>") r << tree ("<nsqsupseteq>");
00188       else if (s == "<leadsto>") r << tree ("<nleadsto>");
00189       else r << a[i];
00190     }
00191     else if (a[i] == ":" && i+1 < N(a) && a[i+1] == "=") {
00192       r << tree ("<assign>");
00193       i++;
00194     }
00195     else r << a[i];
00196   return r;
00197 }
00198 
00199 static string
00200 get_submode (tree t, int i, string mode) {
00201   if (is_func (t, WITH, 3) && t[0] == MATH_FONT_FAMILY && i == 2) return "text";
00202   tree tmode= the_drd->get_env_child (t, i, MODE, mode);
00203   return (is_atomic (tmode)? tmode->label: string ("text"));
00204 }
00205 
00206 static tree
00207 homoglyph_correct (tree t, string mode) {
00208   //cout << "Correct " << t << ", " << mode << "\n";
00209   tree r= t;
00210   if (is_compound (t)) {
00211     int i, n= N(t);
00212     r= tree (t, n);
00213     for (i=0; i<n; i++) {
00214       string smode= get_submode (t, i, mode);
00215       if (is_correctable_child (t, i))
00216        r[i]= homoglyph_correct (t[i], smode);
00217       else r[i]= t[i];
00218     }
00219   }
00220 
00221   if (mode == "math") {
00222     array<tree> a= concat_tokenize (r);
00223     a= homoglyph_correct (a);
00224     tree ret= concat_recompose (a);
00225     //if (ret != r) cout << "< " << r << " >" << LF
00226     //<< "> " << ret << " <" << LF;
00227     return ret;
00228   }
00229   else return r;
00230 }
00231 
00232 tree
00233 homoglyph_correct (tree t) {
00234   with_drd drd (get_document_drd (t));
00235   return homoglyph_correct (t, "text");
00236 }
00237 
00238 /******************************************************************************
00239 * Remove incorrect spaces and multiplications
00240 ******************************************************************************/
00241 
00242 static array<tree>
00243 superfluous_invisible_correct (array<tree> a) {
00244   array<int>  tp= symbol_types (a);
00245   array<tree> r;
00246   //cout << a << ", " << tp << "\n";
00247   for (int i=0; i<N(a); i++)
00248     if (a[i] == " " || a[i] == "*") {
00249       int j1, j2;
00250       for (j1= i-1; j1>=0; j1--)
00251        if (tp[j1] != SYMBOL_SKIP && tp[j1] != SYMBOL_SCRIPT) break;
00252        else if (a[j1] == " ") break;
00253       for (j2= i+1; j2<N(a); j2++)
00254        if (tp[j2] != SYMBOL_SKIP && tp[j2] != SYMBOL_SCRIPT)
00255          if (a[j2] != " " && a[j2] != "*") break;
00256       //cout << "  " << i << ": " << j1 << ", " << j2
00257       //<< "; " << tp[j1] << ", " << tp[j2] << "\n";
00258       if (j1 < 0 || j2 >= N(a));
00259       else if (a[j1] == " " || a[j1] == "*");
00260       else if (tp[j1] == SYMBOL_PREFIX ||
00261               tp[j1] == SYMBOL_INFIX ||
00262               tp[j1] == SYMBOL_SEPARATOR ||
00263                tp[j1] == SYMBOL_PROBABLE_MIDDLE);
00264       else if (tp[j2] == SYMBOL_POSTFIX ||
00265               tp[j2] == SYMBOL_INFIX ||
00266               tp[j2] == SYMBOL_SEPARATOR ||
00267                tp[j2] == SYMBOL_PROBABLE_MIDDLE);
00268       else r << a[i];
00269     }
00270     else if (is_func (a[i], SQRT, 2) && a[i][1] == "")
00271       r << tree (SQRT, a[i][0]);
00272     else if (is_script (a[i]) && a[i][0] == "")
00273       r << tree (L(a[i]), "<nosymbol>");
00274     else r << a[i];
00275   return r;
00276 }
00277 
00278 static tree
00279 superfluous_invisible_correct (tree t, string mode) {
00280   //cout << "Correct " << t << ", " << mode << "\n";
00281   tree r= t;
00282   if (is_compound (t)) {
00283     int i, n= N(t);
00284     r= tree (t, n);
00285     for (i=0; i<n; i++) {
00286       string smode= get_submode (t, i, mode);
00287       //cout << "  " << i << ": " << is_correctable_child (t, i)
00288       //<< ", " << smode << "\n";
00289       if (is_func (t, WITH) && i != N(t)-1)
00290        r[i]= t[i];
00291       else if (is_correctable_child (t, i))
00292        r[i]= superfluous_invisible_correct (t[i], smode);
00293       else r[i]= t[i];
00294     }
00295   }
00296   
00297   if (is_func (r, CONCAT)) {
00298     bool ok= true;
00299     int i, found= -1;
00300     for (i=0; i<N(r); i++)
00301       if (is_compound (r[i], "hide-preamble") ||
00302          is_compound (r[i], "show-preamble"))
00303        {
00304          ok= (found == -1);
00305          found= i;
00306        }
00307       else if (!is_atomic (r[i])) ok= false;
00308       else {
00309        string s= r[i]->label;
00310        for (int j=0; j<N(s); j++)
00311          if (s[j] != ' ') ok= false;
00312       }
00313     if (ok) r= r[found];
00314   }
00315 
00316   if (is_func (r, INACTIVE, 1) && is_func (r[0], RIGID))
00317     return r[0];
00318   else if (mode == "math") {
00319     array<tree> a= concat_tokenize (r);
00320     a= superfluous_invisible_correct (a);
00321     tree ret= concat_recompose (a);
00322     //if (ret != r) cout << "< " << r << " >" << LF
00323     //<< "> " << ret << " <" << LF;
00324     return ret;
00325   }
00326   else return r;
00327 }
00328 
00329 tree
00330 superfluous_invisible_correct (tree t) {
00331   with_drd drd (get_document_drd (t));
00332   return superfluous_invisible_correct (t, "text");
00333 }
00334 
00335 /******************************************************************************
00336 * Insert missing multiplications or function applications
00337 ******************************************************************************/
00338 
00339 #define SURE_NOTHING     0
00340 #define SURE_TIMES       1
00341 #define SURE_SPACE       2
00342 #define PROBABLE_TIMES   3
00343 #define PROBABLE_SPACE   4
00344 #define BOTH_WAYS        5
00345 
00346 struct invisible_corrector {
00347   int force;
00348   hashmap<string,int> times_before;
00349   hashmap<string,int> times_after;
00350   hashmap<string,int> space_before;
00351   hashmap<string,int> space_after;
00352 
00353 protected:
00354   bool is_letter_like (string s);
00355   bool contains_infix (tree t);
00356   bool contains_plus_like (tree t);
00357   void count_invisible (array<tree> a);
00358   void count_invisible (tree t, string mode);
00359   int  get_status (tree t, bool left, bool script_flag);
00360   array<tree> correct (array<tree> a);
00361 
00362 public:
00363   inline invisible_corrector (tree t, int force2):
00364     force (force2), times_before (0), times_after (0), space_after (0) {
00365       count_invisible (t, "text"); }
00366   tree correct (tree t, string mode);
00367 };
00368 
00369 bool
00370 invisible_corrector::is_letter_like (string s) {
00371   static language lan= math_language ("std-math");
00372   if (s != "" && is_iso_alpha (s)) return true;
00373   return lan->get_group (s) == "Letter-symbol";
00374 }
00375 
00376 bool
00377 invisible_corrector::contains_infix (tree t) {
00378   array<int> tp= symbol_types (concat_tokenize (t));
00379   for (int i=0; i<N(tp); i++)
00380     if (tp[i] == SYMBOL_INFIX)
00381       return true;
00382   return false;
00383 }
00384 
00385 bool
00386 invisible_corrector::contains_plus_like (tree t) {
00387   array<tree> a= concat_tokenize (t);
00388   for (int i=1; i<N(a)-1; i++)
00389     if (a[i] == "+" || a[i] == "-")
00390       return true;
00391   return false;
00392 }
00393 
00394 void
00395 invisible_corrector::count_invisible (array<tree> a) {
00396   array<int>  tp= symbol_types (a);
00397   for (int i=0; i<N(a); i++)
00398     if (is_atomic (a[i]) && is_letter_like (a[i]->label)) {
00399       int j1, j2;
00400       for (j1= i-1; j1>=0; j1--)
00401        if (tp[j1] != SYMBOL_SKIP && tp[j1] != SYMBOL_SCRIPT) break;
00402        else if (a[j1] == " ") break;
00403       for (j2= i+1; j2<N(a); j2++)
00404        if (tp[j2] != SYMBOL_SKIP && tp[j2] != SYMBOL_SCRIPT) break;
00405        else if (a[j2] == " ") break;
00406       string s= a[i]->label;
00407       if (j1 >= 0) {
00408        if (a[j1] == "*")
00409          times_before (s)= times_before[s] + 1;
00410        if (a[j1] == " ")
00411          space_before (s)= space_before[s] + 1;
00412       }
00413       if (j2 < N(a)) {
00414        if (a[j2] == "*")
00415          times_after (s)= times_after[s] + 1;
00416        if (a[j2] == " ")
00417          space_after (s)= space_after[s] + 1;
00418         // NOTE: this heuristic might not be a good idea,
00419         // because it inhibits the correction of QR -> Q*R,
00420         // if Q is a polynomial which is applied somewhere Q(1).
00421         // We might introduce a table 'apply_after'.
00422        //if (is_around (a[j2]) && a[j2][0] == "(" &&
00424         //space_after (s)= space_after[s] + 1;
00425       }
00426     }
00427 }
00428 
00429 void
00430 invisible_corrector::count_invisible (tree t, string mode) {
00431   if (is_compound (t)) {
00432     int i, n= N(t);
00433     for (i=0; i<n; i++) {
00434       string smode= get_submode (t, i, mode);
00435       if (is_func (t, WITH) && i != N(t)-1);
00436       else if (is_correctable_child (t, i))
00437        count_invisible (t[i], smode);
00438     }
00439   }
00440   if (mode == "math")
00441     count_invisible (concat_tokenize (t));
00442 }
00443 
00444 int
00445 invisible_corrector::get_status (tree t, bool left, bool script_flag) {
00446   if (is_atomic (t)) {
00447     static language lan= math_language ("std-math");
00448     string s= t->label;
00449     string g= lan->get_group (t->label);
00450     if (is_numeric (s))
00451       return (left? SURE_TIMES: PROBABLE_TIMES);
00452     else if (starts (g, "Unary-operator-textual"))
00453       return (left? SURE_SPACE: BOTH_WAYS);
00454     else if (starts (g, "Binary-operator"))
00455       return SURE_SPACE;
00456     else if (starts (g, "N-ary-operator"))
00457       return (left? SURE_SPACE: BOTH_WAYS);
00458     else if (is_letter_like (s)) {
00459       if (left) {
00460        if (times_after[s] > 0 && space_after[s] == 0)
00461          return SURE_TIMES;
00462        else if (space_after[s] > 0 && times_after[s] == 0)
00463          return SURE_SPACE;
00464        else if (times_after[s] > space_after[s])
00465          return PROBABLE_TIMES;
00466        else if (space_after[s] > times_after[s])
00467          return PROBABLE_SPACE;
00468        else if (N(s)>1 && is_iso_alpha (s))
00469          return PROBABLE_SPACE;
00470         else if (script_flag)
00471           return PROBABLE_TIMES;
00472        else return BOTH_WAYS;
00473       }
00474       else {
00475        if (times_before[s] > space_before[s])
00476          return PROBABLE_TIMES;
00477        else if (times_after[s] > 0 && space_after[s] == 0)
00478          return PROBABLE_TIMES;
00479         else if (script_flag && (N(s) == 1 || !is_iso_alpha (s)))
00480           return PROBABLE_TIMES;
00481        else return BOTH_WAYS;
00482       }
00483     }
00484     else if (s == "<cdots>" || s == "<ldots>")
00485       return PROBABLE_TIMES;
00486     else return ((force > 0)? BOTH_WAYS: SURE_NOTHING);
00487   }
00488   else {
00489     if (is_around (t)) {
00490       if (left && contains_plus_like (t[1]))
00491        return ((force > 0)? SURE_TIMES: PROBABLE_TIMES);
00492       else if (contains_plus_like (t[1]))
00493        return ((force > 0)? PROBABLE_TIMES: BOTH_WAYS);
00494       else if (!contains_infix (t[1]))
00495        return (left? BOTH_WAYS: SURE_SPACE);
00496       else return BOTH_WAYS;
00497     }
00498     else if (is_func (t, FRAC) ||
00499             is_func (t, SQRT))
00500       return (left? SURE_TIMES: BOTH_WAYS);
00501     else if (!left && is_func (t, BIG_AROUND, 2) &&
00502              (t[0] == "<sum>" || t[0] == "<amalg>" ||
00503               t[0] == "<oplus>" || t[0] == "<uplus>" ||
00504               t[0] == "<int>" || t[0] == "<oint>" ||
00505               t[0] == "<intlim>" || t[0] == "<ointlim>" ||
00506               t[0] == "<prod>" || t[0] == "<odot>" || t[0] == "<otimes>"))
00507       return PROBABLE_TIMES;
00508     else if (is_func (t, WIDE, 2))
00509       return get_status (t[0], left, script_flag);
00510     else if (is_func (t, WITH))
00511       return get_status (t[N(t)-1], left, script_flag);
00512     else if (N(t) == 0 && L(t) >= START_EXTENSIONS) {
00513       tree def= the_drd->get_syntax (L(t));
00514       if (is_func (def, MACRO, 1))
00515         return get_status (def[0], left, script_flag);
00516       else return SURE_NOTHING;
00517     }
00518     else return SURE_NOTHING;
00519   }
00520 }
00521 
00522 static bool
00523 admits_script (array<int> tp, int i) {
00524   i++;
00525   while (i<N(tp))
00526     if (tp[i] == SYMBOL_SCRIPT) return true;
00527     else if (tp[i] == SYMBOL_SKIP) i++;
00528     else return false;
00529   return false;
00530 }
00531 
00532 array<tree>
00533 invisible_corrector::correct (array<tree> a) {
00534   //cout << "Correct " << a << "\n";
00535   array<tree> r;
00536   array<int> tp= symbol_types (a);
00537   for (int i=0; i<N(a); i++) {
00538     r << a[i];
00539     if (a[i] != " " && tp[i] == SYMBOL_BASIC) {
00540       int j;
00541       for (j= i+1; j<N(a); j++)
00542        if (tp[j] != SYMBOL_SKIP && tp[j] != SYMBOL_SCRIPT) break;
00543        else if (a[j] == " ") break;
00544       if (j >= N(a) || a[j] == " " || tp[j] != SYMBOL_BASIC)
00545        continue;
00546       
00547       string ins= "";
00548       int sti= get_status (a[i], true, admits_script (tp, i));
00549       int stj= get_status (a[j], false, admits_script (tp, j));
00550       //cout << "Pair (" << a[i] << ", " << a[j] << ")"
00551       //<< " -> (" << sti << ", " << stj << ")" << LF;
00552       if (sti == SURE_NOTHING || stj == SURE_NOTHING)
00553        ins= "";
00554       else if (sti == SURE_TIMES && stj != SURE_SPACE)
00555        ins= "*";
00556       else if (sti == SURE_SPACE && stj != SURE_TIMES)
00557        ins= " ";
00558       else if (sti == PROBABLE_TIMES && stj == PROBABLE_TIMES)
00559        ins= "*";
00560       else if (sti == PROBABLE_SPACE && stj == PROBABLE_SPACE)
00561        ins= " ";
00562       else if (sti == PROBABLE_TIMES && stj == BOTH_WAYS)
00563        ins= "*";
00564       else if (sti == PROBABLE_SPACE && stj == BOTH_WAYS)
00565        ins= " ";
00566       else if (sti == BOTH_WAYS && stj == PROBABLE_TIMES)
00567        ins= "*";
00568       else if (sti == BOTH_WAYS && stj == PROBABLE_SPACE)
00569        ins= " ";
00570       else if (sti == BOTH_WAYS && stj == BOTH_WAYS && force == 1 &&
00571               (is_atomic (a[i]) || is_around (a[i])) &&
00572               (is_atomic (a[j]) || is_around (a[j])))
00573        ins= "*";
00574 
00575       if (is_around (a[j]))
00576        if (ins == " " || (ins == "*" && force == -1))
00577          ins= "";
00578       if (a[j] == ".") ins= "";
00579       while (i+1 < N(a) && (is_func (a[i+1], RSUB, 1) ||
00580                          is_func (a[i+1], RSUP, 1) ||
00581                          is_func (a[i+1], RPRIME, 1))) {
00582        i++;
00583        r << a[i];
00584       }
00585       if (ins != "") r << tree (ins);
00586     }
00587   }
00588   return r;
00589 }
00590 
00591 tree
00592 invisible_corrector::correct (tree t, string mode) {
00593   //cout << "Correct " << t << ", " << mode << "\n";
00594   tree r= t;
00595   if (is_compound (t)) {
00596     int i, n= N(t);
00597     r= tree (t, n);
00598     for (i=0; i<n; i++) {
00599       string smode= get_submode (t, i, mode);
00600       if (is_func (t, WITH) && i != N(t)-1)
00601        r[i]= t[i];
00602       else if (is_correctable_child (t, i))
00603        r[i]= correct (t[i], smode);
00604       else r[i]= t[i];
00605     }
00606   }
00607   
00608   if (mode == "math") {
00609     array<tree> a= concat_tokenize (r);
00610     a= correct (a);
00611     tree ret= concat_recompose (a);
00612     //if (ret != r)
00613     //  cout << "<< " << r << " >>" << LF
00614     //       << ">> " << ret << " <<" << LF;
00615     return ret;
00616   }
00617   else return r;
00618 }
00619 
00620 tree
00621 missing_invisible_correct (tree t, int force) {
00622   // force = -1, only correct when sure, and when old markup is incorrect
00623   // force = 0 , only correct when pretty sure
00624   // force = 1 , correct whenever reasonable (used for LaTeX import)
00625   with_drd drd (get_document_drd (t));
00626   invisible_corrector corrector (t, force);
00627   //cout << "Times before " << corrector.times_before << "\n";
00628   //cout << "Space before " << corrector.space_before << "\n";
00629   //cout << "Times after  " << corrector.times_after  << "\n";
00630   //cout << "Space after  " << corrector.space_after  << "\n";
00631   return corrector.correct (t, "text");
00632 }
00633 
00634 tree
00635 missing_invisible_correct_twice (tree t, int force= -1) {
00636   tree u= missing_invisible_correct (t, force);
00637   if (u == t) return t;
00638   return missing_invisible_correct (u, force);
00639 }
00640 
00641 /******************************************************************************
00642 * Miscellaneous corrections
00643 ******************************************************************************/
00644 
00645 tree
00646 misc_math_correct (tree t) {
00647   if (is_atomic (t)) return t;
00648   else if (is_compound (t, "math", 1) && is_func (t[0], RSUB, 1))
00649     return tree (RSUB, compound ("math", misc_math_correct (t[0][0])));
00650   else if (is_compound (t, "math", 1) && is_func (t[0], RSUP, 1))
00651     return tree (RSUP, compound ("math", misc_math_correct (t[0][0])));
00652   else if (is_func (t, RSUB, 1) && is_func (t[0], RSUB, 1))
00653     return misc_math_correct (t[0]);
00654   else if (is_func (t, RSUB, 1) && is_func (t[0], RSUP, 1))
00655     return misc_math_correct (tree (RSUB, t[0][0]));
00656   else if (is_func (t, RSUP, 1) && is_func (t[0], RSUB, 1))
00657     return misc_math_correct (tree (RSUP, t[0][0]));
00658   else if (is_func (t, RSUP, 1) && is_func (t[0], RSUP, 1))
00659     return misc_math_correct (t[0]);
00660   else if (is_func (t, RSUP, 1) && is_func (t[0], RPRIME, 1))
00661     return misc_math_correct (t[0]);
00662   else if (is_script (t) && is_compound (t[0], "text", 1) &&
00663            is_atomic (t[0][0]) && is_alpha (t[0][0]->label))
00664     {
00665       if (N(t[0][0]->label) != 1) return tree (L(t), t[0][0]);
00666       else return tree (L(t), tree (WITH, "math-font-family", "trm",
00667                                     misc_math_correct (t[0])));
00668     }
00669   else if (is_compound (t, "math", 1)) {
00670     tree arg = misc_math_correct (t[0]);
00671     tree last= arg;
00672     if (is_concat (last) && N(last) > 0) last= last[N(last)-1];
00673     if (is_atomic (last) && N(last->label) > 0 &&
00674         is_punctuation (last->label [N(last->label)-1]))
00675       {
00676         string s= last->label;
00677         int i= N(s);
00678         while (i>0 && is_punctuation (s[i-1])) i--;
00679         if (i == N(s)) return compound ("math", arg);
00680         string tail= s (i, N(s));
00681         s= s (0, i);
00682         if (last == arg) {
00683           if (N(s) == 0) return tail;
00684           else return concat (compound ("math", s), tail);
00685         }
00686         else {
00687           tree cc= arg (0, N(arg) - 1);
00688           if (N(s) != 0) cc << tree (s);
00689           if (N(cc) == 1) cc= cc[0];
00690           return concat (compound ("math", cc), tail);
00691         }        
00692       }
00693     else return compound ("math", arg);
00694   }
00695   else {
00696     int i, n= N(t);
00697     tree r (t, n);
00698     for (i=0; i<n; i++)
00699       r[i]= misc_math_correct (t[i]);
00700     if (is_concat (r))
00701       r= concat_recompose (concat_decompose (r));
00702     return r;
00703   }
00704 }
00705 
00706 /******************************************************************************
00707 * Count errors
00708 ******************************************************************************/
00709 
00710 static int
00711 count_math_formula_errors (tree t, int mode) {
00712   if (mode == 1) return 1;
00713   if (packrat_correct ("std-math", "Main", t)) return 0;
00714   else {
00715     if (mode == 2) cout << "  ERROR> " << t << "\n";
00716     return 1;
00717   }
00718 }
00719 
00720 static int
00721 count_math_table_errors (tree t, int mode) {
00722   if (is_atomic (t)) return 0;
00723   else if (is_func (t, CELL, 1)) {
00724     if (t[0] == "" || t[0] == tree (DOCUMENT, "")) return 0;
00725     if (mode == 1) return 1;
00726     if (packrat_correct ("std-math", "Cell", t[0])) return 0;
00727     else {
00728       if (mode == 2) cout << "  ERROR> " << t << "\n";
00729       return 1;
00730     }
00731   }
00732   else {
00733     int sum= 0;
00734     for (int i=0; i<N(t); i++)
00735       sum += count_math_table_errors (t[i], mode);
00736     return sum;
00737   }
00738 }
00739 
00740 int
00741 count_math_errors (tree t, int mode) {
00742   if (is_atomic (t)) return 0;
00743   else {
00744     int sum= 0;
00745     for (int i=0; i<N(t); i++) {
00746       tree cmode= the_drd->get_env_child (t, i, MODE, "text");
00747       if (cmode != "math") sum += count_math_errors (t[i], mode);
00748       else {
00749         tree u= t[i];
00750         while (is_func (u, DOCUMENT, 1) ||
00751                is_func (u, TFORMAT) ||
00752                is_func (u, WITH))
00753           u= u[N(u)-1];
00754         if (is_func (u, TABLE)) sum += count_math_table_errors (u, mode);
00755         else sum += count_math_formula_errors (u, mode);
00756       }
00757     }
00758     return sum;
00759   }
00760 }
00761 
00762 /******************************************************************************
00763 * Print mathematical status
00764 ******************************************************************************/
00765 
00766 static int count_formula= 0;
00767 static int count_initial_errors= 0;
00768 static int count_final_errors= 0;
00769 
00770 static int corrected_with= 0;
00771 static int corrected_superfluous_with= 0;
00772 static int corrected_brackets= 0;
00773 static int corrected_move_brackets= 0;
00774 static int corrected_misc= 0;
00775 static int corrected_superfluous_invisible= 0;
00776 static int corrected_homoglyph= 0;
00777 static int corrected_missing_invisible= 0;
00778 static int corrected_zealous_invisible= 0;
00779 
00780 void
00781 math_status_cumul_sub (tree t, int& cumul, int& errors) {
00782   int new_errors= count_math_errors (t);
00783   cumul += (errors - new_errors);
00784   errors= new_errors;
00785 }
00786 
00787 void
00788 math_status_cumul (tree t) {
00789   with_drd drd (get_document_drd (t));
00790   if (is_func (t, DOCUMENT))
00791     for (int i=0; i<N(t); i++)
00792       if (is_compound (t[i], "body", 1)) {
00793         t= t[i][0];
00794         break;
00795       }
00796 
00797   int errors= count_math_errors (t);
00798   count_formula += count_math_errors (t, 1);
00799   count_initial_errors += errors;
00800   t= with_correct (t);
00801   math_status_cumul_sub (t, corrected_with, errors);
00802   t= superfluous_with_correct (t);
00803   math_status_cumul_sub (t, corrected_superfluous_with, errors);
00804   t= upgrade_brackets (t);
00805   math_status_cumul_sub (t, corrected_brackets, errors);
00806   t= move_brackets (t);
00807   math_status_cumul_sub (t, corrected_move_brackets, errors);
00808   t= misc_math_correct (t);
00809   math_status_cumul_sub (t, corrected_misc, errors);
00810   t= superfluous_invisible_correct (t);
00811   math_status_cumul_sub (t, corrected_superfluous_invisible, errors);
00812   t= homoglyph_correct (t);
00813   math_status_cumul_sub (t, corrected_homoglyph, errors);
00814   t= superfluous_invisible_correct (t);
00815   math_status_cumul_sub (t, corrected_superfluous_invisible, errors);
00816   t= missing_invisible_correct (t);
00817   math_status_cumul_sub (t, corrected_missing_invisible, errors);
00818   count_final_errors += errors;
00819   //cout << "Errors= " << errors << "\n";
00820   //(void) count_math_errors (t, 2);
00821   t= missing_invisible_correct (t, 1);
00822   math_status_cumul_sub (t, corrected_zealous_invisible, errors);
00823 }
00824 
00825 void
00826 math_status_reset () {
00827   count_formula= 0;
00828   count_initial_errors= 0;
00829   count_final_errors= 0;
00830   corrected_with= 0;
00831   corrected_superfluous_with= 0;
00832   corrected_brackets= 0;
00833   corrected_move_brackets= 0;
00834   corrected_misc= 0;
00835   corrected_superfluous_invisible= 0;
00836   corrected_homoglyph= 0;
00837   corrected_missing_invisible= 0;
00838 }
00839 
00840 void
00841 math_status_print () {
00842   cout << "Formulas       : " << count_formula << "\n";
00843   cout << "Initial errors : " << count_initial_errors << "\n";
00844   cout << "Final errors   : " << count_final_errors << "\n";
00845   cout << "\n";
00846   cout << "With corrected                  : "
00847        << corrected_with << "\n";
00848   cout << "Superfluous with corrected      : "
00849        << corrected_superfluous_with << "\n";
00850   cout << "Upgraded brackets               : "
00851        << corrected_brackets << "\n";
00852   cout << "Moved brackets                  : "
00853        << corrected_move_brackets << "\n";
00854   cout << "Miscellaneous corrected         : "
00855        << corrected_misc << "\n";
00856   cout << "Superfluous invisible corrected : "
00857        << corrected_superfluous_invisible << "\n";
00858   cout << "Homoglyphs corrected            : "
00859        << corrected_homoglyph << "\n";
00860   cout << "Missing invisible corrected     : "
00861        << corrected_missing_invisible << "\n";
00862   cout << "Zealous invisible corrected     : "
00863        << corrected_zealous_invisible << "\n";
00864   cout << "\n";
00865 }
00866 
00867 /******************************************************************************
00868 * Master routines
00869 ******************************************************************************/
00870 
00871 bool
00872 enabled_preference (string s) {
00873   return call ("get-preference", s) == object ("on");
00874 }
00875 
00876 tree
00877 latex_correct (tree t) {
00878   // NOTE: matching brackets corrected in upgrade_tex
00879   t= misc_math_correct (t);
00880   //if (enabled_preference ("remove superfluous invisible"))
00881   t= superfluous_invisible_correct (t);
00882   //if (enabled_preference ("homoglyph correct"))
00883   t= homoglyph_correct (t);
00884   //if (enabled_preference ("remove superfluous invisible"))
00885   t= superfluous_invisible_correct (t);
00886   //if (enabled_preference ("insert missing invisible"))
00887   t= missing_invisible_correct_twice (t);
00888   //if (enabled_preference ("insert missing invisible"))
00889   t= missing_invisible_correct (t, 1);
00890   t= downgrade_big (t);
00891   return t;
00892 }
00893 
00894 tree
00895 automatic_correct (tree t, string version) {
00896   if (version_inf_eq (version, "1.0.7.9")) {
00897     t= misc_math_correct (t);
00898     if (enabled_preference ("remove superfluous invisible"))
00899       t= superfluous_invisible_correct (t);
00900     if (enabled_preference ("homoglyph correct"))
00901       t= homoglyph_correct (t);
00902     if (enabled_preference ("remove superfluous invisible"))
00903       t= superfluous_invisible_correct (t);
00904     if (enabled_preference ("insert missing invisible"))
00905       t= missing_invisible_correct_twice (t);
00906     if (enabled_preference ("zealous invisible correct"))
00907       t= missing_invisible_correct (t, 1);
00908   }
00909   t= downgrade_big (t);
00910   return t;
00911 }
00912 
00913 tree
00914 manual_correct (tree t) {
00915   t= with_correct (t);
00916   t= superfluous_with_correct (t);
00917   t= upgrade_brackets (t);
00918   t= misc_math_correct (t);
00919   if (enabled_preference ("manual remove superfluous invisible"))
00920     t= superfluous_invisible_correct (t);
00921   if (enabled_preference ("manual homoglyph correct"))
00922     t= homoglyph_correct (t);
00923   if (enabled_preference ("manual remove superfluous invisible"))
00924     t= superfluous_invisible_correct (t);
00925   if (enabled_preference ("manual insert missing invisible"))
00926     t= missing_invisible_correct_twice (t);
00927   if (enabled_preference ("manual zealous invisible correct"))
00928     t= missing_invisible_correct (t, 1);
00929   t= downgrade_big (t);
00930   return t;
00931 }