Back to index

texmacs  1.0.7.15
dictionary.cpp
Go to the documentation of this file.
00001 
00002 /******************************************************************************
00003 * MODULE     : dictionary.cpp
00004 * DESCRIPTION: used for translations and analyzing text
00005 * COPYRIGHT  : (C) 1999  Joris van der Hoeven
00006 *******************************************************************************
00007 * This software falls under the GNU general public license version 3 or later.
00008 * It comes WITHOUT ANY WARRANTY WHATSOEVER. For details, see the file LICENSE
00009 * in the root directory or <http://www.gnu.org/licenses/gpl-3.0.html>.
00010 ******************************************************************************/
00011 
00012 #include "dictionary.hpp"
00013 #include "file.hpp"
00014 #include "convert.hpp"
00015 #include "converter.hpp"
00016 #include "drd_std.hpp"
00017 
00018 RESOURCE_CODE(dictionary);
00019 
00020 /******************************************************************************
00021 * Dictionary initialization
00022 ******************************************************************************/
00023 
00024 dictionary_rep::dictionary_rep (string from2, string to2):
00025   rep<dictionary> (from2 * "-" * to2), table ("?"), from (from2), to (to2) {}
00026 
00027 void
00028 dictionary_rep::load (url u) {
00029   if (is_none (u)) return;
00030   if (is_or (u)) {
00031     load (u[1]);
00032     load (u[2]);
00033     return;
00034   }
00035 
00036   string s;
00037   if (load_string (u, s, false)) return;
00038   tree t= block_to_scheme_tree (s);
00039   if (!is_tuple (t)) return;
00040 
00041   int i, n= N(t);
00042   for (i=0; i<n; i++)
00043     if (is_func (t[i], TUPLE, 2) &&
00044        is_atomic (t[i][0]) && is_atomic (t[i][1]))
00045       {
00046        string l= t[i][0]->label; if (is_quoted (l)) l= scm_unquote (l);
00047        string r= t[i][1]->label; if (is_quoted (r)) r= scm_unquote (r);
00048        if (to == "chinese" || to == "japanese" ||
00049            to == "korean" || to == "taiwanese")
00050          r= utf8_to_cork (r);
00051        table (l)= r;
00052       }
00053 }
00054 
00055 void
00056 dictionary_rep::load (string fname) {
00057   fname= fname * ".scm";
00058   if (DEBUG_VERBOSE) cout << "TeXmacs] Loading " << fname << "\n";
00059   url u= url ("$TEXMACS_DIC_PATH") * url_wildcard ("*" * fname);
00060   load (expand (complete (u)));
00061 }
00062 
00063 dictionary
00064 load_dictionary (string from, string to) {
00065   string name= from * "-" * to;
00066   if (dictionary::instances -> contains (name))
00067     return dictionary (name);
00068   dictionary dict= tm_new<dictionary_rep> (from, to);
00069   if (from != to) dict->load (name);
00070   return dict;
00071 }
00072 
00073 /******************************************************************************
00074 * Translation routines
00075 ******************************************************************************/
00076 
00077 string
00078 dictionary_rep::translate (string s) {
00079   // Is s in dictionary?
00080   if (s == "" || from == to) return s;
00081   //cout << "Translate <" << s << ">\n";
00082   if (table->contains (s) && table[s] != "")
00083     return table[s];
00084 
00085   // remove trailing non iso_alpha characters
00086   int i, n= N(s);
00087   for (i=0; i<n; i++)
00088     if (is_iso_alpha (s[i]))
00089       break;
00090   int start= i;
00091   for (i=n; i>0; i--)
00092     if (is_iso_alpha (s[i-1]))
00093       break;
00094   int end= i;
00095   if (start >= n || end <= 0) return s;
00096   if (start != 0 || end != n) {
00097     ASSERT (start < end, "invalid situation");
00098     string s1= translate (s (0, start));
00099     string s2= translate (s (start, end));
00100     string s3= translate (s (end, n));
00101     if (to == "french") {
00102       if (s3 == ":") s3= " :";
00103       if (s3 == "!") s3= " !";
00104       if (s3 == "?") s3= " ?";
00105     }
00106     return s1 * s2 * s3;
00107   }
00108 
00109   // Is lowercase version of s in dictionary?
00110   string ls= locase_first (s);
00111   if (table->contains (ls) && table[ls] != "")
00112     return upcase_first (table[ls]);
00113 
00114   // break at last non iso_alpha character which is not a space
00115   for (i=n; i>0; i--)
00116     if (!is_iso_alpha (s[i-1]) && s[i-1] != ' ')
00117       break;
00118   if (i > 0) {
00119     string s1= translate (s (0, i));
00120     string s2= translate (s (i, n));
00121     return s1 * s2;
00122   }
00123 
00124   // no translation available
00125   return s;
00126 }
00127 
00128 /******************************************************************************
00129 * Interface
00130 ******************************************************************************/
00131 
00132 static string in_lan ("english");
00133 static string out_lan ("english");
00134 
00135 void set_input_language (string s) { in_lan= s; }
00136 string get_input_language () { return in_lan; }
00137 void set_output_language (string s) { out_lan= s; }
00138 string get_output_language () { return out_lan; }
00139 
00140 string
00141 translate (string s, string from, string to) {
00142   if (N(from)==0) return s;
00143   dictionary dict= load_dictionary (from, to);
00144   return dict->translate (s);
00145 }
00146 
00147 string
00148 translate (string s) {
00149   return translate (s, "english", out_lan);
00150 }
00151 
00152 string
00153 translate (const char* s) {
00154   return translate (string (s), "english", out_lan);
00155 }
00156 
00157 /******************************************************************************
00158 * Translation of trees
00159 ******************************************************************************/
00160 
00161 tree
00162 tree_translate (tree t, string from, string to) {
00163   //cout << "Translating " << t << " from " << from << " into " << to << "\n";
00164   if (is_atomic (t))
00165     return translate (t->label, from, to);
00166   else if (is_compound (t, "verbatim", 1))
00167     return t[0];
00168   else if (is_compound (t, "localize", 1))
00169     return tree_translate (t[0], "english", out_lan);
00170   else if (is_compound (t, "render-key", 1))
00171     return compound ("render-key", tree_translate (t[0], from, to));
00172   else {
00173     tree r (t, N(t));
00174     for (int i=0; i<N(t); i++)
00175       if (!the_drd->is_accessible_child (t, i)) r[i]= t[i];
00176       else r[i]= tree_translate (t[i], from, to);
00177     return r;
00178   }
00179 }
00180 
00181 tree
00182 tree_translate (tree t) {
00183   return tree_translate (t, "english", out_lan);
00184 }
00185 
00186 /******************************************************************************
00187 * Translate and serialize
00188 ******************************************************************************/
00189 
00190 static string
00191 serialize (tree t) {
00192   if (is_atomic (t))
00193     return t->label;
00194   else if (is_concat (t)) {
00195     string s;
00196     for (int i=0; i<N(t); i++) {
00197       tree u= t[i];
00198       while (is_concat (u) && N(u) > 0) u= u[0];
00199       if (i > 0 && is_compound (u, "render-key"))
00200        if (!is_atomic (t[i-1]) || !ends (t[i-1]->label, " ")) {
00201          if (use_macos_fonts () || gui_is_qt ()) s << "  ";
00202          else s << " ";
00203        }
00204       s << serialize (t[i]);
00205     }
00206     return s;
00207   }
00208   else if (is_compound (t, "render-key", 1))
00209     return serialize (t[0]);
00210   else if (is_func (t, WITH))
00211     return serialize (t[N(t)-1]);
00212   else if (is_compound (t, "math", 1))
00213     return serialize (t[0]);
00214   else if (is_compound (t, "op", 1)) {
00215     t= t[0];
00216     if (gui_is_qt ()) {
00217       if (t == "<leftarrow>") return "Left";
00218       if (t == "<rightarrow>") return "Right";
00219       if (t == "<uparrow>") return "Up";
00220       if (t == "<downarrow>") return "Down";
00221     }
00222     else {
00223       if (t == "<leftarrow>") return "left";
00224       if (t == "<rightarrow>") return "right";
00225       if (t == "<uparrow>") return "up";
00226       if (t == "<downarrow>") return "down";
00227     }
00228     return serialize (t);
00229   }
00230   else return "";
00231 }
00232 
00233 string
00234 translate (tree t, string from, string to) {
00235   return serialize (tree_translate (t, from, to));
00236 }
00237 
00238 string
00239 translate (tree t) {
00240   return serialize (tree_translate (t));
00241 }