Back to index

texmacs  1.0.7.15
parsebib.cpp
Go to the documentation of this file.
00001 
00002 /******************************************************************************
00003 * MODULE     : parsebib.cpp
00004 * DESCRIPTION: conversion of bibtex strings into logical bibtex trees
00005 * COPYRIGHT  : (C) 2010  David MICHEL
00006 *******************************************************************************
00007 * This software falls under the GNU general public license version 3 or later.
00008 * It comes WITHOUT ANY WARRANTY WHATSOEVER. For details, see the file LICENSE
00009 * in the root directory or <http://www.gnu.org/licenses/gpl-3.0.html>.
00010 ******************************************************************************/
00011 
00012 #include "convert.hpp"
00013 #include "analyze.hpp"
00014 #include "list.hpp"
00015 #include "tree_traverse.hpp"
00016 #include "Bibtex/bibtex_functions.hpp"
00017 
00018 static string bib_current_tag= "";
00019 
00020 bool
00021 bib_ok (string s, int pos) {
00022   return 0 <= pos && pos < N(s);
00023 }
00024 
00025 void
00026 bib_error () {
00027   if (bib_current_tag == "")
00028     cerr << "TeXmacs] BibTeX parse error encountered\n";
00029   else
00030     cerr << "TeXmacs] BibTeX parse error in " << bib_current_tag << "\n";
00031 }
00032 
00033 void
00034 bib_char (string s, int& pos, char c) {
00035   if (!bib_ok (s, pos)) return;
00036   if (s[pos] == c) pos++;
00037   else {
00038     bib_error ();
00039     if (c) cerr << "       ] Invalid char: \'" << s[pos]
00040               << "\', expected \'" << c << "\'\n";
00041     pos= -1;
00042   }
00043 }
00044 
00045 bool
00046 bib_open (string s, int& pos, char& cend) {
00047   switch (s[pos]) {
00048   case '{': cend= '}'; return false;
00049   case '(': cend= ')'; return false;
00050   default:
00051     bib_error ();
00052     cerr << "       ] Expected '{' or '(' instead of '" << s[pos] << "'\n";
00053     while (pos < N(s) && s[pos] != '{' && s[pos] != '(') pos++;
00054     if (pos < N(s)) return bib_open (s, pos, cend);
00055     pos= -1;
00056     return true;
00057   }
00058 }
00059 
00060 bool
00061 bib_is_in (char c, string cs) {
00062   int i= 0;
00063   while (i < N(cs) && cs[i] != c) i++;
00064   return i != N(cs);
00065 }
00066 
00067 void
00068 bib_blank (string s, int& pos) {
00069   if (!bib_ok (s, pos)) return;
00070   string cs= " \t\n\r";
00071   while (bib_ok (s, pos) && bib_is_in (s[pos], cs)) pos++;
00072 }
00073 
00074 void
00075 bib_within (string s, int& pos, char cbegin, char cend, string& content) {
00076   if (!bib_ok (s, pos)) return;
00077   int depth= 0;
00078   bib_char (s, pos, cbegin);
00079   while (bib_ok (s, pos) && (s[pos] != cend || depth > 0)) {
00080     if (cbegin != cend) {
00081       if (s[pos] == cbegin) depth++;
00082       else if (s[pos] == cend) depth--;
00083     }
00084     if (s[pos] == '\\' && bib_ok (s, pos+1)) {
00085       content << '\\';
00086       pos++;
00087     }
00088     content << s[pos];
00089     pos++;
00090   }
00091   bib_char (s, pos, cend);
00092 }
00093 
00094 void
00095 bib_until (string s, int& pos, string cs, string& content) {
00096   if (!bib_ok (s, pos)) return;
00097   while (bib_ok (s, pos) && !bib_is_in (s[pos], cs)) {
00098     content << s[pos];
00099     pos++;
00100   }
00101 }
00102 
00103 void
00104 bib_comment (string s, int& pos, tree& t) {
00105   if (!bib_ok (s, pos)) return;
00106   string content;
00107   while (bib_ok (s, pos) && s[pos] == '%') {
00108     bib_char (s, pos, '%');
00109     while (bib_ok (s, pos) && s[pos] != '\n') {
00110       content << s[pos];
00111       pos++;
00112     }
00113     t << compound ("bib-line", content);
00114     content= "";
00115     pos++;
00116   }
00117 }
00118 
00119 void bib_atomic_arg (string s, int& pos, string ce, tree& a) {
00120   if (!bib_ok (s, pos)) return;
00121   string sa;
00122   string f, v, j, l;
00123   switch (s[pos]) {
00124     case '\"': {
00125       bib_within (s, pos, '\"', '\"', sa);
00126       a= sa;
00127       break;
00128     }
00129     case '{': {
00130       bib_within (s, pos, '{', '}', sa);
00131       a= sa;
00132       break;
00133     }
00134     default: {
00135       string cs= ", \t\n\r";
00136       cs << ce;
00137       if (!is_digit (s[pos])) {
00138         bib_until (s, pos, cs, sa);
00139         a= compound ("bib-var", sa);
00140       }
00141       else {
00142         bib_until (s, pos, cs, sa);
00143         a= sa;
00144       }
00145       break;
00146     }
00147   }
00148 }
00149 
00150 void
00151 bib_arg (string s, int& pos, string ce, tree& arg) {
00152   if (!bib_ok (s, pos)) return;
00153   string cs= ",";
00154   cs << ce;
00155   while (bib_ok (s, pos) && !bib_is_in (s[pos], cs)) {
00156     tree a;
00157     bib_atomic_arg (s, pos, ce, a);
00158     arg << a;
00159     bib_blank (s, pos);
00160     if (bib_ok (s, pos) && s[pos] == '#') {
00161       pos++;
00162       bib_blank (s, pos);
00163     }
00164     else break;
00165   }
00166 }
00167 
00168 void
00169 bib_fields (string s, int& pos, string ce, string tag, tree& fields) {
00170   if (!bib_ok (s, pos)) return;
00171   int savpos;
00172   bib_blank (s, pos);
00173   while (bib_ok (s, pos) && s[pos] == ',') {
00174     pos++;
00175     bib_blank (s, pos);
00176   }
00177   while (bib_ok (s, pos) && !bib_is_in (s[pos], ce)) {
00178     savpos= pos;
00179     string param;
00180     tree arg (CONCAT);
00181     bib_until (s, pos, string ("={( \t\n\r"), param);
00182     if (bib_ok (s, pos) && (s[pos]=='{' || s[pos]=='(')) {
00183       pos= savpos;
00184       return;
00185     }
00186     bib_blank (s, pos);
00187     bib_char (s, pos, '=');
00188     bib_blank (s, pos);
00189     bib_arg (s, pos, ce, arg);
00190     if (tag == "bib-field") param= locase_all (param);
00191     arg= simplify_correct (arg);
00192     fields << compound (tag, param, arg);
00193     bib_blank (s, pos);
00194     string cend= ce;
00195     cend << ",";
00196     while (bib_ok (s, pos) && !bib_is_in (s[pos], cend)) pos++;
00197     while (bib_ok (s, pos) && s[pos] == ',') {
00198       pos++;
00199       bib_blank (s, pos);
00200     }
00201   }
00202 }
00203 
00204 void
00205 bib_string (string s, int& pos, tree& t) {
00206   if (!bib_ok (s, pos)) return;
00207   tree fields= tree (DOCUMENT);
00208   string cs= ", \t\n\r";
00209   char cend;
00210   if (bib_open (s, pos, cend)) return;
00211   pos++;
00212   cs << cend;
00213   bib_blank (s, pos);
00214   string ce;
00215   ce << cend;
00216   bib_fields (s, pos, ce, string ("bib-assign"), fields);
00217   bib_blank (s, pos);
00218   bib_char (s, pos, cend);
00219   t << A (fields);
00220 }
00221 
00222 void
00223 bib_preamble (string s, int& pos, tree& t) {
00224   if (!bib_ok (s, pos)) return;
00225   string cs= ",";
00226   char cend;
00227   if (bib_open (s, pos, cend)) return;
00228   pos++;
00229   cs << cend;
00230   bib_blank (s, pos);
00231   while (bib_ok (s, pos) && s[pos] == ',') {
00232     pos++;
00233     bib_blank (s, pos);
00234   }
00235   while (bib_ok (s, pos) && s[pos] != cend) {
00236     bib_blank (s, pos);
00237     tree arg (CONCAT);
00238     bib_arg (s, pos, cs, arg);
00239     arg= simplify_correct (arg);
00240     t << compound ("bib-latex", arg);
00241     bib_blank (s, pos);
00242     while (bib_ok (s, pos) && s[pos] == ',') {
00243       pos++;
00244       bib_blank (s, pos);
00245     }
00246   }
00247   bib_blank (s, pos);
00248   bib_char (s, pos, cend);
00249 }
00250 
00251 void
00252 bib_entry (string s, int& pos, tree type, tree& t) {
00253   if (!bib_ok (s, pos)) return;
00254   tree entry;
00255   tree fields= tree (DOCUMENT);
00256   string cs= ",\t\n\r";
00257   char cend;
00258   if (bib_open (s, pos, cend)) return;
00259   pos++;
00260   cs << cend;
00261   bib_blank (s, pos);
00262   string tag;
00263   bib_until (s, pos, cs, tag);
00264   bib_current_tag= copy (tag);
00265   bib_blank (s, pos);
00266   string ce;
00267   ce << cend;
00268   bib_fields (s, pos, ce, string ("bib-field"), fields);
00269   bib_blank (s, pos);
00270   bib_char (s, pos, cend);
00271   entry= compound ("bib-entry");
00272   entry << type << tag << fields;
00273   t << entry;
00274 }
00275 
00276 void
00277 bib_list (string s, int& pos, tree& t) {
00278   if (!bib_ok (s, pos)) return;
00279   tree tentry (DOCUMENT);
00280   tree tpreamble (DOCUMENT);
00281   tree tstring (DOCUMENT);
00282   string type;
00283   bool comment= true;
00284   int savpos;
00285   while (bib_ok (s, pos)) {
00286     bib_blank (s, pos);
00287     if (!bib_ok (s, pos)) break;
00288     switch (s[pos]) {
00289       case '%': {
00290         tree tc= tree (DOCUMENT);
00291         bib_comment (s, pos, tc);
00292         tentry << compound ("bib-comment", tc);
00293         break;
00294       }
00295       case '@': {
00296         pos++;
00297         comment= false;
00298       }
00299       default: {
00300         bib_blank (s, pos);
00301         savpos= pos;
00302         type= "";
00303         bib_until (s, pos, string ("{(= \t\n\r"), type);
00304         bib_blank (s, pos);
00305         if (bib_ok (s, pos) && s[pos] == '=') {
00306           tree fields (DOCUMENT);
00307           pos= savpos;
00308           bib_fields (s, pos, string (")}@"), string ("bib-field"), tentry);
00309           bib_blank (s, pos);
00310           if (bib_ok (s, pos) && (s[pos]==')' || s[pos]=='}')) {
00311             if (N(tpreamble) != 0) t << compound ("bib-preamble", tpreamble);
00312             if (N(tstring) != 0) t << compound ("bib-string", tstring);
00313             t << A(tentry);
00314             return;
00315           }
00316         }
00317        else {
00318           string stype= locase_all (type);
00319           if (stype == "string") {
00320             tree ts;
00321             if (comment) ts= tree (DOCUMENT);
00322             else ts= tstring;
00323             bib_string (s, pos, ts);
00324             if (comment) {
00325               if (N(ts) == 1) tstring << compound ("bib-comment", ts[0]);
00326               else tstring << compound ("bib-comment", ts);
00327             }
00328           }
00329          else if (stype == "preamble") {
00330             tree tp;
00331             if (comment) tp= tree (DOCUMENT);
00332             else tp= tpreamble;
00333             bib_preamble (s, pos, tp);
00334             if (comment) {
00335               if (N(tp) == 1) tpreamble << compound ("bib-comment", tp[0]);
00336               else tpreamble << compound ("bib-comment", tp);
00337             }
00338           }
00339          else {
00340             tree te;
00341             if (stype == "comment") comment= true;
00342             if (comment) te= tree (DOCUMENT);
00343             else te= tentry;
00344             bib_entry (s, pos, stype, te);
00345             if (comment) {
00346               if (N(te) == 1) tentry << compound ("bib-comment", te[0]);
00347               else tentry << compound ("bib-comment", te);
00348             }
00349           }
00350           comment= true;
00351         }
00352         break;
00353       }
00354     }
00355   }
00356 //  cerr << "ENTRIES: " << tentry << "\n";
00357 //  cerr << "PREAMBLE: " << tpreamble << "\n";
00358 //  cerr << "STRING: " << tstring << "\n";
00359 //  if (N(tpreamble) != 0) t << compound ("bib-preamble", tpreamble);
00360 //  if (N(tstring) != 0) t << compound ("bib-string", tstring);
00361 //  t << A(tentry);
00362   hashmap<string,string> dict=
00363     bib_strings_dict (tree (DOCUMENT, compound ("bib-string", tstring)));
00364   t << A(bib_subst_vars (tentry, dict));
00365   bib_parse_fields (t);
00366 }
00367 
00368 tree
00369 parse_bib (string s) {
00370   int pos= 0;
00371   tree r (DOCUMENT);
00372   bib_current_tag= "";
00373   bib_list (s, pos, r);
00374   if (N(s) == 0 || N(r) == 0) return tree ();
00375   if (pos < 0) {
00376     cerr << "TeXmacs] Error: failed to load BibTeX file.\n";
00377     return tree ();
00378   }
00379   return r;
00380 }
00381