Back to index

texmacs  1.0.7.15
cpp_language.cpp
Go to the documentation of this file.
00001 
00002 /******************************************************************************
00003 * MODULE     : cpp_language.cpp
00004 * DESCRIPTION: the "cpp" language
00005 * COPYRIGHT  : (C) 2008  Francis Jamet
00006 *******************************************************************************
00007 * This software falls under the GNU general public license and comes WITHOUT
00008 * ANY WARRANTY WHATSOEVER. See the file $TEXMACS_PATH/LICENSE for more details.
00009 * If you don't have this file, write to the Free Software Foundation, Inc.,
00010 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
00011 ******************************************************************************/
00012 
00013 #include "analyze.hpp"
00014 #include "impl_language.hpp"
00015 #include "scheme.hpp"
00016 
00017 extern tree the_et;
00018 
00019 /*
00020 static bool
00021 is_line (tree t) {
00022  path p= obtain_ip (t);
00023   if (is_nil (p) || last_item (p) < 0) return false;
00024   tree pt= subtree (the_et, reverse (p->next));
00025   if (!is_func (pt, DOCUMENT)) return false;
00026   return true;
00027 }
00028 */
00029 
00030 static int
00031 line_number (tree t) {
00032   path p= obtain_ip (t);
00033   if (is_nil (p) || last_item (p) < 0) return -1;
00034   tree pt= subtree (the_et, reverse (p->next));
00035   if (!is_func (pt, DOCUMENT)) return -1;
00036   return p->item;
00037 }
00038 
00039 static int
00040 number_of_line (tree t) {
00041   path p= obtain_ip (t);
00042   if (is_nil (p) || last_item (p) < 0) return -1;
00043   tree pt= subtree (the_et, reverse (p->next));
00044   if (!is_func (pt, DOCUMENT)) return -1;
00045   return N(pt);
00046 }
00047 
00048 static tree
00049 line_inc (tree t, int i) {
00050   path p= obtain_ip (t);
00051   if (is_nil (p) || last_item (p) < 0) return tree (ERROR);
00052   tree pt= subtree (the_et, reverse (p->next));
00053   if (!is_func (pt, DOCUMENT)) return tree (ERROR);
00054   if ((p->item + i < 0) || (p->item + i >= N(pt))) return tree (ERROR);
00055   return pt[p->item + i];
00056 }
00057 
00058 static void parse_number (string s, int& pos);
00059 static void parse_string (string s, int& pos);
00060 static void parse_alpha (string s, int& pos);
00061 
00062 cpp_language_rep::cpp_language_rep (string name):
00063   language_rep (name), colored ("")
00064 { 
00065   eval ("(use-modules (utils misc tm-keywords))");
00066   list<string> l= as_list_string (eval ("(map symbol->string highlight-any)"));
00067   while (!is_nil (l)) {
00068     colored (l->item)= "blue";
00069     l= l->next;
00070   }
00071 }
00072 
00073 text_property
00074 cpp_language_rep::advance (tree t, int& pos) {
00075   string s= t->label;
00076   if (pos==N(s)) return &tp_normal_rep;
00077   char c= s[pos];
00078   if (c == ' ') {
00079     pos++; return &tp_space_rep; }
00080   if (c >= '0' && c <= '9') {
00081     parse_number (s, pos); return &tp_normal_rep; }
00082   if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
00083       (c == '_')) {
00084     parse_alpha (s, pos); return &tp_normal_rep; }
00085   tm_char_forwards (s, pos);
00086   return &tp_normal_rep;
00087 }
00088 
00089 array<int>
00090 cpp_language_rep::get_hyphens (string s) {
00091   int i;
00092   array<int> penalty (N(s)+1);
00093   penalty[0]= HYPH_INVALID;
00094   for (i=1; i<N(s); i++)
00095     if (s[i-1] == '-' && is_alpha (s[i]))
00096       penalty[i]= HYPH_STD;
00097     else penalty[i]= HYPH_INVALID;
00098   penalty[i]= HYPH_INVALID;
00099   return penalty;
00100 }
00101 
00102 void
00103 cpp_language_rep::hyphenate (
00104   string s, int after, string& left, string& right)
00105 { 
00106   left = s (0, after);
00107   right= s (after, N(s));
00108 }
00109 
00110 static void
00111 cpp_color_setup_constants (hashmap<string, string> & t) {
00112   string c= "#2060c0";
00113   t ("true")= c;
00114   t ("false")= c;
00115   t ("cout")= c;
00116   t ("cin")= c;
00117   t ("cerr")= c;
00118 }
00119 
00120 static void
00121 cpp_color_setup_keywords (hashmap<string, string> & t)  {
00122   string c= "#8020c0"; 
00123   t ("asm")= c;
00124   t ("auto")= c;
00125   t ("break")= c;
00126   t ("case")= c;
00127   t ("catch")= c;
00128   t ("class")= c;
00129   t ("concrete")= c;
00130   t ("constant")= c;
00131   t ("continue")= c;
00132   t ("default")= c;
00133   t ("delete")= c;
00134   t ("do")= c;
00135   t ("else")= c;
00136   t ("enum")= c;
00137   t ("extern")= c;
00138   t ("explicit")= c;
00139   t ("for")= c;
00140   t ("friend")= c;
00141   t ("goto")= c;
00142   t ("if")= c;
00143   t ("inline")= c;
00144   t ("mutable")= c;
00145   t ("new")= c;
00146   t ("operator")= c;
00147   t ("private")= c;
00148   t ("protected")= c;
00149   t ("public")= c;
00150   t ("register")= c;
00151   t ("return")= c ;
00152   t ("sizeof")= c;
00153   t ("static")= c;
00154   t ("struct")= c;
00155   t ("switch")= c;
00156   t ("template")= c;
00157   t ("this")= c;
00158   t ("throw")= c;
00159   t ("to")= c;
00160   t ("try")= c;
00161   t ("typedef")= c;
00162   t ("union")= c;
00163   t ("virtual")= c;
00164   t ("volatile")= c;
00165   t ("while")= c;
00166   t ("malloc")= c;
00167   t ("realloc")= c;
00168   t ("calloc")= c;
00169   t ("free")= c;
00170 }
00171 
00172 static void
00173 cpp_color_setup_otherlexeme (hashmap<string, string>& t) {
00174   string c= "black";
00175   t ("+")= c;
00176   t ("-")= c;
00177   t ("/")= c;
00178   t ("=")= c;
00179   t (".")= c;
00180   t ("<less>")= c;
00181   t ("gtr")= c;
00182   t ("|")= c;
00183   t ("!")= c;
00184   t ("...")= c;
00185   t (",")= c;
00186   t ("+=")= c;
00187   t ("-=")= c; 
00188   t ("*=")= c;
00189   t ("/=")= c;
00190   t (",")= c;
00191   t (";")= c;
00192   t ("(")= c;
00193   t (")")= c;
00194   t ("[")= c;
00195   t ("]")= c;
00196   t ("{")= c;
00197   t ("}")= c;
00198   t ("<less><less>")= c;
00199   t ("<gtr><gtr>")= c;
00200   t ("<less>=")= c;
00201   t ("==")= c;
00202   t ("<gtr>=")= c;
00203   t ("&&")= c;
00204   t ("||")= c;
00205   t ("!=")= c;
00206   
00207 }
00208 
00209 static inline bool
00210 belongs_to_identifier (char c) {
00211   return ((c<='9' && c>='0') ||
00212          (c<='Z' && c>='A') ||
00213         (c<='z' && c>='a') ||
00214           c=='_');
00215 }
00216 
00217 static inline bool
00218 is_number (char c) {
00219   return (c>='0' && c<='9');
00220 }
00221 
00222 static void
00223 parse_identifier (hashmap<string, string>& t, string s, int& pos) {
00224   int i=pos;
00225   if (pos >= N(s)) return;
00226   if (is_number (s[i])) return;
00227   while (i<N(s) && belongs_to_identifier (s[i])) i++;
00228   if (!(t->contains (s (pos, i)))) pos= i;
00229 }
00230 
00231 static void
00232 parse_alpha (string s, int& pos) {
00233   static hashmap<string,string> empty;
00234   parse_identifier (empty, s, pos);
00235 }
00236 
00237 static void
00238 parse_blanks (string s, int& pos) {
00239   while (pos<N(s) && (s[pos]==' ' || s[pos]=='\t')) pos++;
00240 }
00241 
00242 static void
00243 parse_string (string s, int& pos) {
00244   if (pos>=N(s)) return;
00245   switch (s[pos])  {
00246   case '\042':
00247     do pos++;
00248     while((pos<N(s)) &&
00249          ((s[pos-1]=='\\' && s[pos]=='\042') || s[pos]!='\042'));
00250     if (s[pos]=='\042') pos++;
00251     return;
00252   case '/':
00253     if (pos+1<N(s) && s[pos+1]=='\042') {
00254       pos=pos+2;
00255       do {
00256        if (pos+1<N(s) && s[pos]=='\042' && s[pos+1]=='/') {
00257          pos=pos+2; return; }
00258        pos++;
00259       } while (pos<N(s));
00260     }
00261   }
00262 }
00263   
00264 static void
00265 parse_keyword (hashmap<string,string>& t, string s, int& pos) {
00266   int i= pos;
00267   if (pos>=N(s)) return;
00268   if (is_number (s[i])) return;
00269   while ((i<N(s)) && belongs_to_identifier (s[i])) i++;
00270   string r= s (pos, i);
00271   if (t->contains (r) && t(r)=="#8020c0") { pos=i; return; }
00272 }
00273 
00274 static void
00275 parse_constant (hashmap<string,string>& t, string s, int& pos) {
00276   int i=pos;
00277   if (pos>=N(s)) return;
00278   if (is_number (s[i])) return;
00279   while ((i<N(s)) && belongs_to_identifier (s[i])) i++;
00280   string r= s (pos, i);
00281   if (t->contains (r) && t(r)=="#2060c0") { pos=i; return; }
00282 }
00283 
00284 static void
00285 parse_other_lexeme (hashmap<string,string>& t, string s, int& pos) {
00286   int i;
00287   for (i=12; i>=1; i--) {
00288     string r=s(pos,pos+i);
00289     if (t->contains(r) && t(r)=="black") {
00290       pos=pos+i; return; }
00291   }
00292 }
00293 
00294 static void
00295 parse_number (string s, int& pos) {
00296   int i= pos;
00297   if (pos>=N(s)) return;
00298   if (s[i] == '.') return;
00299   while (i<N(s) && 
00300         (is_number (s[i]) ||
00301          (s[i] == '.' && (i+1<N(s)) &&
00302           (is_number (s[i+1]) ||
00303            s[i+1] == 'e' || s[i+1] == 'E')))) i++;
00304   if (i == pos) return;
00305   if (i<N(s) && (s[i] == 'e' || s[i] == 'E')) {
00306     i++;
00307     if (i<N(s) && s[i] == '-') i++;
00308     while (i<N(s) && (is_number (s[i]))) i++;
00309   }
00310   pos= i;
00311 }
00312 
00313 static void
00314 parse_comment_multi_lines (string s, int& pos) {
00315   if (pos>=N(s)) return;
00316   if (s[pos]!='/') return;
00317   if (pos+1<N(s) && s[pos+1]=='*') {
00318     pos= pos+2;
00319     while ((pos<N(s) && s[pos]!='*') || (pos+1<N(s) && s[pos+1]!='/')) pos++;
00320     pos= min(pos+2,N(s));
00321   }
00322 }
00323 
00324 static void
00325 parse_comment_single_line (string s, int& pos) {
00326   if (pos>=N(s)) return;
00327   if (s[pos]!='/') return;
00328   if (pos+1<N(s) && s[pos+1]=='/') {pos=N(s);return;}
00329 }
00330 
00331 static void
00332 parse_end_comment (string s, int& pos) {
00333   if (pos+1<N(s) && s[pos]=='*' && s[pos+1]=='/') pos=pos+2; 
00334 }
00335 
00336 static void parse_diese (string s, int& pos) {
00337   if (s[pos] == '#') pos++;
00338   }
00339 
00340 static void parse_preprocessing (string s, int & pos) {
00341   int i= pos;
00342   if (pos>=N(s)) return;
00343   if (is_number (s[i])) return;
00344   while ((i<N(s)) && belongs_to_identifier (s[i])) i++;
00345   string r= s (pos, i);
00346   if (r == "include" ||
00347        r == "if" ||
00348        r == "ifdef" ||
00349        r == "ifndef" ||
00350        r == "else" ||
00351        r == "elif" ||
00352        r == "endif" ||
00353        r == "define" ||
00354        r == "undef" ||
00355        r == "pragma" ||
00356        r == "error") { pos=i; return; }
00357   }
00358   
00359   
00360   
00361 static bool begin_comment (string s, int i) {
00362   bool comment;
00363   int pos= 0;
00364   int opos; 
00365   do {
00366     do {
00367     opos= pos;
00368     comment= false;
00369     parse_string (s, pos);
00370     if (opos < pos) break;
00371     parse_comment_multi_lines (s, pos);
00372     if (opos < pos) {comment= true; break;}
00373     pos++;
00374     }
00375   while (false);
00376   }
00377   while (pos<=i);
00378   return comment;  
00379 }
00380 
00381 static bool end_comment (string s, int i) {
00382   bool comment;
00383   int pos= 0; int opos;
00384   do {
00385     do {
00386     opos= pos;
00387     comment= false;
00388     parse_string (s, pos);
00389        if (opos < pos) break;
00390     parse_end_comment (s, pos);
00391     if (opos < pos && pos>i) return true;
00392     pos ++;
00393     }
00394   while (false);
00395   }
00396   while (pos<N(s));
00397   return false;
00398 }
00399 
00400 static bool after_begin_comment (string s, int i, tree t) {
00401   if (begin_comment(s, i)) return true;
00402   tree t2= t;
00403   string s2= s;
00404   if (N(s2)==0) return false;
00405   int pos=0;
00406   parse_blanks(s2,pos);
00407   if (s2[pos]!='*') return false;
00408   while (line_number(t2) > 0) {
00409     t2= line_inc(t2,-1);
00410     // line_inc return tree(ERROR) upon error
00411     if (!is_atomic(t2)) return false;
00412     s2= t2->label;
00413     if (N(s2)>0 && begin_comment (s2, N(s2)-1)) return true;
00414     if (N(s2)==0) return false;
00415     int pos=0;
00416     parse_blanks(s2,pos);
00417     if (s2[pos]!='*') return false;
00418     } 
00419   return false;
00420 }
00421 
00422 static bool before_end_comment (string s, int i, tree t) {
00423   int number= number_of_line(t);
00424   tree t2= t;
00425   string s2=s;
00426   if (N(s2)==0) return false;
00427   int pos=0;
00428   if (!begin_comment(s,i)) {
00429     parse_blanks(s2,pos);
00430     if (s2[pos]!='*') return false;
00431   }
00432   if (end_comment(s, i)) return true;
00433   while (line_number(t2) < number-1) {
00434     t2= line_inc(t2,1);
00435     // line_inc return tree(ERROR) upon error
00436     if (!is_atomic(t2)) return false;
00437     s2= t2->label;
00438     if (N(s2)==0) return false;
00439     pos=0;
00440     parse_blanks(s2, pos);
00441     if (s2[pos]!='*') return false;
00442     if (N(s2)>0 && end_comment (s2, 0)) return true;
00443   } 
00444   return false;
00445 }
00446 
00447 static bool in_comment(string s, int pos, tree t) {
00448   if (after_begin_comment(s, pos, t) && before_end_comment(s, pos, t)) return true;
00449   return false;
00450 }  
00451 
00452 static bool end_preprocessing( string s) {
00453   int pos=N(s)-1;
00454   if (N(s)==0) return false;
00455   while (s[pos]==' ' && pos>0) {pos--;}
00456   if (s[pos]=='/') return true;
00457   return false;
00458 }  
00459   
00460 static bool begin_preprocessing( string s) { 
00461   if (N(s)>0 && s[0]=='#') return true;
00462   return false;
00463 }
00464 
00465 static bool in_preprocessing (string s, tree t) {
00466   if (begin_preprocessing(s)) return true;
00467   tree t2= t;
00468   string s2= s;
00469   while (line_number(t2) > 0) {
00470     t2= line_inc(t2,-1);
00471     // line_inc return tree(ERROR) upon error
00472     if (!is_atomic(t2)) return false;
00473     s2= t2->label;
00474     if (!end_preprocessing(s2)) return false;
00475     if (begin_preprocessing(s2)) return true;
00476   } 
00477   return false;
00478 }
00479 
00480 string
00481 cpp_language_rep::get_color (tree t, int start, int end) {
00482   static bool setup_done= false;
00483   if (!setup_done) {
00484     cpp_color_setup_constants (colored);
00485     cpp_color_setup_keywords (colored);
00486     cpp_color_setup_otherlexeme (colored);
00487     setup_done= true;
00488   }
00489   static string none= "";
00490   if (start >= end) return none;
00491   string s= t->label;
00492   if (in_comment(s,start,t)) return "brown";
00493   int pos= 0;
00494   int opos=0;
00495   string type;
00496   if (in_preprocessing(s, t)){
00497   do {
00498     do {
00499     opos= pos;
00500     type=none;
00501     parse_blanks (s, pos);
00502     if (opos < pos) break;
00503     parse_diese(s, pos);
00504     if (opos < pos) {type="preprocessing"; break;}
00505     parse_preprocessing (s, pos);
00506     if (opos < pos) {type= "preprocessing"; break; }
00507     pos++;
00508        }
00509   while (false);}
00510   while (pos <= start);
00511   if (type == "preprocessing") return "#20a000";
00512   return "#004000";
00513   }
00514   pos= 0;
00515   do {
00516     type= none;
00517     do {
00518       opos= pos;
00519       parse_blanks (s, pos);
00520       if (opos < pos) break;
00521       parse_string (s, pos);
00522       if (opos < pos) {
00523        type= "string";
00524        break;
00525       }
00526       parse_comment_single_line (s, pos);
00527       if (opos < pos) {
00528        type= "comment";
00529        break;
00530       }
00531       parse_keyword (colored, s, pos);
00532       if (opos < pos) {
00533        type= "keyword";
00534        break;
00535       }
00536       parse_other_lexeme (colored, s, pos);  //not left parenthesis
00537       if (opos < pos) {
00538        type= "other_lexeme";
00539        break;
00540       }
00541       parse_constant (colored, s, pos);
00542       if (opos < pos) {
00543        type= "constant";
00544        break;
00545       }
00546       parse_number (s, pos);
00547       if (opos < pos) {
00548        type= "number";
00549        break;
00550       }
00551       parse_identifier (colored, s, pos);
00552       if (opos < pos) {
00553        type="identifier";
00554        break;
00555       }
00556       pos= opos;
00557       pos++;
00558     }
00559     while (false);
00560   }
00561   while (pos <= start);
00562   if (type=="string") return "#a06040";
00563   if (type=="comment") return "brown";
00564   if (type=="keyword") return "#8020c0";
00565   if (type=="constant") return "#2060c0";
00566   if (type=="number") return "#2060c0";
00567   return none;
00568 }