Back to index

texmacs  1.0.7.15
packrat_parser.cpp
Go to the documentation of this file.
00001 
00002 /******************************************************************************
00003 * MODULE     : packrat_parser.cpp
00004 * DESCRIPTION: efficient packrat parsing
00005 * COPYRIGHT  : (C) 2010  Joris van der Hoeven
00006 *******************************************************************************
00007 * This software falls under the GNU general public license version 3 or later.
00008 * It comes WITHOUT ANY WARRANTY WHATSOEVER. For details, see the file LICENSE
00009 * in the root directory or <http://www.gnu.org/licenses/gpl-3.0.html>.
00010 ******************************************************************************/
00011 
00012 #include "packrat_parser.hpp"
00013 #include "analyze.hpp"
00014 #include "drd_std.hpp"
00015 
00016 extern tree the_et;
00017 bool packrat_invalid_colors= false;
00018 
00019 /******************************************************************************
00020 * Constructor
00021 ******************************************************************************/
00022 
00023 packrat_parser_rep::packrat_parser_rep (packrat_grammar gr):
00024   grammar (gr->grammar),
00025   productions (gr->productions),
00026   properties (gr->properties),
00027   current_tree (packrat_uninit),
00028   current_string (""),
00029   current_start (-1),
00030   current_end (-1),
00031   current_path_pos (-1),
00032   current_pos_path (-1),
00033   current_cursor (-1),
00034   current_input (),
00035   current_cache (PACKRAT_UNDEFINED),
00036   current_production (packrat_uninit) {}
00037 
00038 packrat_parser
00039 make_packrat_parser (string lan, tree in) {
00040   static string         last_lan   = "";
00041   static tree           last_in    = "";
00042   static packrat_parser last_par;
00043   if (lan != last_lan || in != last_in) {
00044     packrat_grammar gr= find_packrat_grammar (lan);
00045     last_lan   = lan;
00046     last_in    = copy (in);
00047     last_par   = packrat_parser (gr, in);
00048   }
00049   return last_par;
00050 }
00051 
00052 packrat_parser
00053 make_packrat_parser (string lan, tree in, path in_pos) {
00054   static string         last_lan   = "";
00055   static tree           last_in    = "";
00056   static path           last_in_pos= path ();
00057   static packrat_parser last_par;
00058   if (lan != last_lan || in != last_in || in_pos != last_in_pos) {
00059     packrat_grammar gr= find_packrat_grammar (lan);
00060     last_lan   = lan;
00061     last_in    = copy (in);
00062     last_in_pos= copy (last_in_pos);
00063     last_par   = packrat_parser (gr, in, in_pos);
00064   }
00065   return last_par;
00066 }
00067 
00068 /******************************************************************************
00069 * Setting up the input
00070 ******************************************************************************/
00071 
00072 void
00073 packrat_parser_rep::set_input (tree t) {
00074   current_string= "";
00075   current_tree  = t;
00076   serialize (t, path ());
00077   if (DEBUG_FLATTEN)
00078     cout << "Input " << current_string << "\n";
00079   current_input= encode_tokens (current_string);
00080 }
00081 
00082 void
00083 packrat_parser_rep::set_cursor (path p) {
00084   if (is_nil (p)) current_cursor= -1;
00085   else current_cursor= encode_tree_position (p);
00086   //cout << current_input << ", " << current_cursor << "\n";
00087 }
00088 
00089 /******************************************************************************
00090 * Encoding and decoding of cursor positions in the input
00091 ******************************************************************************/
00092 
00093 C
00094 packrat_parser_rep::encode_string_position (int i) {
00095   if (i < 0) return PACKRAT_FAILED;
00096   int j=0;
00097   C k=0;
00098   while (j<i && j<N(current_string)) {
00099     tm_char_forwards (current_string, j);
00100     k++;
00101   }
00102   return k;
00103 }
00104 
00105 int
00106 packrat_parser_rep::encode_path (tree t, path p, path pos) {
00107   //cout << "Search " << pos << " in " << t << ", " << p << "\n";
00108   //cout << "Range " << current_start[p] << " -- " << current_end[p] << "\n";
00109   if (is_nil (pos) || !current_start->contains (p)) return -1;
00110   else if (is_atomic (t)) {
00111     if (current_path_pos->contains (p * pos))
00112       return current_path_pos[p * pos];
00113     else if (pos->item < 0 || pos->item > N(t->label)) return -1;
00114     return current_start[p] + pos->item;
00115   }
00116   else {
00117     if (pos == path (0)) return current_start[p];
00118     if (pos == path (1)) return current_end[p];
00119     if (pos->item < 0 || pos->item > N(t) || is_nil (pos->next)) return -1;
00120     return encode_path (t[pos->item], p * pos->item, pos->next);
00121   }
00122 }
00123 
00124 C
00125 packrat_parser_rep::encode_tree_position (path p) {
00126   if (is_nil (p) || p->item < 0) return PACKRAT_FAILED;
00127   int i= encode_path (current_tree, path (), p);
00128   return encode_string_position (i);
00129 }
00130 
00131 int
00132 packrat_parser_rep::decode_string_position (C pos) {
00133   //cout << "Decode " << pos << "\n";
00134   if (pos == PACKRAT_FAILED) return -1;
00135   int i=0;
00136   C k=0;
00137   while (i<N(current_string) && k<pos) {
00138     tm_char_forwards (current_string, i);
00139     k++;
00140   }
00141   return i;
00142 }
00143 
00144 path
00145 packrat_parser_rep::decode_path (tree t, path p, int pos) {
00146   //cout << "Search " << pos << " in " << t << ", " << p << "\n";
00147   //cout << "Range " << current_start[p] << " -- " << current_end[p] << "\n";
00148   if (is_atomic (t)) {
00149     if (current_pos_path->contains (pos))
00150       return current_pos_path[pos];
00151     else return p * (pos - current_start[p]);
00152   }
00153   else {
00154     for (int i=0; i<N(t); i++)
00155       if (pos >= current_start[p*i] && pos <= current_end[p*i])
00156        return decode_path (t[i], p * i, pos);
00157     if (pos <= current_start[p]) return p * 0;
00158     if (pos >= current_end[p]) return p * 1;
00159     return p * 0;
00160   }
00161 }
00162 
00163 path
00164 packrat_parser_rep::decode_tree_position (C pos) {
00165   int i= decode_string_position (pos);
00166   if (i < 0) return path (i);
00167   return decode_path (current_tree, path (), i);
00168 }
00169 
00170 /******************************************************************************
00171 * Packrat parsing
00172 ******************************************************************************/
00173 
00174 bool
00175 starts (tree t, string s) {
00176   return is_atomic (t) && starts (t->label, s);
00177 }
00178 
00179 C
00180 packrat_parser_rep::parse (C sym, C pos) {
00181   D key= (((D) sym) << 32) + ((D) (sym^pos));
00182   C im = current_cache [key];
00183   if (im != PACKRAT_UNDEFINED) {
00184     //cout << "Cached " << sym << " at " << pos << " -> " << im << LF;
00185     return im;
00186   }
00187   current_cache (key)= PACKRAT_FAILED;
00188   if (DEBUG_PACKRAT)
00189     cout << "Parse " << packrat_decode[sym] << " at " << pos << INDENT << LF;
00190   if (sym >= PACKRAT_TM_OPEN) {
00191     array<C> inst= grammar [sym];
00192     //cout << "Parse " << inst << " at " << pos << LF;
00193     switch (inst[0]) {
00194     case PACKRAT_OR:
00195       im= PACKRAT_FAILED;
00196       for (int i=1; i<N(inst); i++) {
00197        im= parse (inst[i], pos);
00198        if (im != PACKRAT_FAILED) break;
00199       }
00200       break;
00201     case PACKRAT_CONCAT:
00202       im= pos;
00203       for (int i=1; i<N(inst); i++) {
00204        im= parse (inst[i], im);
00205        if (im == PACKRAT_FAILED) break;
00206       }
00207       break;
00208     case PACKRAT_WHILE:
00209       im= pos;
00210       while (true) {
00211        C next= parse (inst[1], im);
00212        if (next == PACKRAT_FAILED || (next >= 0 && next <= im)) break;
00213        im= next;
00214       }
00215       break;
00216     case PACKRAT_REPEAT:
00217       im= parse (inst[1], pos);
00218       if (im != PACKRAT_FAILED)
00219        while (true) {
00220          C next= parse (inst[1], im);
00221          if (next == PACKRAT_FAILED || (next >= 0 && next <= im)) break;
00222          im= next;
00223        }
00224       break;
00225     case PACKRAT_RANGE:
00226       if (pos < N (current_input) &&
00227          current_input [pos] >= inst[1] &&
00228          current_input [pos] <= inst[2])
00229        im= pos + 1;
00230       else im= PACKRAT_FAILED;
00231       break;
00232     case PACKRAT_NOT:
00233       if (parse (inst[1], pos) == PACKRAT_FAILED) im= pos;
00234       else im= PACKRAT_FAILED;
00235       break;
00236     case PACKRAT_EXCEPT:
00237       im= parse (inst[1], pos);
00238       if (im != PACKRAT_FAILED)
00239        if (parse (inst[2], pos) != PACKRAT_FAILED)
00240          im= PACKRAT_FAILED;
00241       break;
00242     case PACKRAT_TM_OPEN:
00243       if (pos < N (current_input) &&
00244          starts (packrat_decode[current_input[pos]], "<\\"))
00245        im= pos + 1;
00246       else im= PACKRAT_FAILED;
00247       break;
00248     case PACKRAT_TM_ANY:
00249       im= pos;
00250       while (true) {
00251        C old= im;
00252        im= parse (PACKRAT_TM_OPEN, old);
00253        if (im == PACKRAT_FAILED)
00254          im= parse (PACKRAT_TM_LEAF, old);
00255        else {
00256          im= parse (PACKRAT_TM_ARGS, im);
00257          if (im != PACKRAT_FAILED)
00258            im= parse (encode_token ("</>"), im);
00259        }
00260        if (old == im) break;
00261       }
00262       break;
00263     case PACKRAT_TM_ARGS:
00264       im= parse (PACKRAT_TM_ANY, pos);
00265       while (im < N (current_input))
00266        if (current_input[im] != encode_token ("<|>")) break;
00267        else im= parse (PACKRAT_TM_ANY, im + 1);
00268       break;
00269     case PACKRAT_TM_LEAF:
00270       im= pos;
00271       while (im < N (current_input)) {
00272        tree t= packrat_decode[current_input[im]];
00273        if (starts (t, "<\\") || t == "<|>" || t == "</>") break;
00274        else im++;
00275       }
00276       break;
00277     case PACKRAT_TM_CHAR:
00278       if (pos >= N (current_input)) im= PACKRAT_FAILED;
00279       else {
00280        tree t= packrat_decode[current_input[pos]];
00281        if (starts (t, "<\\") || t == "<|>" || t == "</>") im= PACKRAT_FAILED;
00282        else im= pos + 1;
00283       }
00284       break;
00285     case PACKRAT_TM_CURSOR:
00286       if (pos == current_cursor) im= pos;
00287       else im= PACKRAT_FAILED;
00288       break;
00289     case PACKRAT_TM_FAIL:
00290       im= PACKRAT_FAILED;
00291       break;
00292     default:
00293       im= parse (inst[0], pos);
00294       break;
00295     }
00296   }
00297   else {
00298     if (pos < N (current_input) && current_input[pos] == sym) im= pos + 1;
00299     else im= PACKRAT_FAILED;
00300   }
00301   current_cache (key)= im;
00302   if (DEBUG_PACKRAT)
00303     cout << UNINDENT << "Parsed " << packrat_decode[sym]
00304         << " at " << pos << " -> " << im << LF;
00305   return im;
00306 }
00307 
00308 /******************************************************************************
00309 * Inspecting the parse tree
00310 ******************************************************************************/
00311 
00312 void
00313 packrat_parser_rep::inspect (C sym, C pos, array<C>& syms, array<C>& poss) {
00314   syms= array<C> ();
00315   poss= array<C> ();
00316   C next= parse (sym, pos);
00317   if (next == PACKRAT_FAILED) return;
00318   if (sym >= PACKRAT_TM_OPEN) {
00319     array<C> inst= grammar [sym];
00320     //cout << "Parse " << inst << " at " << pos << LF;
00321     switch (inst[0]) {
00322     case PACKRAT_OR:
00323       for (int i=1; i<N(inst); i++)
00324        if (parse (inst[i], pos) != PACKRAT_FAILED) {
00325          inspect (inst[i], pos, syms, poss);
00326          break;
00327        }
00328       break;
00329     case PACKRAT_CONCAT:
00330       for (int i=1; i<N(inst); i++) {
00331        next= parse (inst[i], pos);
00332        if (next == PACKRAT_FAILED) break;
00333         syms << inst[i];
00334         poss << pos;
00335        pos= next;
00336       }
00337       break;
00338     case PACKRAT_WHILE:
00339     case PACKRAT_REPEAT:
00340       while (true) {
00341         C next= parse (inst[1], pos);
00342         if (next == PACKRAT_FAILED) break;
00343         syms << inst[1];
00344         poss << pos;
00345         pos= next;
00346       }
00347       break;
00348     case PACKRAT_RANGE:
00349     case PACKRAT_NOT:
00350       break;
00351     case PACKRAT_EXCEPT:
00352       inspect (inst[1], pos, syms, poss);
00353       break;
00354     case PACKRAT_TM_OPEN:
00355     case PACKRAT_TM_ANY:
00356     case PACKRAT_TM_ARGS:
00357     case PACKRAT_TM_LEAF:
00358     case PACKRAT_TM_CHAR:
00359     case PACKRAT_TM_CURSOR:
00360     case PACKRAT_TM_FAIL:
00361       break;
00362     default:
00363       inspect (inst[0], pos, syms, poss);
00364       break;
00365     }
00366   }
00367 }
00368 
00369 bool
00370 packrat_parser_rep::is_left_recursive (C sym) {
00371   if (sym < PACKRAT_TM_OPEN) return false;
00372   array<C> inst= grammar [sym];
00373   if (inst[0] != PACKRAT_CONCAT || N(inst) != 3) return false;
00374   if (inst[1] < PACKRAT_TM_OPEN) return false;
00375   tree t= packrat_decode[inst[1]];
00376   return is_compound (t, "symbol", 1) && ends (t[0]->label, "-head");
00377 }
00378 
00379 bool
00380 packrat_parser_rep::is_associative (C sym) {
00381   static C prop= encode_symbol (compound ("property", "associativity"));
00382   D key = (((D) prop) << 32) + ((D) (sym ^ prop));
00383   if (!properties->contains (key)) return false;
00384   return properties[key] == "associative";
00385 }
00386 
00387 bool
00388 packrat_parser_rep::is_anti_associative (C sym) {
00389   static C prop= encode_symbol (compound ("property", "associativity"));
00390   D key = (((D) prop) << 32) + ((D) (sym ^ prop));
00391   if (!properties->contains (key)) return false;
00392   return properties[key] == "anti-associative";
00393 }
00394 
00395 bool
00396 packrat_parser_rep::is_list_like (C sym) {
00397   (void) sym;
00398   return false;
00399 }
00400 
00401 bool
00402 packrat_parser_rep::is_selectable (C sym) {
00403   tree t= packrat_decode[sym];
00404   if (is_compound (t, "partial", 1)) return true;
00405   if (!is_compound (t, "symbol", 1)) return false;
00406   string s= t[0]->label;
00407   return !ends (s, "-head") && !ends (s, "-tail");
00408 }
00409 
00410 /******************************************************************************
00411 * Finding all enclosing structures at a given position
00412 ******************************************************************************/
00413 
00414 void
00415 packrat_parser_rep::context
00416   (C sym, C pos, C w1, C w2, int mode,
00417    array<C>& kind, array<C>& begin, array<C>& end)
00418 {
00419   C next= parse (sym, pos);
00420   if (next < 0 || pos > w1 || next < w2) return;
00421 
00422   if (mode == 2 && (pos == w1 || next == w2)) {
00423     static C prop= encode_symbol (compound ("property", "operator"));
00424     D key = (((D) prop) << 32) + ((D) (sym ^ prop));
00425     if (properties->contains (key)) return;
00426   }
00427 
00428   if (true) {
00429     static C sel_prop= encode_symbol (compound ("property", "selectable"));
00430     static C foc_prop= encode_symbol (compound ("property", "focus"));
00431     D sel_key = (((D) sel_prop) << 32) + ((D) (sym ^ sel_prop));
00432     D foc_key = (((D) foc_prop) << 32) + ((D) (sym ^ foc_prop));
00433     if (properties->contains (sel_key) &&
00434         properties[sel_key] == "inside");
00435     else if (properties->contains (foc_key) &&
00436              properties[foc_key] == "disallow" &&
00437              mode == 2);
00438     else {
00439       int n= N(kind);
00440       if (n >= 1 && begin[n-1] == pos && end[n-1] == next) {
00441         if (is_selectable (sym) || !is_selectable (kind[n-1]))
00442           kind[n-1]= sym;
00443       }
00444       else {
00445         kind  << sym;
00446         begin << pos;
00447         end   << next;
00448       }
00449     }
00450   }
00451 
00452   if (mode >= 0) {
00453     static C prop= encode_symbol (compound ("property", "atomic"));
00454     D key = (((D) prop) << 32) + ((D) (sym ^ prop));
00455     if (properties->contains (key)) return;
00456   }
00457 
00458   if (is_left_recursive (sym) && mode == 0) {
00459     array<C> inst= grammar [sym];
00460     C before= pos;
00461     C middle= parse (inst[1], before);
00462     if (middle == PACKRAT_FAILED) return;
00463     C after = parse (inst[2], middle);
00464     if (after == PACKRAT_FAILED) return;
00465     array<C> csym;
00466     array<C> cpos;
00467     inspect (inst[2], middle, csym, cpos);
00468     csym= append (inst[1], csym);
00469     cpos= append (before, cpos);
00470     cpos << after;
00471     int i1, i2;
00472     for (i1=0; i1<N(csym); i1++)
00473       if (cpos[i1+1] > w1) break;
00474     for (i2=i1; i2<N(csym); i2++)
00475       if (cpos[i2+1] >= w2) break;
00476     if (i1 == i2) {
00477       int i, n= N(kind);
00478       context (csym[i1], cpos[i1], w1, w2, mode, kind, begin, end);
00479       for (i=n; i<N(kind); i++)
00480         if (is_selectable (kind[i]))
00481           return;
00482       kind  -> resize (n);
00483       begin -> resize (n);
00484       end   -> resize (n);
00485     }
00486     C alt_start= -1;
00487     while (i1 > 0) {
00488       array<C> ccsym;
00489       array<C> ccpos;
00490       inspect (csym[i1], cpos[i1], ccsym, ccpos);
00491       if (N(ccsym)>1 && is_associative (ccsym[0])) {
00492         if (w1 >= ccpos[1]) alt_start= ccpos[1];
00493         break;
00494       }
00495       if (N(ccsym)>0 && is_anti_associative (ccsym[0])) break;
00496       i1--;
00497     }
00498     tree sel= compound ("partial", packrat_decode[sym]);
00499     kind  << encode_symbol (sel);
00500     begin << (alt_start<0? cpos[i1]: alt_start);
00501     end   << cpos[i2+1];
00502     return;
00503   }
00504 
00505   if (sym >= PACKRAT_TM_OPEN) {
00506     array<C> inst= grammar [sym];
00507     //cout << "Context " << inst << " at " << pos << LF;
00508     switch (inst[0]) {
00509     case PACKRAT_OR:
00510       for (int i=1; i<N(inst); i++)
00511        if (parse (inst[i], pos) != PACKRAT_FAILED) {
00512          context (inst[i], pos, w1, w2, mode, kind, begin, end);
00513          break;
00514        }
00515       break;
00516     case PACKRAT_CONCAT:
00517       for (int i=1; i<N(inst); i++) {
00518        next= parse (inst[i], pos);
00519        if (next == PACKRAT_FAILED) break;
00520        if (pos <= w1 && w2 <= next)
00521          context (inst[i], pos, w1, w2, mode, kind, begin, end);
00522        if (next > w2) break;
00523        pos= next;
00524       }
00525       break;
00526     case PACKRAT_WHILE:
00527     case PACKRAT_REPEAT:
00528       while (true) {
00529        C next= parse (inst[1], pos);
00530        if (next == PACKRAT_FAILED) break;
00531        if (pos <= w1 && w2 <= next)
00532          context (inst[1], pos, w1, w2, mode, kind, begin, end);
00533        if (next > w2) break;
00534        pos= next;
00535       }
00536       break;
00537     case PACKRAT_RANGE:
00538     case PACKRAT_NOT:
00539       break;
00540     case PACKRAT_EXCEPT:
00541       context (inst[1], pos, w1, w2, mode, kind, begin, end);
00542       break;
00543     case PACKRAT_TM_OPEN:
00544     case PACKRAT_TM_ANY:
00545     case PACKRAT_TM_ARGS:
00546     case PACKRAT_TM_LEAF:
00547     case PACKRAT_TM_CHAR:
00548     case PACKRAT_TM_CURSOR:
00549     case PACKRAT_TM_FAIL:
00550       break;
00551     default:
00552       context (inst[0], pos, w1, w2, mode, kind, begin, end);
00553       break;
00554     }
00555   }
00556 }
00557 
00558 void
00559 packrat_parser_rep::compress
00560   (array<C>& kind, array<C>& begin, array<C>& end)
00561 {
00562   array<C> new_kind, new_begin, new_end;
00563   for (int i=0; i<N(kind); i++) {
00564     int n= N(new_kind);
00565     if (is_selectable (kind[i]))
00566       if (N(new_kind) == 0 ||
00567          new_kind [n-1] != kind[i] ||
00568          (new_begin[n-1] != begin[i] && new_end[n-1] != end[i])) {
00569        new_kind  << kind[i];
00570        new_begin << begin[i];
00571        new_end   << end[i];
00572       }
00573   }
00574   kind = new_kind;
00575   begin= new_begin;
00576   end  = new_end;
00577 }
00578 
00579 /******************************************************************************
00580 * Syntax highlighting
00581 ******************************************************************************/
00582 
00583 void
00584 packrat_parser_rep::highlight (tree t, path tp, path p1, path p2, int col) {
00585   if (p1 == p2);
00586   else if (is_atomic (t)) {
00587     string s= t->label;
00588     ASSERT (is_atom (p1) && is_atom (p2), "invalid selection");
00589     ASSERT (0 <= p1->item && p1->item <= p2->item && p2->item <= N(s),
00590            "invalid selection");
00591     attach_highlight (t, current_hl_lan, col, p1->item, p2->item);
00592   }
00593   else if (N(t) == 0);
00594   else {
00595     ASSERT (!is_nil (p1) && !is_nil (p2) && p1->item <= p2->item,
00596            "invalid selection");
00597     if (p1 == path (0)) p1= path (0, 0);
00598     if (p2 == path (1)) p2= path (N(t) - 1, right_index (t[N(t) -1]));
00599     for (int i= max (0, p1->item); i <= min (p2->item, N(t)-1); i++) {
00600       path q1= (i == p1->item? p1->next: path (0));
00601       path q2= (i == p2->item? p2->next: path (right_index (t[i])));
00602       highlight (t[i], tp * i, q1, q2, col);
00603     }
00604   }
00605 }
00606 
00607 void
00608 packrat_parser_rep::highlight (C sym, C pos) {
00609   C next= parse (sym, pos);
00610   if (next < 0) return;
00611   if (sym >= PACKRAT_SYMBOLS) {
00612     static C prop= encode_symbol (compound ("property", "highlight"));
00613     D key = (((D) prop) << 32) + ((D) (sym ^ prop));
00614     if (properties->contains (key)) {
00615       int  col  = encode_color (properties [key]);
00616       path start= decode_tree_position (pos);
00617       path end  = decode_tree_position (next);
00618       highlight (current_tree, path (), start, end, col);
00619       static C prop= encode_symbol (compound ("property", "transparent"));
00620       D key = (((D) prop) << 32) + ((D) (sym ^ prop));
00621       if (!properties->contains (key)) return;
00622     }
00623   }
00624 
00625   if (sym >= PACKRAT_TM_OPEN) {
00626     array<C> inst= grammar [sym];
00627     //cout << "Parse " << inst << " at " << pos << LF;
00628     switch (inst[0]) {
00629     case PACKRAT_OR:
00630       for (int i=1; i<N(inst); i++)
00631        if (parse (inst[i], pos) != PACKRAT_FAILED) {
00632          highlight (inst[i], pos);
00633          break;
00634        }
00635       break;
00636     case PACKRAT_CONCAT:
00637       for (int i=1; i<N(inst); i++) {
00638        next= parse (inst[i], pos);
00639        highlight (inst[i], pos);
00640        pos= next;
00641       }
00642       break;
00643     case PACKRAT_WHILE:
00644     case PACKRAT_REPEAT:
00645       while (true) {
00646        C next= parse (inst[1], pos);
00647        if (next == PACKRAT_FAILED) break;
00648        highlight (inst[1], pos);
00649        if (next == pos) break;
00650        pos= next;
00651       }
00652       break;
00653     case PACKRAT_RANGE:
00654     case PACKRAT_NOT:
00655       break;
00656     case PACKRAT_EXCEPT:
00657       highlight (inst[1], pos);
00658       break;      
00659     case PACKRAT_TM_OPEN:
00660     case PACKRAT_TM_ANY:
00661     case PACKRAT_TM_ARGS:
00662     case PACKRAT_TM_LEAF:
00663     case PACKRAT_TM_CHAR:
00664     case PACKRAT_TM_CURSOR:
00665     case PACKRAT_TM_FAIL:
00666       break;
00667     default:
00668       highlight (inst[0], pos);
00669       break;
00670     }
00671   }
00672 }
00673 
00674 /******************************************************************************
00675 * Memoized and accelerated highlighting
00676 ******************************************************************************/
00677 
00678 static bool
00679 empty_line (tree t) {
00680   if (!is_atomic (t)) return false;
00681   string s= t->label;
00682   for (int i=0; i<N(s); i++)
00683     if (s[i] != ' ') return false;
00684   return true;
00685 }
00686 
00687 static bool
00688 consistent_portion (tree t, int begin, int end) {
00689   int level= 0;
00690   for (int i=begin; i<end; i++)
00691     if (is_atomic (t[i])) {
00692       string s= t[i]->label;
00693       for (int j=0; j<N(s); j++)
00694        switch (s[j]) {
00695        case '(': level++; break;
00696        case ')': if (level <= 0) return false; level--; break;
00697        case '[': level++; break;
00698        case ']': if (level <= 0) return false; level--; break;
00699        case '{': level++; break;
00700        case '}': if (level <= 0) return false; level--; break;
00701        default : break;
00702        }
00703     }
00704   return level == 0;
00705 }
00706 
00707 static void
00708 consistent_enlargement (tree t, int& begin, int& end) {
00709   while (begin > 0 || end < N(t)) {
00710     while (begin > 0    && !empty_line (t[begin-1])) begin--;
00711     while (end   < N(t) && !empty_line (t[end    ])) end++;
00712     if (consistent_portion (t, begin, end)) return;
00713     //cout << "Inconsistent " << begin << " -- " << end << "\n";
00714     begin= max (0   , begin - max (end - begin, 1));
00715     end  = min (N(t), end   + max (end - begin, 1));
00716     //cout << "  Try " << begin << " -- " << end << "\n";
00717   }
00718 }
00719 
00720 /******************************************************************************
00721 * User interface
00722 ******************************************************************************/
00723 
00724 path
00725 packrat_parse (string lan, string sym, tree in) {
00726   packrat_parser par= make_packrat_parser (lan, in);
00727   C pos= par->parse (encode_symbol (compound ("symbol", sym)), 0);
00728   return par->decode_tree_position (pos);
00729 }
00730 
00731 bool
00732 packrat_correct (string lan, string sym, tree in) {
00733   packrat_parser par= make_packrat_parser (lan, in);
00734   C pos= par->parse (encode_symbol (compound ("symbol", sym)), 0);
00735   return pos == N(par->current_input);
00736 }
00737 
00738 bool
00739 packrat_available_path (string lan, tree in, path in_p) {
00740   packrat_parser par= make_packrat_parser (lan, in);
00741   return par->current_start->contains (in_p);
00742 }
00743 
00744 object
00745 packrat_context (string lan, string s, tree in, path in_pos) {
00746   //cout << "Context " << in << " at " << in_pos
00747   //     << " (" << lan << ", " << s << ")" << LF;
00748   packrat_parser par= make_packrat_parser (lan, in);
00749   C sym= encode_symbol (compound ("symbol", s));
00750   if (par->parse (sym, 0) != N(par->current_input))
00751     par= make_packrat_parser (lan, in, in_pos);
00752   C pos= par->encode_tree_position (in_pos);
00753   if (pos == PACKRAT_FAILED) return object (false);
00754   array<C> kind, begin, end;
00755   par->context (sym, 0, pos-1, pos+1, 0, kind, begin, end);
00756   par->compress (kind, begin, end);
00757   object ret= null_object ();
00758   for (int i=0; i<N(kind); i++) {
00759     object x1 (symbol_object (packrat_decode[kind[i]][0]->label));
00760     object x2 (par->decode_tree_position (begin[i]));
00761     object x3 (par->decode_tree_position (end[i]));
00762     ret= cons (list_object (x1, x2, x3), ret);
00763   }
00764   return ret;
00765 }
00766 
00767 bool
00768 packrat_select (string lan, string s, tree in, path in_pos,
00769               path& p1, path& p2, int mode)
00770 {
00771   // mode= 0: genuine semantic selection
00772   // mode= 1: strictly larger selection for select_enlarge
00773   // mode= 2: determine environment rectangles
00774   if (path_less (p2, p1))
00775     return packrat_select (lan, s, in, in_pos, p2, p1, mode);
00776   //cout << "Enlarge " << p1 << " -- " << p2 << " in " << in
00777   //<< " (" << lan << ", " << s << ")" << LF;
00778   packrat_parser par= make_packrat_parser (lan, in);
00779   C sym = encode_symbol (compound ("symbol", s));
00780   if (par->parse (sym, 0) != N(par->current_input))
00781     par= make_packrat_parser (lan, in, in_pos);
00782   C pos1= par->encode_tree_position (p1);
00783   C pos2= par->encode_tree_position (p2);
00784   //cout << "Encoded " << pos1 << " -- " << pos2
00785   //     << " in " << par->current_string << LF;
00786   if (par->parse (sym, 0) != N(par->current_input)) return false;
00787   if (pos1 == PACKRAT_FAILED || pos2 == PACKRAT_FAILED) return false;
00788   array<C> kind, begin, end;
00789   C pos0= pos1;
00790   if ((mode == 1 && pos1 == pos2) || mode == 2) pos0= max (pos1 - 1, 0);
00791   par->context (sym, 0, pos0, pos2, mode, kind, begin, end);
00792   //for (int i=0; i<N(kind); i++)
00793   //  cout << i << ":\t"
00794   //       << par->decode_tree_position (begin[i]) << "\t"
00795   //       << par->decode_tree_position (end[i]) << "\t"
00796   //       << packrat_decode[kind[i]] << LF;
00797   par->compress (kind, begin, end);
00798   int n= N(kind);
00799   if (n == 0) return false;
00800   if (mode == 1) {
00801     if (pos1 == begin[n-1] && pos2 == end[n-1]) n--;
00802     if (n == 0) return false;
00803   }
00804   p1= par->decode_tree_position (begin[n-1]);
00805   p2= par->decode_tree_position (end[n-1]);
00806   //cout << "Selected " << packrat_decode[kind[n-1]] << LF;
00807   return true;
00808 }
00809 
00810 void
00811 packrat_highlight_subtree (string lan, string s, tree in) {
00812   //cout << "Highlight " << lan << ", " << s << " in " << in << "\n";
00813   int hl_lan= packrat_abbreviation (lan, s);
00814   if (hl_lan == 0) return;
00815   packrat_parser par= make_packrat_parser (lan, in);
00816   C sym = encode_symbol (compound ("symbol", s));
00817   if (par->parse (sym, 0) == N(par->current_input)) {
00818     par->current_hl_lan= hl_lan;
00819     par->highlight (sym, 0);
00820   }
00821 }
00822 
00823 void
00824 packrat_highlight (string lan, string s, tree in) {
00825   int hl_lan= packrat_abbreviation (lan, s);
00826   if (hl_lan == 0) return;
00827   //cout << "Highlight " << in << "\n";
00828   if (is_func (in, DOCUMENT)) {
00829     int i, begin, end;
00830     for (begin=0; begin<N(in); begin++)
00831       if (!has_highlight (in[begin], hl_lan))
00832        break;
00833     for (end=N(in)-1; end>begin; end--)
00834       if (!has_highlight (in[end-1], hl_lan))
00835        break;
00836     consistent_enlargement (in, begin, end);    
00837     for (i=begin; i<end; i++)
00838       detach_highlight (in[i], hl_lan);
00839     attach_highlight (in, hl_lan);
00840     packrat_highlight_subtree (lan, s, in (begin, end));
00841   }
00842   else {
00843     if (is_compound (in))
00844       for (int i=0; i<N(in); i++)
00845        detach_highlight (in[i], hl_lan);
00846     attach_highlight (in, hl_lan);
00847     packrat_highlight_subtree (lan, s, in);
00848   }
00849 }