Back to index

texmacs  1.0.7.15
url.cpp
Go to the documentation of this file.
00001 
00002 /******************************************************************************
00003 * MODULE     : url.cpp
00004 * DESCRIPTION: unified resource location handling
00005 * COPYRIGHT  : (C) 1999  Joris van der Hoeven
00006 *******************************************************************************
00007 * The url class uses a tree representation for urls.
00008 * This allows us to generalize the concept of an url and allow paths and
00009 * patterns to be regarded as urls too. An url is either a string or a tuple
00010 * of one of the following types:
00011 *   "." -- here
00012 *   ".." -- parent
00013 *   none -- invalid url
00014 *   concat -- a/b/c is represented as (concat "a" (concat "b" "c"));
00015 *   or -- the path a:b/c is represented as (or "a" (concat "b" "c"));
00016 *   root -- the url http://gnu.org yields (concat (root "http") "gnu.org");
00017 *   wildcard -- (wildcard) corresponds to any url, (wildcard "*.tm")
00018 *     to all strings which end with .tm and (wildcard "*.tm" "file")
00019 *     to all TeXmacs files (i.e. discarding directories ending with .tm).
00020 *******************************************************************************
00021 * There are three main types of urls:
00022 *   - rootless urls, like a/b/c. These urls are mainly used in computations.
00023 *     For example, they can be appended to another url.
00024 *   - Standard rooted urls, like file:///usr or http://www.texmacs.org.
00025 *     These are the same as those used on the web.
00026 *   - System urls, characterized by a "default" root.
00027 *     These urls are similar to standard rooted urls, but they behave
00028 *     in a slightly different way with respect to concatenation.
00029 *     For instance http://www.texmacs.org/Web * file:///tmp would yield
00030 *     file:///tmp, where as http://www.texmacs.org/Web * /tmp yields
00031 *     http://www.texmacs.org/tmp
00032 *******************************************************************************
00033 * There are several formats for parsing (and printing) urls:
00034 *   - System format: the usual format on your operating system.
00035 *     On unix systems "/usr/bin:/usr/local/bin" would be a valid url
00036 *     representing a path and on windows systems "c:\windows;c:\TeXmacs"
00037 *     would be OK.
00038 *   - Unix format: this format forces unix-like notation even for
00039 *     other systems like Windows. This is convenient for url's in
00040 *     the source code. Unix environment variables like ~ and $TEXMACS_PATH
00041 *     can also be part of the url.
00042 *   - Standard format: the format which is used on the web.
00043 *     Notice that ftp://www.texmacs.org/pub and ftp://www.texmacs.org/pub/
00044 *     represent different urls. The second one is represented by concating
00045 *     on the right with an empty name.
00046 *******************************************************************************
00047 * When an explicit operation on urls need to be performed,
00048 * like reading a file, the url is first "resolved" into a simple url
00049 * with a unique name (modulo symbolic links) for the resource.
00050 * Next, the url is "concretized" as a file name which is understood
00051 * by the operating system. This may require searching the file from the web.
00052 * Concretized urls should be used quickly and not memorized,
00053 * since such names may be the names of temporary files,
00054 * which may be destroyed soon afterwards.
00055 *******************************************************************************
00056 * This software falls under the GNU general public license version 3 or later.
00057 * It comes WITHOUT ANY WARRANTY WHATSOEVER. For details, see the file LICENSE
00058 * in the root directory or <http://www.gnu.org/licenses/gpl-3.0.html>.
00059 ******************************************************************************/
00060 
00061 #include "boot.hpp"
00062 #include "url.hpp"
00063 #include "sys_utils.hpp"
00064 #include "web_files.hpp"
00065 #include "file.hpp"
00066 #include "analyze.hpp"
00067 
00068 #include <ctype.h>
00069 
00070 #if defined(OS_WIN32)  || defined(__MINGW32__)
00071 #define WINPATHS
00072 #endif
00073 
00074 #ifdef WINPATHS
00075 #define URL_CONCATER  '\\'
00076 #define URL_SEPARATOR ';'
00077 #else
00078 #define URL_CONCATER  '/'
00079 #define URL_SEPARATOR ':'
00080 #endif
00081 
00082 /******************************************************************************
00083 * Unrooted url constructors
00084 ******************************************************************************/
00085 
00086 static url
00087 url_get_atom (string s, int type) {
00088   if (type < URL_STANDARD) {
00089     if (s == "~") return url_system (get_env ("HOME"));
00090     if (starts (s, "$")) {
00091       string val= get_env (s (1, N(s)));
00092       if (val == "") return url_none ();
00093       return unblank (url_system (val));
00094     }
00095   }
00096   return as_url (tree (s));
00097 }
00098 
00099 static url
00100 url_get_name (string s, int type= URL_STANDARD, int i=0) {
00101   char sep= (type == URL_SYSTEM)? URL_CONCATER: '/';
00102   int start= i, n= N(s);
00103   while ((i<n) && (s[i] != sep) && (s[i] != '/')) i++;
00104   url u= url_get_atom (s (start, i), type);
00105   // url u= tree (s (start, i));
00106   if (i == n) return u;
00107   if (start == i) return url_get_name (s, type, i+1);
00108   return u * url_get_name (s, type, i+1);
00109 }
00110 
00111 static url
00112 url_get_path (string s, int type= URL_STANDARD, int i=0) {
00113   char sep= (type == URL_SYSTEM)? URL_SEPARATOR: ':';
00114   int start= i, n= N(s);
00115   if (i == n) return url_none ();
00116   while ((i<n) && (s[i] != sep)) i++;
00117   url u= url_general (s (start, i), type);
00118   if (i == n) return u;
00119   if (start == i) return url_get_path (s, type, i+1);
00120   return u | url_get_path (s, type, i+1);
00121 }
00122 
00123 /******************************************************************************
00124 * Rooted url constructors
00125 ******************************************************************************/
00126 
00127 url
00128 url_root (string protocol) {
00129   return as_url (tuple ("root", protocol));
00130 }
00131 
00132 url
00133 url_ramdisc (string contents) {
00134   return as_url (tuple ("root", "ramdisc", contents));
00135 }
00136 
00137 static url
00138 url_default (string name, int type= URL_SYSTEM) {
00139   url u= url_get_name (name, type);
00140 #ifdef WINPATHS
00141   // FIXME: this hack seems a bit too simple
00142   if (is_concat (u) && (u[2]->t == "")) u= u[1];
00143   // cout << name << " -> " << url_root ("default") * u << "\n";
00144   return url_root ("default") * u;
00145 #else
00146   if (u->t == "") return url_root ("default");
00147   return url_root ("default") * u;
00148 #endif
00149 }
00150 
00151 static url
00152 url_path (string s, int type= URL_SYSTEM) {
00153   url u= url_get_path (s, type);
00154   return u;
00155 }
00156 
00157 static url
00158 url_local (string name) {
00159   url u= url_get_name (name, URL_SYSTEM);
00160   return reroot (u, "file");
00161 }
00162 
00163 static url
00164 url_file (string name) {
00165   url u= url_get_name (name);
00166   return url_root ("file") * u;
00167 }
00168 
00169 static url
00170 url_http (string name) {
00171   url u= url_get_name (name);
00172   return url_root ("http") * u;
00173 }
00174 
00175 static url
00176 url_ftp (string name) {
00177   url u= url_get_name (name);
00178   return url_root ("ftp") * u;
00179 }
00180 
00181 static url
00182 url_tmfs (string name) {
00183   url u= url_get_name (name);
00184   return url_root ("tmfs") * u;
00185 }
00186 
00187 static url
00188 url_blank (string name) {
00189   url u= url_get_name (name);
00190   return url_root ("blank") * u;
00191 }
00192 
00193 /******************************************************************************
00194 * Generic url constructor
00195 ******************************************************************************/
00196 
00197 static bool
00198 heuristic_is_path (string name, int type) {
00199   char sep= (type==0)? URL_SEPARATOR: ':';
00200   int i, n= N(name);
00201   for (i=0; i<n; i++)
00202     if (name[i] == sep)
00203       return true;
00204   return false;
00205 }
00206 
00207 static bool
00208 heuristic_is_default (string name, int type) {
00209 #ifdef WINPATHS
00210   // FIXME: we probably should take into account 'type' too
00211   if (N(name) < 2) return false;
00212   if ((name[0] == '\\') && (name[1] == '\\')) return true;
00213   return
00214     isalpha (name[0]) && (name[1] == ':') &&
00215     ((N(name)==2) || (name[2] == '\\') || (name[2] == '/'));
00216 #else
00217   char sep= (type==0)? URL_CONCATER: '/';
00218   return (name != "") && (name[0] == sep);
00219 #endif
00220 }
00221 
00222 static bool
00223 heuristic_is_http (string name) {
00224   return starts (name, "www.");
00225   // FIXME: we might want to recognize some other ones like google.com too
00226 }
00227 
00228 static bool
00229 heuristic_is_ftp (string name) {
00230   return starts (name, "ftp.");
00231 }
00232 
00233 url
00234 url_general (string name, int type= URL_SYSTEM) {
00235   if (starts (name, "local:")) return url_local (name (6, N (name)));
00236   if (starts (name, "file://")) return url_file (name (7, N (name)));
00237   if (starts (name, "http://")) return url_http (name (7, N (name)));
00238   if (starts (name, "ftp://")) return url_ftp (name (6, N (name)));
00239   if (starts (name, "tmfs://")) return url_tmfs (name (7, N (name)));
00240   if (starts (name, "//")) return url_blank (name (2, N (name)));
00241   if (heuristic_is_path (name, type)) return url_path (name, type);
00242   if (heuristic_is_default (name, type)) return url_default (name, type);
00243   if (heuristic_is_http (name)) return url_http (name);
00244   if (heuristic_is_ftp (name)) return url_ftp (name);
00245   return url_get_name (name, type);
00246 }
00247 
00248 url
00249 url_unix (string name) {
00250   return url_general (name, URL_UNIX);
00251 }
00252 
00253 url
00254 url_unix (string dir, string name) {
00255   return url_unix (dir) * url_unix (name);
00256 }
00257 
00258 url
00259 url_system (string name) {
00260   return url_general (name, URL_SYSTEM);
00261 }
00262 
00263 url
00264 url_system (string dir, string name) {
00265   return url_system (dir) * url_system (name);
00266 }
00267 
00268 url
00269 url_standard (string name) {
00270   return url_general (name, URL_STANDARD);
00271 }
00272 
00273 url
00274 url_standard (string dir, string name) {
00275   return url_standard (dir) * url_standard (name);
00276 }
00277 
00278 url::url (const char* name): rep (tm_new<url_rep> (url_unix (name)->t)) {}
00279 url::url (string name): rep (tm_new<url_rep> (url_unix (name)->t)) {}
00280 url::url (string path_name, string name):
00281   rep (tm_new<url_rep> (url_unix (path_name, name)->t)) {}
00282 
00283 /******************************************************************************
00284 * Computational url constructors
00285 ******************************************************************************/
00286 
00287 static bool
00288 is_semi_root (url u) {
00289   // url u such that u/.. == u (website or windows drive name)
00290 #ifdef WINPATHS
00291   return is_concat (u) && is_root (u[1]) && is_atomic (u[2]);
00292 #else
00293   return is_concat (u) && is_root_web (u[1]) && is_atomic (u[2]);
00294 #endif
00295 }
00296 
00297 url
00298 operator * (url u1, url u2) {
00299   //cout << "concat " << u1->t << " * " << u2->t << "\n";
00300   if (is_root (u2) || (is_concat (u2) && is_root (u2[1]))) {
00301     if (is_concat (u1) && is_root_web (u1[1])) {
00302       if (is_root (u2, "default") ||
00303           (is_concat (u2) && is_root (u2[1], "default")))
00304         {
00305           url v= u1[2];
00306           while (is_concat (v)) v= v[1];
00307           if (is_root (u2)) return u1[1] * v;
00308           return u1[1] * v * u2[2];
00309         }
00310       if (is_root (u2, "blank") ||
00311           (is_concat (u2) && is_root (u2[1], "blank")))
00312         return reroot (u2, u1[1][1]->t->label);
00313     }
00314     return u2;
00315   }
00316   if (is_here (u1) || (u1->t == "")) return u2;
00317   if (is_here (u2)) return u1;
00318   if (is_none (u1)) return url_none ();
00319   if (is_none (u2)) return url_none ();
00320   if (u2 == url_parent ()) {
00321     if (is_root (u1)) return u1;
00322     if (is_atomic (u1) && (!is_parent (u1))) return url_here ();
00323     if (is_semi_root (u1)) return u1;
00324   }
00325   if (is_concat (u2) && (u2[1] == url_parent ())) {
00326     if (is_root (u1)) return u1 * u2[2];
00327     if (is_atomic (u1) && (!is_parent (u1))) return u2[2];
00328     if (is_semi_root (u1)) return u1 * u2[2];
00329   }
00330   if (is_concat (u1)) return u1[1] * (u1[2] * u2);
00331   return as_url (tuple ("concat", u1->t, u2->t));
00332 }
00333 
00334 url
00335 operator * (url u1, const char* name) {
00336   return u1 * url (name);
00337 }
00338 
00339 url
00340 operator * (url u1, string name) {
00341   return u1 * url (name);
00342 }
00343 
00344 url
00345 operator | (url u1, url u2) {
00346   if (is_none (u1)) return u2;
00347   if (is_none (u2)) return u1;
00348   if (is_or (u1)) return u1[1] | (u1[2] | u2);
00349   if (u1 == u2) return u2;
00350   if (is_or (u2) && (u1 == u2[1])) return u2;
00351   return as_url (tuple ("or", u1->t, u2->t));
00352 }
00353 
00354 url
00355 url_wildcard () {
00356   return as_url (tuple ("wildcard"));
00357 }
00358 
00359 url
00360 url_wildcard (string name) {
00361   return as_url (tuple ("wildcard", name));
00362 }
00363 
00364 /******************************************************************************
00365 * url predicates
00366 ******************************************************************************/
00367 
00368 bool
00369 is_rooted (url u) {
00370   return
00371     is_root (u) ||
00372     (is_concat (u) && is_rooted (u[1])) ||
00373     (is_or (u) && is_rooted (u[1]) && is_rooted (u[2]));
00374 }
00375 
00376 bool
00377 is_rooted (url u, string protocol) {
00378   return
00379     is_root (u, protocol) ||
00380     (is_concat (u) && is_rooted (u[1], protocol)) ||
00381     (is_or (u) && is_rooted (u[1], protocol) && is_rooted (u[2], protocol));
00382 }
00383 
00384 bool
00385 is_rooted_web (url u) {
00386   return
00387     is_root_web (u) ||
00388     (is_concat (u) && is_rooted_web (u[1])) ||
00389     (is_or (u) && is_rooted_web (u[1]) && is_rooted_web (u[2]));
00390 }
00391 
00392 bool
00393 is_rooted_tmfs (url u) {
00394   return
00395     is_root_tmfs (u) ||
00396     (is_concat (u) && is_rooted_tmfs (u[1])) ||
00397     (is_or (u) && is_rooted_tmfs (u[1]) && is_rooted_tmfs (u[2]));
00398 }
00399 
00400 bool
00401 is_rooted_blank (url u) {
00402   return
00403     is_root_blank (u) ||
00404     (is_concat (u) && is_rooted_blank (u[1])) ||
00405     (is_or (u) && is_rooted_blank (u[1]) && is_rooted_blank (u[2]));
00406 }
00407 
00408 bool
00409 is_name (url u) {
00410   if (is_atomic (u)) return true;
00411   if (!is_concat (u)) return false;
00412   return is_name (u[1]) && is_name (u[2]);
00413 }
00414 
00415 bool
00416 is_rooted_name (url u) {
00417   return is_concat (u) && is_root (u[1]) && is_name (u[2]);
00418 }
00419 
00420 bool
00421 is_name_in_path (url u) {
00422   if (is_name (u)) return true;
00423   return is_concat (u) && is_root (u[1], "default") && is_name (u[2]);
00424 }
00425 
00426 bool
00427 is_path (url u) {
00428   if (is_atomic (u)) return true;
00429   if ((!is_or (u)) && (!is_concat (u))) return false;
00430   return is_path (u[1]) && is_path (u[2]);
00431 }
00432 
00433 bool
00434 is_rooted_path (url u) {
00435   return is_rooted (u) && is_path (u);
00436 }
00437 
00438 bool
00439 is_ramdisc (url u) {
00440   return is_concat (u) && is_root (u[1], "ramdisc");
00441 }
00442 
00443 /******************************************************************************
00444 * Conversion routines for urls
00445 ******************************************************************************/
00446 
00447 string
00448 as_string (url u, int type) {
00449   // This routine pritty prints an url as a string.
00450   // FIXME: the current algorithm is quadratic in time.
00451   if (is_none (u)) return "{}";
00452   if (is_atomic (u)) return u->t->label;
00453   if (is_concat (u)) {
00454     int stype= type;
00455     if (is_root (u[1]) && (!is_root (u[1], "default"))) stype= URL_STANDARD;
00456     string sep= (stype==URL_SYSTEM? string (URL_CONCATER): string ("/"));
00457     string s1 = as_string (u[1], type);
00458     string s2 = as_string (u[2], stype);
00459     if (is_root (u[1], "default")) s1= "";
00460     if ((!is_name (u[1])) && (!is_root (u[1]))) s1= "{" * s1 * "}";
00461     if ((!is_concat (u[2])) && (!is_atomic (u[2])) && (!is_wildcard (u[2], 1)))
00462       s2= "{" * s2 * "}";
00463 #ifdef WINPATHS
00464     if (is_semi_root (u)) {
00465       if (ends (s2, ":")) return s2 * "\\";
00466       else return s2;
00467     }
00468     if (is_root (u[1]) && stype == URL_SYSTEM) return s2;
00469 #endif
00470     return s1 * sep * s2;
00471   }
00472   if (is_or (u)) {
00473     string s1= as_string (u[1], type);
00474     string s2= as_string (u[2], type);
00475     if (!is_name_in_path (u[1])) s1= "{" * s1 * "}";
00476     if ((!is_or (u[2])) && (!is_name_in_path (u[2]))) s2= "{" * s2 * "}";
00477 #ifdef WINPATHS
00478     if (type == URL_STANDARD) return s1 * ":" * s2;
00479     else return s1 * string (URL_SEPARATOR) * s2;
00480 #else
00481     return s1 * string (URL_SEPARATOR) * s2;
00482 #endif
00483   }
00484 #ifdef WINPATHS
00485   if (is_root (u, "default")) {
00486     int stype= type;
00487     if (is_root (u[1]) && (!is_root (u[1], "default"))) stype= URL_STANDARD;
00488        if (stype == URL_SYSTEM) return ""; else return "/";
00489   }
00490 #else
00491   if (is_root (u, "default")) return "/";
00492 #endif
00493   if (is_root (u, "blank")) return "/";
00494   if (is_root (u, "file")) return u[1]->t->label * "://";
00495   if (is_root (u)) return u[1]->t->label * ":/";
00496   if (is_wildcard (u, 0)) return "**";
00497   if (is_wildcard (u, 1)) return u->t[1]->label;
00498   FAILED ("bad url");
00499   return "";
00500 }
00501 
00502 tm_ostream&
00503 operator << (tm_ostream& out, url u) {
00504   return out << as_string (u, URL_SYSTEM);
00505 }
00506 
00507 /******************************************************************************
00508 * Operations on urls
00509 ******************************************************************************/
00510 
00511 url
00512 head (url u) {
00513   return u * url_parent ();
00514 }
00515 
00516 url
00517 tail (url u) {
00518   if (is_concat (u)) {
00519     if (is_root_web (u[1]) && is_atomic (u[2])) return url_here ();
00520     return tail (u[2]);
00521   }
00522   if (is_or (u)) return tail (u[1]) | tail (u[2]);
00523   if (is_root (u)) return url_here ();
00524   return u;
00525 }
00526 
00527 string
00528 suffix (url u) {
00529   u= tail (u);
00530   if (!is_atomic (u)) return "";
00531   string s= as_string (u);
00532   int i, n= N(s);
00533   for (i=n-1; i>=0; i--)
00534     if (s[i]=='.') break;
00535   if ((i>0) && (i<n-1)) {
00536     string r= s (i+1, n);
00537     while ((N(r)>0) && (r[N(r)-1]=='~' || r[N(r)-1]=='#')) r= r(0, N(r)-1);
00538     return r;
00539   }
00540   return "";
00541 }
00542 
00543 url
00544 glue (url u, string s) {
00545   if (is_atomic (u)) return as_url (tree (u->t->label * s));
00546   if (is_concat (u)) return u[1] * glue (u[2], s);
00547   if (is_or (u)) return glue (u[1], s) | glue (u[2], s);
00548   cerr << "\nu= " << u << "\n";
00549   cerr << "s= " << s << "\n";
00550   FAILED ("can't glue string to url");
00551   return u;
00552 }
00553 
00554 url
00555 unglue (url u, int nr) {
00556   if (is_atomic (u))
00557     return as_url (tree (u->t->label (0, N(u->t->label) - nr)));
00558   if (is_concat (u)) return u[1] * unglue (u[2], nr);
00559   if (is_or (u)) return unglue (u[1], nr) | unglue (u[2], nr);
00560   cerr << "\nu= " << u << "\n";
00561   cerr << "nr= " << nr << "\n";
00562   FAILED ("can't unglue from url");
00563   return u;
00564 }
00565 
00566 url
00567 unblank (url u) {
00568   if (is_concat (u) && (u[2]->t == "")) return u[1];
00569   if (is_concat (u)) return u[1] * unblank (u[2]);
00570   if (is_or (u)) return unblank (u[1]) | unblank (u[2]);
00571   return u;
00572 }
00573 
00574 url
00575 relative (url base, url u) {
00576   return head (base) * u;
00577 }
00578 
00579 url
00580 delta_sub (url base, url u) {
00581 #ifdef WINPATHS
00582   if (is_atomic (base) || heuristic_is_default (as_string (base), URL_SYSTEM))
00583     return u;
00584 #else
00585   if (is_atomic (base))
00586     return u;
00587 #endif
00588   if (is_concat (base) && is_concat (u) && (base[1] == u[1]))
00589     return delta_sub (base[2], u[2]);
00590   if (is_concat (base))
00591     return url_parent () * delta_sub (head (base), u);
00592   return url_none ();
00593 }
00594 
00595 url
00596 delta (url base, url u) {
00597   if (is_or (u))
00598     return delta (base, u[1]) | delta (base, u[2]);
00599   url res= delta_sub (base, u);
00600   if (is_none (res)) return u;
00601   return res;
00602 }
00603 
00604 static url
00605 expand (url u1, url u2) {
00606   if (is_or (u1)) return expand (u1[1], u2) | expand (u1[2], u2);
00607   if (is_or (u2)) return expand (u1, u2[1]) | expand (u1, u2[2]);
00608   if (is_ancestor (u2)) {
00609     if (is_concat (u1)) return u1 | expand (u1[1], u2);
00610     return u1 | u2;
00611   }
00612   if (is_concat (u2) && is_ancestor (u2[1]))
00613     return expand (expand (u1, u2[1]), u2[2]);
00614   return u1 * u2;
00615 }
00616 
00617 url
00618 expand (url u) {
00619   if (is_or (u)) return expand (u[1]) | expand (u[2]);
00620   if (is_concat (u)) return expand (expand (u[1]), expand (u[2]));
00621   return u;
00622 }
00623 
00624 bool
00625 descends (url u, url base) {
00626   if (is_or (base)) return descends (u, base[1]) || descends (u, base[2]);
00627   if (is_concat (u) && is_atomic (base))
00628     return u[1] == base;
00629   if (is_concat (u) && is_concat (base))
00630     return u[1] == base[1] && descends (u[2], base[2]);
00631   return false;
00632 }
00633 
00634 bool
00635 is_secure (url u) {
00636   return descends (u, expand (url_path ("$TEXMACS_SECURE_PATH")));
00637 }
00638 
00639 /******************************************************************************
00640 * Url sorting and factorization
00641 ******************************************************************************/
00642 
00643 static bool
00644 operator <= (url u1, url u2) {
00645   if (is_atomic (u1) && is_atomic (u2))
00646     return u1->t->label <= u2->t->label;
00647   if (is_atomic (u1)) return true;
00648   if (is_atomic (u2)) return false;
00649   if (is_concat (u1) && is_concat (u2)) {
00650     if (u1[1] == u2[1]) return u1[2] <= u2[2];
00651     else return u1[1] <= u2[1];
00652   }
00653   if (is_concat (u1)) return true;
00654   if (is_concat (u2)) return false;
00655   return true; // does not matter for sorting
00656 }
00657 
00658 static url
00659 sort_sub (url add, url to) {
00660   if (is_or (to)) {
00661     if (add <= to[1]) return add | to;
00662     return to[1] | sort_sub (add, to[2]);
00663   }
00664   if (add <= to) return add | to;
00665   else return to | add;
00666 }
00667 
00668 url
00669 sort (url u) {
00670   if (is_or (u))
00671     return sort_sub (u[1], sort (u[2]));
00672   else return u;
00673 }
00674 
00675 static url
00676 factor_sorted (url u) {
00677   if (!is_or (u)) return u;
00678   url v= factor_sorted (u[2]);
00679   if (is_concat (u[1])) {
00680     if (is_concat (v) && (u[1][1] == v[1]))
00681       return u[1][1] * (u[1][2] | v[2]);
00682     if (is_or (v) && is_concat (v[1]) && (u[1][1] == v[1][1]))
00683       return (u[1][1] * (u[1][2] | v[1][2])) | v[2];
00684   }
00685   return u[1] | v;
00686 }
00687 
00688 static url
00689 factor_sub (url u) {
00690   if (is_concat (u)) return u[1] * factor (u[2]);
00691   if (is_or (u)) return factor_sub (u[1]) | factor_sub (u[2]);
00692   return u;
00693 }
00694 
00695 url
00696 factor (url u) {
00697   return factor_sub (factor_sorted (sort (u)));
00698 }
00699 
00700 /******************************************************************************
00701 * Url resolution and wildcard expansion
00702 ******************************************************************************/
00703 
00704 url complete (url base, url u, string filter, bool flag);
00705 
00706 url
00707 reroot (url u, string protocol) {
00708   if (is_concat (u)) return reroot (u[1], protocol) * u[2];
00709   if (is_or (u)) return reroot (u[1], protocol) | reroot (u[2], protocol);
00710   if (is_root (u)) return url_root (protocol);
00711   return u;
00712 }
00713 
00714 static url
00715 complete (url base, url sub, url u, string filter, bool flag) {
00716   if (is_or (sub)) {
00717     url res1= complete (base, sub[1], u, filter, flag);
00718     if ((!is_none (res1)) && flag) return res1;
00719     return res1 | complete (base, sub[2], u, filter, flag);
00720   }
00721   if (is_concat (sub) && is_rooted (sub[1])) {
00722     url res= complete (sub[1], sub[2], u, filter, flag);
00723     return sub[1] * res;
00724   }
00725   return sub * complete (base * sub, u, filter, flag);
00726 }
00727 
00728 url
00729 complete (url base, url u, string filter, bool flag) {
00730  // cout << "complete " << base << " |||| " << u << LF;
00731   if (is_none (base)) return base;
00732   if (is_none (u)) return u;
00733   if ((!is_root (base)) && (!is_rooted_name (base))) {
00734     cerr << "base= " << base << LF;
00735     FAILED ("invalid base url");
00736   }
00737   if (is_name (u) || (is_concat (u) && is_root (u[1]) && is_name (u[2]))) {
00738     url comp= base * u;
00739     if (is_rooted (comp, "default") || is_rooted (comp, "file")) {
00740       if (is_of_type (comp, filter)) return reroot (u, "default");
00741       return url_none ();
00742     }
00743     if (is_rooted_web (comp) || is_rooted_tmfs (comp) || is_ramdisc (comp)) {
00744       if (is_of_type (comp, filter)) return u;
00745       return url_none ();
00746     }
00747     cerr << LF << "base= " << base << LF;
00748     ASSERT (is_rooted (comp), "unrooted url");
00749     FAILED ("bad protocol in url");
00750   }
00751   if (is_root (u)) {
00752     // FIXME: test filter flags here
00753     return u;
00754   }
00755   if (is_concat (u)) {
00756     url sub= complete (base, u[1], "", false);
00757     // "" should often be faster than the more correct "d" here
00758     return complete (base, sub, u[2], filter, flag);
00759   }
00760   if (is_or (u)) {
00761     url res1= complete (base, u[1], filter, flag);
00762     if ((!is_none (res1)) && flag) return res1;
00763     return res1 | complete (base, u[2], filter, flag);
00764   }
00765   if (is_wildcard (u)) {
00766     // FIXME: ret= ret | ... is unefficient (quadratic) in main loop
00767     if (!(is_rooted (base, "default") || is_rooted (base, "file"))) {
00768       cerr << LF << "base= " << base << LF;
00769       FAILED ("wildcards only implemented for files");
00770     }
00771     url ret= url_none ();
00772     if (is_wildcard (u, 0) && is_of_type (base, filter)) ret= url_here ();
00773     bool error_flag;
00774     array<string> dir= read_directory (base, error_flag);
00775     int i, n= N(dir);
00776     for (i=0; i<n; i++) {
00777       if ((!is_none (ret)) && flag) return ret;
00778       if ((dir[i] == ".") || (dir[i] == "..")) continue;
00779       if (is_wildcard (u, 0))
00780        ret= ret | (dir[i] * complete (base * dir[i], u, filter, flag));
00781       else if (match_wildcard (dir[i], u[1]->t->label))
00782        ret= ret | complete (base, dir[i], filter, flag);
00783     }
00784     return ret;
00785   }
00786   cout << LF << "url= " << u << LF;
00787   FAILED ("bad url");
00788   return u;
00789 }
00790 
00791 url
00792 complete (url u, string filter, bool flag) {
00793   url home= url_pwd ();
00794   return home * complete (home, u, filter, flag);
00795 }
00796 
00797 url
00798 complete (url u, string filter) {
00799   // This routine can be used in order to find all possible matches
00800   // for the wildcards in an url and replace the wildcards by these matches.
00801   // Moreover, matches are normalized (file root -> default root).
00802   return complete (u, filter, false);
00803 }
00804 
00805 url
00806 resolve (url u, string filter) {
00807   // This routine does the same thing as complete, but it stops at
00808   // the first match. It is particularly useful for finding files in paths.
00809   return complete (u, filter, true);
00810   /*
00811   url res= complete (u, filter, true);
00812   if (is_none (res))
00813     cout << "Failed resolution of " << u << ", " << filter << LF;
00814   return res;
00815   */
00816 }
00817 
00818 url
00819 resolve_in_path (url u) {
00820   if (use_which) {
00821     string name = escape_sh (as_string (u));
00822     string which= var_eval_system ("which " * name * " 2> /dev/null");
00823     if (ends (which, name))
00824       return which;
00825     else if ((which != "") &&
00826             (!starts (which, "which: ")) &&
00827             (!starts (which, "no ")))
00828       cout << "TeXmacs] " << which << "\n";
00829   }
00830   return resolve (url_path ("$PATH") * u, "x");
00831 }
00832 
00833 bool
00834 exists (url u) {
00835   return !is_none (resolve (u, "r"));
00836 }
00837 
00838 bool
00839 exists_in_path (url u) {
00840 #if defined (OS_WIN32) || defined (__MINGW__) || defined (__MINGW32__)
00841   return !is_none (resolve_in_path (url (as_string (u) * ".exe")));
00842 #else
00843   return !is_none (resolve_in_path (u));
00844 #endif
00845 }
00846 
00847 bool
00848 has_permission (url u, string filter) {
00849   return !is_none (resolve (u, filter));
00850 }
00851 
00852 static url
00853 descendance_sub (url u) {
00854   if (is_or (u))
00855     return descendance_sub (u[1]) | descendance_sub (u[2]);
00856   return complete (u, url_wildcard (), "r", false);
00857 }
00858 
00859 url
00860 descendance (url u) {
00861   // Utility for style and package menus in tm_server.cpp
00862   // Compute and merge subdirectories of directories in path
00863   return factor (descendance_sub (u));
00864 }
00865 
00866 /******************************************************************************
00867 * Concretization of resolved urls
00868 ******************************************************************************/
00869 
00870 string
00871 concretize (url u) {
00872   // This routine transforms a resolved url into a system file name.
00873   // In the case of distant files from the web, a local copy is created.
00874 #ifdef WINPATHS
00875   // FIXME: this fix seems strange;
00876   // to start with, the if condition is not respected
00877   string s = as_string (u);
00878   if (starts (s, "file:///")) s= s (8, N(s));
00879   if (heuristic_is_default (s, 0)) return s;
00880   if (is_rooted (u, "blank")) return as_string (reroot (u, "default"));
00881 #else
00882   if (is_rooted (u, "default") ||
00883       is_rooted (u, "file") ||
00884       is_rooted (u, "blank"))
00885     return as_string (reroot (u, "default"));
00886 #endif
00887   if (is_rooted_web (u)) return concretize (get_from_web (u));
00888   if (is_rooted_tmfs (u)) return concretize (get_from_server (u));
00889   if (is_ramdisc (u)) return concretize (get_from_ramdisc (u));
00890   if (is_here (u)) return as_string (url_pwd ());
00891   if (is_parent (u)) return as_string (url_pwd () * url_parent ());
00892   if (is_wildcard (u, 1)) return u->t[1]->label;
00893   cerr << "TeXmacs] couldn't concretize " << u->t << LF;
00894   // cerr << "\nu= " << u << LF;
00895   // FAILED ("url has no root");
00896   return "xxx";
00897 }
00898 
00899 string
00900 materialize (url u, string filter) {
00901   // Combines resolve and concretize
00902   url r= resolve (u, filter);
00903   if (!(is_rooted (r) || is_here (r) || is_parent (r))) {
00904     cerr << "\nu= " << u << LF;
00905     FAILED ("url could not be resolved");
00906   }
00907   return concretize (r);
00908 }