Back to index

texmacs  1.0.7.15
disk_table.cpp
Go to the documentation of this file.
00001 
00002 /******************************************************************************
00003 * MODULE     : disk_table.cpp
00004 * DESCRIPTION: large size string->string tables stored on disk
00005 * COPYRIGHT  : (C) 2007  Joris van der Hoeven
00006 *******************************************************************************
00007 * This software falls under the GNU general public license version 3 or later.
00008 * It comes WITHOUT ANY WARRANTY WHATSOEVER. For details, see the file LICENSE
00009 * in the root directory or <http://www.gnu.org/licenses/gpl-3.0.html>.
00010 ******************************************************************************/
00011 
00012 #include "tmfs.hpp"
00013 #include "file.hpp"
00014 
00015 void write_to_disk (url u, transaction t);
00016 transaction read_table_from_disk (url u);
00017 
00018 /******************************************************************************
00019 * Subroutines for hashing
00020 ******************************************************************************/
00021 
00022 string
00023 mix (string s) {
00024   unsigned char c= 0;
00025   int i, n= N(s);
00026   string r= copy (s);
00027   for (i=n-1; i>=0; i--) {
00028     c= s[i] ^ (c<<1) ^ (c>>7) ^ (c<<3) ^ (c>>5);
00029     r[i]= c;
00030   }
00031   return r;
00032 }
00033 
00034 int
00035 hash_index (string s, int level) {
00036   int i= level >> 1;
00037   if (i >= N(s)) return 0;
00038   string r= mix (s);
00039   int h= ((unsigned char) r[i]);
00040   if ((level&1) == 0) return h&15;
00041   else return h>>4;
00042 }
00043 
00044 url
00045 subdir (url u, int index) {
00046   return u * string ((char) ('A' + index));
00047 }
00048 
00049 /******************************************************************************
00050 * Encoding of a tables on disk
00051 ******************************************************************************/
00052 
00053 string
00054 disk_encode_string (string s) {
00055   string r;
00056   int i, n= N(s);
00057   for (i=0; i<n; i++)
00058     if (((unsigned char) s[i]) < 32) r << '%' << (s[i]+64);
00059     else if (s[i] == '%') r << '%' << '%';
00060     else r << s[i];
00061   return r;
00062 }
00063 
00064 string
00065 disk_decode_string (string s) {
00066   string r;
00067   int i, n= N(s);
00068   for (i=0; i<n; i++)
00069     if (s[i] != '%') r << s[i];
00070     else {
00071       if (++i >= n) break;
00072       if (s[i] == '%') r << '%';
00073       else r << (s[i]-64);
00074     }
00075   return r;
00076 }
00077 
00078 string
00079 disk_encode_collection (collection t) {
00080   string r;
00081   iterator<string> it= iterate (t);
00082   while (it->busy ()) {
00083     string s= it->next ();
00084     r << '\t';
00085     if (t[s] < 0) r << '!';
00086     if (N(s) != 0 && s[0] == '!') r << '%';
00087     r << s;
00088   }
00089   return r;
00090 }
00091 
00092 collection
00093 disk_decode_collection (string s) {
00094   int i, n= N(s);
00095   collection t;
00096   for (i=0; i<n; ) {
00097     int eps= 1;
00098     if (s[i] == '\t') i++;
00099     if (i<n && s[i] == '!') { i++; eps= -eps; }
00100     if (i+1<n && s[i] == '%' && s[i+1] == '!') i++;
00101     int start= i;
00102     for (; i<n; i++)
00103       if (s[i] == '\t') break;
00104     t (disk_decode_string (s (start, i)))= eps;
00105   }
00106   return t;
00107 }
00108 
00109 string
00110 disk_encode_transaction (transaction t) {
00111   string r;
00112   iterator<string> it= iterate (t);
00113   while (it->busy()) {
00114     string s= it->next();
00115     r << disk_encode_string (s) << disk_encode_collection (t[s]) << "\n";
00116   }
00117   return r;
00118 }
00119 
00120 transaction
00121 disk_decode_transaction (string s) {
00122   int i, n= N(s);
00123   transaction t;
00124   for (i=0; i<n; i++) {
00125     int start= i;
00126     for (; i<n; i++)
00127       if (s[i] == '\t' || s[i] == '\n') break;
00128     string key= disk_decode_string (s (start, i));
00129     start= i;
00130     for (; i<n; i++)
00131       if (s[i] == '\n') break;
00132     if (!t->contains (key)) t (key)= collection ();
00133     merge (t (key), disk_decode_collection (s (start, i)));
00134   }
00135   return t;
00136 }
00137 
00138 /******************************************************************************
00139 * Constructors and destructors and pending disk cache
00140 ******************************************************************************/
00141 
00142 disk_table_rep::disk_table_rep (url root2): root (root2) {
00143   if (!exists (root)) mkdir (root);
00144   if (exists (root * "pending")) {
00145     string s;
00146     load_string (root * "pending", s, true);
00147     transaction t= disk_decode_transaction (s);
00148     write_to_disk (root, t);
00149     remove (root * "pending");
00150   }
00151   open_pending_write ();
00152 }
00153 
00154 disk_table_rep::~disk_table_rep () {
00155   close_pending_write ();
00156 }
00157 
00158 void
00159 disk_table_rep::open_pending_write () {
00160   string name= concretize (root * "pending");
00161   char* _name= as_charp (name);
00162 #ifdef OS_WIN32
00163   pending_fp= _fopen (_name, "ab");
00164 #else
00165   pending_fp= fopen (_name, "a");
00166 #endif
00167   if (pending_fp == NULL)
00168     FAILED ("cannot store pending writes file for TeXmacs file system");
00169   tm_delete_array (_name);
00170 }
00171 
00172 void
00173 disk_table_rep::close_pending_write () {
00174   fclose (pending_fp);
00175 }
00176 
00177 void
00178 disk_table_rep::flush_pending_write () {
00179   if (N (pending_write) > 10000) {
00180     write_to_disk (root, pending_write);
00181     pending_write= transaction ();
00182     close_pending_write ();
00183     remove (root * "pending");
00184     open_pending_write ();
00185   }
00186 }
00187 
00188 void
00189 disk_table_rep::flush_pending_read () {
00190   if (N (pending_read) > 10000) {
00191     read_cache= pending_read;
00192     pending_read= transaction ();
00193   }
00194 }
00195 
00196 /******************************************************************************
00197 * Writing values to disk
00198 ******************************************************************************/
00199 
00200 void
00201 write_to_disk (url u, transaction t, int level) {
00202   //cout << "Write " << u << ", " << t << ", " << level << "\n";
00203   if (exists (u * "index")) {
00204     transaction h= read_table_from_disk (u);
00205     merge (h, t);
00206     t= h;
00207   }
00208   if (total_size (t) < (1000 << level)) {
00209     if (!exists (u)) mkdir (u);
00210     save_string (u * "index", disk_encode_transaction (t), false);
00211   }
00212   else {
00213     transaction h[16];
00214     iterator<string> it= iterate (t);
00215     while (it->busy()) {
00216       string s= it->next();
00217       int i= hash_index (s, level);
00218       h[i](s)= t[s];
00219     }
00220     for (int i=0; i<16; i++)
00221       if (N(h[i]) != 0)
00222        write_to_disk (subdir (u, i), h[i], level+1);
00223     remove (u * "index");
00224   }
00225 }
00226 
00227 void
00228 write_to_disk (url u, string key, string val, int eps, int level) {
00229   //cout << "Write " << u << ", " << key << ", " << val << ", " << eps
00230   //<< ", " << level << "\n";
00231   if (!exists (u)) mkdir (u);
00232   if (eps < 0) {
00233     remove (u * key);
00234     int index= hash_index (key, level);
00235     if (exists (subdir (u, index)))
00236       write_to_disk (subdir (u, index), key, val, eps, level + 1);
00237   }
00238   else {
00239     bool error_flag;
00240     if (N (read_directory (u, error_flag)) <= 32)
00241       save_string (u * key, val, false);
00242     else {
00243       int index= hash_index (key, level);
00244       write_to_disk (subdir (u, index), key, val, eps, level + 1);
00245     }
00246   }
00247 }
00248 
00249 void
00250 write_to_disk (url u, transaction t) {
00251   write_to_disk (u, filter (t, false), 0);
00252   transaction t2= filter (t, true);
00253   iterator<string> it= iterate (t2);
00254   while (it->busy ()) {
00255     string key= it->next ();
00256     string val= first (t2[key]);
00257     write_to_disk (u, key, val, t2[key][val], 0);
00258   }
00259 }
00260 
00261 void
00262 disk_table_rep::write (transaction t) {
00263   transaction pending;
00264   transaction r;
00265   iterator<string> it= iterate (t);
00266   while (it->busy ()) {
00267     string s= it->next ();
00268     collection val= t[s];
00269     if (N (filter (val, true)) == 0) pending (s)= val;
00270     else r (s)= val;
00271   }
00272 
00273   string app= disk_encode_transaction (pending);
00274   int i, n= N(app);
00275   for (i=0; i<n; i++)
00276     fputc (app[i], pending_fp);
00277   fflush (pending_fp);
00278   merge (pending_write, pending);
00279   merge (pending_read, pending);
00280   write_to_disk (root, r);
00281 
00282   flush_pending_write ();
00283   flush_pending_read ();
00284 }
00285 
00286 /******************************************************************************
00287 * Reading values from disk
00288 ******************************************************************************/
00289 
00290 transaction
00291 read_table_from_disk (url u) {
00292   string contents;
00293   load_string (u * "index", contents, false);
00294   return disk_decode_transaction (contents);
00295 }
00296 
00297 transaction
00298 read_from_disk (url u, collection keys, int level) {
00299   transaction t;
00300   if (N(keys) == 0 || !exists (u)) return t;
00301   //cout << "Read " << u << ", " << keys << ", " << level << "\n";
00302 
00303   collection c[16];
00304   iterator<string> it= iterate (keys);
00305   while (it->busy()) {
00306     string s= it->next();
00307     int i= hash_index (s, level);
00308     c[i](s)= keys[s];
00309   }
00310   for (int i=0; i<16; i++) {
00311     transaction d= read_from_disk (subdir (u, i), c[i], level+1);
00312     merge (t, d);
00313   }
00314 
00315   transaction d;
00316   if (exists (u * "index")) d= read_table_from_disk (u);
00317   it= iterate (keys);
00318   while (it->busy ()) {
00319     string s= it->next ();
00320     if (d->contains (s)) merge (t, atom (s, d[s]));
00321     if (keys[s] == 3 && exists (u * s)) {
00322       string contents;
00323       if (!load_string (u * s, contents, false))
00324        merge (t, atom (s, singleton (contents, 3)));
00325     }
00326   }
00327 
00328   //cout << "Return " << u << ", " << keys << ", " << t << "\n";
00329   return t;
00330 }
00331 
00332 transaction
00333 disk_table_rep::read (collection keys) {
00334   transaction done;
00335   collection  remaining;
00336   iterator<string> it= iterate (keys);
00337   while (it->busy ()) {
00338     string s= it->next ();
00339     if (keys [s] > 0 && keys [s] < 3) {
00340       if (pending_read->contains (s))
00341        done (s)= pending_read [s];
00342       else if (read_cache->contains (s))
00343        done (s)= read_cache [s];
00344       else remaining (s)= keys [s];
00345     }
00346     else remaining (s)= keys [s];
00347   }
00348 
00349   transaction extra= read_from_disk (root, remaining, 0);
00350   it= iterate (remaining);
00351   while (it->busy ()) {
00352     string s= it->next ();
00353     if (remaining [s] > 0 && remaining [s] < 3)
00354       pending_read (s)= extra [s];
00355   }
00356   flush_pending_read ();
00357   return done * extra;
00358 }