Back to index

nordugrid-arc-nox  1.1.0~rc6
FileCache.cpp
Go to the documentation of this file.
00001 // -*- indent-tabs-mode: nil -*-
00002 
00003 #ifdef HAVE_CONFIG_H
00004 #include <config.h>
00005 #endif
00006 
00007 #ifndef WIN32
00008 
00009 #include <cerrno>
00010 #include <cmath>
00011 #include <algorithm>
00012 
00013 #include <sys/types.h>
00014 #include <sys/stat.h>
00015 #include <fcntl.h>
00016 #include <dirent.h>
00017 #include <unistd.h>
00018 #include <sys/utsname.h>
00019 #include <sys/statvfs.h>
00020 
00021 #include <glibmm.h>
00022 
00023 #include <arc/Logger.h>
00024 
00025 #include "FileCache.h"
00026 
00027 namespace Arc {
00028 
00029   const std::string FileCache::CACHE_DATA_DIR = "data";
00030   const std::string FileCache::CACHE_JOB_DIR = "joblinks";
00031   const int FileCache::CACHE_DIR_LENGTH = 2;
00032   const int FileCache::CACHE_DIR_LEVELS = 1;
00033   const std::string FileCache::CACHE_LOCK_SUFFIX = ".lock";
00034   const std::string FileCache::CACHE_META_SUFFIX = ".meta";
00035   const int FileCache::CACHE_DEFAULT_AUTH_VALIDITY = 86400; // 24 h
00036 
00037   Logger FileCache::logger(Logger::getRootLogger(), "FileCache");
00038 
00039   FileCache::FileCache(std::string cache_path,
00040                        std::string id,
00041                        uid_t job_uid,
00042                        gid_t job_gid) {
00043 
00044     // make a vector of one item and call _init
00045     std::vector<std::string> caches;
00046     std::vector<std::string> remote_caches;
00047     std::vector<std::string> draining_caches;
00048     if (!cache_path.empty()) 
00049       caches.push_back(cache_path);
00050 
00051     // if problem in init, clear _caches so object is invalid
00052     if (!_init(caches, remote_caches, draining_caches, id, job_uid, job_gid))
00053       _caches.clear();
00054   }
00055 
00056   FileCache::FileCache(std::vector<std::string> caches,
00057                        std::string id,
00058                        uid_t job_uid,
00059                        gid_t job_gid) {
00060 
00061     std::vector<std::string> remote_caches;
00062     std::vector<std::string> draining_caches;
00063 
00064     // if problem in init, clear _caches so object is invalid
00065     if (!_init(caches, remote_caches, draining_caches, id, job_uid, job_gid))
00066       _caches.clear();
00067   }
00068 
00069   FileCache::FileCache(std::vector<std::string> caches,
00070                        std::vector<std::string> remote_caches,
00071                        std::vector<std::string> draining_caches,
00072                        std::string id,
00073                        uid_t job_uid,
00074                        gid_t job_gid,
00075                        int cache_max,
00076                        int cache_min) {
00077   
00078     // if problem in init, clear _caches so object is invalid
00079     if (! _init(caches, remote_caches, draining_caches, id, job_uid, job_gid, cache_max, cache_min))
00080       _caches.clear();
00081   }
00082  
00083   bool FileCache::_init(std::vector<std::string> caches,
00084                         std::vector<std::string> remote_caches,
00085                         std::vector<std::string> draining_caches,
00086                         std::string id,
00087                         uid_t job_uid,
00088                         gid_t job_gid,
00089                         int cache_max,
00090                         int cache_min) {
00091 
00092     _id = id;
00093     _uid = job_uid;
00094     _gid = job_gid;
00095     _max_used = cache_max;
00096     _min_used = cache_min;
00097 
00098     // for each cache
00099     for (int i = 0; i < (int)caches.size(); i++) {
00100       std::string cache = caches[i];
00101       std::string cache_path = cache.substr(0, cache.find(" "));
00102       if (cache_path.empty()) {
00103         logger.msg(ERROR, "No cache directory specified");
00104         return false;
00105       }
00106       std::string cache_link_path = "";
00107       if (cache.find(" ") != std::string::npos)
00108         cache_link_path = cache.substr(cache.find_last_of(" ") + 1, cache.length() - cache.find_last_of(" ") + 1);
00109 
00110       // tidy up paths - take off any trailing slashes
00111       if (cache_path.rfind("/") == cache_path.length() - 1)
00112         cache_path = cache_path.substr(0, cache_path.length() - 1);
00113       if (cache_link_path.rfind("/") == cache_link_path.length() - 1)
00114         cache_link_path = cache_link_path.substr(0, cache_link_path.length() - 1);
00115 
00116       // create cache dir and subdirs
00117       if (!_cacheMkDir(cache_path + "/" + CACHE_DATA_DIR, true)) {
00118         logger.msg(ERROR, "Cannot create directory \"%s\" for cache", cache_path + "/" + CACHE_DATA_DIR);
00119         return false;
00120       }
00121       if (!_cacheMkDir(cache_path + "/" + CACHE_JOB_DIR, true)) {
00122         logger.msg(ERROR, "Cannot create directory \"%s\" for cache", cache_path + "/" + CACHE_JOB_DIR);
00123         return false;
00124       }
00125       // add this cache to our list
00126       struct CacheParameters cache_params;
00127       cache_params.cache_path = cache_path;
00128       cache_params.cache_link_path = cache_link_path;
00129       _caches.push_back(cache_params);
00130     }
00131   
00132     // add remote caches
00133     for (int i = 0; i < (int)remote_caches.size(); i++) {
00134       std::string cache = remote_caches[i];
00135       std::string cache_path = cache.substr(0, cache.find(" "));
00136       if (cache_path.empty()) {
00137         logger.msg(ERROR, "No remote cache directory specified");
00138         return false;
00139       }
00140       std::string cache_link_path = "";
00141       if (cache.find(" ") != std::string::npos) cache_link_path = cache.substr(cache.find_last_of(" ")+1, cache.length()-cache.find_last_of(" ")+1);
00142       
00143       // tidy up paths - take off any trailing slashes
00144       if (cache_path.rfind("/") == cache_path.length()-1) cache_path = cache_path.substr(0, cache_path.length()-1);
00145       if (cache_link_path.rfind("/") == cache_link_path.length()-1) cache_link_path = cache_link_path.substr(0, cache_link_path.length()-1);
00146   
00147       // add this cache to our list
00148       struct CacheParameters cache_params;
00149       cache_params.cache_path = cache_path;
00150       cache_params.cache_link_path = cache_link_path;
00151       _remote_caches.push_back(cache_params);
00152     }
00153   
00154     // for each draining cache
00155     for (int i = 0; i < (int)draining_caches.size(); i++) {
00156       std::string cache = draining_caches[i];
00157       std::string cache_path = cache.substr(0, cache.find(" "));
00158       if (cache_path.empty()) {
00159         logger.msg(ERROR, "No cache directory specified");
00160         return false;
00161       }
00162       // tidy up paths - take off any trailing slashes
00163       if (cache_path.rfind("/") == cache_path.length()-1) cache_path = cache_path.substr(0, cache_path.length()-1);
00164   
00165       // add this cache to our list
00166       struct CacheParameters cache_params;
00167       cache_params.cache_path = cache_path;
00168       cache_params.cache_link_path = "";
00169       _draining_caches.push_back(cache_params);
00170     }
00171       // our hostname and pid
00172     struct utsname buf;
00173     if (uname(&buf) != 0) {
00174       logger.msg(ERROR, "Cannot determine hostname from uname()");
00175       return false;
00176     }
00177     _hostname = buf.nodename;
00178     int pid_i = getpid();
00179     std::stringstream ss;
00180     ss << pid_i;
00181     ss >> _pid;
00182     return true;
00183   }
00184 
00185   bool FileCache::Start(std::string url, bool& available, bool& is_locked, bool use_remote) {
00186 
00187     if (!(*this))
00188       return false;
00189 
00190     available = false;
00191     is_locked = false;
00192     std::string filename = File(url);
00193     std::string lock_file = _getLockFileName(url);
00194 
00195     // create directory structure if required, only readable by GM user
00196     if (!_cacheMkDir(lock_file.substr(0, lock_file.rfind("/")), false))
00197       return false;
00198 
00199     int lock_timeout = 86400; // one day timeout on lock TODO: make configurable?
00200 
00201     // locking mechanism:
00202     // - check if lock is there
00203     // - if not, create tmp file and check again
00204     // - if lock is still not there copy tmp file to cache lock file
00205     // - check pid inside lock file matches ours
00206 
00207     struct stat fileStat;
00208     int err = stat(lock_file.c_str(), &fileStat);
00209     if (0 != err) {
00210       if (errno == EACCES) {
00211         logger.msg(ERROR, "EACCES Error opening lock file %s: %s", lock_file, strerror(errno));
00212         return false;
00213       }
00214       else if (errno != ENOENT) {
00215         // some other error occurred opening the lock file
00216         logger.msg(ERROR, "Error opening lock file %s in initial check: %s", lock_file, strerror(errno));
00217         return false;
00218       }
00219       // lock does not exist - create tmp file
00220       std::string tmpfile = lock_file + ".XXXXXX";
00221       int h = Glib::mkstemp(tmpfile);
00222       if (h == -1) {
00223         logger.msg(ERROR, "Error creating file %s with mkstemp(): %s", tmpfile, strerror(errno));
00224         return false;
00225       }
00226       // write pid@hostname to the lock file
00227       std::string buf = _pid + "@" + _hostname;
00228       if (write(h, buf.c_str(), buf.length()) == -1) {
00229         logger.msg(ERROR, "Error writing to tmp lock file %s: %s", tmpfile, strerror(errno));
00230         // not much we can do if this doesn't work, but it is only a tmp file
00231         remove(tmpfile.c_str());
00232         close(h);
00233         return false;
00234       }
00235       if (close(h) != 0)
00236         // not critical as file will be removed after we are done
00237         logger.msg(WARNING, "Warning: closing tmp lock file %s failed", tmpfile);
00238       // check again if lock exists, in case creating the tmp file took some time
00239       err = stat(lock_file.c_str(), &fileStat);
00240       if (0 != err) {
00241         if (errno == ENOENT) {
00242           // ok, we can create lock
00243           if (rename(tmpfile.c_str(), lock_file.c_str()) != 0) {
00244             logger.msg(ERROR, "Error renaming tmp file %s to lock file %s: %s", tmpfile, lock_file, strerror(errno));
00245             remove(tmpfile.c_str());
00246             return false;
00247           }
00248           // check it's really there
00249           err = stat(lock_file.c_str(), &fileStat);
00250           if (0 != err) {
00251             logger.msg(ERROR, "Error renaming lock file, even though rename() did not return an error");
00252             return false;
00253           }
00254           // check the pid inside the lock file, just in case...
00255           if (!_checkLock(url)) {
00256             is_locked = true;
00257             return false;
00258           }
00259         }
00260         else if (errno == EACCES) {
00261           logger.msg(ERROR, "EACCES Error opening lock file %s: %s", lock_file, strerror(errno));
00262           remove(tmpfile.c_str());
00263           return false;
00264         }
00265         else {
00266           // some other error occurred opening the lock file
00267           logger.msg(ERROR, "Error opening lock file we just renamed successfully %s: %s", lock_file, strerror(errno));
00268           remove(tmpfile.c_str());
00269           return false;
00270         }
00271       }
00272       else {
00273         logger.msg(VERBOSE, "The file is currently locked with a valid lock");
00274         remove(tmpfile.c_str());
00275         is_locked = true;
00276         return false;
00277       }
00278     }
00279     else {
00280       // the lock already exists, check if it has expired
00281       // look at modification time
00282       time_t mod_time = fileStat.st_mtime;
00283       time_t now = time(NULL);
00284       logger.msg(VERBOSE, "%li seconds since lock file was created", now - mod_time);
00285 
00286       if ((now - mod_time) > lock_timeout) {
00287         logger.msg(VERBOSE, "Timeout has expired, will remove lock file");
00288         // TODO: kill the process holding the lock, only if we know it was the original
00289         // process which created it
00290         if (remove(lock_file.c_str()) != 0 && errno != ENOENT) {
00291           logger.msg(ERROR, "Failed to unlock file %s: %s", lock_file, strerror(errno));
00292           return false;
00293         }
00294         // lock has expired and has been removed. Try to remove cache file and call Start() again
00295         if (remove(filename.c_str()) != 0 && errno != ENOENT) {
00296           logger.msg(ERROR, "Error removing cache file %s: %s", filename, strerror(errno));
00297           return false;
00298         }
00299         return Start(url, available, is_locked, use_remote);
00300       }
00301 
00302       // lock is still valid, check if we own it
00303       FILE *pFile;
00304       char lock_info[100]; // should be long enough for a pid + hostname
00305       pFile = fopen((char*)lock_file.c_str(), "r");
00306       if (pFile == NULL) {
00307         // lock could have been released by another process, so call Start again
00308         if (errno == ENOENT) {
00309           logger.msg(VERBOSE, "Lock that recently existed has been deleted by another process, calling Start() again");
00310           return Start(url, available, is_locked, use_remote);
00311         }
00312         logger.msg(ERROR, "Error opening valid and existing lock file %s: %s", lock_file, strerror(errno));
00313         return false;
00314       }
00315       if (fgets(lock_info, 100, pFile) == NULL) {
00316         logger.msg(ERROR, "Error reading valid and existing lock file %s: %s", lock_file, strerror(errno));
00317         fclose(pFile);
00318         return false;
00319       }
00320       fclose(pFile);
00321 
00322       std::string lock_info_s(lock_info);
00323       std::string::size_type index = lock_info_s.find("@", 0);
00324       if (index == std::string::npos) {
00325         logger.msg(ERROR, "Error with formatting in lock file %s: %s", lock_file, lock_info_s);
00326         return false;
00327       }
00328 
00329       if (lock_info_s.substr(index + 1) != _hostname) {
00330         logger.msg(VERBOSE, "Lock is owned by a different host");
00331         // TODO: here do ssh login and check
00332         is_locked = true;
00333         return false;
00334       }
00335       std::string lock_pid = lock_info_s.substr(0, index);
00336       if (lock_pid == _pid)
00337         // safer to wait until lock expires than use cached file or re-download
00338         logger.msg(WARNING, "Warning: This process already owns the lock");
00339       else {
00340         // check if the pid owning the lock is still running - if not we can claim the lock
00341         // this is not really portable... but no other way to do it
00342         std::string procdir("/proc/");
00343         procdir = procdir.append(lock_pid);
00344         if (stat(procdir.c_str(), &fileStat) != 0 && errno == ENOENT) {
00345           logger.msg(VERBOSE, "The process owning the lock is no longer running, will remove lock");
00346           if (remove(lock_file.c_str()) != 0) {
00347             logger.msg(ERROR, "Failed to unlock file %s: %s", lock_file, strerror(errno));
00348             return false;
00349           }
00350           // lock has been removed. try to delete cache file and call Start() again
00351           if (remove(filename.c_str()) != 0 && errno != ENOENT) {
00352             logger.msg(ERROR, "Error removing cache file %s: %s", filename, strerror(errno));
00353             return false;
00354           }
00355           return Start(url, available, is_locked, use_remote);
00356         }
00357       }
00358 
00359       logger.msg(VERBOSE, "The file is currently locked with a valid lock");
00360       is_locked = true;
00361       return false;
00362     }
00363 
00364     // if we get to here we have acquired the lock
00365 
00366     // create the meta file to store the URL, if it does not exist
00367     std::string meta_file = _getMetaFileName(url);
00368     err = stat(meta_file.c_str(), &fileStat);
00369     if (0 == err) {
00370       // check URL inside file for possible hash collisions
00371       FILE *pFile;
00372       char mystring[fileStat.st_size+1];
00373       pFile = fopen((char*)_getMetaFileName(url).c_str(), "r");
00374       if (pFile == NULL) {
00375         logger.msg(ERROR, "Error opening meta file %s: %s", _getMetaFileName(url), strerror(errno));
00376         remove(lock_file.c_str());
00377         return false;
00378       }
00379       if (fgets(mystring, sizeof(mystring), pFile) == NULL) {
00380         logger.msg(ERROR, "Error reading valid and existing lock file %s: %s", lock_file, strerror(errno));
00381         fclose(pFile);
00382         return false;
00383       }
00384       fclose(pFile);
00385 
00386       std::string meta_str(mystring);
00387       // get the first line
00388       if (meta_str.find('\n') != std::string::npos)
00389         meta_str.resize(meta_str.find('\n'));
00390 
00391       std::string::size_type space_pos = meta_str.find(' ', 0);
00392       if (meta_str.substr(0, space_pos) != url) {
00393         logger.msg(ERROR, "Error: File %s is already cached at %s under a different URL: %s - this file will not be cached", url, filename, meta_str.substr(0, space_pos));
00394         remove(lock_file.c_str());
00395         return false;
00396       }
00397     }
00398     else if (errno == ENOENT) {
00399       // create new file
00400       FILE *pFile;
00401       pFile = fopen((char*)meta_file.c_str(), "w");
00402       if (pFile == NULL) {
00403         logger.msg(ERROR, "Failed to create info file %s: %s", meta_file, strerror(errno));
00404         remove(lock_file.c_str());
00405         return false;
00406       }
00407       fputs((char*)url.c_str(), pFile);
00408       fputs("\n", pFile);
00409       fclose(pFile);
00410       // make read/writeable only by GM user
00411       chmod(meta_file.c_str(), S_IRUSR | S_IWUSR);
00412     }
00413     else {
00414       logger.msg(ERROR, "Error looking up attributes of meta file %s: %s", meta_file, strerror(errno));
00415       remove(lock_file.c_str());
00416       return false;
00417     }
00418     // now check if the cache file is there already
00419     err = stat(filename.c_str(), &fileStat);
00420     if (0 == err)
00421       available = true;
00422       
00423     // if the file is not there. check remote caches
00424     else if (errno == ENOENT) {
00425       if (!use_remote) return true;    
00426       // get the hash of the url
00427       std::string hash = FileCacheHash::getHash(url);
00428     
00429       int index = 0;
00430       for(int level = 0; level < CACHE_DIR_LEVELS; level ++) {
00431         hash.insert(index + CACHE_DIR_LENGTH, "/");
00432         // go to next slash position, add one since we just inserted a slash
00433         index += CACHE_DIR_LENGTH + 1;
00434       }
00435       std::string remote_cache_file;
00436       std::string remote_cache_link;
00437       for (std::vector<struct CacheParameters>::iterator it = _remote_caches.begin(); it != _remote_caches.end(); it++) {
00438         std::string remote_file = it->cache_path+"/"+CACHE_DATA_DIR+"/"+hash;
00439         if (stat(remote_file.c_str(), &fileStat) == 0) {
00440           remote_cache_file = remote_file;
00441           remote_cache_link = it->cache_link_path;
00442           break;
00443         }
00444       }
00445       if (remote_cache_file.empty()) return true;
00446       
00447       logger.msg(INFO, "Found file %s in remote cache at %s", url, remote_cache_file);
00448       // if found, create lock file in remote cache
00449       std::string remote_lock_file = remote_cache_file+".lock";
00450       err = stat( remote_lock_file.c_str(), &fileStat );
00451       // if lock exists, exit
00452       if (0 == err) {
00453         logger.msg(VERBOSE, "File exists in remote cache at %s but is locked. Will download from source", remote_cache_file);
00454         return true;
00455       }
00456     
00457       // lock does not exist - create tmp file
00458       std::string remote_tmpfile = remote_lock_file + ".XXXXXX";
00459       int h = Glib::mkstemp(remote_tmpfile);
00460       if (h == -1) {
00461         logger.msg(WARNING, "Error creating tmp file %s for remote lock with mkstemp(): %s", remote_tmpfile, strerror(errno));
00462         return true;
00463       }
00464       // write pid@hostname to the lock file
00465       std::string buf2 = _pid + "@" + _hostname;
00466       if (write(h, buf2.c_str(), buf2.length()) == -1) {
00467         logger.msg(WARNING, "Error writing to tmp lock file for remote lock %s: %s", remote_tmpfile, strerror(errno));
00468         // not much we can do if this doesn't work, but it is only a tmp file
00469         remove(remote_tmpfile.c_str());
00470         close(h);
00471         return true;
00472       }
00473       if (close(h) != 0) {
00474         // not critical as file will be removed after we are done
00475         logger.msg(WARNING, "Warning: closing tmp lock file for remote lock %s failed", remote_tmpfile);
00476       }
00477       // check again if lock exists, in case creating the tmp file took some time
00478       err = stat( remote_lock_file.c_str(), &fileStat ); 
00479       if (0 != err) {
00480         if (errno == ENOENT) {
00481           // ok, we can create lock
00482           if (rename(remote_tmpfile.c_str(), remote_lock_file.c_str()) != 0) {
00483             logger.msg(WARNING, "Error renaming tmp file %s to lock file %s for remote lock: %s", remote_tmpfile, remote_lock_file, strerror(errno));
00484             remove(remote_tmpfile.c_str());
00485             return true;
00486           }
00487           // check it's really there
00488           err = stat( remote_lock_file.c_str(), &fileStat ); 
00489           if (0 != err) {
00490             logger.msg(WARNING, "Error renaming lock file for remote lock, even though rename() did not return an error: %s", strerror(errno));
00491             return true;
00492           }
00493         }
00494         else {
00495           // some error occurred opening the lock file
00496           logger.msg(WARNING, "Error opening lock file for remote lock we just renamed successfully %s: %s", remote_lock_file, strerror(errno));
00497           remove(remote_tmpfile.c_str());
00498           return true;
00499         }
00500       }
00501       else {
00502         logger.msg(VERBOSE, "The remote cache file is currently locked with a valid lock, will download from source");
00503         remove(remote_tmpfile.c_str());
00504         return true;
00505       }
00506       
00507       // we have locked the remote file - so find out what to do with it
00508       if (remote_cache_link == "replicate") {
00509         // copy the file to the local cache, remove remote lock and exit with available=true
00510         logger.msg(VERBOSE, "Replicating file %s to local cache file %s", remote_cache_file, filename);
00511           // do the copy - taken directly from old datacache.cc
00512         char copybuf[65536];
00513         int fdest = open(filename.c_str(), O_WRONLY | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
00514         if(fdest == -1) {
00515           logger.msg(ERROR, "Failed to create file %s for writing: %s",filename, strerror(errno));
00516           return false;
00517         };
00518         
00519         int fsource = open(remote_cache_file.c_str(), O_RDONLY);
00520         if(fsource == -1) {
00521           close(fdest);
00522           logger.msg(ERROR, "Failed to open file %s for reading: %s", remote_cache_file, strerror(errno));
00523           return false;
00524         };
00525         
00526         // source and dest opened ok - copy in chunks
00527         for(;;) {
00528           ssize_t lin = read(fsource, copybuf, sizeof(copybuf));
00529           if(lin == -1) {
00530             close(fdest); close(fsource);
00531             logger.msg(ERROR, "Failed to read file %s: %s", remote_cache_file, strerror(errno));
00532             return false;
00533           };
00534           if(lin == 0) break; // eof
00535           
00536           for(ssize_t lout = 0; lout < lin;) {
00537             ssize_t lwritten = write(fdest, copybuf+lout, lin-lout);
00538             if(lwritten == -1) {
00539               close(fdest); close(fsource);
00540               logger.msg(ERROR, "Failed to write file %s: %s", filename, strerror(errno));
00541               return false;
00542             };
00543             lout += lwritten;
00544           };
00545         };
00546         close(fdest); close(fsource);
00547         if (remove(remote_lock_file.c_str()) != 0) {
00548           logger.msg(ERROR, "Failed to remove remote lock file %s: %s. Some manual intervention may be required", remote_lock_file, strerror(errno));
00549           return true;
00550         }
00551       }
00552       // create symlink from file in this cache to other cache
00553       else {
00554         logger.msg(VERBOSE, "Creating temporary link from %s to remote cache file %s", filename, remote_cache_file);
00555         if (symlink(remote_cache_file.c_str(), filename.c_str()) != 0) {
00556           logger.msg(ERROR, "Failed to create soft link to remote cache: %s Will download %s from source", strerror(errno), url);
00557           if (remove(remote_lock_file.c_str()) != 0) {
00558             logger.msg(ERROR, "Failed to remove remote lock file %s: %s Some manual intervention may be required", remote_lock_file, strerror(errno));
00559           }
00560           return true;
00561         }
00562       }
00563       available = true;
00564     }
00565     else {
00566       // this is ok, we will download again
00567       logger.msg(WARNING, "Warning: error looking up attributes of cached file: %s", strerror(errno));
00568     }
00569     return true;
00570   }
00571 
00572   bool FileCache::Stop(std::string url) {
00573 
00574     if (!(*this))
00575       return false;
00576 
00577     // if cache file is a symlink, remove remote cache lock and symlink
00578     std::string filename = File(url);
00579     struct stat fileStat;
00580     if (lstat(filename.c_str(), &fileStat) == 0 && S_ISLNK(fileStat.st_mode)) {
00581       char buf[1024];
00582       int link_size = readlink(filename.c_str(), buf, sizeof(buf));
00583       if (link_size == -1) {
00584         logger.msg(ERROR, "Could not read target of link %s: %s. Manual intervention may be required to remove lock in remote cache", filename, strerror(errno));
00585         return false;
00586       }
00587       std::string remote_lock(buf); remote_lock.resize(link_size); remote_lock += ".lock";
00588       if (remove(remote_lock.c_str()) != 0 && errno != ENOENT) {
00589         logger.msg(ERROR, "Failed to unlock remote cache lock %s: %s. Manual intervention may be required", remote_lock, strerror(errno));
00590         return false;
00591       }
00592       if (remove(filename.c_str()) != 0) {
00593         logger.msg(ERROR, "Error removing file %s: %s. Manual intervention may be required", filename, strerror(errno));
00594         return false;
00595       }
00596     }
00597     
00598      // check the lock is ok to delete
00599     if (!_checkLock(url))
00600       return false;
00601 
00602     // delete the lock
00603     if (remove(_getLockFileName(url).c_str()) != 0) {
00604       logger.msg(ERROR, "Failed to unlock file with lock %s: %s", _getLockFileName(url), strerror(errno));
00605       return false;
00606     }
00607     // get the hash of the url
00608     std::string hash = FileCacheHash::getHash(url);
00609     int index = 0;
00610     for(int level = 0; level < CACHE_DIR_LEVELS; level ++) {
00611       hash.insert(index + CACHE_DIR_LENGTH, "/");
00612       // go to next slash position, add one since we just inserted a slash
00613       index += CACHE_DIR_LENGTH + 1;
00614     }
00615     
00616     // remove the file from the cache map
00617     _cache_map.erase(hash);
00618     return true;
00619   }
00620 
00621   bool FileCache::StopAndDelete(std::string url) {
00622 
00623     if (!(*this))
00624       return false;
00625     
00626     // if cache file is a symlink, remove remote cache lock
00627     std::string filename = File(url);
00628     struct stat fileStat;
00629     if (lstat(filename.c_str(), &fileStat) == 0 && S_ISLNK(fileStat.st_mode)) {
00630       char buf[1024];
00631       int link_size = readlink(filename.c_str(), buf, sizeof(buf));
00632       if (link_size == -1) {
00633         logger.msg(ERROR, "Could not read target of link %s: %s. Manual intervention may be required to remove lock in remote cache", filename, strerror(errno));
00634         return false;
00635       }
00636       std::string remote_lock(buf); remote_lock.resize(link_size); remote_lock += ".lock";
00637       if (remove(remote_lock.c_str()) != 0 && errno != ENOENT) {
00638         logger.msg(ERROR, "Failed to unlock remote cache lock %s: %s. Manual intervention may be required", remote_lock, strerror(errno));
00639         return false;
00640       }
00641     }
00642 
00643     // check the lock is ok to delete, and if so, remove the file and the
00644     // associated lock
00645     if (!_checkLock(url))
00646       return false;
00647 
00648     // delete the cache file
00649     if (remove(filename.c_str()) != 0 && errno != ENOENT) {
00650       logger.msg(ERROR, "Error removing cache file %s: %s", filename, strerror(errno));
00651       return false;
00652     }
00653 
00654     // delete the meta file - not critical so don't fail on error
00655     if (remove(_getMetaFileName(url).c_str()) != 0)
00656       logger.msg(ERROR, "Failed to unlock file with lock %s: %s", _getLockFileName(url), strerror(errno));
00657 
00658     // delete the lock
00659     if (remove(_getLockFileName(url).c_str()) != 0) {
00660       logger.msg(ERROR, "Failed to unlock file with lock %s: %s", _getLockFileName(url), strerror(errno));
00661       return false;
00662     }
00663     
00664     // get the hash of the url
00665     std::string hash = FileCacheHash::getHash(url);
00666     int index = 0;
00667     for(int level = 0; level < CACHE_DIR_LEVELS; level ++) {
00668       hash.insert(index + CACHE_DIR_LENGTH, "/");
00669       // go to next slash position, add one since we just inserted a slash
00670       index += CACHE_DIR_LENGTH + 1;
00671     }
00672   
00673     // remove the file from the cache map
00674     _cache_map.erase(hash);
00675     return true;
00676   }
00677 
00678   std::string FileCache::File(std::string url) {
00679 
00680     if (!(*this))
00681       return "";
00682 
00683     // get the hash of the url
00684     std::string hash = FileCacheHash::getHash(url);
00685 
00686     int index = 0;
00687     for (int level = 0; level < CACHE_DIR_LEVELS; level++) {
00688       hash.insert(index + CACHE_DIR_LENGTH, "/");
00689       // go to next slash position, add one since we just inserted a slash
00690       index += CACHE_DIR_LENGTH + 1;
00691     }
00692     // look up the cache map to see if the file is already in
00693     std::map <std::string, int>::iterator iter = _cache_map.find(hash) ;
00694     if (iter != _cache_map.end()) {
00695       return _caches[iter->second].cache_path + "/" + CACHE_DATA_DIR + "/" + hash;
00696     } 
00697   
00698     // else choose a new cache and assign the file to it
00699     int chosen_cache = _chooseCache(url);
00700     std::string path  = _caches[chosen_cache].cache_path + "/" + CACHE_DATA_DIR + "/" + hash;
00701   
00702     // update the cache map with the new file
00703     _cache_map.insert(std::make_pair(hash, chosen_cache));
00704     return path;
00705   }
00706 
00707   bool FileCache::Link(std::string link_path, std::string url) {
00708 
00709     if (!(*this))
00710       return false;
00711 
00712     // check the original file exists
00713     std::string cache_file = File(url);
00714     struct stat fileStat;
00715   
00716     if (lstat(cache_file.c_str(), &fileStat) != 0) {
00717       if (errno == ENOENT)
00718         logger.msg(ERROR, "Error: Cache file %s does not exist", cache_file);
00719       else
00720         logger.msg(ERROR, "Error accessing cache file %s: %s", cache_file, strerror(errno));
00721       return false;
00722     }
00723   
00724     // get the hash of the url
00725     std::string hash = FileCacheHash::getHash(url);
00726     int index = 0;
00727     for (int level = 0; level < CACHE_DIR_LEVELS; level ++) {
00728       hash.insert(index + CACHE_DIR_LENGTH, "/");
00729       // go to next slash position, add one since we just inserted a slash
00730       index += CACHE_DIR_LENGTH + 1;
00731     }
00732   
00733     // look up the map file to see if the file is already mapped with a cache  
00734     std::map <std::string, int>::iterator iter = _cache_map.find(hash);
00735     int cache_no = 0;
00736     if (iter != _cache_map.end()) {
00737       cache_no = iter->second;}
00738     else {
00739       logger.msg(ERROR, "Error: Cache not found for file %s", cache_file);
00740       return false;
00741     }
00742 
00743     // choose cache
00744     struct CacheParameters cache_params = _caches[cache_no];
00745     std::string hard_link_path = cache_params.cache_path + "/" + CACHE_JOB_DIR + "/" +_id;
00746     std::string cache_link_path = cache_params.cache_link_path;
00747 
00748     // check if cached file is a symlink - if so get link path from the remote cache
00749     if (S_ISLNK(fileStat.st_mode)) {
00750       char link_target_buf[1024];
00751       int link_size = readlink(cache_file.c_str(), link_target_buf, sizeof(link_target_buf));
00752       if (link_size == -1) {
00753         logger.msg(ERROR, "Could not read target of link %s: %s", cache_file, strerror(errno));
00754         return false;
00755       }
00756       // need to match the symlink target against the list of remote caches
00757       std::string link_target(link_target_buf); link_target.resize(link_size);
00758       for (std::vector<struct CacheParameters>::iterator it = _remote_caches.begin(); it != _remote_caches.end(); it++) {
00759         std::string remote_data_dir = it->cache_path+"/"+CACHE_DATA_DIR;
00760         if (link_target.find(remote_data_dir) == 0) {
00761           hard_link_path = it->cache_path+"/"+CACHE_JOB_DIR + "/" + _id;
00762           cache_link_path = it->cache_link_path;
00763           cache_file = link_target;
00764           break;
00765         }
00766       }
00767       if (hard_link_path == cache_params.cache_path + "/" + CACHE_JOB_DIR + "/" +_id) {
00768         logger.msg(ERROR, "Couldn't match link target %s to any remote cache", link_target);
00769         return false;
00770       }
00771     }
00772 
00773     // if _cache_link_path is '.' then copy instead, bypassing the hard-link
00774     if (cache_params.cache_link_path == ".")
00775       return Copy(link_path, url);
00776 
00777     // create per-job hard link dir if necessary, making the final dir readable only by the job user
00778     if (!_cacheMkDir(hard_link_path, true)) {
00779       logger.msg(ERROR, "Cannot create directory \"%s\" for per-job hard links", hard_link_path);
00780       return false;
00781     }
00782     if (chown(hard_link_path.c_str(), _uid, _gid) != 0) {
00783       logger.msg(ERROR, "Cannot change owner of %s", hard_link_path);
00784       return false;
00785     }
00786     if (chmod(hard_link_path.c_str(), S_IRWXU) != 0) {
00787       logger.msg(ERROR, "Cannot change permissions of \"%s\" to 0700", hard_link_path);
00788       return false;
00789     }
00790 
00791     std::string filename = link_path.substr(link_path.rfind("/") + 1);
00792     std::string hard_link_file = hard_link_path + "/" + filename;
00793     std::string session_dir = link_path.substr(0, link_path.rfind("/"));
00794 
00795     // make the hard link
00796     if (link(cache_file.c_str(), hard_link_file.c_str()) != 0) {
00797       logger.msg(ERROR, "Failed to create hard link from %s to %s: %s", hard_link_file, cache_file, strerror(errno));
00798       return false;
00799     }
00800     // ensure the hard link is readable by all and owned by root (or GM user)
00801     // (to make cache file immutable but readable by all)
00802     if (chown(hard_link_file.c_str(), getuid(), getgid()) != 0) {
00803       logger.msg(ERROR, "Failed to change owner of hard link to %i: %s", getuid(), strerror(errno));
00804       return false;
00805     }
00806     if (chmod(hard_link_file.c_str(), S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) != 0) {
00807       logger.msg(ERROR, "Failed to change permissions of hard link to 0644: %s", strerror(errno));
00808       return false;
00809     }
00810 
00811     // make necessary dirs for the soft link
00812     // this probably should have already been done... somewhere...
00813     if (!_cacheMkDir(session_dir, true))
00814       return false;
00815     if (chown(session_dir.c_str(), _uid, _gid) != 0) {
00816       logger.msg(ERROR, "Failed to change owner of session dir to %i: %s", _uid, strerror(errno));
00817       return false;
00818     }
00819     if (chmod(session_dir.c_str(), S_IRWXU) != 0) {
00820       logger.msg(ERROR, "Failed to change permissions of session dir to 0700: %s", strerror(errno));
00821       return false;
00822     }
00823 
00824     // make the soft link, changing the target if cache_link_path is defined
00825     if (!cache_params.cache_link_path.empty())
00826       hard_link_file = cache_params.cache_link_path + "/" + CACHE_JOB_DIR + "/" + _id + "/" + filename;
00827     if (symlink(hard_link_file.c_str(), link_path.c_str()) != 0) {
00828       logger.msg(ERROR, "Failed to create soft link: %s", strerror(errno));
00829       return false;
00830     }
00831 
00832     // change the owner of the soft link to the job user
00833     if (lchown(link_path.c_str(), _uid, _gid) != 0) {
00834       logger.msg(ERROR, "Failed to change owner of session dir to %i: %s", _uid, strerror(errno));
00835       return false;
00836     }
00837     return true;
00838   }
00839 
00840   bool FileCache::Copy(std::string dest_path, std::string url, bool executable) {
00841 
00842     if (!(*this))
00843       return false;
00844 
00845     // check the original file exists
00846     std::string cache_file = File(url);
00847     struct stat fileStat;
00848     if (stat(cache_file.c_str(), &fileStat) != 0) {
00849       if (errno == ENOENT)
00850         logger.msg(ERROR, "Cache file %s does not exist", cache_file);
00851       else
00852         logger.msg(ERROR, "Error accessing cache file %s: %s", cache_file, strerror(errno));
00853       return false;
00854     }
00855 
00856     // make necessary dirs for the copy
00857     // this probably should have already been done... somewhere...
00858     std::string dest_dir = dest_path.substr(0, dest_path.rfind("/"));
00859     if (!_cacheMkDir(dest_dir, true))
00860       return false;
00861     if (chown(dest_dir.c_str(), _uid, _gid) != 0) {
00862       logger.msg(ERROR, "Failed to change owner of destination dir to %i: %s", _uid, strerror(errno));
00863       return false;
00864     }
00865     if (chmod(dest_dir.c_str(), S_IRWXU) != 0) {
00866       logger.msg(ERROR, "Failed to change permissions of session dir to 0700: %s", strerror(errno));
00867       return false;
00868     }
00869 
00870     // do the copy - taken directly from old datacache.cc
00871     char buf[65536];
00872     mode_t perm = S_IRUSR | S_IWUSR;
00873     if (executable)
00874       perm |= S_IXUSR;
00875     int fdest = open(dest_path.c_str(), O_WRONLY | O_CREAT | O_EXCL, perm);
00876     if (fdest == -1) {
00877       logger.msg(ERROR, "Failed to create file %s for writing: %s", dest_path, strerror(errno));
00878       return false;
00879     }
00880     if (fchown(fdest, _uid, _gid) == -1) {
00881       logger.msg(ERROR, "Failed change ownership of destination file %s: %s", dest_path, strerror(errno));
00882       close(fdest);
00883       return false;
00884     }
00885 
00886     int fsource = open(cache_file.c_str(), O_RDONLY);
00887     if (fsource == -1) {
00888       close(fdest);
00889       logger.msg(ERROR, "Failed to open file %s for reading: %s", cache_file, strerror(errno));
00890       return false;
00891     }
00892 
00893     // source and dest opened ok - copy in chunks
00894     for (;;) {
00895       ssize_t lin = read(fsource, buf, sizeof(buf));
00896       if (lin == -1) {
00897         close(fdest);
00898         close(fsource);
00899         logger.msg(ERROR, "Failed to read file %s: %s", cache_file, strerror(errno));
00900         return false;
00901       }
00902       if (lin == 0)
00903         break;          // eof
00904 
00905       for (ssize_t lout = 0; lout < lin;) {
00906         ssize_t lwritten = write(fdest, buf + lout, lin - lout);
00907         if (lwritten == -1) {
00908           close(fdest);
00909           close(fsource);
00910           logger.msg(ERROR, "Failed to write file %s: %s", dest_path, strerror(errno));
00911           return false;
00912         }
00913         lout += lwritten;
00914       }
00915     }
00916     close(fdest);
00917     close(fsource);
00918     return true;
00919   }
00920 
00921   bool FileCache::Release() {
00922 
00923     // go through all caches (including remote caches and draining caches)
00924     // and remove per-job dirs for our job id
00925     std::vector<std::string> job_dirs;
00926     for (int i = 0; i < (int)_caches.size(); i++)
00927       job_dirs.push_back(_caches[i].cache_path + "/" + CACHE_JOB_DIR + "/" + _id);
00928     for (int i = 0; i < (int)_remote_caches.size(); i++)
00929       job_dirs.push_back(_remote_caches[i].cache_path + "/" + CACHE_JOB_DIR + "/" + _id);
00930     for (int i = 0; i < (int)_draining_caches.size(); i++)
00931       job_dirs.push_back(_draining_caches[i].cache_path + "/" + CACHE_JOB_DIR + "/" + _id); 
00932 
00933     for (int i = 0; i < (int)job_dirs.size(); i++) {
00934       std::string job_dir = job_dirs[i];
00935       // check if job dir exists
00936       DIR *dirp = opendir(job_dir.c_str());
00937       if (dirp == NULL) {
00938         if (errno == ENOENT)
00939           continue;
00940         logger.msg(ERROR, "Error opening per-job dir %s: %s", job_dir, strerror(errno));
00941         return false;
00942       }
00943 
00944       // list all files in the dir and delete them
00945       struct dirent *dp;
00946       errno = 0;
00947       while ((dp = readdir(dirp))) {
00948         if (strcmp(dp->d_name, ".") == 0 || strcmp(dp->d_name, "..") == 0)
00949           continue;
00950         std::string to_delete = job_dir + "/" + dp->d_name;
00951         logger.msg(VERBOSE, "Removing %s", to_delete);
00952         if (remove(to_delete.c_str()) != 0) {
00953           logger.msg(ERROR, "Failed to remove hard link %s: %s", to_delete, strerror(errno));
00954           closedir(dirp);
00955           return false;
00956         }
00957       }
00958       closedir(dirp);
00959 
00960       if (errno != 0) {
00961         logger.msg(ERROR, "Error listing dir %s: %s", job_dir, strerror(errno));
00962         return false;
00963       }
00964 
00965       // remove now-empty dir
00966       logger.msg(VERBOSE, "Removing %s", job_dir);
00967       if (rmdir(job_dir.c_str()) != 0) {
00968         logger.msg(ERROR, "Failed to remove cache per-job dir %s: %s", job_dir, strerror(errno));
00969         return false;
00970       }
00971     }
00972     return true;
00973   }
00974 
00975   bool FileCache::AddDN(std::string url, std::string DN, Time expiry_time) {
00976 
00977     if (DN.empty())
00978       return false;
00979     if (expiry_time == Time(0))
00980       expiry_time = Time(time(NULL) + CACHE_DEFAULT_AUTH_VALIDITY);
00981 
00982     // add DN to the meta file. If already there, renew the expiry time
00983     std::string meta_file = _getMetaFileName(url);
00984     struct stat fileStat;
00985     int err = stat(meta_file.c_str(), &fileStat);
00986     if (0 != err) {
00987       logger.msg(ERROR, "Error reading meta file %s: %s", meta_file, strerror(errno));
00988       return false;
00989     }
00990     FILE *pFile;
00991     char mystring[fileStat.st_size + 1];
00992     pFile = fopen(meta_file.c_str(), "r");
00993     if (pFile == NULL) {
00994       logger.msg(ERROR, "Error opening meta file %s: %s", meta_file, strerror(errno));
00995       return false;
00996     }
00997     // get the first line
00998     fgets(mystring, sizeof(mystring), pFile);
00999 
01000     // check for correct formatting and possible hash collisions between URLs
01001     std::string first_line(mystring);
01002     if (first_line.find('\n') == std::string::npos)
01003       first_line += '\n';
01004     std::string::size_type space_pos = first_line.rfind(' ');
01005     if (space_pos == std::string::npos)
01006       space_pos = first_line.length() - 1;
01007 
01008     if (first_line.substr(0, space_pos) != url) {
01009       logger.msg(ERROR, "Error: File %s is already cached at %s under a different URL: %s - will not add DN to cached list", url, File(url), first_line.substr(0, space_pos));
01010       fclose(pFile);
01011       return false;
01012     }
01013 
01014     // read in list of DNs
01015     std::vector<std::string> dnlist;
01016     dnlist.push_back(DN + ' ' + expiry_time.str(MDSTime) + '\n');
01017 
01018     char *res = fgets(mystring, sizeof(mystring), pFile);
01019     while (res) {
01020       std::string dnstring(mystring);
01021       space_pos = dnstring.rfind(' ');
01022       if (space_pos == std::string::npos) {
01023         logger.msg(WARNING, "Bad format detected in file %s, in line %s", meta_file, dnstring);
01024         res = fgets (mystring, sizeof(mystring), pFile);
01025         continue;
01026       }
01027       // remove expired DNs (after some grace period)
01028       if (dnstring.substr(0, space_pos) != DN) {
01029         if (dnstring.find('\n') != std::string::npos)
01030           dnstring.resize(dnstring.find('\n'));
01031         Time exp_time(dnstring.substr(space_pos + 1));
01032         if (exp_time > Time(time(NULL) - CACHE_DEFAULT_AUTH_VALIDITY))
01033           dnlist.push_back(dnstring + '\n');
01034       }
01035       res = fgets(mystring, sizeof(mystring), pFile);
01036     }
01037     fclose(pFile);
01038 
01039     // write everything back to the file
01040     pFile = fopen(meta_file.c_str(), "w");
01041     if (pFile == NULL) {
01042       logger.msg(ERROR, "Error opening meta file for writing %s: %s", meta_file, strerror(errno));
01043       return false;
01044     }
01045     fputs((char*)first_line.c_str(), pFile);
01046     for (std::vector<std::string>::iterator i = dnlist.begin(); i != dnlist.end(); i++)
01047       fputs((char*)i->c_str(), pFile);
01048     fclose(pFile);
01049     return true;
01050   }
01051 
01052   bool FileCache::CheckDN(std::string url, std::string DN) {
01053 
01054     if (DN.empty())
01055       return false;
01056 
01057     std::string meta_file = _getMetaFileName(url);
01058     struct stat fileStat;
01059     int err = stat(meta_file.c_str(), &fileStat);
01060     if (0 != err) {
01061       if (errno != ENOENT)
01062         logger.msg(ERROR, "Error reading meta file %s: %s", meta_file, strerror(errno));
01063       return false;
01064     }
01065     FILE *pFile;
01066     char mystring[fileStat.st_size + 1];
01067     pFile = fopen(meta_file.c_str(), "r");
01068     if (pFile == NULL) {
01069       logger.msg(ERROR, "Error opening meta file %s: %s", meta_file, strerror(errno));
01070       return false;
01071     }
01072     fgets(mystring, sizeof(mystring), pFile); // first line
01073 
01074     // read in list of DNs
01075     char *res = fgets(mystring, sizeof(mystring), pFile);
01076     while (res) {
01077       std::string dnstring(mystring);
01078       std::string::size_type space_pos = dnstring.rfind(' ');
01079       if (dnstring.substr(0, space_pos) == DN) {
01080         if (dnstring.find('\n') != std::string::npos)
01081           dnstring.resize(dnstring.find('\n'));
01082         std::string exp_time = dnstring.substr(space_pos + 1);
01083         if (Time(exp_time) > Time()) {
01084           logger.msg(VERBOSE, "DN %s is cached and is valid until %s for URL %s", DN, Time(exp_time).str(), url);
01085           fclose(pFile);
01086           return true;
01087         }
01088         else {
01089           logger.msg(VERBOSE, "DN %s is cached but has expired for URL %s", DN, url);
01090           fclose(pFile);
01091           return false;
01092         }
01093       }
01094       res = fgets(mystring, sizeof(mystring), pFile);
01095     }
01096     fclose(pFile);
01097     return false;
01098   }
01099 
01100   bool FileCache::CheckCreated(std::string url) {
01101 
01102     // check the cache file exists - if so we can get the creation date
01103     // follow symlinks
01104     std::string cache_file = File(url);
01105     struct stat fileStat;
01106     return (stat(cache_file.c_str(), &fileStat) == 0) ? true : false;
01107   }
01108 
01109   Time FileCache::GetCreated(std::string url) {
01110 
01111     // check the cache file exists
01112     std::string cache_file = File(url);
01113     // follow symlinks
01114     struct stat fileStat;
01115     if (stat(cache_file.c_str(), &fileStat) != 0) {
01116       if (errno == ENOENT)
01117         logger.msg(ERROR, "Cache file %s does not exist", cache_file);
01118       else
01119         logger.msg(ERROR, "Error accessing cache file %s: %s", cache_file, strerror(errno));
01120       return 0;
01121     }
01122 
01123     time_t ctime = fileStat.st_ctime;
01124     if (ctime <= 0)
01125       return Time(0);
01126     return Time(ctime);
01127   }
01128 
01129   bool FileCache::CheckValid(std::string url) {
01130     return (GetValid(url) != Time(0));
01131   }
01132 
01133   Time FileCache::GetValid(std::string url) {
01134 
01135     // open meta file and pick out expiry time if it exists
01136 
01137     FILE *pFile;
01138     char mystring[1024]; // should be long enough for a pid or url...
01139     pFile = fopen((char*)_getMetaFileName(url).c_str(), "r");
01140     if (pFile == NULL) {
01141       logger.msg(ERROR, "Error opening meta file %s: %s", _getMetaFileName(url), strerror(errno));
01142       return Time(0);
01143     }
01144     if (fgets(mystring, sizeof(mystring), pFile) == NULL) {
01145       logger.msg(ERROR, "Error reading meta file %s: %s", _getMetaFileName(url), strerror(errno));
01146       fclose(pFile);
01147       return Time(0);
01148     }
01149     fclose(pFile);
01150 
01151     std::string meta_str(mystring);
01152     // get the first line
01153     if (meta_str.find('\n') != std::string::npos)
01154       meta_str.resize(meta_str.find('\n'));
01155 
01156     // if the file contains only the url, we don't have an expiry time
01157     if (meta_str == url)
01158       return Time(0);
01159 
01160     // check sensible formatting - should be like "rls://rls1.ndgf.org/file1 20080101123456Z"
01161     if (meta_str.substr(0, url.length() + 1) != url + " ") {
01162       logger.msg(ERROR, "Mismatching url in file %s: %s Expected %s", _getMetaFileName(url), meta_str, url);
01163       return Time(0);
01164     }
01165     if (meta_str.length() != url.length() + 16) {
01166       logger.msg(ERROR, "Bad format in file %s: %s", _getMetaFileName(url), meta_str);
01167       return Time(0);
01168     }
01169     if (meta_str.substr(url.length(), 1) != " ") {
01170       logger.msg(ERROR, "Bad separator in file %s: %s", _getMetaFileName(url), meta_str);
01171       return Time(0);
01172     }
01173     if (meta_str.substr(url.length() + 1).length() != 15) {
01174       logger.msg(ERROR, "Bad value of expiry time in %s: %s", _getMetaFileName(url), meta_str);
01175       return Time(0);
01176     }
01177 
01178     // convert to Time object
01179     return Time(meta_str.substr(url.length() + 1));
01180   }
01181 
01182   bool FileCache::SetValid(std::string url, Time val) {
01183 
01184     std::string meta_file = _getMetaFileName(url);
01185     FILE *pFile;
01186     pFile = fopen((char*)meta_file.c_str(), "w");
01187     if (pFile == NULL) {
01188       logger.msg(ERROR, "Error opening meta file %s: %s", meta_file, strerror(errno));
01189       return false;
01190     }
01191     std::string file_data = url + " " + val.str(MDSTime);
01192     fputs((char*)file_data.c_str(), pFile);
01193     fclose(pFile);
01194     return true;
01195   }
01196 
01197   bool FileCache::operator==(const FileCache& a) {
01198     if (a._caches.size() != _caches.size())
01199       return false;
01200     for (int i = 0; i < (int)a._caches.size(); i++) {
01201       if (a._caches.at(i).cache_path != _caches.at(i).cache_path)
01202         return false;
01203       if (a._caches.at(i).cache_link_path != _caches.at(i).cache_link_path)
01204         return false;
01205     }
01206     return (
01207              a._id == _id &&
01208              a._uid == _uid &&
01209              a._gid == _gid
01210              );
01211   }
01212   bool FileCache::_checkLock(std::string url) {
01213 
01214     std::string filename = File(url);
01215     std::string lock_file = _getLockFileName(url);
01216 
01217     // check for existence of lock file
01218     struct stat fileStat;
01219     int err = stat(lock_file.c_str(), &fileStat);
01220     if (0 != err) {
01221       if (errno == ENOENT)
01222         logger.msg(ERROR, "Lock file %s doesn't exist", lock_file);
01223       else
01224         logger.msg(ERROR, "Error listing lock file %s: %s", lock_file, strerror(errno));
01225       return false;
01226     }
01227 
01228     // check the lock file's pid and hostname matches ours
01229     FILE *pFile;
01230     char lock_info[100]; // should be long enough for a pid + hostname
01231     pFile = fopen((char*)lock_file.c_str(), "r");
01232     if (pFile == NULL) {
01233       logger.msg(ERROR, "Error opening lock file %s: %s", lock_file, strerror(errno));
01234       return false;
01235     }
01236     if (fgets(lock_info, 100, pFile) == NULL) {
01237       logger.msg(ERROR, "Error reading lock file %s: %s", lock_file, strerror(errno));
01238       fclose(pFile);
01239       return false;
01240     }
01241     fclose(pFile);
01242 
01243     std::string lock_info_s(lock_info);
01244     std::string::size_type index = lock_info_s.find("@", 0);
01245     if (index == std::string::npos) {
01246       logger.msg(ERROR, "Error with formatting in lock file %s: %s", lock_file, lock_info_s);
01247       return false;
01248     }
01249 
01250     if (lock_info_s.substr(index + 1) != _hostname) {
01251       logger.msg(VERBOSE, "Lock is owned by a different host");
01252       // TODO: here do ssh login and check
01253       return false;
01254     }
01255     if (lock_info_s.substr(0, index) != _pid) {
01256       logger.msg(ERROR, "Another process owns the lock on file %s. Must go back to Start()", filename);
01257       return false;
01258     }
01259     return true;
01260   }
01261 
01262   std::string FileCache::_getLockFileName(std::string url) {
01263     return File(url) + CACHE_LOCK_SUFFIX;
01264   }
01265 
01266   std::string FileCache::_getMetaFileName(std::string url) {
01267     return File(url) + CACHE_META_SUFFIX;
01268   }
01269 
01270   bool FileCache::_cacheMkDir(std::string dir, bool all_read) {
01271 
01272     struct stat fileStat;
01273     int err = stat(dir.c_str(), &fileStat);
01274     if (0 != err) {
01275       logger.msg(VERBOSE, "Creating directory %s", dir);
01276       std::string::size_type slashpos = 0;
01277 
01278       // set perms based on all_read
01279       mode_t perm = S_IRWXU;
01280       if (all_read)
01281         perm |= S_IRGRP | S_IROTH | S_IXGRP | S_IXOTH;
01282 
01283       do {
01284         slashpos = dir.find("/", slashpos + 1);
01285         std::string dirname = dir.substr(0, slashpos);
01286         // list dir to see if it exists (we can't tell the difference between
01287         // dir already exists and permission denied)
01288         struct stat statbuf;
01289         if (stat(dirname.c_str(), &statbuf) == 0)
01290           continue;
01291 
01292         if (mkdir(dirname.c_str(), perm) != 0)
01293           if (errno != EEXIST) {
01294             logger.msg(ERROR, "Error creating required dirs: %s", strerror(errno));
01295             return false;
01296           }
01297         // chmod to get around GM umask setting
01298         if (chmod(dirname.c_str(), perm) != 0) {
01299           logger.msg(ERROR, "Error changing permission of dir %s: %s", dirname, strerror(errno));
01300           return false;
01301         }
01302       } while (slashpos != std::string::npos);
01303     }
01304     return true;
01305   }
01306 
01307   int FileCache::_chooseCache(std::string url) {
01308     
01309     // get the hash of the url
01310     std::string hash = FileCacheHash::getHash(url);
01311     int index = 0;
01312     for (int level = 0; level < CACHE_DIR_LEVELS; level ++) {
01313        hash.insert(index + CACHE_DIR_LENGTH, "/");
01314        // go to next slash position, add one since we just inserted a slash
01315        index += CACHE_DIR_LENGTH + 1;
01316     }
01317   
01318     int caches_size = _caches.size();
01319   
01320     // When there is only one cache directory   
01321     if (caches_size == 1) {
01322       return 0;
01323     }
01324     // check the fs to see if the file is already there
01325     for (int i = 0; i < caches_size ; i++) { 
01326       struct stat fileStat;  
01327       std::string c_file = _caches[i].cache_path + "/" + CACHE_DATA_DIR +"/" + hash;  
01328       if (stat(c_file.c_str(), &fileStat) == 0) {
01329         return i; 
01330       }  
01331     }
01332   
01333     // find a cache with the most unsed space and also the cache_size parameter defined in "arc.conf"
01334     std::map<int ,std::pair<unsigned long long, float> > cache_map;
01335     // caches which are under the usage percent of the "arc.conf": < cache number, chance to select this cache >
01336     std::map <int, int>  under_limit;
01337     // caches which are over the usage percent of the "arc.conf" < cache free space, cache number> 
01338     std::map<unsigned long long, int> over_limit;
01339     // sum of all caches 
01340     long total_size = 0; 
01341     // get the free spaces of the caches 
01342     for (int i = 0; i < caches_size; i++ ) {
01343       std::pair <unsigned long long, unsigned long long> p = _getCacheInfo(_caches[i].cache_path);
01344       cache_map.insert(std::make_pair(i, p));
01345       total_size = total_size + p.first;
01346     }
01347     for ( std::map< int, std::pair<unsigned long long,float> >::iterator cache_it = cache_map.begin(); cache_it != cache_map.end(); cache_it++) {
01348       // check if the usage percent is passed
01349       if ((100 - (100 * cache_it->second.second)/ cache_it->second.first) < _max_used) {                       
01350         // caches which are under the defined percentage 
01351         under_limit.insert(std::make_pair(cache_it->first, roundf((float) cache_it->second.first/total_size*10)));
01352       } else {
01353         // caches which are passed the defined percentage
01354         over_limit.insert(std::make_pair(cache_it->second.second, cache_it->first));
01355       }
01356     }
01357     int cache_no = 0;
01358     if (under_limit.size() > 0) {
01359       std::vector<int> utility_cache;
01360       for ( std::map<int,int> ::iterator cache_it = under_limit.begin(); cache_it != under_limit.end(); cache_it++) {
01361         // fill the vector with the frequency of cache number according to the cache size. 
01362         // for instance, a cache with 70% of the total cache space will appear 7 times in this vector and a cache with 30% will appear 3 times.           
01363         if (cache_it->second == 0) {
01364           utility_cache.push_back(cache_it->first);
01365         } else { 
01366           for (int i = 0; i < cache_it->second; i++) {
01367             utility_cache.push_back(cache_it->first);
01368           }
01369         }
01370       } 
01371       // choose a cache from the weighted list   
01372       cache_no = utility_cache.at((int)rand()%(utility_cache.size()));
01373     } else {
01374       // find a max free space amoung the caches that are passed the limit of usage
01375       cache_no = max_element(over_limit.begin(), over_limit.end(), over_limit.value_comp())->second;
01376     }
01377     return cache_no;
01378   }
01379   
01380   std::pair <unsigned long long, unsigned long long> FileCache::_getCacheInfo(std::string path) {
01381   
01382     struct statvfs info;
01383     if (statvfs(path.c_str(), &info) != 0) {
01384       logger.msg(ERROR, "Error getting info from statvfs for the path: %s", path);
01385     }
01386     // return a pair of <cache total size (KB), cache free space (KB)>
01387     return std::make_pair((info.f_blocks * info.f_bsize)/1024, (info.f_bfree * info.f_bsize)/1024); 
01388   }
01389 
01390 } // namespace Arc
01391 
01392 #endif /*WIN32*/