Back to index

nordugrid-arc-nox  1.1.0~rc6
info_log.cpp
Go to the documentation of this file.
00001 #ifdef HAVE_CONFIG_H
00002 #include <config.h>
00003 #endif
00004 
00005 #include <string>
00006 #include <list>
00007 #include <fstream>
00008 #include <sys/types.h>
00009 #include <sys/stat.h>
00010 #include <unistd.h>
00011 #include <fcntl.h>
00012 #include <pwd.h>
00013 
00014 #include <arc/StringConv.h>
00015 #include <arc/DateTime.h>
00016 #include <arc/client/JobDescription.h>
00017 #include "../jobs/job_desc.h"
00018 #include "info_files.h"
00019 #include "../conf/conf.h"
00020 //@ #include <arc/certificate.h>
00021 
00022 #include "info_log.h"
00023 
00024 const char * const sfx_local       = ".local";
00025 const char * const sfx_rsl         = ".description";
00026 const char * const sfx_diag        = ".diag";
00027 const char * const sfx_proxy       = ".proxy";
00028 
00029 static void extract_integer(std::string& s,std::string::size_type n = 0) {
00030   for(;n<s.length();n++) {
00031     if(isdigit(s[n])) continue;
00032     s.resize(n); break;
00033   };
00034   return;
00035 }
00036 
00037 static void extract_float(std::string& s,std::string::size_type n = 0) {
00038   for(;n<s.length();n++) {
00039     if(isdigit(s[n])) continue;
00040     if(s[n] == '.') { extract_integer(s,n+1); return; };
00041     s.resize(n); break;
00042   };
00043   return;
00044 }
00045 
00046 static bool string_to_number(std::string& s,float& f) {
00047   extract_float(s);
00048   if(s.length() == 0) return false;
00049   if(!Arc::stringto(s,f)) return false;
00050   return true;
00051 }
00052 
00053 /*
00054 static bool string_to_number(std::string& s,unsigned int& n) {
00055   extract_integer(s);
00056   if(s.length() == 0) return false;
00057   if(!Arc::stringto(s,n)) return false;
00058   return true;
00059 }
00060 */
00061 
00062 // Create multiple files for sending to logger
00063 // TODO - make it SOAP XML so that they could be sent directly
00064 bool job_log_make_file(const JobDescription &desc,JobUser &user,const std::string &url,std::list<std::string> &report_config) {
00065   std::string fname_dst = user.ControlDir()+"/logs/"+desc.get_id()+".XXXXXX";
00066   std::string fname_src;
00067   std::string status;
00068   int h_dst;
00069   int l;
00070   time_t t;
00071   char buf[256];
00072   if((h_dst=mkstemp((char*)(fname_dst.c_str()))) == -1) {
00073     return false;
00074   };
00075   (void)chmod(fname_dst.c_str(),S_IRUSR | S_IWUSR);
00076   fix_file_owner(fname_dst,desc,user);
00077   fix_file_permissions(fname_dst,false);
00078   std::ofstream o_dst(fname_dst.c_str());
00079   close(h_dst);
00080   // URL to send info to
00081   if(url.length()) {
00082     o_dst<<"loggerurl="<<url<<std::endl; if(o_dst.fail()) goto error;
00083   };
00084   // Configuration options for usage reporter tool
00085   for (std::list<std::string>::iterator sp = report_config.begin();
00086        sp != report_config.end();
00087        ++sp) 
00088     {
00089       o_dst<<*sp<<std::endl;
00090     }
00091   // Copy job description
00092   {
00093   fname_src = user.ControlDir() + "/job." + desc.get_id() + sfx_rsl;
00094   int h_src=open(fname_src.c_str(),O_RDONLY);
00095   if(h_src==-1) goto error; 
00096   o_dst<<"description=";
00097   for(;;) {
00098     l=read(h_src,buf,sizeof(buf));
00099     if(l==0) break;
00100     if(l==-1) goto error;
00101     for(char* p=buf;p;) { p=(char*)memchr(buf,'\r',l); if(p) (*p)=' '; };
00102     for(char* p=buf;p;) { p=(char*)memchr(buf,'\n',l); if(p) (*p)=' '; };
00103     o_dst.write(buf,l);
00104     if(o_dst.fail()) goto error;
00105   };
00106   o_dst<<std::endl;
00107   struct stat st;
00108   if(fstat(h_src,&st) == 0) {
00109     struct passwd pw_;
00110     struct passwd *pw;
00111     char buf[BUFSIZ];
00112     getpwuid_r(st.st_uid,&pw_,buf,BUFSIZ,&pw);
00113     if(pw != NULL) {
00114       if(pw->pw_name) o_dst<<"localuser="<<pw->pw_name<<std::endl;
00115     };
00116   };
00117   close(h_src);
00118   };
00119   // Start time and identifier
00120   t = job_mark_time(fname_src);
00121   o_dst<<"submissiontime="<<Arc::Time(t).str(Arc::MDSTime)<<std::endl;
00122   o_dst<<"ngjobid="<<desc.get_id()<<std::endl;
00123   if(o_dst.fail()) goto error;
00124   // Analyze job.ID.local and store relevant information
00125   {
00126   fname_src = user.ControlDir() + "/job." + desc.get_id() + sfx_local;
00127   std::ifstream i_src(fname_src.c_str());
00128   for(;;) {
00129     if(i_src.fail()) goto error;
00130     if(o_dst.fail()) goto error;
00131     if(i_src.eof()) break;
00132     std::string value;
00133     std::string key = config_read_line(i_src,value,'=');
00134     if(key=="subject") { o_dst<<"usersn="<<value<<std::endl; }
00135     else if(key=="lrms") { o_dst<<"lrms="<<value<<std::endl; }
00136     else if(key=="queue") { o_dst<<"queue="<<value<<std::endl; }
00137     else if(key=="localid") { o_dst<<"localjobid="<<value<<std::endl; }
00138     else if(key=="jobname") { o_dst<<"jobname="<<value<<std::endl; }
00139     else if(key=="globalid") { o_dst<<"globalid="<<value<<std::endl; }
00140     else if(key=="projectname") { o_dst<<"projectname="<<value<<std::endl; }
00141     else if(key=="clientname") { o_dst<<"clienthost="<<value<<std::endl; }
00142   };
00143   };
00144 
00145   // Copy public part of user certificate chain incl. proxy
00146   {
00147     std::string user_cert;
00148     fname_src = user.ControlDir() + "/job." + desc.get_id() + sfx_proxy;
00149     std::ifstream proxy_src(fname_src.c_str());
00150     bool in_private=false;
00151     for(;;) {
00152       if(proxy_src.bad()) goto error;
00153       if(proxy_src.eof()) break;
00154       std::string line;
00155       std::getline(proxy_src,line);
00156       if(in_private)  
00157   { // Skip private key
00158     if (line.find("-----END") != std::string::npos &&
00159         line.find("PRIVATE KEY-----") != std::string::npos
00160         )           // can be RSA, DSA etc.
00161       in_private=false;
00162   }
00163       else
00164   {
00165     if (line.find("-----BEGIN") != std::string::npos &&
00166         line.find("PRIVATE KEY-----") != std::string::npos
00167         )           // can be RSA, DSA etc.
00168       in_private=true;
00169     else
00170       {
00171         user_cert+=line;
00172         if (!proxy_src.eof()) user_cert+='\\';
00173       }
00174   }
00175     }
00176     if(user_cert.length()) {
00177       o_dst<<"usercert="<<user_cert<<std::endl; if(o_dst.fail()) goto error;
00178     }
00179   }
00180 
00181   // Extract requested resources
00182   {
00183     fname_src = user.ControlDir() + "/job." + desc.get_id() + sfx_rsl;
00184     Arc::JobDescription arc_job_desc;
00185     if(!get_arc_job_description(fname_src, arc_job_desc)) goto error;
00186     if(arc_job_desc.Resources.IndividualPhysicalMemory.max>=0) o_dst<<"requestedmemory="<<arc_job_desc.Resources.IndividualPhysicalMemory.max<<std::endl;
00187     if(arc_job_desc.Resources.TotalCPUTime.range.max>=0) o_dst<<"requestedcputime="<<arc_job_desc.Resources.TotalCPUTime.range.max<<std::endl;
00188     if(arc_job_desc.Resources.TotalWallTime.range.max>=0) o_dst<<"requestedwalltime="<<arc_job_desc.Resources.TotalWallTime.range.max<<std::endl;
00189     if(arc_job_desc.Resources.DiskSpaceRequirement.DiskSpace.max>=0) o_dst<<"requesteddisk="<<arc_job_desc.Resources.DiskSpaceRequirement.DiskSpace.max<<std::endl;
00190     if(arc_job_desc.Resources.RunTimeEnvironment.getSoftwareList().size()>0) {
00191       std::string rteStr;
00192       for (std::list<Arc::Software>::const_iterator itSW = arc_job_desc.Resources.RunTimeEnvironment.getSoftwareList().begin();
00193            itSW != arc_job_desc.Resources.RunTimeEnvironment.getSoftwareList().end(); itSW++) {
00194         if (!itSW->empty() && !itSW->getVersion().empty()) {
00195           if (!rteStr.empty()) rteStr += " ";
00196           rteStr += *itSW;
00197         }
00198       }
00199       if (!rteStr.empty()) o_dst<<"runtimeenvironment="<<rteStr<<std::endl;
00200     }
00201   };
00202   // Analyze diagnostics and store relevant information
00203   {
00204   fname_src = user.ControlDir() + "/job." + desc.get_id() + sfx_diag;
00205   std::ifstream i_src(fname_src.c_str());
00206   if(!i_src.fail()) {
00207     std::string nodenames;
00208     int nodecount = 0;
00209     float cputime = 0;
00210     for(;;) {
00211       if(i_src.fail()) goto error;
00212       if(o_dst.fail()) goto error;
00213       if(i_src.eof()) break;
00214       std::string value;
00215       std::string key = config_read_line(i_src,value,'=');
00216       if(key=="nodename") {
00217         if(nodecount) nodenames+=":"; nodenames+=value;
00218         nodecount++;
00219       } else if(strcasecmp(key.c_str(),"walltime") == 0) {
00220         float f;
00221         if(string_to_number(value,f))
00222           o_dst<<"usedwalltime="<<(unsigned int)f<<std::endl;
00223       } else if(strcasecmp(key.c_str(),"kerneltime") == 0) {
00224         float f;
00225         if(string_to_number(value,f)) {
00226           o_dst<<"usedkernelcputime="<<(unsigned int)f<<std::endl;
00227           cputime+=f;
00228         }
00229       } else if(strcasecmp(key.c_str(),"usertime") == 0) {
00230         float f;
00231         if(string_to_number(value,f)) {
00232           o_dst<<"usedusercputime="<<(unsigned int)f<<std::endl;
00233           cputime+=f;
00234         }
00235       } else if(strcasecmp(key.c_str(),"averagetotalmemory") == 0) {
00236         float f;
00237         if(string_to_number(value,f))
00238           o_dst<<"usedmemory="<<(unsigned int)f<<std::endl;
00239       } else if(strcasecmp(key.c_str(),"averageresidentmemory") == 0) {
00240         float f;
00241         if(string_to_number(value,f))
00242           o_dst<<"usedaverageresident="<<(unsigned int)f<<std::endl;
00243       } else if(strcasecmp(key.c_str(),"maxresidentmemory") == 0) {
00244         float f;
00245         if(string_to_number(value,f))
00246           o_dst<<"usedmaxresident="<<(unsigned int)f<<std::endl;
00247       } else if(strcasecmp(key.c_str(),"exitcode") == 0) {
00248         int n;
00249         if(Arc::stringto(value,n)) o_dst<<"exitcode="<<n<<std::endl;
00250       };
00251     };
00252     if(nodecount) {
00253       o_dst<<"nodename="<<nodenames<<std::endl;
00254       o_dst<<"nodecount="<<nodecount<<std::endl;
00255     };
00256     o_dst<<"usedcputime="<<(unsigned int)cputime<<std::endl;
00257   };
00258   };
00259   // Endtime and failure reason
00260   if(desc.get_state() == JOB_STATE_FINISHED) {
00261     status="completed";
00262     t = job_state_time(desc.get_id(),user);
00263     if(t == 0) t=::time(NULL);
00264     o_dst<<"endtime="<<Arc::Time(t).str(Arc::MDSTime)<<std::endl;
00265     if(job_failed_mark_check(desc.get_id(),user)) {
00266       std::string failure = job_failed_mark_read(desc.get_id(),user);
00267       o_dst<<"failurestring="<<failure<<std::endl;
00268       status="failed";
00269     };
00270   };
00271   if(status.length()) o_dst<<"status="<<status<<std::endl;
00272   // Identity of cluster
00273 //@   try {
00274 //@     Certificate cert(HOSTCERT);
00275 //@     o_dst<<"cluster="<<cert.GetSN()<<std::endl;
00276 //@   } catch (std::exception e) { };
00277   if(o_dst.fail()) goto error;
00278   o_dst.close();
00279   return true;
00280 error:
00281   o_dst.close();
00282   unlink(fname_dst.c_str());
00283   return false;
00284 }
00285