Back to index

salome-kernel  6.5.0
SALOME_ResourcesManager.cxx
Go to the documentation of this file.
00001 // Copyright (C) 2007-2012  CEA/DEN, EDF R&D, OPEN CASCADE
00002 //
00003 // Copyright (C) 2003-2007  OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN,
00004 // CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS
00005 //
00006 // This library is free software; you can redistribute it and/or
00007 // modify it under the terms of the GNU Lesser General Public
00008 // License as published by the Free Software Foundation; either
00009 // version 2.1 of the License.
00010 //
00011 // This library is distributed in the hope that it will be useful,
00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014 // Lesser General Public License for more details.
00015 //
00016 // You should have received a copy of the GNU Lesser General Public
00017 // License along with this library; if not, write to the Free Software
00018 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
00019 //
00020 // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
00021 //
00022 
00023 #include "SALOME_ResourcesManager.hxx" 
00024 #include "Utils_ExceptHandlers.hxx"
00025 #include "Utils_CorbaException.hxx"
00026 #include "OpUtil.hxx"
00027 
00028 #include <stdlib.h>
00029 #include <stdio.h>
00030 #ifndef WIN32
00031 #include <unistd.h>
00032 #else
00033 #include <io.h>
00034 #include <process.h>
00035 #endif
00036 #include <fstream>
00037 #include <iostream>
00038 #include <sstream>
00039 #include <string.h>
00040 #include <map>
00041 #include <list>
00042 
00043 #include <sys/types.h>
00044 #include <sys/stat.h>
00045 #include "utilities.h"
00046 
00047 #define MAX_SIZE_FOR_HOSTNAME 256;
00048 
00049 const char *SALOME_ResourcesManager::_ResourcesManagerNameInNS = "/ResourcesManager";
00050 
00051 //=============================================================================
00055 //=============================================================================
00056 
00057 SALOME_ResourcesManager::
00058 SALOME_ResourcesManager(CORBA::ORB_ptr orb, 
00059                         PortableServer::POA_var poa, 
00060                         SALOME_NamingService *ns,
00061                         const char *xmlFilePath) : _rm(xmlFilePath)
00062 {
00063   MESSAGE("SALOME_ResourcesManager constructor");
00064   _NS = ns;
00065   _orb = CORBA::ORB::_duplicate(orb) ;
00066   _poa = PortableServer::POA::_duplicate(poa) ;
00067   PortableServer::ObjectId_var id = _poa->activate_object(this);
00068   CORBA::Object_var obj = _poa->id_to_reference(id);
00069   Engines::ResourcesManager_var refContMan = Engines::ResourcesManager::_narrow(obj);
00070   _NS->Register(refContMan,_ResourcesManagerNameInNS);
00071   MESSAGE("SALOME_ResourcesManager constructor end");
00072 }
00073 
00074 //=============================================================================
00083 //=============================================================================
00084 
00085 SALOME_ResourcesManager::SALOME_ResourcesManager(CORBA::ORB_ptr orb, 
00086                                                  PortableServer::POA_var poa, 
00087                                                  SALOME_NamingService *ns) : _rm()
00088 {
00089   MESSAGE("SALOME_ResourcesManager constructor");
00090   _NS = ns;
00091   _orb = CORBA::ORB::_duplicate(orb) ;
00092   _poa = PortableServer::POA::_duplicate(poa) ;
00093   PortableServer::ObjectId_var id = _poa->activate_object(this);
00094   CORBA::Object_var obj = _poa->id_to_reference(id);
00095   Engines::ResourcesManager_var refContMan = Engines::ResourcesManager::_narrow(obj);
00096   _NS->Register(refContMan,_ResourcesManagerNameInNS);
00097 
00098   MESSAGE("SALOME_ResourcesManager constructor end");
00099 }
00100 
00101 //=============================================================================
00105 //=============================================================================
00106 
00107 SALOME_ResourcesManager::~SALOME_ResourcesManager()
00108 {
00109   MESSAGE("SALOME_ResourcesManager destructor");
00110 }
00111 
00112 
00113 //=============================================================================
00117 //=============================================================================
00118 
00119 void SALOME_ResourcesManager::Shutdown()
00120 {
00121   MESSAGE("Shutdown");
00122   _NS->Destroy_Name(_ResourcesManagerNameInNS);
00123   PortableServer::ObjectId_var oid = _poa->servant_to_id(this);
00124   _poa->deactivate_object(oid);
00125 }
00126 
00127 //=============================================================================
00129 
00139 //=============================================================================
00140 
00141 Engines::ResourceList *
00142 SALOME_ResourcesManager::GetFittingResources(const Engines::ResourceParameters& params)
00143 {
00144   MESSAGE("ResourcesManager::GetFittingResources");
00145   Engines::ResourceList * ret = new Engines::ResourceList;
00146 
00147   // CORBA -> C++
00148   resourceParams p;
00149   p.name = params.name;
00150   p.hostname = params.hostname;
00151   p.OS = params.OS;
00152   p.nb_proc = params.nb_proc;
00153   p.nb_node = params.nb_node;
00154   p.nb_proc_per_node = params.nb_proc_per_node;
00155   p.cpu_clock = params.cpu_clock;
00156   p.mem_mb = params.mem_mb;
00157   for(unsigned int i=0; i<params.componentList.length(); i++)
00158     p.componentList.push_back(std::string(params.componentList[i]));
00159   for(unsigned int i=0; i<params.resList.length(); i++)
00160     p.resourceList.push_back(std::string(params.resList[i]));
00161   
00162   try
00163   {
00164     // Call C++ ResourceManager
00165     std::vector <std::string> vec = _rm.GetFittingResources(p);
00166 
00167     // C++ -> CORBA
00168     ret->length(vec.size());
00169     for(unsigned int i=0;i<vec.size();i++)
00170       (*ret)[i] = (vec[i]).c_str();
00171   }
00172   catch(const ResourcesException &ex)
00173   {
00174     INFOS("Caught exception in GetFittingResources C++:  " << ex.msg);
00175     THROW_SALOME_CORBA_EXCEPTION(ex.msg.c_str(),SALOME::BAD_PARAM);
00176   }  
00177 
00178   return ret;
00179 }
00180 
00181 //=============================================================================
00185 //=============================================================================
00186 
00187 char *
00188 SALOME_ResourcesManager::FindFirst(const Engines::ResourceList& listOfResources)
00189 {
00190   // CORBA -> C++
00191   std::vector<std::string> rl;
00192   for(unsigned int i=0; i<listOfResources.length(); i++)
00193     rl.push_back(std::string(listOfResources[i]));
00194 
00195   return CORBA::string_dup(_rm.Find("first", rl).c_str());
00196 }
00197 
00198 char *
00199 SALOME_ResourcesManager::Find(const char* policy, const Engines::ResourceList& listOfResources)
00200 {
00201   // CORBA -> C++
00202   std::vector<std::string> rl;
00203   for(unsigned int i=0; i<listOfResources.length(); i++)
00204     rl.push_back(std::string(listOfResources[i]));
00205 
00206   return CORBA::string_dup(_rm.Find(policy, rl).c_str());
00207 }
00208 
00209 Engines::ResourceDefinition* 
00210 SALOME_ResourcesManager::GetResourceDefinition(const char * name)
00211 {
00212   ParserResourcesType resource = _rm.GetResourcesDescr(name);
00213   Engines::ResourceDefinition *p_ptr = new Engines::ResourceDefinition;
00214 
00215   p_ptr->name = CORBA::string_dup(resource.Name.c_str());
00216   p_ptr->hostname = CORBA::string_dup(resource.HostName.c_str());
00217   p_ptr->protocol = ParserResourcesType::protocolToString(resource.Protocol).c_str();
00218   p_ptr->iprotocol = ParserResourcesType::protocolToString(resource.ClusterInternalProtocol).c_str();
00219   p_ptr->username = CORBA::string_dup(resource.UserName.c_str());
00220   p_ptr->applipath = CORBA::string_dup(resource.AppliPath.c_str());
00221   p_ptr->componentList.length(resource.ComponentsList.size());
00222   for(unsigned int i=0;i<resource.ComponentsList.size();i++)
00223     p_ptr->componentList[i] = CORBA::string_dup(resource.ComponentsList[i].c_str());
00224   p_ptr->OS = CORBA::string_dup(resource.OS.c_str());
00225   p_ptr->mem_mb = resource.DataForSort._memInMB;
00226   p_ptr->cpu_clock = resource.DataForSort._CPUFreqMHz;
00227   p_ptr->nb_proc_per_node = resource.DataForSort._nbOfProcPerNode;
00228   p_ptr->nb_node = resource.DataForSort._nbOfNodes;
00229   p_ptr->is_cluster_head = resource.is_cluster_head;
00230   p_ptr->working_directory = CORBA::string_dup(resource.working_directory.c_str());
00231 
00232   if( resource.mpi == lam )
00233     p_ptr->mpiImpl = "lam";
00234   else if( resource.mpi == mpich1 )
00235     p_ptr->mpiImpl = "mpich1";
00236   else if( resource.mpi == mpich2 )
00237     p_ptr->mpiImpl = "mpich2";
00238   else if( resource.mpi == openmpi )
00239     p_ptr->mpiImpl = "openmpi";
00240   else if( resource.mpi == slurmmpi )
00241     p_ptr->mpiImpl = "slurmmpi";
00242   else if( resource.mpi == prun )
00243     p_ptr->mpiImpl = "prun";
00244 
00245   if( resource.Batch == pbs )
00246     p_ptr->batch = "pbs";
00247   else if( resource.Batch == lsf )
00248     p_ptr->batch = "lsf";
00249   else if( resource.Batch == sge )
00250     p_ptr->batch = "sge";
00251   else if( resource.Batch == ccc )
00252     p_ptr->batch = "ccc";
00253   else if( resource.Batch == slurm )
00254     p_ptr->batch = "slurm";
00255   else if( resource.Batch == ssh_batch )
00256     p_ptr->batch = "ssh";
00257   else if( resource.Batch == ll )
00258     p_ptr->batch = "ll";
00259   else if( resource.Batch == vishnu )
00260     p_ptr->batch = "vishnu";
00261 
00262   return p_ptr;
00263 }
00264 
00265 void 
00266 SALOME_ResourcesManager::AddResource(const Engines::ResourceDefinition& new_resource,
00267                                      CORBA::Boolean write,
00268                                      const char * xml_file)
00269 {
00270   ParserResourcesType resource;
00271   resource.Name = new_resource.name.in();
00272   resource.HostName = new_resource.hostname.in();
00273   resource.OS = new_resource.OS.in();
00274   resource.AppliPath = new_resource.applipath.in();
00275   resource.DataForSort._memInMB = new_resource.mem_mb;
00276   resource.DataForSort._CPUFreqMHz = new_resource.cpu_clock;
00277   resource.DataForSort._nbOfNodes = new_resource.nb_node;
00278   resource.DataForSort._nbOfProcPerNode = new_resource.nb_proc_per_node;
00279   resource.UserName = new_resource.username.in();
00280   resource.is_cluster_head = new_resource.is_cluster_head;
00281   resource.working_directory = new_resource.working_directory.in();
00282 
00283   std::string aBatch = new_resource.batch.in();
00284   if (aBatch == "pbs")
00285     resource.Batch = pbs;
00286   else if  (aBatch == "lsf")
00287     resource.Batch = lsf;
00288   else if  (aBatch == "sge")
00289     resource.Batch = sge;
00290   else if  (aBatch == "slurm")
00291     resource.Batch = slurm;
00292   else if  (aBatch == "ccc")
00293     resource.Batch = ccc;
00294   else if  (aBatch == "ssh_batch")
00295     resource.Batch = ssh_batch;
00296   else if  (aBatch == "ll")
00297     resource.Batch = ll;
00298   else if  (aBatch == "vishnu")
00299     resource.Batch = vishnu;
00300   else if (aBatch == "")
00301     resource.Batch = none;
00302   else {
00303     INFOS("Bad Batch definition in AddResource: " << aBatch);
00304     std::string message("Bad Batch definition in AddResource: ");
00305     message += aBatch;
00306     THROW_SALOME_CORBA_EXCEPTION(message.c_str(),SALOME::BAD_PARAM);
00307   }
00308 
00309   std::string anMpi = new_resource.mpiImpl.in();
00310   if (anMpi == "lam")
00311     resource.mpi = lam;
00312   else if (anMpi == "mpich1")
00313     resource.mpi = mpich1;
00314   else if (anMpi == "mpich2")
00315     resource.mpi = mpich2;
00316   else if (anMpi == "openmpi")
00317     resource.mpi = openmpi;
00318   else if  (anMpi == "slurmmpi")
00319     resource.mpi = slurmmpi;
00320   else if  (anMpi == "prun")
00321     resource.mpi = prun;
00322   else if (anMpi == "")
00323     resource.mpi = nompi;
00324   else {
00325     INFOS("Bad MPI definition in AddResource: " << anMpi);
00326     std::string message("Bad MPI definition in AddResource: ");
00327     message += anMpi;
00328     THROW_SALOME_CORBA_EXCEPTION(message.c_str(),SALOME::BAD_PARAM);
00329   }
00330 
00331   std::string mode_str = new_resource.mode.in();
00332   if (mode_str == "interactive")
00333     resource.Mode = interactive;
00334   else if (mode_str == "batch")
00335     resource.Mode = batch;
00336   else if (mode_str == "")
00337     resource.Mode = interactive;
00338   else {
00339     INFOS("Bad mode definition in AddResource: " << mode_str);
00340     std::string message("Bad mode definition in AddResource: ");
00341     message += mode_str;
00342     THROW_SALOME_CORBA_EXCEPTION(message.c_str(),SALOME::BAD_PARAM);
00343   }
00344   
00345   std::string protocol = new_resource.protocol.in();
00346   try
00347   {
00348     resource.Protocol = ParserResourcesType::stringToProtocol(protocol);
00349   }
00350   catch (SALOME_Exception e)
00351   {
00352     INFOS("Bad protocol definition in AddResource: " << protocol);
00353     std::string message("Bad protocol definition in AddResource: ");
00354     message += protocol;
00355     THROW_SALOME_CORBA_EXCEPTION(message.c_str(),SALOME::BAD_PARAM);
00356   }
00357 
00358   std::string iprotocol = new_resource.iprotocol.in();
00359   try
00360   {
00361     resource.ClusterInternalProtocol = ParserResourcesType::stringToProtocol(iprotocol);
00362   }
00363   catch (SALOME_Exception e)
00364   {
00365     INFOS("Bad iprotocol definition in AddResource: " << iprotocol);
00366     std::string message("Bad iprotocol definition in AddResource: ");
00367     message += iprotocol;
00368     THROW_SALOME_CORBA_EXCEPTION(message.c_str(),SALOME::BAD_PARAM);
00369   }
00370 
00371   for (CORBA::ULong i = 0; i < new_resource.componentList.length(); i++)
00372     resource.ComponentsList.push_back(new_resource.componentList[i].in());
00373 
00374   _rm.AddResourceInCatalog(resource);
00375 
00376   if (write)
00377   {
00378     _rm.WriteInXmlFile(std::string(xml_file));
00379     _rm.ParseXmlFiles();
00380   }
00381 }
00382 
00383 void
00384 SALOME_ResourcesManager::RemoveResource(const char * resource_name,
00385                                         CORBA::Boolean write,
00386                                         const char * xml_file)
00387 {
00388   _rm.DeleteResourceInCatalog(resource_name);
00389   if (write)
00390   {
00391     _rm.WriteInXmlFile(std::string(xml_file));
00392     _rm.ParseXmlFiles();
00393   }
00394 }
00395 
00396 std::string 
00397 SALOME_ResourcesManager::getMachineFile(std::string resource_name, 
00398                                         CORBA::Long nb_procs, 
00399                                         std::string parallelLib)
00400 {
00401   std::string machine_file_name("");
00402 
00403   if (parallelLib == "Dummy")
00404   {
00405     MESSAGE("[getMachineFile] parallelLib is Dummy");
00406     MapOfParserResourcesType resourcesList = _rm.GetList();
00407     if (resourcesList.find(resource_name) != resourcesList.end())
00408     {
00409       ParserResourcesType resource = resourcesList[resource_name];
00410 
00411       // Check if resource is cluster or not
00412       if (resource.ClusterMembersList.empty())
00413       {
00414         //It is not a cluster so we create a cluster with one machine
00415         ParserResourcesClusterMembersType fake_node;
00416         fake_node.HostName = resource.HostName;
00417         fake_node.Protocol = resource.Protocol;
00418         fake_node.ClusterInternalProtocol = resource.ClusterInternalProtocol;
00419         fake_node.UserName = resource.UserName;
00420         fake_node.AppliPath = resource.AppliPath;
00421         fake_node.DataForSort = resource.DataForSort;
00422 
00423         resource.ClusterMembersList.push_front(fake_node);
00424       }
00425 
00426       // Creating list of machines for creating the machine file
00427       std::list<std::string> list_of_machines;
00428       std::list<ParserResourcesClusterMembersType>::iterator cluster_it = 
00429         resource.ClusterMembersList.begin();
00430       while (cluster_it != resource.ClusterMembersList.end())
00431       {
00432         // For each member of the cluster we add a nbOfNodes * nbOfProcPerNode in the list
00433         unsigned int number_of_proc = (*cluster_it).DataForSort._nbOfNodes * 
00434                                       (*cluster_it).DataForSort._nbOfProcPerNode;
00435         for (unsigned int i = 0; i < number_of_proc; i++)
00436           list_of_machines.push_back((*cluster_it).HostName);
00437         cluster_it++;
00438       }
00439 
00440       // Creating machine file
00441       machine_file_name = tmpnam(NULL);
00442       std::ofstream machine_file(machine_file_name.c_str(), std::ios_base::out);
00443 
00444       CORBA::Long machine_number = 0;
00445       std::list<std::string>::iterator it = list_of_machines.begin();
00446       while (machine_number != nb_procs)
00447       {
00448         // Adding a new node to the machine file
00449         machine_file << *it << std::endl;
00450 
00451         // counting...
00452         it++;
00453         if (it == list_of_machines.end())
00454           it = list_of_machines.begin();
00455         machine_number++;
00456       }
00457     }
00458     else
00459       INFOS("[getMachineFile] Error resource_name not found in resourcesList -> " << resource_name);
00460   }
00461   else if (parallelLib == "Mpi")
00462   {
00463     MESSAGE("[getMachineFile] parallelLib is Mpi");
00464 
00465     MapOfParserResourcesType resourcesList = _rm.GetList();
00466     if (resourcesList.find(resource_name) != resourcesList.end())
00467     {
00468       ParserResourcesType resource = resourcesList[resource_name];
00469       // Check if resource is cluster or not
00470       if (resource.ClusterMembersList.empty())
00471       {
00472         //It is not a cluster so we create a cluster with one machine
00473         ParserResourcesClusterMembersType fake_node;
00474         fake_node.HostName = resource.HostName;
00475         fake_node.Protocol = resource.Protocol;
00476         fake_node.ClusterInternalProtocol = resource.ClusterInternalProtocol;
00477         fake_node.UserName = resource.UserName;
00478         fake_node.AppliPath = resource.AppliPath;
00479         fake_node.DataForSort = resource.DataForSort;
00480 
00481         resource.ClusterMembersList.push_front(fake_node);
00482       }
00483 
00484       // Choose mpi implementation -> each MPI implementation has is own machinefile...
00485       if (resource.mpi == lam)
00486       {
00487         // Creating machine file
00488         machine_file_name = tmpnam(NULL);
00489         std::ofstream machine_file(machine_file_name.c_str(), std::ios_base::out);
00490 
00491         // We add all cluster machines to the file
00492         std::list<ParserResourcesClusterMembersType>::iterator cluster_it = 
00493           resource.ClusterMembersList.begin();
00494         while (cluster_it != resource.ClusterMembersList.end())
00495         {
00496           unsigned int number_of_proc = (*cluster_it).DataForSort._nbOfNodes * 
00497             (*cluster_it).DataForSort._nbOfProcPerNode;
00498           machine_file << (*cluster_it).HostName << " cpu=" << number_of_proc << std::endl;
00499           cluster_it++;
00500         }
00501       }
00502       else if (resource.mpi == openmpi)
00503       {
00504         // Creating machine file
00505         machine_file_name = tmpnam(NULL);
00506         std::ofstream machine_file(machine_file_name.c_str(), std::ios_base::out);
00507 
00508         // We add all cluster machines to the file
00509         std::list<ParserResourcesClusterMembersType>::iterator cluster_it =
00510           resource.ClusterMembersList.begin();
00511         while (cluster_it != resource.ClusterMembersList.end())
00512         {
00513           unsigned int number_of_proc = (*cluster_it).DataForSort._nbOfNodes *
00514             (*cluster_it).DataForSort._nbOfProcPerNode;
00515           machine_file << (*cluster_it).HostName << " slots=" << number_of_proc << std::endl;
00516           cluster_it++;
00517         }
00518       }
00519       else if (resource.mpi == nompi)
00520       {
00521         INFOS("[getMachineFile] Error resource_name MPI implementation was defined for " << resource_name);
00522       }
00523       else
00524         INFOS("[getMachineFile] Error resource_name MPI implementation not currenly handled for " << resource_name);
00525     }
00526     else
00527       INFOS("[getMachineFile] Error resource_name not found in resourcesList -> " << resource_name);
00528   }
00529   else
00530     INFOS("[getMachineFile] Error parallelLib is not handled -> " << parallelLib);
00531 
00532   return machine_file_name;
00533 }