Back to index

salome-kernel  6.5.0
SALOME_ContainerManager.cxx
Go to the documentation of this file.
00001 // Copyright (C) 2007-2012  CEA/DEN, EDF R&D, OPEN CASCADE
00002 //
00003 // Copyright (C) 2003-2007  OPEN CASCADE, EADS/CCR, LIP6, CEA/DEN,
00004 // CEDRAT, EDF R&D, LEG, PRINCIPIA R&D, BUREAU VERITAS
00005 //
00006 // This library is free software; you can redistribute it and/or
00007 // modify it under the terms of the GNU Lesser General Public
00008 // License as published by the Free Software Foundation; either
00009 // version 2.1 of the License.
00010 //
00011 // This library is distributed in the hope that it will be useful,
00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014 // Lesser General Public License for more details.
00015 //
00016 // You should have received a copy of the GNU Lesser General Public
00017 // License along with this library; if not, write to the Free Software
00018 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
00019 //
00020 // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com
00021 //
00022 
00023 #include "SALOME_ContainerManager.hxx"
00024 #include "SALOME_NamingService.hxx"
00025 #include "SALOME_ModuleCatalog.hh"
00026 #include "Basics_Utils.hxx"
00027 #include "Basics_DirUtils.hxx"
00028 #include <sys/types.h>
00029 #include <sys/stat.h>
00030 #include <signal.h>
00031 #ifndef WIN32
00032 #include <unistd.h>
00033 #endif
00034 #include <vector>
00035 #include "Utils_CorbaException.hxx"
00036 #include <sstream>
00037 #include <string>
00038 
00039 #ifdef WNT
00040 #include <process.h>
00041 #define getpid _getpid
00042 #endif
00043 
00044 #ifdef WITH_PACO_PARALLEL
00045 #include "PaCOPP.hxx"
00046 #endif
00047 
00048 #define TIME_OUT_TO_LAUNCH_CONT 60
00049 
00050 const char *SALOME_ContainerManager::_ContainerManagerNameInNS = 
00051   "/ContainerManager";
00052 
00053 omni_mutex SALOME_ContainerManager::_numInstanceMutex;
00054 
00055 
00056 //=============================================================================
00063 //=============================================================================
00064 
00065 SALOME_ContainerManager::SALOME_ContainerManager(CORBA::ORB_ptr orb, PortableServer::POA_var poa, SALOME_ResourcesManager *rm, SALOME_NamingService *ns):_nbprocUsed(1)
00066 {
00067   MESSAGE("constructor");
00068   _NS = ns;
00069   _ResManager = rm;
00070 
00071   PortableServer::POAManager_var pman = poa->the_POAManager();
00072   _orb = CORBA::ORB::_duplicate(orb) ;
00073   CORBA::PolicyList policies;
00074   policies.length(1);
00075   PortableServer::ThreadPolicy_var threadPol = 
00076     poa->create_thread_policy(PortableServer::SINGLE_THREAD_MODEL);
00077   policies[0] = PortableServer::ThreadPolicy::_duplicate(threadPol);
00078 
00079   _poa = poa->create_POA("SThreadPOA",pman,policies);
00080   threadPol->destroy();
00081   PortableServer::ObjectId_var id = _poa->activate_object(this);
00082   CORBA::Object_var obj = _poa->id_to_reference(id);
00083   Engines::ContainerManager_var refContMan =
00084     Engines::ContainerManager::_narrow(obj);
00085 
00086   _NS->Register(refContMan,_ContainerManagerNameInNS);
00087   _isAppliSalomeDefined = (getenv("APPLI") != 0);
00088 
00089 #ifdef HAVE_MPI2
00090 #ifdef WITHOPENMPI
00091   _pid_mpiServer = -1;
00092   // the urifile name depends on pid of the process
00093   std::stringstream urifile;
00094   urifile << getenv("HOME") << "/.urifile_" << getpid();
00095   setenv("OMPI_URI_FILE",urifile.str().c_str(),1);
00096   if( getenv("OMPI_URI_FILE") != NULL ){
00097     // get the pid of all ompi-server
00098     std::set<pid_t> thepids1 = getpidofprogram("ompi-server");
00099     // launch a new ompi-server
00100     std::string command;
00101     command = "ompi-server -r ";
00102     command += getenv("OMPI_URI_FILE");
00103     int status=system(command.c_str());
00104     if(status!=0)
00105       throw SALOME_Exception("Error when launching ompi-server");
00106     // get the pid of all ompi-server
00107     std::set<pid_t> thepids2 = getpidofprogram("ompi-server");
00108     // my ompi-server is the new one
00109     std::set<pid_t>::const_iterator it;
00110     for(it=thepids2.begin();it!=thepids2.end();it++)
00111       if(thepids1.find(*it) == thepids1.end())
00112         _pid_mpiServer = *it;
00113     if(_pid_mpiServer < 0)
00114       throw SALOME_Exception("Error when getting ompi-server id");
00115   }
00116 #elif defined(WITHMPICH)
00117   _pid_mpiServer = -1;
00118   // get the pid of all hydra_nameserver
00119   std::set<pid_t> thepids1 = getpidofprogram("hydra_nameserver");
00120   // launch a new hydra_nameserver
00121   std::string command;
00122   command = "hydra_nameserver &";
00123   system(command.c_str());
00124   // get the pid of all hydra_nameserver
00125   std::set<pid_t> thepids2 = getpidofprogram("hydra_nameserver");
00126   // my hydra_nameserver is the new one
00127   std::set<pid_t>::const_iterator it;
00128   for(it=thepids2.begin();it!=thepids2.end();it++)
00129     if(thepids1.find(*it) == thepids1.end())
00130       _pid_mpiServer = *it;
00131 #endif
00132 #endif
00133 
00134   MESSAGE("constructor end");
00135 }
00136 
00137 //=============================================================================
00141 //=============================================================================
00142 
00143 SALOME_ContainerManager::~SALOME_ContainerManager()
00144 {
00145   MESSAGE("destructor");
00146 #ifdef HAVE_MPI2
00147 #ifdef WITHOPENMPI
00148   if( getenv("OMPI_URI_FILE") != NULL ){
00149     // kill my ompi-server
00150     if( kill(_pid_mpiServer,SIGTERM) != 0 )
00151       throw SALOME_Exception("Error when killing ompi-server");
00152     // delete my urifile
00153     int status=system("rm -f ${OMPI_URI_FILE}");
00154     if(status!=0)
00155       throw SALOME_Exception("Error when removing urifile");
00156   }
00157 #elif defined(WITHMPICH)
00158   // kill my hydra_nameserver
00159   if(_pid_mpiServer > -1)
00160     if( kill(_pid_mpiServer,SIGTERM) != 0 )
00161       throw SALOME_Exception("Error when killing hydra_nameserver");
00162 #endif
00163 #endif
00164 }
00165 
00166 //=============================================================================
00168 
00170 //=============================================================================
00171 
00172 void SALOME_ContainerManager::Shutdown()
00173 {
00174   MESSAGE("Shutdown");
00175   ShutdownContainers();
00176   _NS->Destroy_Name(_ContainerManagerNameInNS);
00177   PortableServer::ObjectId_var oid = _poa->servant_to_id(this);
00178   _poa->deactivate_object(oid);
00179 }
00180 
00181 //=============================================================================
00183 
00185 //=============================================================================
00186 
00187 void SALOME_ContainerManager::ShutdownContainers()
00188 {
00189   MESSAGE("ShutdownContainers");
00190   bool isOK;
00191   isOK = _NS->Change_Directory("/Containers");
00192   if( isOK ){
00193     std::vector<std::string> vec = _NS->list_directory_recurs();
00194     std::list<std::string> lstCont;
00195     for(std::vector<std::string>::iterator iter = vec.begin();iter!=vec.end();iter++)
00196       {
00197         SCRUTE((*iter));
00198         CORBA::Object_var obj=_NS->Resolve((*iter).c_str());
00199         try
00200           {
00201             Engines::Container_var cont=Engines::Container::_narrow(obj);
00202             if(!CORBA::is_nil(cont))
00203               lstCont.push_back((*iter));
00204           }
00205         catch(const CORBA::Exception& e)
00206           {
00207             // ignore this entry and continue
00208           }
00209       }
00210     MESSAGE("Container list: ");
00211     for(std::list<std::string>::iterator iter=lstCont.begin();iter!=lstCont.end();iter++){
00212       SCRUTE((*iter));
00213     }
00214     for(std::list<std::string>::iterator iter=lstCont.begin();iter!=lstCont.end();iter++)
00215     {
00216       try
00217       {
00218         SCRUTE((*iter));
00219         CORBA::Object_var obj=_NS->Resolve((*iter).c_str());
00220         Engines::Container_var cont=Engines::Container::_narrow(obj);
00221         if(!CORBA::is_nil(cont))
00222         {
00223           MESSAGE("ShutdownContainers: " << (*iter));
00224           cont->Shutdown();
00225         }
00226         else 
00227           MESSAGE("ShutdownContainers: no container ref for " << (*iter));
00228       }
00229       catch(CORBA::SystemException& e)
00230       {
00231         INFOS("CORBA::SystemException ignored : " << e);
00232       }
00233       catch(CORBA::Exception&)
00234       {
00235         INFOS("CORBA::Exception ignored.");
00236       }
00237       catch(...)
00238       {
00239         INFOS("Unknown exception ignored.");
00240       }
00241     }
00242   }
00243 }
00244 
00245 //=============================================================================
00247 
00251 //=============================================================================
00252 Engines::Container_ptr
00253 SALOME_ContainerManager::GiveContainer(const Engines::ContainerParameters& params)
00254 {
00255   std::string machFile;
00256   Engines::Container_ptr ret = Engines::Container::_nil();
00257 
00258   // Step 0: Default mode is start
00259   Engines::ContainerParameters local_params(params);
00260   if (std::string(local_params.mode.in()) == "")
00261     local_params.mode = CORBA::string_dup("start");
00262   std::string mode = local_params.mode.in();
00263   MESSAGE("[GiveContainer] starting with mode: " << mode);
00264 
00265   // Step 1: Find Container for find and findorstart mode
00266   if (mode == "find" || mode == "findorstart")
00267   {
00268     ret = FindContainer(params, params.resource_params.resList);
00269     if(!CORBA::is_nil(ret))
00270       return ret;
00271     else
00272     {
00273       if (mode == "find")
00274       {
00275         MESSAGE("[GiveContainer] no container found");
00276         return ret;
00277       }
00278       else
00279       {
00280         mode = "start";
00281       }
00282     }
00283   }
00284 
00285   // Step 2: Get all possibleResources from the parameters
00286   Engines::ResourceList_var possibleResources = _ResManager->GetFittingResources(local_params.resource_params);
00287   MESSAGE("[GiveContainer] - length of possible resources " << possibleResources->length());
00288   std::vector<std::string> local_resources;
00289 
00290   // Step 3: if mode is "get" keep only machines with existing containers 
00291   if(mode == "get")
00292   {
00293     for(unsigned int i=0; i < possibleResources->length(); i++)
00294     {
00295       Engines::Container_ptr cont = FindContainer(params, possibleResources[i].in());
00296       try
00297       {
00298         if(!cont->_non_existent())
00299           local_resources.push_back(std::string(possibleResources[i]));
00300       }
00301       catch(CORBA::Exception&) {}
00302     }
00303 
00304     // if local_resources is empty, we cannot give a container
00305     if (local_resources.size() == 0)
00306     {
00307       MESSAGE("[GiveContainer] cannot find a container for mode get");
00308       return ret;
00309     }
00310   }
00311   else
00312     for(unsigned int i=0; i < possibleResources->length(); i++)
00313       local_resources.push_back(std::string(possibleResources[i]));
00314 
00315   // Step 4: select the resource where to get/start the container
00316   bool resource_available = true;
00317   std::string resource_selected;
00318   std::vector<std::string> resources = local_resources;
00319   while (resource_available)
00320   {
00321     if (resources.size() == 0)
00322       resource_available = false;
00323     else
00324     {
00325       try
00326       {
00327         resource_selected = _ResManager->GetImpl()->Find(params.resource_params.policy.in(), resources);
00328         // Remove resource_selected from vector
00329         std::vector<std::string>::iterator it;
00330         for (it=resources.begin() ; it < resources.end(); it++ )
00331           if (*it == resource_selected)
00332           {
00333             resources.erase(it);
00334             break;
00335           }
00336       }
00337       catch(const SALOME_Exception &ex)
00338       {
00339         MESSAGE("[GiveContainer] Exception in ResourceManager find !: " << ex.what());
00340         return ret;
00341       }
00342       MESSAGE("[GiveContainer] Resource selected is: " << resource_selected);
00343 
00344       // Step 5: Create container name
00345       Engines::ResourceDefinition_var resource_definition = _ResManager->GetResourceDefinition(resource_selected.c_str());
00346       std::string hostname(resource_definition->hostname.in());
00347       std::string containerNameInNS;
00348       if(params.isMPI){
00349         int nbproc;
00350         if ( params.nb_proc <= 0 )
00351           nbproc = 1;
00352         else
00353           nbproc = params.nb_proc;
00354         try
00355         {
00356           if( getenv("LIBBATCH_NODEFILE") != NULL )
00357             machFile = machinesFile(nbproc);
00358         }
00359         catch(const SALOME_Exception & ex)
00360         {
00361           std::string err_msg = ex.what();
00362           err_msg += params.container_name;
00363           INFOS(err_msg.c_str());
00364           return ret;
00365         }
00366         // A mpi parallel container register on zero node in NS
00367         containerNameInNS = _NS->BuildContainerNameForNS(params, GetMPIZeroNode(hostname,machFile).c_str());
00368       }
00369       else
00370         containerNameInNS = _NS->BuildContainerNameForNS(params, hostname.c_str());
00371       MESSAGE("[GiveContainer] Container name in the naming service: " << containerNameInNS);
00372 
00373       // Step 6: check if the name exists in naming service
00374       //if params.mode == "getorstart" or "get" use the existing container
00375       //if params.mode == "start" shutdown the existing container before launching a new one with that name
00376       CORBA::Object_var obj = _NS->Resolve(containerNameInNS.c_str());
00377       if (!CORBA::is_nil(obj))
00378       {
00379         try
00380         {
00381           Engines::Container_var cont=Engines::Container::_narrow(obj);
00382           if(!cont->_non_existent())
00383           {
00384             if(std::string(params.mode.in())=="getorstart" || std::string(params.mode.in())=="get"){
00385               return cont._retn(); /* the container exists and params.mode is getorstart or get use it*/
00386             }
00387             else
00388             {
00389               INFOS("[GiveContainer] A container is already registered with the name: " << containerNameInNS << ", shutdown the existing container");
00390               cont->Shutdown(); // shutdown the registered container if it exists
00391             }
00392           }
00393         }
00394         catch(CORBA::Exception&)
00395         {
00396           INFOS("[GiveContainer] CORBA::Exception ignored when trying to get the container - we start a new one");
00397         }
00398       }
00399       Engines::Container_var cont = LaunchContainer(params, resource_selected, hostname, machFile, containerNameInNS);
00400       if (!CORBA::is_nil(cont))
00401       {
00402         INFOS("[GiveContainer] container " << containerNameInNS << " launched");
00403         return cont._retn();
00404       }
00405       else
00406       {
00407         INFOS("[GiveContainer] Failed to launch container on resource " << resource_selected);
00408       }
00409     }
00410   }
00411 
00412   // We were not able to launch the container
00413   INFOS("[GiveContainer] Cannot launch the container on the following selected resources:")
00414   std::vector<std::string>::iterator it;
00415   for (it=local_resources.begin() ; it < local_resources.end(); it++ )
00416     INFOS("[GiveContainer] " << *it)
00417   return ret;
00418 }
00419 
00420 Engines::Container_ptr
00421 SALOME_ContainerManager::LaunchContainer(const Engines::ContainerParameters& params,
00422                                          const std::string & resource_selected,
00423                                          const std::string & hostname,
00424                                          const std::string & machFile,
00425                                          const std::string & containerNameInNS)
00426 {
00427 
00428   // Step 1: type of container: PaCO, Exe, Mpi or Classic
00429   // Mpi already tested in step 5, specific code on BuildCommandToLaunch Local/Remote Container methods
00430   // TODO -> separates Mpi from Classic/Exe
00431   // Classic or Exe ?
00432   std::string container_exe = "SALOME_Container"; // Classic container
00433   Engines::ContainerParameters local_params(params);
00434   Engines::Container_ptr ret = Engines::Container::_nil();
00435   int found=0;
00436   try
00437   {
00438     CORBA::String_var container_exe_tmp;
00439     CORBA::Object_var obj = _NS->Resolve("/Kernel/ModulCatalog");
00440     SALOME_ModuleCatalog::ModuleCatalog_var Catalog = SALOME_ModuleCatalog::ModuleCatalog::_narrow(obj) ;
00441     if (CORBA::is_nil (Catalog))
00442     {
00443       INFOS("[GiveContainer] Module Catalog is not found -> cannot launch a container");
00444       return ret;
00445     }
00446     // Loop through component list
00447     for(unsigned int i=0; i < local_params.resource_params.componentList.length(); i++)
00448     {
00449       const char* compoi = local_params.resource_params.componentList[i];
00450       SALOME_ModuleCatalog::Acomponent_var compoInfo = Catalog->GetComponent(compoi);
00451       if (CORBA::is_nil (compoInfo))
00452       {
00453         continue;
00454       }
00455       SALOME_ModuleCatalog::ImplType impl=compoInfo->implementation_type();
00456       container_exe_tmp=compoInfo->implementation_name();
00457       if(impl==SALOME_ModuleCatalog::CEXE)
00458       {
00459         if(found)
00460         {
00461           INFOS("ContainerManager Error: you can't have 2 CEXE component in the same container" );
00462           return Engines::Container::_nil();
00463         }
00464         MESSAGE("[GiveContainer] Exe container found !: " << container_exe_tmp);
00465         container_exe = container_exe_tmp.in();
00466         found=1;
00467       }
00468     }
00469   }
00470   catch (ServiceUnreachable&)
00471   {
00472     INFOS("Caught exception: Naming Service Unreachable");
00473     return ret;
00474   }
00475   catch (...)
00476   {
00477     INFOS("Caught unknown exception.");
00478     return ret;
00479   }
00480 
00481   // Step 2: test resource
00482   // Only if an application directory is set
00483   if(hostname != Kernel_Utils::GetHostname() && _isAppliSalomeDefined)
00484   {
00485     // Preparing remote command
00486     std::string command = "";
00487     const ParserResourcesType& resInfo = _ResManager->GetImpl()->GetResourcesDescr(resource_selected);
00488     command = getCommandToRunRemoteProcess(resInfo.Protocol, resInfo.HostName, resInfo.UserName);
00489     if (resInfo.AppliPath != "")
00490       command += resInfo.AppliPath;
00491     else
00492     {
00493       ASSERT(getenv("APPLI"));
00494       command += getenv("APPLI");
00495     }
00496     command += "/runRemote.sh ";
00497     ASSERT(getenv("NSHOST")); 
00498     command += getenv("NSHOST"); // hostname of CORBA name server
00499     command += " ";
00500     ASSERT(getenv("NSPORT"));
00501     command += getenv("NSPORT"); // port of CORBA name server
00502     command += " ls /tmp";
00503 
00504     // Launch remote command
00505     int status = system(command.c_str());
00506     if (status != 0)
00507     {
00508       // Error on resource - cannot launch commands
00509       INFOS("[LaunchContainer] Cannot launch commands on machine " << hostname);
00510       INFOS("[LaunchContainer] Command was " << command);
00511 #ifndef WIN32
00512       INFOS("[LaunchContainer] Command status is " << WEXITSTATUS(status));
00513 #endif
00514       return Engines::Container::_nil();
00515     }
00516   }
00517 
00518   // Step 3: start a new container
00519   // Check if a PaCO container
00520   // PaCO++
00521   if (std::string(local_params.parallelLib.in()) != "")
00522   {
00523     ret = StartPaCOPPContainer(params, resource_selected);
00524     return ret;
00525   }
00526   // Other type of containers...
00527   MESSAGE("[GiveContainer] Try to launch a new container on " << resource_selected);
00528   std::string command;
00529   // if a parallel container is launched in batch job, command is: "mpirun -np nbproc -machinefile nodesfile SALOME_MPIContainer"
00530   if( getenv("LIBBATCH_NODEFILE") != NULL && params.isMPI )
00531     command = BuildCommandToLaunchLocalContainer(params, machFile, container_exe);
00532   // if a container is launched on localhost, command is "SALOME_Container" or "mpirun -np nbproc SALOME_MPIContainer"
00533   else if(hostname == Kernel_Utils::GetHostname())
00534     command = BuildCommandToLaunchLocalContainer(params, machFile, container_exe);
00535   // if a container is launched in remote mode, command is "ssh resource_selected SALOME_Container" or "ssh resource_selected mpirun -np nbproc SALOME_MPIContainer"
00536   else
00537     command = BuildCommandToLaunchRemoteContainer(resource_selected, params, container_exe);
00538 
00539   //redirect stdout and stderr in a file
00540 #ifdef WNT
00541   std::string logFilename=getenv("TEMP");
00542   logFilename += "\\";
00543   std::string user = getenv( "USERNAME" );
00544 #else
00545   std::string user = getenv( "USER" );
00546   std::string logFilename="/tmp";
00547   char* val = getenv("SALOME_TMP_DIR");
00548   if(val)
00549   {
00550     struct stat file_info;
00551     stat(val, &file_info);
00552     bool is_dir = S_ISDIR(file_info.st_mode);
00553     if (is_dir)logFilename=val;
00554     else std::cerr << "SALOME_TMP_DIR environment variable is not a directory use /tmp instead" << std::endl;
00555   }
00556   logFilename += "/";
00557 #endif
00558   logFilename += _NS->ContainerName(params)+"_"+ resource_selected +"_"+user;
00559   std::ostringstream tmp;
00560   tmp << "_" << getpid();
00561   logFilename += tmp.str();
00562   logFilename += ".log" ;
00563   command += " > " + logFilename + " 2>&1";
00564 #ifdef WNT
00565   command = "%PYTHONBIN% -c \"import win32pm ; win32pm.spawnpid(r'" + command + "', '')\"";
00566 #else
00567   command += " &";
00568 #endif
00569 
00570   // launch container with a system call
00571   int status=system(command.c_str());
00572 
00573   if (status == -1){
00574     INFOS("[LaunchContainer] command failed (system command status -1): " << command);
00575     RmTmpFile(_TmpFileName); // command file can be removed here
00576     _TmpFileName="";
00577     return Engines::Container::_nil();
00578   }
00579   else if (status == 217){
00580     INFOS("[LaunchContainer] command failed (system command status 217): " << command);
00581     RmTmpFile(_TmpFileName); // command file can be removed here
00582     _TmpFileName="";
00583     return Engines::Container::_nil();
00584   }
00585   else
00586   {
00587     // Step 4: Wait for the container
00588     int count = TIME_OUT_TO_LAUNCH_CONT;
00589     if (getenv("TIMEOUT_TO_LAUNCH_CONTAINER") != 0)
00590     {
00591       std::string new_count_str = getenv("TIMEOUT_TO_LAUNCH_CONTAINER");
00592       int new_count;
00593       std::istringstream ss(new_count_str);
00594       if (!(ss >> new_count))
00595       {
00596         INFOS("[LaunchContainer] TIMEOUT_TO_LAUNCH_CONTAINER should be an int");
00597       }
00598       else
00599         count = new_count;
00600     }
00601     INFOS("[GiveContainer] waiting " << count << " second steps container " << containerNameInNS);
00602     while (CORBA::is_nil(ret) && count)
00603     {
00604 #ifndef WIN32
00605       sleep( 1 ) ;
00606 #else
00607       Sleep(1000);
00608 #endif
00609       count--;
00610       MESSAGE("[GiveContainer] step " << count << " Waiting for container on " << resource_selected);
00611       CORBA::Object_var obj = _NS->Resolve(containerNameInNS.c_str());
00612       ret=Engines::Container::_narrow(obj);
00613     }
00614     if (CORBA::is_nil(ret))
00615     {
00616       INFOS("[GiveContainer] was not able to launch container " << containerNameInNS);
00617     }
00618     else
00619     {
00620       // Setting log file name
00621       logFilename=":"+logFilename;
00622       logFilename="@"+Kernel_Utils::GetHostname()+logFilename;
00623       logFilename=user+logFilename;
00624       ret->logfilename(logFilename.c_str());
00625       RmTmpFile(_TmpFileName); // command file can be removed here
00626       _TmpFileName="";
00627     }
00628   }
00629   return ret;
00630 }
00631 
00632 //=============================================================================
00634 
00637 //=============================================================================
00638 
00639 Engines::Container_ptr
00640 SALOME_ContainerManager::FindContainer(const Engines::ContainerParameters& params,
00641                                        const Engines::ResourceList& possibleResources)
00642 {
00643   MESSAGE("[FindContainer] FindContainer on " << possibleResources.length() << " resources");
00644   for(unsigned int i=0; i < possibleResources.length();i++)
00645     {
00646       Engines::Container_ptr cont = FindContainer(params, possibleResources[i].in());
00647       if(!CORBA::is_nil(cont))
00648         return cont;
00649     }
00650   MESSAGE("[FindContainer] no container found");
00651   return Engines::Container::_nil();
00652 }
00653 
00654 //=============================================================================
00656 
00659 //=============================================================================
00660 
00661 Engines::Container_ptr
00662 SALOME_ContainerManager::FindContainer(const Engines::ContainerParameters& params,
00663                                        const std::string& resource)
00664 {
00665   Engines::ResourceDefinition_var resource_definition = _ResManager->GetResourceDefinition(resource.c_str());
00666   std::string hostname(resource_definition->hostname.in());
00667   std::string containerNameInNS(_NS->BuildContainerNameForNS(params, hostname.c_str()));
00668   MESSAGE("[FindContainer] Try to find a container  " << containerNameInNS << " on resource " << resource);
00669   CORBA::Object_var obj = _NS->Resolve(containerNameInNS.c_str());
00670   try
00671   {
00672     if(obj->_non_existent())
00673       return Engines::Container::_nil();
00674     else
00675       return Engines::Container::_narrow(obj);
00676   }
00677   catch(const CORBA::Exception& e)
00678   {
00679     return Engines::Container::_nil();
00680   }
00681 }
00682 
00683 
00684 bool isPythonContainer(const char* ContainerName);
00685 
00686 //=============================================================================
00690 //=============================================================================
00691 bool isPythonContainer(const char* ContainerName)
00692 {
00693   bool ret = false;
00694   int len = strlen(ContainerName);
00695 
00696   if (len >= 2)
00697     if (strcmp(ContainerName + len - 2, "Py") == 0)
00698       ret = true;
00699 
00700   return ret;
00701 }
00702 
00703 //=============================================================================
00724 //=============================================================================
00725 
00726 std::string
00727 SALOME_ContainerManager::BuildCommandToLaunchRemoteContainer
00728 (const std::string& resource_name,
00729  const Engines::ContainerParameters& params, const std::string& container_exe)
00730 {
00731 
00732   std::string command;
00733   if (!_isAppliSalomeDefined)
00734     command = BuildTempFileToLaunchRemoteContainer(resource_name, params);
00735   else
00736   {
00737     int nbproc;
00738     Engines::ResourceDefinition_var resource_definition = _ResManager->GetResourceDefinition(resource_name.c_str());
00739     std::string hostname(resource_definition->hostname.in());
00740     const ParserResourcesType& resInfo = _ResManager->GetImpl()->GetResourcesDescr(resource_name);
00741 
00742     if (params.isMPI)
00743     {
00744       if ( params.nb_proc <= 0 )
00745         nbproc = 1;
00746       else
00747         nbproc = params.nb_proc;
00748     }
00749 
00750     // "ssh -l user machine distantPath/runRemote.sh hostNS portNS WORKINGDIR workingdir \
00751     //  SALOME_Container containerName &"
00752     command = getCommandToRunRemoteProcess(resInfo.Protocol, resInfo.HostName, resInfo.UserName);
00753 
00754     if (resInfo.AppliPath != "")
00755       command += resInfo.AppliPath; // path relative to user@machine $HOME
00756     else
00757     {
00758       ASSERT(getenv("APPLI"));
00759       command += getenv("APPLI"); // path relative to user@machine $HOME
00760     }
00761 
00762     command += "/runRemote.sh ";
00763 
00764     ASSERT(getenv("NSHOST")); 
00765     command += getenv("NSHOST"); // hostname of CORBA name server
00766 
00767     command += " ";
00768     ASSERT(getenv("NSPORT"));
00769     command += getenv("NSPORT"); // port of CORBA name server
00770 
00771     std::string wdir = params.workingdir.in();
00772     if(wdir != "")
00773     {
00774       command += " WORKINGDIR ";
00775       command += " '";
00776       if(wdir == "$TEMPDIR")
00777         wdir="\\$TEMPDIR";
00778       command += wdir; // requested working directory
00779       command += "'"; 
00780     }
00781 
00782     if(params.isMPI)
00783     {
00784       command += " mpirun -np ";
00785       std::ostringstream o;
00786       o << nbproc << " ";
00787       command += o.str();
00788 #ifdef WITHLAM
00789       command += "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
00790 #elif defined(WITHOPENMPI)
00791       if( getenv("OMPI_URI_FILE") == NULL )
00792         command += "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace";
00793       else{
00794         command += "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace -ompi-server file:";
00795         command += getenv("OMPI_URI_FILE");
00796       }
00797 #elif defined(WITHMPICH)
00798       command += "-nameserver " + Kernel_Utils::GetHostname();
00799 #endif        
00800       command += " SALOME_MPIContainer ";
00801     }
00802     else
00803       command += " " +container_exe+ " ";
00804 
00805     command += _NS->ContainerName(params);
00806     command += " -";
00807     AddOmninamesParams(command);
00808 
00809     MESSAGE("command =" << command);
00810   }
00811 
00812   return command;
00813 }
00814 
00815 //=============================================================================
00819 //=============================================================================
00820 std::string
00821 SALOME_ContainerManager::BuildCommandToLaunchLocalContainer
00822 (const Engines::ContainerParameters& params, const std::string& machinesFile, const std::string& container_exe)
00823 {
00824   _TmpFileName = BuildTemporaryFileName();
00825   std::string command;
00826   int nbproc = 0;
00827 
00828   std::ostringstream o;
00829 
00830   if (params.isMPI)
00831     {
00832       o << "mpirun -np ";
00833 
00834       if ( params.nb_proc <= 0 )
00835         nbproc = 1;
00836       else
00837         nbproc = params.nb_proc;
00838 
00839       o << nbproc << " ";
00840 
00841       if( getenv("LIBBATCH_NODEFILE") != NULL )
00842         o << "-machinefile " << machinesFile << " ";
00843 
00844 #ifdef WITHLAM
00845       o << "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
00846 #elif defined(WITHOPENMPI)
00847       if( getenv("OMPI_URI_FILE") == NULL )
00848         o << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace";
00849       else
00850         {
00851           o << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace -ompi-server file:";
00852           o << getenv("OMPI_URI_FILE");
00853         }
00854 #elif defined(WITHMPICH)
00855       o << "-nameserver " + Kernel_Utils::GetHostname();
00856 #endif
00857 
00858       if (isPythonContainer(params.container_name))
00859         o << " pyMPI SALOME_ContainerPy.py ";
00860       else
00861         o << " SALOME_MPIContainer ";
00862     }
00863 
00864   else
00865     {
00866       std::string wdir=params.workingdir.in();
00867       if(wdir != "")
00868         {
00869           // a working directory is requested
00870           if(wdir == "$TEMPDIR")
00871             {
00872               // a new temporary directory is requested
00873               std::string dir = Kernel_Utils::GetTmpDir();
00874 #ifdef WIN32
00875               o << "cd /d " << dir << std::endl;
00876 #else
00877               o << "cd " << dir << ";";
00878 #endif
00879 
00880             }
00881           else
00882             {
00883               // a permanent directory is requested use it or create it
00884 #ifdef WIN32
00885               o << "mkdir " + wdir << std::endl;
00886               o << "cd /D " + wdir << std::endl;
00887 #else
00888               o << "mkdir -p " << wdir << " && cd " << wdir + ";";
00889 #endif
00890             }
00891         }
00892 
00893       if (isPythonContainer(params.container_name))
00894         o << "SALOME_ContainerPy.py ";
00895       else
00896         o << container_exe + " ";
00897 
00898     }
00899 
00900   o << _NS->ContainerName(params);
00901   o << " -";
00902   AddOmninamesParams(o);
00903 
00904   std::ofstream command_file( _TmpFileName.c_str() );
00905   command_file << o.str();
00906   command_file.close();
00907 
00908 #ifndef WIN32
00909   chmod(_TmpFileName.c_str(), 0x1ED);
00910 #endif
00911   command = _TmpFileName;
00912 
00913   MESSAGE("Command is file ... " << command);
00914   MESSAGE("Command is ... " << o.str());
00915   return command;
00916 }
00917 
00918 
00919 //=============================================================================
00923 //=============================================================================
00924 
00925 void SALOME_ContainerManager::RmTmpFile(std::string& tmpFileName)
00926 {
00927   int lenght = tmpFileName.size();
00928   if ( lenght  > 0)
00929     {
00930 #ifdef WIN32
00931       std::string command = "del /F ";
00932 #else
00933       std::string command = "rm ";      
00934 #endif
00935       if ( lenght > 4 )
00936         command += tmpFileName.substr(0, lenght - 3 );
00937       else
00938         command += tmpFileName;
00939       command += '*';
00940       system(command.c_str());
00941       //if dir is empty - remove it
00942       std::string tmp_dir = Kernel_Utils::GetDirByPath( tmpFileName );
00943       if ( Kernel_Utils::IsEmptyDir( tmp_dir ) )
00944         {
00945 #ifdef WIN32
00946           command = "del /F " + tmp_dir;
00947 #else
00948           command = "rmdir " + tmp_dir;
00949 #endif
00950           system(command.c_str());
00951         }
00952     }
00953 }
00954 
00955 //=============================================================================
00959 //=============================================================================
00960 
00961 void SALOME_ContainerManager::AddOmninamesParams(std::string& command) const
00962 {
00963   CORBA::String_var iorstr = _NS->getIORaddr();
00964   command += "ORBInitRef NameService=";
00965   command += iorstr;
00966 }
00967 
00968 //=============================================================================
00972 //=============================================================================
00973 
00974 void SALOME_ContainerManager::AddOmninamesParams(std::ofstream& fileStream) const
00975 {
00976   CORBA::String_var iorstr = _NS->getIORaddr();
00977   fileStream << "ORBInitRef NameService=";
00978   fileStream << iorstr;
00979 }
00980 
00981 //=============================================================================
00985 //=============================================================================
00986 
00987 void SALOME_ContainerManager::AddOmninamesParams(std::ostringstream& oss) const
00988 {
00989   CORBA::String_var iorstr = _NS->getIORaddr();
00990   oss << "ORBInitRef NameService=";
00991   oss << iorstr;
00992 }
00993 
00994 //=============================================================================
00998 //=============================================================================
00999 
01000 std::string SALOME_ContainerManager::BuildTemporaryFileName() const
01001 {
01002   //build more complex file name to support multiple salome session
01003   std::string aFileName = Kernel_Utils::GetTmpFileName();
01004 #ifndef WIN32
01005   aFileName += ".sh";
01006 #else
01007   aFileName += ".bat";
01008 #endif
01009   return aFileName;
01010 }
01011 
01012 //=============================================================================
01020 //=============================================================================
01021 
01022 std::string
01023 SALOME_ContainerManager::BuildTempFileToLaunchRemoteContainer
01024 (const std::string& resource_name,
01025  const Engines::ContainerParameters& params) throw(SALOME_Exception)
01026 {
01027   int status;
01028 
01029   _TmpFileName = BuildTemporaryFileName();
01030   std::ofstream tempOutputFile;
01031   tempOutputFile.open(_TmpFileName.c_str(), std::ofstream::out );
01032   const ParserResourcesType& resInfo = _ResManager->GetImpl()->GetResourcesDescr(resource_name);
01033   tempOutputFile << "#! /bin/sh" << std::endl;
01034 
01035   // --- set env vars
01036 
01037   tempOutputFile << "export SALOME_trace=local" << std::endl; // mkr : 27.11.2006 : PAL13967 - Distributed supervision graphs - Problem with "SALOME_trace"
01038   //tempOutputFile << "source " << resInfo.PreReqFilePath << endl;
01039 
01040   // ! env vars
01041 
01042   if (params.isMPI)
01043     {
01044       tempOutputFile << "mpirun -np ";
01045       int nbproc;
01046 
01047       if ( params.nb_proc <= 0 )
01048         nbproc = 1;
01049       else
01050         nbproc = params.nb_proc;
01051 
01052       std::ostringstream o;
01053 
01054       tempOutputFile << nbproc << " ";
01055 #ifdef WITHLAM
01056       tempOutputFile << "-x PATH,LD_LIBRARY_PATH,OMNIORB_CONFIG,SALOME_trace ";
01057 #elif defined(WITHOPENMPI)
01058       if( getenv("OMPI_URI_FILE") == NULL )
01059         tempOutputFile << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace";
01060       else{
01061         tempOutputFile << "-x PATH -x LD_LIBRARY_PATH -x OMNIORB_CONFIG -x SALOME_trace -ompi-server file:";
01062         tempOutputFile << getenv("OMPI_URI_FILE");
01063       }
01064 #elif defined(WITHMPICH)
01065       tempOutputFile << "-nameserver " + Kernel_Utils::GetHostname();
01066 #endif
01067     }
01068 
01069   tempOutputFile << getenv("KERNEL_ROOT_DIR") << "/bin/salome/";
01070 
01071   if (params.isMPI)
01072     {
01073       if (isPythonContainer(params.container_name))
01074         tempOutputFile << " pyMPI SALOME_ContainerPy.py ";
01075       else
01076         tempOutputFile << " SALOME_MPIContainer ";
01077     }
01078 
01079   else
01080     {
01081       if (isPythonContainer(params.container_name))
01082         tempOutputFile << "SALOME_ContainerPy.py ";
01083       else
01084         tempOutputFile << "SALOME_Container ";
01085     }
01086 
01087   tempOutputFile << _NS->ContainerName(params) << " -";
01088   AddOmninamesParams(tempOutputFile);
01089   tempOutputFile << " &" << std::endl;
01090   tempOutputFile.flush();
01091   tempOutputFile.close();
01092 #ifndef WIN32
01093   chmod(_TmpFileName.c_str(), 0x1ED);
01094 #endif
01095 
01096   // --- Build command
01097 
01098   std::string command;
01099 
01100   if (resInfo.Protocol == rsh)
01101     {
01102       command = "rsh ";
01103       std::string commandRcp = "rcp ";
01104       commandRcp += _TmpFileName;
01105       commandRcp += " ";
01106       commandRcp += resInfo.HostName;
01107       commandRcp += ":";
01108       commandRcp += _TmpFileName;
01109       status = system(commandRcp.c_str());
01110     }
01111 
01112   else if (resInfo.Protocol == ssh)
01113     {
01114       command = "ssh ";
01115       std::string commandRcp = "scp ";
01116       commandRcp += _TmpFileName;
01117       commandRcp += " ";
01118       commandRcp += resInfo.HostName;
01119       commandRcp += ":";
01120       commandRcp += _TmpFileName;
01121       status = system(commandRcp.c_str());
01122     }
01123 
01124   else if (resInfo.Protocol == srun)
01125     {
01126       command = "srun -n 1 -N 1 --share --nodelist=";
01127       std::string commandRcp = "rcp ";
01128       commandRcp += _TmpFileName;
01129       commandRcp += " ";
01130       commandRcp += resInfo.HostName;
01131       commandRcp += ":";
01132       commandRcp += _TmpFileName;
01133       status = system(commandRcp.c_str());
01134     }
01135   else
01136     throw SALOME_Exception("Unknown protocol");
01137 
01138   if(status)
01139     throw SALOME_Exception("Error of connection on remote host");    
01140 
01141   command += resInfo.HostName;
01142   _CommandForRemAccess = command;
01143   command += " ";
01144   command += _TmpFileName;
01145 
01146   SCRUTE(command);
01147 
01148   return command;
01149 
01150 }
01151 
01152 std::string SALOME_ContainerManager::GetMPIZeroNode(const std::string machine, const std::string machinesFile)
01153 {
01154   int status;
01155   std::string zeronode;
01156   std::string command;
01157   std::string tmpFile = BuildTemporaryFileName();
01158 
01159   if( getenv("LIBBATCH_NODEFILE") == NULL )
01160     {
01161       if (_isAppliSalomeDefined)
01162         {
01163           const ParserResourcesType& resInfo = _ResManager->GetImpl()->GetResourcesDescr(machine);
01164 
01165           if (resInfo.Protocol == rsh)
01166             command = "rsh ";
01167           else if (resInfo.Protocol == ssh)
01168             command = "ssh ";
01169           else if (resInfo.Protocol == srun)
01170             command = "srun -n 1 -N 1 --share --nodelist=";
01171           else
01172             throw SALOME_Exception("Unknown protocol");
01173 
01174           if (resInfo.UserName != "")
01175             {
01176               command += "-l ";
01177               command += resInfo.UserName;
01178               command += " ";
01179             }
01180 
01181           command += resInfo.HostName;
01182           command += " ";
01183 
01184           if (resInfo.AppliPath != "")
01185             command += resInfo.AppliPath; // path relative to user@machine $HOME
01186           else
01187             {
01188               ASSERT(getenv("APPLI"));
01189               command += getenv("APPLI"); // path relative to user@machine $HOME
01190             }
01191 
01192           command += "/runRemote.sh ";
01193 
01194           ASSERT(getenv("NSHOST")); 
01195           command += getenv("NSHOST"); // hostname of CORBA name server
01196 
01197           command += " ";
01198           ASSERT(getenv("NSPORT"));
01199           command += getenv("NSPORT"); // port of CORBA name server
01200 
01201           command += " mpirun -np 1 hostname -s > " + tmpFile;
01202         }
01203       else
01204         command = "mpirun -np 1 hostname -s > " + tmpFile;
01205     }
01206   else
01207     command = "mpirun -np 1 -machinefile " + machinesFile + " hostname -s > " + tmpFile;
01208 
01209   status = system(command.c_str());
01210   if( status == 0 ){
01211     std::ifstream fp(tmpFile.c_str(),std::ios::in);
01212     while(fp >> zeronode);
01213   }
01214 
01215   RmTmpFile(tmpFile);
01216 
01217   return zeronode;
01218 }
01219 
01220 std::string SALOME_ContainerManager::machinesFile(const int nbproc)
01221 {
01222   std::string tmp;
01223   std::string nodesFile = getenv("LIBBATCH_NODEFILE");
01224   std::string machinesFile = Kernel_Utils::GetTmpFileName();
01225   std::ifstream fpi(nodesFile.c_str(),std::ios::in);
01226   std::ofstream fpo(machinesFile.c_str(),std::ios::out);
01227 
01228   _numInstanceMutex.lock();
01229 
01230   for(int i=0;i<_nbprocUsed;i++)
01231     fpi >> tmp;
01232 
01233   for(int i=0;i<nbproc;i++)
01234     if( fpi >> tmp )
01235       fpo << tmp << std::endl;
01236     else
01237       throw SALOME_Exception("You need more processors than batch session have allocated for you! Unable to launch the mpi container: ");
01238 
01239   _nbprocUsed += nbproc;
01240   fpi.close();
01241   fpo.close();
01242 
01243   _numInstanceMutex.unlock();
01244 
01245   return machinesFile;
01246 
01247 }
01248 
01249 std::set<pid_t> SALOME_ContainerManager::getpidofprogram(const std::string program)
01250 {
01251   std::set<pid_t> thepids;
01252   std::string tmpFile = Kernel_Utils::GetTmpFileName();
01253   std::string cmd;
01254   std::string thepid;
01255   cmd = "pidof " + program + " > " + tmpFile;
01256   system(cmd.c_str());
01257   std::ifstream fpi(tmpFile.c_str(),std::ios::in);
01258   while(fpi >> thepid){
01259     thepids.insert(atoi(thepid.c_str()));
01260   }
01261   return thepids;
01262 }
01263 
01264 std::string SALOME_ContainerManager::getCommandToRunRemoteProcess(AccessProtocolType protocol,
01265                                                                   const std::string & hostname,
01266                                                                   const std::string & username)
01267 {
01268   std::ostringstream command;
01269   switch (protocol)
01270   {
01271   case rsh:
01272     command << "rsh ";
01273     if (username != "")
01274     {
01275       command << "-l " << username << " ";
01276     }
01277     command << hostname << " ";
01278     break;
01279   case ssh:
01280     command << "ssh ";
01281     if (username != "")
01282     {
01283       command << "-l " << username << " ";
01284     }
01285     command << hostname << " ";
01286     break;
01287   case srun:
01288     // no need to redefine the user with srun, the job user is taken by default
01289     // (note: for srun, user id can be specified with " --uid=<user>")
01290     command << "srun -n 1 -N 1 --share --nodelist=" << hostname << " ";
01291     break;
01292   case pbsdsh:
01293     command << "pbsdsh -o -h " << hostname << " ";
01294     break;
01295   case blaunch:
01296     command << "blaunch " << hostname << " ";
01297     break;
01298   default:
01299     throw SALOME_Exception("Unknown protocol");
01300   }
01301 
01302   return command.str();
01303 }
01304 
01305 bool 
01306 SALOME_ContainerManager::checkPaCOParameters(Engines::ContainerParameters & params, std::string resource_selected)
01307 {
01308   bool result = true;
01309  
01310   // Step 1 : check ContainerParameters
01311   // Check container_name, has to be defined
01312   if (std::string(params.container_name.in()) == "")
01313   {
01314     INFOS("[checkPaCOParameters] You must define a container_name to launch a PaCO++ container");
01315     result = false;
01316   }
01317   // Check parallelLib
01318   std::string parallelLib = params.parallelLib.in();
01319   if (parallelLib != "Mpi" && parallelLib != "Dummy")
01320   {
01321     INFOS("[checkPaCOParameters] parallelLib is not correctly defined");
01322     INFOS("[checkPaCOParameters] you can chosse between: Mpi and Dummy");
01323     INFOS("[checkPaCOParameters] you entered: " << parallelLib);
01324     result = false;
01325   }
01326   // Check nb_proc
01327   if (params.nb_proc <= 0)
01328   {
01329     INFOS("[checkPaCOParameters] You must define a nb_proc > 0");
01330     result = false;
01331   }
01332 
01333   // Step 2 : check resource_selected
01334   Engines::ResourceDefinition_var resource_definition = _ResManager->GetResourceDefinition(resource_selected.c_str());
01335   std::string protocol = resource_definition->protocol.in();
01336   std::string username = resource_definition->username.in();
01337   std::string applipath = resource_definition->applipath.in();
01338 
01339   if (protocol == "" || username == "" || applipath == "")
01340   {
01341     INFOS("[checkPaCOParameters] resource selected is not well defined");
01342     INFOS("[checkPaCOParameters] resource name: " << resource_definition->name.in());
01343     INFOS("[checkPaCOParameters] resource hostname: " << resource_definition->hostname.in());
01344     INFOS("[checkPaCOParameters] resource protocol: " << protocol);
01345     INFOS("[checkPaCOParameters] resource username: " << username);
01346     INFOS("[checkPaCOParameters] resource applipath: " << applipath);
01347     result = false;
01348   }
01349 
01350   return result;
01351 }
01352 #ifdef WITH_PACO_PARALLEL
01353 
01354 //=============================================================================
01360 //=============================================================================
01361 Engines::Container_ptr
01362 SALOME_ContainerManager::StartPaCOPPContainer(const Engines::ContainerParameters& params_const,
01363                                               std::string resource_selected)
01364 {
01365   CORBA::Object_var obj;
01366   PaCO::InterfaceManager_var container_proxy;
01367   Engines::Container_ptr ret = Engines::Container::_nil();
01368   Engines::ContainerParameters params(params_const);
01369   params.resource_params.name = CORBA::string_dup(resource_selected.c_str());
01370 
01371   // Step 0 : Check parameters
01372   if (!checkPaCOParameters(params, resource_selected))
01373   {
01374     INFOS("[StartPaCOPPContainer] check parameters failed ! see logs...");
01375     return ret;
01376   }
01377 
01378   // Step 1 : Starting a new parallel container !
01379   INFOS("[StartPaCOPPContainer] Starting a PaCO++ parallel container");
01380   INFOS("[StartPaCOPPContainer] on resource : " << resource_selected);
01381 
01382   // Step 2 : Get a MachineFile for the parallel container
01383   std::string machine_file_name = _ResManager->getMachineFile(resource_selected, 
01384                                                               params.nb_proc,
01385                                                               params.parallelLib.in());
01386 
01387   if (machine_file_name == "")
01388   {
01389     INFOS("[StartPaCOPPContainer] Machine file generation failed");
01390     return ret;
01391   }
01392 
01393   // Step 3 : starting parallel container proxy
01394   std::string command_proxy("");
01395   std::string proxy_machine;
01396   try 
01397   {
01398     command_proxy = BuildCommandToLaunchPaCOProxyContainer(params, machine_file_name, proxy_machine);
01399   }
01400   catch(const SALOME_Exception & ex)
01401   {
01402     INFOS("[StartPaCOPPContainer] Exception in BuildCommandToLaunchPaCOContainer");
01403     INFOS(ex.what());
01404     return ret;
01405   }
01406   obj = LaunchPaCOProxyContainer(command_proxy, params, proxy_machine);
01407   if (CORBA::is_nil(obj))
01408   {
01409     INFOS("[StartPaCOPPContainer] LaunchPaCOContainer for proxy returns NIL !");
01410     return ret;
01411   }
01412   container_proxy = PaCO::InterfaceManager::_narrow(obj);
01413   MESSAGE("[StartPaCOPPContainer] PaCO container proxy is launched");
01414 
01415   // Step 4 : starting parallel container nodes
01416   std::string command_nodes("");
01417   SALOME_ContainerManager::actual_launch_machine_t nodes_machines;
01418   try 
01419   {
01420     command_nodes = BuildCommandToLaunchPaCONodeContainer(params, machine_file_name, nodes_machines, proxy_machine);
01421   }
01422   catch(const SALOME_Exception & ex)
01423   {
01424     INFOS("[StarPaCOPPContainer] Exception in BuildCommandToLaunchPaCONodeContainer");
01425     INFOS(ex.what());
01426     return ret;
01427   }
01428 
01429   std::string container_generic_node_name = std::string(params.container_name.in()) + std::string("Node");
01430   bool result = LaunchPaCONodeContainer(command_nodes, params, container_generic_node_name, nodes_machines);
01431   if (!result)
01432   {
01433     INFOS("[StarPaCOPPContainer] LaunchPaCONodeContainer failed !");
01434     // Il faut tuer le proxy
01435     try 
01436     {
01437       Engines::Container_var proxy = Engines::Container::_narrow(container_proxy);
01438       proxy->Shutdown();
01439     }
01440     catch (...)
01441     {
01442       INFOS("[StarPaCOPPContainer] Exception catched from proxy Shutdown...");
01443     }
01444     return ret;
01445   }
01446 
01447   // Step 4 : connecting nodes and the proxy to actually create a parallel container
01448   for (int i = 0; i < params.nb_proc; i++) 
01449   {
01450     std::ostringstream tmp;
01451     tmp << i;
01452     std::string proc_number = tmp.str();
01453     std::string container_node_name = container_generic_node_name + proc_number;
01454 
01455     std::string theNodeMachine(nodes_machines[i]);
01456     std::string containerNameInNS = _NS->BuildContainerNameForNS(container_node_name.c_str(), theNodeMachine.c_str());
01457     obj = _NS->Resolve(containerNameInNS.c_str());
01458     if (CORBA::is_nil(obj)) 
01459     {
01460       INFOS("[StarPaCOPPContainer] CONNECTION FAILED From Naming Service !");
01461       INFOS("[StarPaCOPPContainer] Container name is " << containerNameInNS);
01462       return ret;
01463     }
01464     try
01465     {
01466       MESSAGE("[StarPaCOPPContainer] Deploying node : " << container_node_name);
01467       PaCO::InterfaceParallel_var node = PaCO::InterfaceParallel::_narrow(obj);
01468       node->deploy();
01469       MESSAGE("[StarPaCOPPContainer] node " << container_node_name << " is deployed");
01470     }
01471     catch(CORBA::SystemException& e)
01472     {
01473       INFOS("[StarPaCOPPContainer] Exception in deploying node : " << containerNameInNS);
01474       INFOS("CORBA::SystemException : " << e);
01475       return ret;
01476     }
01477     catch(CORBA::Exception& e)
01478     {
01479       INFOS("[StarPaCOPPContainer] Exception in deploying node : " << containerNameInNS);
01480       INFOS("CORBA::Exception" << e);
01481       return ret;
01482     }
01483     catch(...)
01484     {
01485       INFOS("[StarPaCOPPContainer] Exception in deploying node : " << containerNameInNS);
01486       INFOS("Unknown exception !");
01487       return ret;
01488     }
01489   }
01490 
01491   // Step 5 : starting parallel container
01492   try 
01493   {
01494     MESSAGE ("[StarPaCOPPContainer] Starting parallel object");
01495     container_proxy->start();
01496     MESSAGE ("[StarPaCOPPContainer] Parallel object is started");
01497     ret = Engines::Container::_narrow(container_proxy);
01498   }
01499   catch(CORBA::SystemException& e)
01500   {
01501     INFOS("Caught CORBA::SystemException. : " << e);
01502   }
01503   catch(PortableServer::POA::ServantAlreadyActive&)
01504   {
01505     INFOS("Caught CORBA::ServantAlreadyActiveException");
01506   }
01507   catch(CORBA::Exception&)
01508   {
01509     INFOS("Caught CORBA::Exception.");
01510   }
01511   catch(std::exception& exc)
01512   {
01513     INFOS("Caught std::exception - "<<exc.what()); 
01514   }
01515   catch(...)
01516   {
01517     INFOS("Caught unknown exception.");
01518   }
01519   return ret;
01520 }
01521 
01522 std::string 
01523 SALOME_ContainerManager::BuildCommandToLaunchPaCOProxyContainer(const Engines::ContainerParameters& params,
01524                                                                 std::string machine_file_name,
01525                                                                 std::string & proxy_hostname)
01526 {
01527   // In the proxy case, we always launch a Dummy Proxy
01528   std::string exe_name = "SALOME_ParallelContainerProxyDummy";
01529   std::string container_name = params.container_name.in();
01530 
01531   // Convert nb_proc in string
01532   std::ostringstream tmp_string;
01533   tmp_string << params.nb_proc;
01534   std::string nb_proc_str = tmp_string.str();
01535 
01536   // Get resource definition
01537   Engines::ResourceDefinition_var resource_definition = 
01538     _ResManager->GetResourceDefinition(params.resource_params.name);
01539 
01540   // Choose hostname
01541   std::string hostname;
01542   std::ifstream machine_file(machine_file_name.c_str());
01543   std::getline(machine_file, hostname, ' ');
01544   size_t found = hostname.find('\n');
01545   if (found!=std::string::npos)
01546     hostname.erase(found, 1); // Remove \n
01547   proxy_hostname = hostname;
01548   MESSAGE("[BuildCommandToLaunchPaCOProxyContainer] machine file name extracted is " << hostname);
01549 
01550   // Remote execution
01551   bool remote_execution = false;
01552   if (hostname != std::string(Kernel_Utils::GetHostname()))
01553   {
01554     MESSAGE("[BuildCommandToLaunchPaCOProxyContainer] remote machine case detected !");
01555     remote_execution = true;
01556   }
01557   
01558   // Log environnement
01559   std::string log_type("");
01560   char * get_val = getenv("PARALLEL_LOG");
01561   if (get_val)
01562     log_type = get_val;
01563 
01564   // Generating the command
01565   std::string command_begin("");
01566   std::string command_end("");
01567   std::ostringstream command;
01568 
01569   LogConfiguration(log_type, "proxy", container_name, hostname, command_begin, command_end);
01570   command << command_begin;
01571 
01572   // Adding connection command
01573   // We can only have a remote execution with
01574   // a SALOME application
01575   if (remote_execution)
01576   {
01577     ASSERT(getenv("NSHOST")); 
01578     ASSERT(getenv("NSPORT"));
01579 
01580     command << resource_definition->protocol.in();
01581     command << " -l ";
01582     command << resource_definition->username.in();
01583     command << " " << hostname;
01584     command << " " << resource_definition->applipath.in();
01585     command << "/runRemote.sh ";
01586     command << getenv("NSHOST") << " "; // hostname of CORBA name server
01587     command << getenv("NSPORT") << " "; // port of CORBA name server
01588   }
01589 
01590   command << exe_name;
01591   command << " " << container_name;
01592   command << " Dummy";
01593   command << " " << hostname;
01594   command << " " << nb_proc_str;
01595   command << " -";
01596   AddOmninamesParams(command);
01597 
01598   // Final command
01599   command << command_end;
01600   MESSAGE("[BuildCommandToLaunchPaCOProxyContainer] Command is: " << command.str());
01601 
01602   return command.str();
01603 }
01604 
01605 std::string 
01606 SALOME_ContainerManager::BuildCommandToLaunchPaCONodeContainer(const Engines::ContainerParameters& params,
01607                                                                const std::string & machine_file_name,
01608                                                                SALOME_ContainerManager::actual_launch_machine_t & vect_machine,
01609                                                                const std::string & proxy_hostname)
01610 {
01611   // Name of exe
01612   std::string exe_name = "SALOME_ParallelContainerNode";
01613   exe_name += params.parallelLib.in();
01614   std::string container_name = params.container_name.in();
01615 
01616   // Convert nb_proc in string
01617   std::ostringstream nb_proc_stream;
01618   nb_proc_stream << params.nb_proc;
01619 
01620   // Get resource definition
01621   Engines::ResourceDefinition_var resource_definition = 
01622     _ResManager->GetResourceDefinition(params.resource_params.name);
01623   
01624   // Log environnement
01625   std::string log_type("");
01626   char * get_val = getenv("PARALLEL_LOG");
01627   if (get_val)
01628     log_type = get_val;
01629 
01630   // Now the command is different according to paralleLib
01631   std::ostringstream command_nodes;
01632   std::ifstream machine_file(machine_file_name.c_str());
01633   if (std::string(params.parallelLib.in()) == "Dummy")
01634   {
01635     for (int i= 0; i < params.nb_proc; i++)
01636     {
01637       // Choose hostname
01638       std::string hostname;
01639       std::getline(machine_file, hostname);
01640       MESSAGE("[BuildCommandToLaunchPaCONodeContainer] machine file name extracted is " << hostname);
01641 
01642       // Remote execution
01643       bool remote_execution = false;
01644       if (hostname != std::string(Kernel_Utils::GetHostname()))
01645       {
01646         MESSAGE("[BuildCommandToLaunchPaCONodeContainer] remote machine case detected !");
01647         remote_execution = true;
01648       }
01649 
01650       // For each node we have a new command
01651       // Generating the command
01652       std::ostringstream command_node_stream;
01653       std::string command_node_begin("");
01654       std::string command_node_end("");
01655       std::ostringstream node_number;
01656       node_number << i;
01657       std::string container_node_name = container_name + node_number.str();
01658       LogConfiguration(log_type, "node", container_node_name, hostname, command_node_begin, command_node_end);
01659 
01660       // Adding connection command
01661       // We can only have a remote execution with
01662       // a SALOME application
01663       if (remote_execution)
01664       {
01665         ASSERT(getenv("NSHOST")); 
01666         ASSERT(getenv("NSPORT"));
01667 
01668         command_node_stream << resource_definition->protocol.in();
01669         command_node_stream << " -l ";
01670         command_node_stream << resource_definition->username.in();
01671         command_node_stream << " " << hostname;
01672         command_node_stream << " " << resource_definition->applipath.in();
01673         command_node_stream << "/runRemote.sh ";
01674         command_node_stream << getenv("NSHOST") << " "; // hostname of CORBA name server
01675         command_node_stream << getenv("NSPORT") << " "; // port of CORBA name server
01676       }
01677 
01678       command_node_stream << exe_name;
01679       command_node_stream << " " << container_name;
01680       command_node_stream << " " << params.parallelLib.in();
01681       command_node_stream << " " << proxy_hostname;
01682       command_node_stream << " " << node_number.str();
01683       command_node_stream << " -";
01684       AddOmninamesParams(command_node_stream);
01685 
01686       command_nodes << command_node_begin << command_node_stream.str() << command_node_end;
01687       vect_machine.push_back(hostname);
01688     }
01689   }
01690 
01691   else if (std::string(params.parallelLib.in()) == "Mpi")
01692   {
01693     // Choose hostname
01694     std::string hostname;
01695     std::getline(machine_file, hostname, ' ');
01696     MESSAGE("[BuildCommandToLaunchPaCONodeContainer] machine file name extracted is " << hostname);
01697 
01698     // Remote execution
01699     bool remote_execution = false;
01700     if (hostname != std::string(Kernel_Utils::GetHostname()))
01701     {
01702       MESSAGE("[BuildCommandToLaunchPaCONodeContainer] remote machine case detected !");
01703       remote_execution = true;
01704     }
01705 
01706     // In case of Mpi and Remote, we copy machine_file in the applipath
01707     // scp mpi_machine_file user@machine:Path
01708     std::ostringstream command_remote_stream;
01709     std::string::size_type last = machine_file_name.find_last_of("/");
01710     if (last == std::string::npos)
01711       last = -1;
01712 
01713     std::string protocol = resource_definition->protocol.in();
01714     if (protocol == "rsh")
01715       command_remote_stream << "rcp ";
01716     else 
01717       command_remote_stream << "scp ";
01718     command_remote_stream << machine_file_name << " ";
01719     command_remote_stream << resource_definition->username.in() << "@";
01720     command_remote_stream << hostname << ":" << resource_definition->applipath.in();
01721     command_remote_stream <<  "/" << machine_file_name.substr(last+1);
01722 
01723     int status = system(command_remote_stream.str().c_str());
01724     if (status == -1)
01725     {
01726       INFOS("copy of the MPI machine file failed ! - sorry !");
01727       return "";
01728     }
01729 
01730     // Generating the command
01731     std::string command_begin("");
01732     std::string command_end("");
01733 
01734     LogConfiguration(log_type, "nodes", container_name, hostname, command_begin, command_end);
01735     command_nodes << command_begin;
01736 
01737     // Adding connection command
01738     // We can only have a remote execution with
01739     // a SALOME application
01740     if (remote_execution)
01741     {
01742       ASSERT(getenv("NSHOST")); 
01743       ASSERT(getenv("NSPORT"));
01744 
01745       command_nodes << resource_definition->protocol.in();
01746       command_nodes << " -l ";
01747       command_nodes << resource_definition->username.in();
01748       command_nodes << " " << hostname;
01749       command_nodes << " " << resource_definition->applipath.in();
01750       command_nodes << "/runRemote.sh ";
01751       command_nodes << getenv("NSHOST") << " "; // hostname of CORBA name server
01752       command_nodes << getenv("NSPORT") << " "; // port of CORBA name server
01753     }
01754 
01755     if (std::string(resource_definition->mpiImpl.in()) == "lam")
01756     {
01757       command_nodes << "mpiexec -ssi boot ";
01758       command_nodes << "-machinefile "  << machine_file_name << " "; 
01759       command_nodes <<  "-n " << params.nb_proc;
01760     }
01761     else
01762     {
01763       command_nodes << "mpirun -np " << params.nb_proc;
01764     }
01765     command_nodes << " " << exe_name;
01766     command_nodes << " " << container_name;
01767     command_nodes << " " << params.parallelLib.in();
01768     command_nodes << " " << proxy_hostname;
01769     command_nodes << " -";
01770     AddOmninamesParams(command_nodes);
01771 
01772     // We don't put hostname, because nodes are registered in the resource of the proxy
01773     for (int i= 0; i < params.nb_proc; i++)
01774       vect_machine.push_back(proxy_hostname); 
01775 
01776     command_nodes << command_end;
01777   }
01778   return command_nodes.str();
01779 }
01780 
01781 void
01782 SALOME_ContainerManager::LogConfiguration(const std::string & log_type,
01783                                           const std::string & exe_type,
01784                                           const std::string & container_name,
01785                                           const std::string & hostname,
01786                                           std::string & begin, 
01787                                           std::string & end)
01788 {
01789   if(log_type == "xterm")
01790   {
01791     begin = "xterm -e \"export LD_LIBRARY_PATH=$LD_LIBRARY_PATH; export PATH=$PATH;";
01792     end   = "\"&";
01793   }
01794   else if(log_type == "xterm_debug")
01795   {
01796     begin = "xterm -e \"export LD_LIBRARY_PATH=$LD_LIBRARY_PATH; export PATH=$PATH;";
01797     end   = "; cat \" &";
01798   }
01799   else
01800   {
01801     // default into a file...
01802     std::string logFilename = "/tmp/" + container_name + "_" + hostname + "_" + exe_type + "_";
01803     logFilename += std::string(getenv("USER")) + ".log";
01804     end = " > " + logFilename + " 2>&1 & ";
01805   }
01806 }
01807 
01808 CORBA::Object_ptr 
01809 SALOME_ContainerManager::LaunchPaCOProxyContainer(const std::string& command, 
01810                                                   const Engines::ContainerParameters& params,
01811                                                   const std::string & hostname)
01812 {
01813   PaCO::InterfaceManager_ptr container_proxy = PaCO::InterfaceManager::_nil();
01814 
01815   MESSAGE("[LaunchPaCOProxyContainer] Launch command");
01816   int status = system(command.c_str());
01817   if (status == -1) {
01818     INFOS("[LaunchPaCOProxyContainer] failed : system command status -1");
01819     return container_proxy;
01820   }
01821   else if (status == 217) {
01822     INFOS("[LaunchPaCOProxyContainer] failed : system command status 217");
01823     return container_proxy;
01824   }
01825 
01826   int count = TIME_OUT_TO_LAUNCH_CONT;
01827   CORBA::Object_var obj = CORBA::Object::_nil();
01828   std::string containerNameInNS = _NS->BuildContainerNameForNS(params.container_name.in(), 
01829                                                                hostname.c_str());
01830   MESSAGE("[LaunchParallelContainer]  Waiting for Parallel Container proxy : " << containerNameInNS);
01831 
01832   while (CORBA::is_nil(obj) && count) 
01833   {
01834     sleep(1);
01835     count--;
01836     obj = _NS->Resolve(containerNameInNS.c_str());
01837   }
01838 
01839   try 
01840   {
01841     container_proxy = PaCO::InterfaceManager::_narrow(obj);
01842   }
01843   catch(CORBA::SystemException& e)
01844   {
01845     INFOS("[StarPaCOPPContainer] Exception in _narrow after LaunchParallelContainer for proxy !");
01846     INFOS("CORBA::SystemException : " << e);
01847     return container_proxy;
01848   }
01849   catch(CORBA::Exception& e)
01850   {
01851     INFOS("[StarPaCOPPContainer] Exception in _narrow after LaunchParallelContainer for proxy !");
01852     INFOS("CORBA::Exception" << e);
01853     return container_proxy;
01854   }
01855   catch(...)
01856   {
01857     INFOS("[StarPaCOPPContainer] Exception in _narrow after LaunchParallelContainer for proxy !");
01858     INFOS("Unknown exception !");
01859     return container_proxy;
01860   }
01861   if (CORBA::is_nil(container_proxy))
01862   {
01863     INFOS("[StarPaCOPPContainer] PaCO::InterfaceManager::_narrow returns NIL !");
01864     return container_proxy;
01865   }
01866   return obj._retn();
01867 }
01868 
01869 //=============================================================================
01879 //=============================================================================
01880 bool
01881 SALOME_ContainerManager::LaunchPaCONodeContainer(const std::string& command, 
01882                                                  const Engines::ContainerParameters& params,
01883                                                  const std::string& name,
01884                                                  SALOME_ContainerManager::actual_launch_machine_t & vect_machine)
01885 {
01886   INFOS("[LaunchPaCONodeContainer] Launch command");
01887   int status = system(command.c_str());
01888   if (status == -1) {
01889     INFOS("[LaunchPaCONodeContainer] failed : system command status -1");
01890     return false;
01891   }
01892   else if (status == 217) {
01893     INFOS("[LaunchPaCONodeContainer] failed : system command status 217");
01894     return false;
01895   }
01896 
01897   INFOS("[LaunchPaCONodeContainer] Waiting for the nodes of the parallel container");
01898   // We are waiting all the nodes
01899   for (int i = 0; i < params.nb_proc; i++) 
01900   {
01901     CORBA::Object_var obj = CORBA::Object::_nil();
01902     std::string theMachine(vect_machine[i]);
01903     // Name of the node
01904     std::ostringstream tmp;
01905     tmp << i;
01906     std::string proc_number = tmp.str();
01907     std::string container_node_name = name + proc_number;
01908     std::string containerNameInNS = _NS->BuildContainerNameForNS((char*) container_node_name.c_str(), theMachine.c_str());
01909     INFOS("[LaunchPaCONodeContainer]  Waiting for Parallel Container node " << containerNameInNS << " on " << theMachine);
01910     int count = TIME_OUT_TO_LAUNCH_CONT;
01911     while (CORBA::is_nil(obj) && count) {
01912       sleep(1) ;
01913       count-- ;
01914       obj = _NS->Resolve(containerNameInNS.c_str());
01915     }
01916     if (CORBA::is_nil(obj))
01917     {
01918       INFOS("[LaunchPaCONodeContainer] Launch of node failed (or not found) !");
01919       return false;
01920     }
01921   }
01922   return true;
01923 }
01924 
01925 #else
01926 
01927 Engines::Container_ptr
01928 SALOME_ContainerManager::StartPaCOPPContainer(const Engines::ContainerParameters& params,
01929                                               std::string resource_selected)
01930 {
01931   Engines::Container_ptr ret = Engines::Container::_nil();
01932   INFOS("[StarPaCOPPContainer] is disabled !");
01933   INFOS("[StarPaCOPPContainer] recompile SALOME Kernel to enable PaCO++ parallel extension");
01934   return ret;
01935 }
01936 
01937 std::string 
01938 SALOME_ContainerManager::BuildCommandToLaunchPaCOProxyContainer(const Engines::ContainerParameters& params,
01939                                                                 std::string machine_file_name,
01940                                                                 std::string & proxy_hostname)
01941 {
01942   return "";
01943 }
01944 
01945 std::string 
01946 SALOME_ContainerManager::BuildCommandToLaunchPaCONodeContainer(const Engines::ContainerParameters& params,
01947                                                                const std::string & machine_file_name,
01948                                                                SALOME_ContainerManager::actual_launch_machine_t & vect_machine, 
01949                                                                const std::string & proxy_hostname) 
01950 {
01951   return "";
01952 }
01953 void 
01954 SALOME_ContainerManager::LogConfiguration(const std::string & log_type,
01955                                           const std::string & exe_type,
01956                                           const std::string & container_name,
01957                                           const std::string & hostname,
01958                                           std::string & begin, 
01959                                           std::string & end)
01960 {
01961 }
01962 
01963 CORBA::Object_ptr 
01964 SALOME_ContainerManager::LaunchPaCOProxyContainer(const std::string& command, 
01965                                                   const Engines::ContainerParameters& params,
01966                                                   const std::string& hostname)
01967 {
01968   CORBA::Object_ptr ret = CORBA::Object::_nil();
01969   return ret;
01970 }
01971 
01972 bool 
01973 SALOME_ContainerManager::LaunchPaCONodeContainer(const std::string& command, 
01974                         const Engines::ContainerParameters& params,
01975                         const std::string& name,
01976                         SALOME_ContainerManager::actual_launch_machine_t & vect_machine)
01977 {
01978   return false;
01979 }
01980 #endif
01981