Back to index

nagios-plugins  1.4.16
check_load.c
Go to the documentation of this file.
00001 /*                                                                          
00002         Source: check_load.c
00003         Author: Adam G. Bowen (agbowen@bealenet.com)
00004         $Revision: 1.7 $
00005         $Date: 1999/05/17 14:35:26 $
00006  
00007  * Program: System load plugin for NetSaint
00008  * License: GPL
00009  * Copyright (c) 1999 Adam G. Bowen (agbowen@bealenet.com)
00010  *
00011  * Description:
00012  *
00013  * This plugin will check the system load on the remote host at ipaddress and
00014  * generate an alert if average is above one of the threshold values.
00015  *
00016  * Other errors result in a STATE_UNKNOWN return.
00017  *
00018  * Command line:
00019  *
00020  * check_load ipaddress sys_type
00021  * check_load ipaddress sys_type <w1> <c1> <w5> <c5> <w15> <c15>
00022  *
00023  * Required input:
00024  *
00025  * ipaddress = The ipaddress of the remote system to run the check on.
00026  * sys_type = The remote system type.
00027  *
00028  * Optional input:
00029  *
00030  * <w1> = 1 min load average necessary to result in a WARNING state.
00031  * <c1> = 1 min load average necessary to result in a CRITICAL state.
00032  * <w5> = 5 min load average necessary to result in a WARNING state.
00033  * <c5> = 5 min load average necessary to result in a CRITICAL state.
00034  * <w15> = 15 min load average necessary to result in a WARNING state.
00035  * <c15> = 15 min load average necessary to result in a CRITICAL state.
00036  *
00037  * Notes:
00038  *
00039  * If <w1>, <c1>, <w5>, <c5>, <w15> and <c15> are not passed on the command
00040  * line, they will be set to the default values in the check_load config file.
00041  *
00042  * sys_type is used to determine which config file to use to generate the
00043  * remote command.
00044  *
00045  * The configuration file /usr/local/netsaint/config/check_load/local
00046  * contains the following values:
00047  *
00048  * RSH_COMMAND|<location of rsh command on netsaint system>|
00049  * WARN_LOAD_1|<default 1 min warn average>|
00050  * CRIT_LOAD_1|<default 1 min crit average>|
00051  * WARN_LOAD_5|<default 5 min warn average>|
00052  * CRIT_LOAD_5|<default 5 min crit average>|
00053  * WARN_LOAD_15|<default 15 min warn average>|
00054  * CRIT_LOAD_15|<default 15 min crit average>|
00055  *
00056  * The configuration file /usr/local/netsaint/config/check_load/<sys_type>
00057  * contains the following values:
00058  *
00059  * UPTIME_COMMAND|<location of uptime command on system sys_type>|
00060  *
00061  * $Log: check_load.c,v $
00062  * Revision 1.7  1999/05/17 14:35:26  netsaint
00063  * Changed the plugin to use a seperate config directory.  This directory is the
00064  * CONFIG_DIR/command_name directory.
00065  *
00066  * Revision 1.6  1999/05/14 03:01:17  netsaint
00067  * Added the following integer variable:
00068  *     socket_name
00069  * Changed the call check_net to open_socket.  Added a call to recv_socket and
00070  * close_socket.  The check_net subroutine was changed to provide more
00071  * flexibility.
00072  * Added a call to the subroutine get_command_name.
00073  * Changed the error checking routines to ensure that any error cause the
00074  * program to terminate.
00075  *
00076  * Revision 1.5  1999/05/07 15:30:26  netsaint
00077  * Removed the char variable error_buffer and the FILE *error_fp variable.
00078  * These variables are no longer needed since the printing of the error file is
00079  * handled in a subroutine.
00080  * Added a call to the check_output_file subroutine.  This routine checks the
00081  * status of the output file.  Also removed the struct stat file_stat variable.
00082  * Added a call to the check_consistency subroutine.  This subroutine checks
00083  * that the warn value is less than the critical value.
00084  *
00085  * Revision 1.4  1999/05/03 14:48:17  netsaint
00086  * Changed the config_file_prefix to config_file_fs_prefix.  Added the following
00087  * character variables:
00088  *     config_file_net_prefix
00089  *     config_file_net
00090  *     expected
00091  *     protocol
00092  * Added the following integer variables:
00093  *     result
00094  *     telnet_port
00095  * Added signal handler for the SIGALRM signal.  All these changes were to
00096  * prevent the plugin from hanging when attempting to perform check on a remote
00097  * system that is down or not working properly.  Prior to issuing the rsh
00098  * command, the plugin will attempt to establish a telnet session to the
00099  * plugin will not issue the rsh command.  Prior to establishing the telnet
00100  * connection, an alarm is set.  If the telnet connections does not return
00101  * control to the plugin before the timer expires, a SIGALRM signal will be
00102  * sent to the process which will caues the plugin to exit with a
00103  * STATE_CRITICAL error.
00104  * Added struct stat file_stat.
00105  * This is used to get the size of the out_put_file.  If the size is zero,
00106  * the plugin exits with a STATE_UNKNOWN.  Also change the error_file printing
00107  * to a subroutine in the plugins.h file.
00108  *
00109  * Revision 1.3  1999/04/28 15:16:27  netsaint
00110  * Added a </dev/null redirect of the input to the rsh command.  This was to
00111  * prevent the netsaint program from stopping when running this plugin.  Also
00112  * changed the strcat of CONFIG_DIR to the local and remote config_files to
00113  * strcpy.  This was to ensure that no leading characters were left in the
00114  * variable before adding the CONFIG_DIR location.
00115  *
00116  * Revision 1.2  1999/04/27 12:03:32  netsaint
00117  * Removed the awk_command and awk_options variables.  Changed the program to
00118  * look for the string load average: from the output returned from the uptime
00119  * command.  This will prevent the program from crashing when checking the
00120  * load of a system that has been up for less than a day.
00121  *
00122  * Revision 1.1  1999/04/23 20:54:23  netsaint
00123  * Initial revision
00124  *
00125  *
00126 */
00127 
00128 #include "/usr/local/src/netsaint/include/plugins.h"
00129 
00130 int main(int argc, char *argv[])
00131 {
00132   char expected[MAX_CHARS]="";
00133   char token_sep[] = ":,";
00134 
00135   char command_line[MAX_CHARS];
00136   char command_name[MAX_CHARS];
00137   char config_file_local[MAX_CHARS];
00138   char config_file_remote[MAX_CHARS];
00139   char config_file_net[MAX_CHARS];
00140   char error_file[MAX_CHARS];
00141   char input_buffer[MAX_CHARS];
00142   char ip_address[MAX_CHARS];
00143   char load_crit_1[MAX_CHARS];
00144   char load_crit_5[MAX_CHARS];
00145   char load_crit_15[MAX_CHARS];
00146   char load_warn_1[MAX_CHARS];
00147   char load_warn_5[MAX_CHARS];
00148   char load_warn_15[MAX_CHARS];
00149   char out_put_file[MAX_CHARS];
00150   char protocol[MAX_CHARS];
00151   char port_telnet[MAX_CHARS];
00152   char rsh_command[MAX_CHARS];
00153   char sub_string[MAX_CHARS];
00154   char system_name[MAX_CHARS];
00155   char temp_value[MAX_CHARS];
00156   char uptime_command[MAX_CHARS];
00157 
00158   FILE *out_put_fp;
00159 
00160   float crit_load_1;
00161   float crit_load_5;
00162   float crit_load_15;
00163   float load_avrg_1;
00164   float load_avrg_5;
00165   float load_avrg_15;
00166   float warn_load_1;
00167   float warn_load_5;
00168   float warn_load_15;
00169 
00170   int get_defaults;
00171   int result;
00172   int return_value;
00173   int socket_name;
00174   int telnet_port;
00175 
00176   /* Initialize alarm signal handling */
00177 
00178   signal(SIGALRM,alarm_signal);
00179 
00180   strcpy(command_name,get_command_name(argv[0]));
00181   if(!((argc==3) || (argc==9)))
00182   {
00183     printf("\n");
00184     printf(" Incorrect number of arguments supplied\n");
00185     printf("\n");
00186     printf(" System load plugin for NetSaint\n");
00187     printf(" Copyright (c) 1999 Adam G. Bowen (agbowen@bealenet.com)\n");
00188     printf(" $Revision: 1.7 $\n");
00189     printf(" Last Modified $Date: 1999/05/17 14:35:26 $\n");
00190     printf(" License: GPL\n");
00191     printf("\n");
00192     printf(" Description:\n");
00193     printf("\n");
00194     printf(" This plugin will check the system load on the remote host at ipaddress and\n");
00195     printf(" generate an alert if average is above one of the threshold values.\n");
00196     printf("\n");
00197     printf(" Usage: %s ipaddress sys_type\n",command_name);
00198     printf(" Usage: %s ipaddress sys_type <w1> <c1> <w5> <c5> <w15> <c15>\n",command_name);
00199     printf("\n");
00200     printf(" Required input:\n");
00201     printf("\n");
00202     printf(" ipaddress = The ipaddress of the remote system to run the check on.\n");
00203     printf(" sys_type = The remote system type.\n");
00204     printf("\n");
00205     printf(" Optional input:\n");
00206     printf("\n");
00207     printf(" <w1> = 1 min load average necessary to result in a WARNING state.\n");
00208     printf(" <c1> = 1 min load average necessary to result in a CRITICAL state.\n");
00209     printf(" <w5> = 5 min load average necessary to result in a WARNING state.\n");
00210     printf(" <c5> = 5 min load average necessary to result in a CRITICAL state.\n");
00211     printf(" <w15> = 15 min load average necessary to result in a WARNING state.\n");
00212     printf(" <c15> = 15 min load average necessary to result in a CRITICAL state.\n");
00213     printf("\n");
00214     printf(" If <w1>, <c1>, <w5>, <c5>, <w15> and <c15> are not passed on the command line,\n");
00215     printf(" they will be set to the default values in the %s config file.\n", command_name);
00216     printf("\n");
00217     printf(" sys_type is used to determine which config file to use to generate the\n");
00218     printf(" remote command.\n");
00219     printf("\n");
00220     return_value = STATE_UNKNOWN;
00221   }
00222   else
00223   {
00224     /* Set up config files and get the command line information */
00225 
00226     strcpy(ip_address,argv[1]);
00227     strcpy(system_name,argv[2]);
00228 
00229     strcpy(config_file_local,CONFIG_DIR);
00230     strcpy(config_file_remote,CONFIG_DIR);
00231     strcpy(config_file_net,CONFIG_DIR);
00232     strcat(config_file_local,command_name);
00233     strcat(config_file_remote,command_name);
00234     strcat(config_file_net,CHECK_TELNET);
00235     strcat(config_file_local,"/local");
00236     strcat(config_file_remote,"/");
00237     strcat(config_file_net,"/");
00238     strcat(config_file_remote,system_name);
00239     strcat(config_file_net,system_name);
00240 
00241     if(argc == 3)
00242     {
00243       get_defaults = TRUE;
00244     }
00245     else
00246     {
00247       get_defaults = FALSE;
00248       strcpy(load_warn_1,argv[3]);
00249       strcpy(load_crit_1,argv[4]);
00250       strcpy(load_warn_5,argv[5]);
00251       strcpy(load_crit_5,argv[6]);
00252       strcpy(load_warn_15,argv[7]);
00253       strcpy(load_crit_15,argv[8]);
00254     }
00255 
00256     /* Check if config files exist */
00257 
00258     if (access(config_file_local, EXISTS) != 0 )
00259     {
00260       printf("Config file %s does not exist!\n",config_file_local);
00261       return_value = STATE_UNKNOWN;
00262     }
00263     else if (access(config_file_remote, EXISTS) != 0 )
00264     {
00265       printf("Config file %s does not exist!\n",config_file_remote);
00266       return_value = STATE_UNKNOWN;
00267     }
00268     else if (access(config_file_net, EXISTS) != 0 )
00269     {
00270       printf("Config file %s does not exist!\n",config_file_net);
00271       return_value = STATE_UNKNOWN;
00272     }
00273     else
00274     {
00275       /* Local config file variables */
00276 
00277       if((get_defaults == TRUE) && ((return_value=get_var("WARN_LOAD_1", config_file_local, load_warn_1)) != STATE_OK))
00278       {
00279         printf("WARN_LOAD_1 entry not found in config file %s!\n",config_file_local);
00280       }
00281       else if((get_defaults == TRUE) && ((return_value=get_var("CRIT_LOAD_1", config_file_local, load_crit_1)) != STATE_OK))
00282       {
00283         printf("CRIT_LOAD_1 entry not found in config file %s!\n",config_file_local);
00284       }
00285       else if((get_defaults == TRUE) && ((return_value=get_var("WARN_LOAD_5", config_file_local, load_warn_5)) != STATE_OK))
00286       {
00287         printf("WARN_LOAD_5 entry not found in config file %s!\n",config_file_local);
00288       }
00289       else if((get_defaults == TRUE) && ((return_value=get_var("CRIT_LOAD_5", config_file_local, load_crit_5)) != STATE_OK))
00290       {
00291         printf("CRIT_LOAD_5 entry not found in config file %s!\n",config_file_local);
00292       }
00293       else if((get_defaults == TRUE) && ((return_value=get_var("WARN_LOAD_15", config_file_local, load_warn_15)) != STATE_OK))
00294       {
00295         printf("WARN_LOAD_15 entry not found in config file %s!\n",config_file_local);
00296       }
00297       else if((get_defaults == TRUE) && ((return_value=get_var("CRIT_LOAD_15", config_file_local, load_crit_15)) != STATE_OK))
00298       {
00299         printf("CRIT_LOAD_15 entry not found in config file %s!\n",config_file_local);
00300       }
00301       else if((return_value=get_var("RSH_COMMAND", config_file_local, rsh_command)) != STATE_OK)
00302       {
00303         printf("RSH_COMMAND entry not found in config file %s!\n", config_file_local);
00304       }
00305 
00306       /* Remote config file variables */
00307 
00308       else if((return_value=get_var("UPTIME_COMMAND", config_file_remote, uptime_command)) != STATE_OK)
00309       {
00310         printf("UPTIME_COMMAND entry not found in config file %s!\n", config_file_remote);
00311       }
00312 
00313       /* Network config file variables */
00314 
00315       else if((return_value=get_var("TELNET_PORT", config_file_net, port_telnet)) != STATE_OK)
00316       {
00317         printf("TELNET_PORT entry not found in config file %s!\n",config_file_net);
00318       }
00319       else if((return_value=get_var("TELNET_PROTO", config_file_net, protocol)) != STATE_OK)
00320       {
00321         printf("TELNET_PROTO entry not found in config file %s!\n",config_file_net);
00322       }
00323       else
00324       {
00325 
00326         /* Check alert level consistency */
00327 
00328         warn_load_1=atof(load_warn_1);
00329         crit_load_1=atof(load_crit_1);
00330         warn_load_5=atof(load_warn_5);
00331         crit_load_5=atof(load_crit_5);
00332         warn_load_15=atof(load_warn_15);
00333         crit_load_15=atof(load_crit_15);
00334         if((result=check_consistency(warn_load_1, crit_load_1)) != STATE_OK)
00335         {
00336           return_value = result;
00337         }
00338         else if((result=check_consistency(warn_load_5, crit_load_5)) != STATE_OK)
00339         {
00340           return_value = result;
00341         }
00342         else if((result=check_consistency(warn_load_15, crit_load_15)) != STATE_OK)
00343         {
00344           return_value = result;
00345         }
00346         else
00347         {
00348 
00349           /* Check the network */
00350 
00351           telnet_port=atoi(port_telnet);
00352           alarm(TIME_OUT);
00353           if((result=open_socket(&socket_name, ip_address, telnet_port, protocol)) != STATE_OK)
00354           {
00355             return_value=exit_error(result,ip_address,protocol,telnet_port);
00356           }
00357           else if((result=recv_socket(&socket_name, expected)) != STATE_OK)
00358           {
00359             return_value=exit_error(result,ip_address,protocol,telnet_port);
00360           }
00361           else if((result=close_socket(&socket_name)) != STATE_OK)
00362           {
00363             return_value=exit_error(result,ip_address,protocol,telnet_port);
00364           }
00365           else
00366           {
00367             alarm(0);
00368 
00369             /* Generate out_put and error file names */
00370 
00371             strcpy(out_put_file, tmpnam(NULL));
00372             strcpy(error_file, tmpnam(NULL));
00373  
00374             /* Set the command line and arguments to use for the check */
00375 
00376             sprintf(command_line,"%s %s %s </dev/null >%s 2>%s",rsh_command, ip_address, uptime_command, out_put_file, error_file);
00377 
00378             /* Run the command */
00379   
00380             system(command_line);
00381 
00382             return_value=check_output_file(out_put_file);
00383             if (return_value != STATE_OK)
00384             {
00385               print_error(error_file);
00386             }
00387             else
00388             {
00389               out_put_fp=fopen(out_put_file,"r");
00390 
00391               /* Retrive single line from output file */
00392 
00393               fgets(input_buffer,MAX_CHARS-1,out_put_fp);
00394 
00395               /* Populate variables */
00396 
00397               strcpy(sub_string,strstr(input_buffer,"load average:"));
00398               strcpy(temp_value,strtok(sub_string,token_sep));
00399               load_avrg_1 = atof(strcpy(temp_value,strtok(NULL,token_sep)));
00400               load_avrg_5 = atof(strcpy(temp_value,strtok(NULL,token_sep)));
00401               load_avrg_15 = atof(strcpy(temp_value,strtok(NULL,token_sep)));
00402 
00403               /* Close output file */
00404 
00405               fclose(out_put_fp);
00406       
00407               /* Check the system load against warning and critical levels */
00408 
00409               if((load_avrg_1 >= crit_load_1)||(load_avrg_5 >= crit_load_5)||(load_avrg_15 >= crit_load_15))
00410               {
00411                 return_value=STATE_CRITICAL;
00412               }
00413               else if((load_avrg_1 >= warn_load_1)||(load_avrg_5 >= warn_load_5)||(load_avrg_15 >= warn_load_15))
00414               {
00415                 return_value=STATE_WARNING;
00416               }
00417   
00418               if(return_value==STATE_OK)
00419               {
00420                 printf("System load ok - load average: %.2f, %.2f, %.2f\n", load_avrg_1,load_avrg_5,load_avrg_15);
00421               }
00422               else
00423               {
00424                 printf("System load error - load average: %.2f, %.2f, %.2f\n", load_avrg_1,load_avrg_5,load_avrg_15);
00425               }
00426             }
00427 
00428             /* Remove the output and error files */
00429       
00430             remove(out_put_file);
00431             remove(error_file);
00432           }
00433         }
00434       } 
00435     }
00436   }
00437 
00438   return return_value;
00439 }