Back to index

nagios-plugins  1.4.16
check_nagios.c
Go to the documentation of this file.
00001 /*****************************************************************************
00002 * 
00003 * Nagios check_nagios plugin
00004 * 
00005 * License: GPL
00006 * Copyright (c) 1999-2007 Nagios Plugins Development Team
00007 * 
00008 * Description:
00009 * 
00010 * This file contains the check_nagios plugin
00011 * 
00012 * This plugin checks the status of the Nagios process on the local machine.
00013 * The plugin will check to make sure the Nagios status log is no older than
00014 * the number of minutes specified by the expires option.
00015 * It also checks the process table for a process matching the command
00016 * argument.
00017 * 
00018 * 
00019 * This program is free software: you can redistribute it and/or modify
00020 * it under the terms of the GNU General Public License as published by
00021 * the Free Software Foundation, either version 3 of the License, or
00022 * (at your option) any later version.
00023 * 
00024 * This program is distributed in the hope that it will be useful,
00025 * but WITHOUT ANY WARRANTY; without even the implied warranty of
00026 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00027 * GNU General Public License for more details.
00028 * 
00029 * You should have received a copy of the GNU General Public License
00030 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
00031 * 
00032 * 
00033 *****************************************************************************/
00034 
00035 const char *progname = "check_nagios";
00036 const char *copyright = "1999-2007";
00037 const char *email = "nagiosplug-devel@lists.sourceforge.net";
00038 
00039 #include "common.h"
00040 #include "runcmd.h"
00041 #include "utils.h"
00042 
00043 int process_arguments (int, char **);
00044 void print_help (void);
00045 void print_usage (void);
00046 
00047 char *status_log = NULL;
00048 char *process_string = NULL;
00049 int expire_minutes = 0;
00050 
00051 int verbose = 0;
00052 
00053 int
00054 main (int argc, char **argv)
00055 {
00056        int result = STATE_UNKNOWN;
00057        char input_buffer[MAX_INPUT_BUFFER];
00058        unsigned long latest_entry_time = 0L;
00059        unsigned long temp_entry_time = 0L;
00060        int proc_entries = 0;
00061        time_t current_time;
00062        char *temp_ptr;
00063        FILE *fp;
00064        int procuid = 0;
00065        int procpid = 0;
00066        int procppid = 0;
00067        int procvsz = 0;
00068        int procrss = 0;
00069        float procpcpu = 0;
00070        char procstat[8];
00071 #ifdef PS_USES_PROCETIME
00072        char procetime[MAX_INPUT_BUFFER];
00073 #endif /* PS_USES_PROCETIME */
00074        char procprog[MAX_INPUT_BUFFER];
00075        char *procargs;
00076        int pos, cols;
00077        int expected_cols = PS_COLS - 1;
00078        const char *zombie = "Z";
00079        char *temp_string;
00080        output chld_out, chld_err;
00081        size_t i;
00082 
00083        setlocale (LC_ALL, "");
00084        bindtextdomain (PACKAGE, LOCALEDIR);
00085        textdomain (PACKAGE);
00086 
00087        /* Parse extra opts if any */
00088        argv=np_extra_opts (&argc, argv, progname);
00089 
00090        if (process_arguments (argc, argv) == ERROR)
00091               usage_va(_("Could not parse arguments"));
00092 
00093        /* Set signal handling and alarm timeout */
00094        if (signal (SIGALRM, timeout_alarm_handler) == SIG_ERR) {
00095               usage_va(_("Cannot catch SIGALRM"));
00096        }
00097 
00098        /* handle timeouts gracefully... */
00099        alarm (timeout_interval);
00100 
00101        /* open the status log */
00102        fp = fopen (status_log, "r");
00103        if (fp == NULL) {
00104               die (STATE_CRITICAL, "NAGIOS %s: %s\n", _("CRITICAL"), _("Cannot open status log for reading!"));
00105        }
00106 
00107        /* get the date/time of the last item updated in the log */
00108        while (fgets (input_buffer, MAX_INPUT_BUFFER - 1, fp)) {
00109               if ((temp_ptr = strstr (input_buffer, "created=")) != NULL) {
00110                      temp_entry_time = strtoul (temp_ptr + 8, NULL, 10);
00111                      latest_entry_time = temp_entry_time;
00112                      break;
00113               } else if ((temp_ptr = strtok (input_buffer, "]")) != NULL) {
00114                      temp_entry_time = strtoul (temp_ptr + 1, NULL, 10);
00115                      if (temp_entry_time > latest_entry_time)
00116                             latest_entry_time = temp_entry_time;
00117               }
00118        }
00119        fclose (fp);
00120 
00121        if (verbose >= 2)
00122               printf("command: %s\n", PS_COMMAND);
00123 
00124        /* run the command to check for the Nagios process.. */
00125        if((result = np_runcmd(PS_COMMAND, &chld_out, &chld_err, 0)) != 0)
00126               result = STATE_WARNING;
00127 
00128        /* count the number of matching Nagios processes... */
00129        for(i = 0; i < chld_out.lines; i++) {
00130               cols = sscanf (chld_out.line[i], PS_FORMAT, PS_VARLIST);
00131               /* Zombie processes do not give a procprog command */
00132               if ( cols == (expected_cols - 1) && strstr(procstat, zombie) ) {
00133                      cols = expected_cols;
00134                      /* Set some value for procargs for the strip command further below
00135                       * Seen to be a problem on some Solaris 7 and 8 systems */
00136                      chld_out.line[i][pos] = '\n';
00137                      chld_out.line[i][pos+1] = 0x0;
00138               }
00139               if ( cols >= expected_cols ) {
00140                      asprintf (&procargs, "%s", chld_out.line[i] + pos);
00141                      strip (procargs);
00142 
00143                      /* Some ps return full pathname for command. This removes path */
00144                      temp_string = strtok ((char *)procprog, "/");
00145                      while (temp_string) {
00146                             strcpy(procprog, temp_string);
00147                             temp_string = strtok (NULL, "/");
00148                      }
00149 
00150                      /* May get empty procargs */
00151                      if (!strstr(procargs, argv[0]) && strstr(procargs, process_string) && strcmp(procargs,"")) {
00152                             proc_entries++;
00153                             if (verbose >= 2) {
00154                                    printf (_("Found process: %s %s\n"), procprog, procargs);
00155                             }
00156                      }
00157               }
00158        }
00159 
00160        /* If we get anything on stderr, at least set warning */
00161        if(chld_err.buflen)
00162               result = max_state (result, STATE_WARNING);
00163 
00164        /* reset the alarm handler */
00165        alarm (0);
00166 
00167        if (proc_entries == 0) {
00168               die (STATE_CRITICAL, "NAGIOS %s: %s\n", _("CRITICAL"), _("Could not locate a running Nagios process!"));
00169        }
00170 
00171        if (latest_entry_time == 0L) {
00172               die (STATE_CRITICAL, "NAGIOS %s: %s\n", _("CRITICAL"), _("Cannot parse Nagios log file for valid time"));
00173        }
00174 
00175        time (&current_time);
00176        if ((int)(current_time - latest_entry_time) > (expire_minutes * 60)) {
00177               result = STATE_WARNING;
00178        } else {
00179               result = STATE_OK;
00180        }
00181 
00182        printf ("NAGIOS %s: ", (result == STATE_OK) ? _("OK") : _("WARNING"));
00183        printf (ngettext ("%d process", "%d processes", proc_entries), proc_entries);
00184        printf (", ");
00185        printf (
00186          ngettext ("status log updated %d second ago",
00187            "status log updated %d seconds ago",
00188            (int) (current_time - latest_entry_time) ),
00189            (int) (current_time - latest_entry_time) );
00190        printf ("\n");
00191 
00192        return result;
00193 }
00194 
00195 
00196 
00197 /* process command-line arguments */
00198 int
00199 process_arguments (int argc, char **argv)
00200 {
00201        int c;
00202 
00203        int option = 0;
00204        static struct option longopts[] = {
00205               {"filename", required_argument, 0, 'F'},
00206               {"expires", required_argument, 0, 'e'},
00207               {"command", required_argument, 0, 'C'},
00208               {"version", no_argument, 0, 'V'},
00209               {"help", no_argument, 0, 'h'},
00210               {"verbose", no_argument, 0, 'v'},
00211               {0, 0, 0, 0}
00212        };
00213 
00214        if (argc < 2)
00215               return ERROR;
00216 
00217        if (!is_option (argv[1])) {
00218               status_log = argv[1];
00219               if (is_intnonneg (argv[2]))
00220                      expire_minutes = atoi (argv[2]);
00221               else
00222                      die (STATE_UNKNOWN,
00223                                                          _("Expiration time must be an integer (seconds)\n"));
00224               process_string = argv[3];
00225               return OK;
00226        }
00227 
00228        while (1) {
00229               c = getopt_long (argc, argv, "+hVvF:C:e:", longopts, &option);
00230 
00231               if (c == -1 || c == EOF || c == 1)
00232                      break;
00233 
00234               switch (c) {
00235               case 'h':                                                             /* help */
00236                      print_help ();
00237                      exit (STATE_OK);
00238               case 'V':                                                             /* version */
00239                      print_revision (progname, NP_VERSION);
00240                      exit (STATE_OK);
00241               case 'F':                                                             /* status log */
00242                      status_log = optarg;
00243                      break;
00244               case 'C':                                                             /* command */
00245                      process_string = optarg;
00246                      break;
00247               case 'e':                                                             /* expiry time */
00248                      if (is_intnonneg (optarg))
00249                             expire_minutes = atoi (optarg);
00250                      else
00251                             die (STATE_UNKNOWN,
00252                                  _("Expiration time must be an integer (seconds)\n"));
00253                      break;
00254               case 'v':
00255                      verbose++;
00256                      break;
00257               default:                                                              /* print short usage_va statement if args not parsable */
00258                      usage5();
00259               }
00260        }
00261 
00262 
00263        if (status_log == NULL)
00264               die (STATE_UNKNOWN, _("You must provide the status_log\n"));
00265 
00266        if (process_string == NULL)
00267               die (STATE_UNKNOWN, _("You must provide a process string\n"));
00268 
00269        return OK;
00270 }
00271 
00272 
00273 
00274 void
00275 print_help (void)
00276 {
00277        print_revision (progname, NP_VERSION);
00278 
00279        printf (_(COPYRIGHT), copyright, email);
00280 
00281        printf ("%s\n", _("This plugin checks the status of the Nagios process on the local machine"));
00282   printf ("%s\n", _("The plugin will check to make sure the Nagios status log is no older than"));
00283   printf ("%s\n", _("the number of minutes specified by the expires option."));
00284   printf ("%s\n", _("It also checks the process table for a process matching the command argument."));
00285 
00286   printf ("\n\n");
00287 
00288        print_usage ();
00289 
00290        printf (UT_HELP_VRSN);
00291        printf (UT_EXTRA_OPTS);
00292 
00293        printf (" %s\n", "-F, --filename=FILE");
00294   printf ("    %s\n", _("Name of the log file to check"));
00295   printf (" %s\n", "-e, --expires=INTEGER");
00296   printf ("    %s\n", _("Minutes aging after which logfile is considered stale"));
00297   printf (" %s\n", "-C, --command=STRING");
00298   printf ("    %s\n", _("Substring to search for in process arguments"));
00299   printf (UT_VERBOSE);
00300 
00301   printf ("\n");
00302   printf ("%s\n", _("Examples:"));
00303   printf (" %s\n", "check_nagios -e 5 -F /usr/local/nagios/var/status.log -C /usr/local/nagios/bin/nagios");
00304 
00305   printf (UT_SUPPORT);
00306 }
00307 
00308 
00309 
00310 void
00311 print_usage (void)
00312 {
00313   printf ("%s\n", _("Usage:"));
00314        printf ("%s -F <status log file> -e <expire_minutes> -C <process_string>\n", progname);
00315 }