Back to index

nagios-plugins  1.4.16
check_ide_smart.c
Go to the documentation of this file.
00001 /*****************************************************************************
00002 * 
00003 * Nagios check_ide_smart plugin
00004 * ide-smart 1.3 - IDE S.M.A.R.T. checking tool
00005 * 
00006 * License: GPL
00007 * Copyright (C) 1998-1999 Ragnar Hojland Espinosa <ragnar@lightside.dhis.org>
00008 *               1998      Gadi Oxman <gadio@netvision.net.il>
00009 * Copyright (c) 2000 Robert Dale <rdale@digital-mission.com>
00010 * Copyright (c) 2000-2007 Nagios Plugins Development Team
00011 * 
00012 * Description:
00013 * 
00014 * This file contains the check_ide_smart plugin
00015 * 
00016 * This plugin checks a local hard drive with the (Linux specific) SMART
00017 * interface
00018 * 
00019 * 
00020 * This program is free software: you can redistribute it and/or modify
00021 * it under the terms of the GNU General Public License as published by
00022 * the Free Software Foundation, either version 3 of the License, or
00023 * (at your option) any later version.
00024 * 
00025 * This program is distributed in the hope that it will be useful,
00026 * but WITHOUT ANY WARRANTY; without even the implied warranty of
00027 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00028 * GNU General Public License for more details.
00029 * 
00030 * You should have received a copy of the GNU General Public License
00031 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
00032 * 
00033 * 
00034 *****************************************************************************/
00035 
00036 const char *progname = "check_ide_smart";
00037 const char *copyright = "1998-2007";
00038 const char *email = "nagiosplug-devel@lists.sourceforge.net";
00039        
00040 #include "common.h"
00041 #include "utils.h"
00042 
00043 void print_help (void);
00044 void print_usage (void);
00045 
00046 #include <sys/stat.h>
00047 #include <sys/ioctl.h>
00048 #include <fcntl.h>
00049 #include <linux/hdreg.h>
00050 #include <linux/types.h>
00051 #include <errno.h>
00052        
00053 #define NR_ATTRIBUTES       30
00054        
00055 #ifndef TRUE
00056 #define TRUE 1
00057 #endif /*  */
00058        
00059 #define PREFAILURE 2
00060 #define ADVISORY 1
00061 #define OPERATIONAL 0
00062 #define UNKNOWN -1
00063 
00064 typedef struct threshold_s
00065 {
00066        __u8 id;
00067        __u8 threshold;
00068        __u8 reserved[10];
00069 }
00070 __attribute__ ((packed)) threshold_t;
00071 
00072 typedef struct thresholds_s
00073 {
00074        __u16 revision;
00075        threshold_t thresholds[NR_ATTRIBUTES];
00076        __u8 reserved[18];
00077        __u8 vendor[131];
00078        __u8 checksum;
00079 }
00080 __attribute__ ((packed)) thresholds_t;
00081 
00082 typedef struct value_s
00083 {
00084        __u8 id;
00085        __u16 status;
00086        __u8 value;
00087        __u8 vendor[8];
00088 }
00089 __attribute__ ((packed)) value_t;
00090 
00091 typedef struct values_s
00092 {
00093        __u16 revision;
00094        value_t values[NR_ATTRIBUTES];
00095        __u8 offline_status;
00096        __u8 vendor1;
00097        __u16 offline_timeout;
00098        __u8 vendor2;
00099        __u8 offline_capability;
00100        __u16 smart_capability;
00101        __u8 reserved[16];
00102        __u8 vendor[125];
00103        __u8 checksum;
00104 }
00105 __attribute__ ((packed)) values_t;
00106 
00107 struct
00108 {
00109        __u8 value;
00110        char *text;
00111 }
00112 
00113 offline_status_text[] =
00114        {
00115               {0x00, "NeverStarted"},
00116               {0x02, "Completed"},
00117               {0x04, "Suspended"},
00118               {0x05, "Aborted"},
00119               {0x06, "Failed"},
00120               {0, 0}
00121        };
00122 
00123 struct
00124 {
00125        __u8 value;
00126        char *text;
00127 }
00128 
00129 smart_command[] =
00130        {
00131               {SMART_ENABLE, "SMART_ENABLE"},
00132               {SMART_DISABLE, "SMART_DISABLE"},
00133               {SMART_IMMEDIATE_OFFLINE, "SMART_IMMEDIATE_OFFLINE"},
00134               {SMART_AUTO_OFFLINE, "SMART_AUTO_OFFLINE"}
00135        };
00136 
00137 
00138 /* Index to smart_command table, keep in order */ 
00139 enum SmartCommand 
00140        { SMART_CMD_ENABLE,
00141               SMART_CMD_DISABLE,
00142               SMART_CMD_IMMEDIATE_OFFLINE,
00143               SMART_CMD_AUTO_OFFLINE 
00144        };
00145 
00146 void print_values (values_t * p, thresholds_t * t);
00147 int smart_cmd_simple (int fd, enum SmartCommand command, __u8 val0, char show_error); 
00148 
00149 int
00150 main (int argc, char *argv[]) 
00151 {
00152        char *device = NULL;
00153        int command = -1;
00154        int o, longindex;
00155        int retval = 0;
00156 
00157        thresholds_t thresholds;
00158        values_t values;
00159        int fd;
00160 
00161        static struct option longopts[] = { 
00162               {"device", required_argument, 0, 'd'}, 
00163               {"immediate", no_argument, 0, 'i'}, 
00164               {"quiet-check", no_argument, 0, 'q'}, 
00165               {"auto-on", no_argument, 0, '1'}, 
00166               {"auto-off", no_argument, 0, '0'}, 
00167               {"nagios", no_argument, 0, 'n'}, 
00168               {"help", no_argument, 0, 'h'}, 
00169               {"version", no_argument, 0, 'V'},
00170               {0, 0, 0, 0}
00171        };
00172 
00173        /* Parse extra opts if any */
00174        argv=np_extra_opts (&argc, argv, progname);
00175 
00176        setlocale (LC_ALL, "");
00177        bindtextdomain (PACKAGE, LOCALEDIR);
00178        textdomain (PACKAGE);
00179 
00180        while (1) {
00181               
00182               o = getopt_long (argc, argv, "+d:iq10nhV", longopts, &longindex);
00183 
00184               if (o == -1 || o == EOF || o == 1)
00185                      break;
00186 
00187               switch (o) {
00188               case 'd':
00189                      device = optarg;
00190                      break;
00191               case 'q':
00192                      command = 3;
00193                      break;
00194               case 'i':
00195                      command = 2;
00196                      break;
00197               case '1':
00198                      command = 1;
00199                      break;
00200               case '0':
00201                      command = 0;
00202                      break;
00203               case 'n':
00204                      command = 4;
00205                      break;
00206               case 'h':
00207                      print_help ();
00208                      return STATE_OK;
00209               case 'V':
00210                      print_revision (progname, NP_VERSION);
00211                      return STATE_OK;
00212               default:
00213                      usage5 ();
00214               }
00215        }
00216 
00217        if (optind < argc) {
00218               device = argv[optind];
00219        }
00220 
00221        if (!device) {
00222               print_help ();
00223               return STATE_OK;
00224        }
00225 
00226        fd = open (device, O_RDONLY);
00227 
00228        if (fd < 0) {
00229               printf (_("CRITICAL - Couldn't open device %s: %s\n"), device, strerror (errno));
00230               return STATE_CRITICAL;
00231        }
00232 
00233        if (smart_cmd_simple (fd, SMART_CMD_ENABLE, 0, TRUE)) {
00234               printf (_("CRITICAL - SMART_CMD_ENABLE\n"));
00235               return STATE_CRITICAL;
00236        }
00237 
00238        switch (command) {
00239        case 0:
00240               retval = smart_cmd_simple (fd, SMART_CMD_AUTO_OFFLINE, 0, TRUE);
00241               break;
00242        case 1:
00243               retval = smart_cmd_simple (fd, SMART_CMD_AUTO_OFFLINE, 0xF8, TRUE);
00244               break;
00245        case 2:
00246               retval = smart_cmd_simple (fd, SMART_CMD_IMMEDIATE_OFFLINE, 0, TRUE);
00247               break;
00248        case 3:
00249               smart_read_values (fd, &values);
00250               smart_read_thresholds (fd, &thresholds);
00251               retval = values_not_passed (&values, &thresholds);
00252               break;
00253        case 4:
00254               smart_read_values (fd, &values);
00255               smart_read_thresholds (fd, &thresholds);
00256               retval = nagios (&values, &thresholds);
00257               break;
00258        default:
00259               smart_read_values (fd, &values);
00260               smart_read_thresholds (fd, &thresholds);
00261               print_values (&values, &thresholds);
00262               break;
00263        }
00264        close (fd);
00265        return retval;
00266 }
00267 
00268 
00269 
00270 char *
00271 get_offline_text (int status) 
00272 {
00273        int i;
00274        for (i = 0; offline_status_text[i].text; i++) {
00275               if (offline_status_text[i].value == status) {
00276                      return offline_status_text[i].text;
00277               }
00278        }
00279        return "UNKNOW";
00280 }
00281 
00282 
00283 
00284 int
00285 smart_read_values (int fd, values_t * values) 
00286 {
00287        int e;
00288        __u8 args[4 + 512];
00289        args[0] = WIN_SMART;
00290        args[1] = 0;
00291        args[2] = SMART_READ_VALUES;
00292        args[3] = 1;
00293        if (ioctl (fd, HDIO_DRIVE_CMD, &args)) {
00294               e = errno;
00295               printf (_("CRITICAL - SMART_READ_VALUES: %s\n"), strerror (errno));
00296               return e;
00297        }
00298        memcpy (values, args + 4, 512);
00299        return 0;
00300 }
00301 
00302 
00303 
00304 int
00305 values_not_passed (values_t * p, thresholds_t * t) 
00306 {
00307        value_t * value = p->values;
00308        threshold_t * threshold = t->thresholds;
00309        int failed = 0;
00310        int passed = 0;
00311        int i;
00312        for (i = 0; i < NR_ATTRIBUTES; i++) {
00313               if (value->id && threshold->id && value->id == threshold->id) {
00314                      if (value->value <= threshold->threshold) {
00315                             ++failed;
00316                      }
00317                      else {
00318                             ++passed;
00319                      }
00320               }
00321               ++value;
00322               ++threshold;
00323        }
00324        return (passed ? -failed : 2);
00325 }
00326 
00327 
00328 
00329 int
00330 nagios (values_t * p, thresholds_t * t) 
00331 {
00332        value_t * value = p->values;
00333        threshold_t * threshold = t->thresholds;
00334        int status = OPERATIONAL;
00335        int prefailure = 0;
00336        int advisory = 0;
00337        int failed = 0;
00338        int passed = 0;
00339        int total = 0;
00340        int i;
00341        for (i = 0; i < NR_ATTRIBUTES; i++) {
00342               if (value->id && threshold->id && value->id == threshold->id) {
00343                      if (value->value <= threshold->threshold) {
00344                             ++failed;
00345                             if (value->status & 1) {
00346                                    status = PREFAILURE;
00347                                    ++prefailure;
00348                             }
00349                             else {
00350                                    status = ADVISORY;
00351                                    ++advisory;
00352                             }
00353                      }
00354                      else {
00355                             ++passed;
00356                      }
00357                      ++total;
00358               }
00359               ++value;
00360               ++threshold;
00361        }
00362        switch (status) {
00363        case PREFAILURE:
00364               printf (_("CRITICAL - %d Harddrive PreFailure%cDetected! %d/%d tests failed.\n"),
00365                       prefailure,
00366                       prefailure > 1 ? 's' : ' ',
00367                       failed,
00368                  total);
00369               status=STATE_CRITICAL;
00370               break;
00371        case ADVISORY:
00372               printf (_("WARNING - %d Harddrive Advisor%s Detected. %d/%d tests failed.\n"),
00373                       advisory,
00374                       advisory > 1 ? "ies" : "y",
00375                       failed,
00376                       total);
00377               status=STATE_WARNING;
00378               break;
00379        case OPERATIONAL:
00380               printf (_("OK - Operational (%d/%d tests passed)\n"), passed, total);
00381               status=STATE_OK;
00382               break;
00383        default:
00384               printf (_("ERROR - Status '%d' unkown. %d/%d tests passed\n"), status,
00385                                           passed, total);
00386               status = STATE_UNKNOWN;
00387               break;
00388        }
00389        return status;
00390 }
00391 
00392 
00393 
00394 void
00395 print_value (value_t * p, threshold_t * t) 
00396 {
00397        printf ("Id=%3d, Status=%2d {%s , %s}, Value=%3d, Threshold=%3d, %s\n",
00398                                    p->id, p->status, p->status & 1 ? "PreFailure" : "Advisory   ",
00399                                    p->status & 2 ? "OnLine " : "OffLine", p->value, t->threshold,
00400                                    p->value > t->threshold ? "Passed" : "Failed");
00401 }
00402 
00403 
00404 
00405 void
00406 print_values (values_t * p, thresholds_t * t)
00407 {
00408        value_t * value = p->values;
00409        threshold_t * threshold = t->thresholds;
00410        int i;
00411        for (i = 0; i < NR_ATTRIBUTES; i++) {
00412               if (value->id && threshold->id && value->id == threshold->id) {
00413                      print_value (value++, threshold++);
00414               }
00415        }
00416        printf
00417               (_("OffLineStatus=%d {%s}, AutoOffLine=%s, OffLineTimeout=%d minutes\n"),
00418                p->offline_status,
00419                get_offline_text (p->offline_status & 0x7f),
00420                (p->offline_status & 0x80 ? "Yes" : "No"),
00421                p->offline_timeout / 60);
00422        printf
00423               (_("OffLineCapability=%d {%s %s %s}\n"),
00424                p->offline_capability,
00425                p->offline_capability & 1 ? "Immediate" : "",
00426                p->offline_capability & 2 ? "Auto" : "",
00427                p->offline_capability & 4 ? "AbortOnCmd" : "SuspendOnCmd");
00428        printf
00429               (_("SmartRevision=%d, CheckSum=%d, SmartCapability=%d {%s %s}\n"),
00430                p->revision,
00431                p->checksum,
00432                p->smart_capability,
00433                p->smart_capability & 1 ? "SaveOnStandBy" : "",
00434                p->smart_capability & 2 ? "AutoSave" : "");
00435 }
00436 
00437 
00438 int
00439 smart_cmd_simple (int fd, enum SmartCommand command, __u8 val0, char show_error) 
00440 {
00441        int e = 0;
00442        __u8 args[4];
00443        args[0] = WIN_SMART;
00444        args[1] = val0;
00445        args[2] = smart_command[command].value;
00446        args[3] = 0;
00447        if (ioctl (fd, HDIO_DRIVE_CMD, &args)) {
00448               e = errno;
00449               if (show_error) {
00450                      printf (_("CRITICAL - %s: %s\n"), smart_command[command].text, strerror (errno));
00451               }
00452        }
00453        return e;
00454 }
00455 
00456 
00457 
00458 int
00459 smart_read_thresholds (int fd, thresholds_t * thresholds) 
00460 {
00461        int e;
00462        __u8 args[4 + 512];
00463        args[0] = WIN_SMART;
00464   args[1] = 0;
00465   args[2] = SMART_READ_THRESHOLDS;
00466   args[3] = 1;
00467        if (ioctl (fd, HDIO_DRIVE_CMD, &args)) {
00468               e = errno;
00469               printf (_("CRITICAL - SMART_READ_THRESHOLDS: %s\n"), strerror (errno));
00470               return e;
00471        }
00472        memcpy (thresholds, args + 4, 512);
00473        return 0;
00474 }
00475 
00476 
00477 void
00478 print_help (void)
00479 {
00480        print_revision (progname, NP_VERSION);
00481 
00482        printf ("Nagios feature - 1999 Robert Dale <rdale@digital-mission.com>\n");
00483        printf ("(C) 1999 Ragnar Hojland Espinosa <ragnar@lightside.dhis.org>\n");
00484        printf (COPYRIGHT, copyright, email);
00485 
00486        printf (_("This plugin checks a local hard drive with the (Linux specific) SMART interface [http://smartlinux.sourceforge.net/smart/index.php]."));
00487 
00488   printf ("\n\n");
00489 
00490   print_usage ();
00491 
00492   printf (UT_HELP_VRSN);
00493   printf (UT_EXTRA_OPTS);
00494 
00495   printf (" %s\n", "-d, --device=DEVICE");
00496   printf ("    %s\n", _("Select device DEVICE"));
00497   printf ("    %s\n", _("Note: if the device is selected with this option, _no_ other options are accepted"));
00498   printf (" %s\n", "-i, --immediate");
00499   printf ("    %s\n", _("Perform immediately offline tests"));
00500   printf (" %s\n", "-q, --quiet-check");
00501   printf ("    %s\n", _("Returns the number of failed tests"));
00502   printf (" %s\n", "-1, --auto-on");
00503   printf ("    %s\n", _("Turn on automatic offline tests"));
00504   printf (" %s\n", "-0, --auto-off");
00505   printf ("    %s\n", _("Turn off automatic offline tests"));
00506   printf (" %s\n", "-n, --nagios");
00507   printf ("    %s\n", _("Output suitable for Nagios"));
00508 
00509   printf (UT_SUPPORT);
00510 }
00511 
00512  /* todo : add to the long nanual as example
00513  *
00514  *     Run with:  check_ide-smart --nagios [-d] <DRIVE>
00515  *     Where DRIVE is an IDE drive, ie. /dev/hda, /dev/hdb, /dev/hdc
00516  *
00517  *       - Returns 0 on no errors
00518  *       - Returns 1 on advisories
00519  *       - Returns 2 on prefailure
00520  *       - Returns -1 not too often
00521  */
00522 
00523 
00524 void
00525 print_usage (void)
00526 {
00527   printf ("%s\n", _("Usage:"));
00528   printf ("%s [-d <device>] [-i <immediate>] [-q quiet] [-1 <auto-on>]",progname);
00529   printf (" [-O <auto-off>] [-n <nagios>]\n");
00530 }