Back to index

im-sdk  12.3.91
watchdog.c
Go to the documentation of this file.
00001 /*
00002 Copyright 1990-2001 Sun Microsystems, Inc. All Rights Reserved.
00003 
00004 Permission is hereby granted, free of charge, to any person obtaining a
00005 copy of this software and associated documentation files (the
00006 "Software"), to deal in the Software without restriction, including
00007 without limitation the rights to use, copy, modify, merge, publish,
00008 distribute, sublicense, and/or sell copies of the Software, and to
00009 permit persons to whom the Software is furnished to do so, subject to
00010 the following conditions: The above copyright notice and this
00011 permission notice shall be included in all copies or substantial
00012 portions of the Software.
00013 
00014 
00015 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
00016 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00017 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
00018 IN NO EVENT SHALL THE OPEN GROUP OR SUN MICROSYSTEMS, INC. BE LIABLE
00019 FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
00020 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH
00021 THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE EVEN IF
00022 ADVISED IN ADVANCE OF THE POSSIBILITY OF SUCH DAMAGES.
00023 
00024 
00025 Except as contained in this notice, the names of The Open Group and/or
00026 Sun Microsystems, Inc. shall not be used in advertising or otherwise to
00027 promote the sale, use or other dealings in this Software without prior
00028 written authorization from The Open Group and/or Sun Microsystems,
00029 Inc., as applicable.
00030 
00031 
00032 X Window System is a trademark of The Open Group
00033 
00034 OSF/1, OSF/Motif and Motif are registered trademarks, and OSF, the OSF
00035 logo, LBX, X Window System, and Xinerama are trademarks of the Open
00036 Group. All other trademarks and registered trademarks mentioned herein
00037 are the property of their respective owners. No right, title or
00038 interest in or to any trademark, service mark, logo or trade name of
00039 Sun Microsystems, Inc. or its licensors is granted.
00040 
00041 */
00042 
00043 #ifdef HAVE_CONFIG_H
00044 #include <config.h>
00045 #endif
00046 
00047 #include <stdio.h>
00048 #include <stdlib.h>
00049 #include <sys/types.h>
00050 #include <unistd.h>
00051 #include <sys/wait.h>
00052 #include <signal.h>
00053 
00054 #include <string.h>
00055 #include <errno.h>
00056 #include <syslog.h>
00057 #include "SharedData.h"
00058 
00059 #ifdef SunOS
00060 #include <locale.h>
00061 
00062 #ifdef DELAYED_START
00063 #include <kstat.h>
00064 #include <sys/utsname.h>
00065 #include <sys/sysinfo.h>
00066 #include <locale.h>
00067 
00068 #define HARDLIMIT 120
00069 #define DESKTOPINTERVAL 16
00070 #define INITINTERVAL 30
00071 #define MININTERVAL 2
00072 
00073 static int
00074 is_desktop_up(){
00075     const char *X11unix="/tmp/.X11-unix" ; /* May be Solaris only */
00076     const char *X11pipe="/tmp/.X11-pipe" ; /* May be Solaris only */
00077 
00078     return (!access(X11unix, F_OK) || !access(X11pipe, F_OK));
00079 }
00080 
00081 static void
00082 zalloc(void **p, int size, int free_first){
00083     if (free_first && *p != NULL)
00084        free(*p);
00085     if ((*p = (void *)malloc(size)) == NULL){
00086       syslog(LOG_ERR, "malloc failed");
00087       exit(1);
00088     }
00089     memset(*p, 0, size);
00090 }
00091 
00092 static int
00093 is_load_or_cpu_low(int loadlowwater, int cpulowwater, int *newinterval){
00094     int interval = *newinterval ;
00095     static int initialized = 0 ;
00096     static kstat_ctl_t *kc ;
00097     static kstat_t *ksp ;
00098     static kstat_t *system_misc_ksp ;
00099     static kstat_t *cpu_stat_0_ksp ;
00100     static kstat_named_t *avenrun_1min_knp ;
00101     static int ncpus = 0;
00102     static kstat_t **cpu_stat_list = 0;
00103     static cpu_sysinfo_t cpu_sysinfo_all;
00104     static cpu_vminfo_t     cpu_vminfo_all;
00105     int idle, pgin ;
00106     static int b[10]; /* buffer */
00107 
00108     int i, j;
00109     cpu_stat_t cs;
00110     ulong *np, *tp;
00111 
00112     if(!initialized){
00113        initialized = 1 ;
00114        if ((kc = kstat_open()) == NULL){
00115            syslog(LOG_WARNING, "kstat_open() fails\n");
00116            return 0;
00117        }
00118 
00119        if((system_misc_ksp = kstat_lookup(kc, "unix", 0, "system_misc")) == 0){
00120          syslog(LOG_NOTICE, "kstat lookup: unix, system_misc\n");
00121          return 0;
00122        }
00123        if(kstat_read(kc, system_misc_ksp, NULL) == -1){
00124          syslog(LOG_NOTICE, "kstat read: system_misc\n");
00125          return 0;
00126        }
00127        if((avenrun_1min_knp = kstat_data_lookup(system_misc_ksp,
00128                                            "avenrun_1min"))== 0){
00129          syslog(LOG_NOTICE, "kstat lookup: unix, system_misc avenrun_1min\n");
00130          return 0;
00131        }
00132 
00133        for (ksp = kc->kc_chain; ksp; ksp = ksp->ks_next)
00134               if (strncmp(ksp->ks_name, "cpu_stat", 8) == 0)
00135                      ncpus++;
00136 
00137        zalloc((void **)&cpu_stat_list, ncpus * sizeof (kstat_t *), 1);
00138 
00139        ncpus = 0;
00140        for (ksp = kc->kc_chain; ksp; ksp = ksp->ks_next)
00141               if (strncmp(ksp->ks_name, "cpu_stat", 8) == 0 &&
00142                     kstat_read(kc, ksp, NULL) != -1)
00143                      cpu_stat_list[ncpus++] = ksp;
00144 
00145        if (ncpus == 0){
00146          syslog(LOG_ERR, "can't find any cpu statistics\n");
00147          exit(1);
00148        }
00149 
00150 #ifdef DEBUG
00151 printf("#CPU: %d\n", ncpus);
00152 #endif
00153        
00154     } /* end initialized */
00155 
00156     memset((void *)&cpu_sysinfo_all, 0, sizeof (cpu_sysinfo_all));
00157     memset((void *)&cpu_vminfo_all, 0, sizeof (cpu_vminfo_all));
00158 
00159     if(kstat_read(kc, system_misc_ksp, NULL) == -1){
00160       syslog(LOG_NOTICE, "kstat read: system_misc\n");
00161       return 0;
00162     }
00163     if((avenrun_1min_knp = kstat_data_lookup(system_misc_ksp,
00164                                         "avenrun_1min"))== 0){
00165       syslog(LOG_NOTICE, "kstat lookup: unix, system_misc avenrun_1min\n");
00166       return 0;
00167     }
00168     b[0] = avenrun_1min_knp->value.ul ;
00169 
00170     for (i = 0; i < ncpus; i++) {
00171               if (kstat_read(kc, cpu_stat_list[i], (void *)&cs) == -1)
00172                      return (1);
00173               np = (ulong *)&cpu_sysinfo_all;
00174               tp = (ulong *)&cs.cpu_sysinfo;
00175               for (j = 0; j < sizeof (cpu_sysinfo_t); j += sizeof (ulong_t))
00176                      *np++ += *tp++;
00177               np = (ulong *)&cpu_vminfo_all;
00178               tp = (ulong *)&cs.cpu_vminfo;
00179               for (j = 0; j < sizeof (cpu_vminfo_t); j += sizeof (ulong_t))
00180                      *np++ += *tp++;
00181        }
00182 #ifdef DEBUG
00183 printf("usr+nice: %3d sys %3d idle %3d pgin %d\n",
00184        (((cpu_sysinfo_all.cpu[CPU_USER] - b[1])*100/interval)/100)+
00185        (((cpu_sysinfo_all.cpu[CPU_WAIT] - b[2])*100/interval)/100),
00186        ((cpu_sysinfo_all.cpu[CPU_KERNEL] - b[3])*100/interval)/100,
00187        ((cpu_sysinfo_all.cpu[CPU_IDLE] - b[4])*100/interval)/100,
00188        ((cpu_vminfo_all.pgpgin - b[5])*100/interval)/100
00189        );
00190 #endif
00191     idle = ((cpu_sysinfo_all.cpu[CPU_IDLE] - b[4])*100/interval)/100 ;
00192     pgin = ((cpu_vminfo_all.pgpgin - b[5])*100/interval)/100 ;
00193 
00194     if (idle > 25 && (idle - b[4])>10)
00195        *newinterval = (interval+MININTERVAL)/2 ;
00196 
00197     b[1] = cpu_sysinfo_all.cpu[CPU_USER];
00198     b[2] = cpu_sysinfo_all.cpu[CPU_WAIT] ;
00199     b[3] = cpu_sysinfo_all.cpu[CPU_KERNEL];
00200     b[4] = cpu_sysinfo_all.cpu[CPU_IDLE];
00201     b[5] = cpu_vminfo_all.pgpgin;
00202 
00203     /* why pgin? */
00204     if(idle > 90 /* && pgin < 20 */){
00205        *newinterval = 1 ;
00206        return 1 ;
00207     }
00208     return 0 ;
00209 }
00210 
00211 #endif /* DELAYED_START */
00212 #endif /* SunOS */
00213 
00214 static void clean_up(int);
00215 
00216 int iiimd_pid=0;
00217 
00218 int
00219 main(argc, argv)
00220     int              argc;
00221     char      **argv;
00222 {
00223     const char *iiimdpath=IIIMPATHIIIMD ; /* SUNWiiimf Only */
00224     pid_t pgrp;
00225     void (*disp)(int);
00226     char **new_argv, **pp;
00227     int newargc = 1, i, retry_on_error = 4, retrycount;
00228 
00229 #ifdef SunOS
00230     
00231 #ifdef DELAYED_START
00232     int hardlimit = HARDLIMIT  ; /* Experimental: 2 minutes */
00233 #endif /* DELAYED_START */
00234     int interval          ;  /* Experimental: 2 sec */
00235     int salt = 1 ;         /* salt should reflect perf measurement */
00236     int loadlowwater = 1 ; /* Experimental */
00237     int cpulowwater = 20 ; /* 20% */
00238     static int desktop_is_up ;
00239     static int confirm = 0 ;
00240     int use_syslog;
00241     char * message_locale;
00242 
00243     setlocale(LC_ALL, "");
00244 
00245     use_syslog = 0;
00246     message_locale = NULL;
00247     /* not enough */
00248     for (i = argc - 1, pp = argv + 1; 0 < i; --i, pp++) {
00249        if (0 == strcmp(*pp, "-syslog")) {
00250            use_syslog = 1;
00251        } else if (0 == strcmp(*pp, "-message_locale")) {
00252            if (1 == i) break;
00253            --i;
00254            pp++;
00255            message_locale = *pp;
00256        } else if (0 == strcmp(*pp, "-retryonerror")) {
00257            if (1 == i) break;
00258            --i;
00259            pp++;
00260            retry_on_error = atoi(*pp);
00261        } else if (0 == strcmp(*pp, "-h") ||
00262                  0 == strcmp(*pp, "-help") ||
00263                  0 == strcmp(*pp, "--help")) {
00264            fprintf(stderr, "Usage: %s [-retryonerror NUM]\n", argv[0]);
00265            exit(1);
00266        } else {
00267            /* unknown options found. stop parsing option for iiimd here */
00268            break;
00269        }
00270     }
00271     newargc = argc - i;
00272 
00273     openlog("iiimd", LOG_PID, LOG_USER);
00274 
00275     /* BugId : 4281734. No need to have delayed start now.
00276        iiim_server does not require X connection and will be started only
00277        in asian locales.
00278     */
00279 #ifdef DELAYED_START
00280     if (desktop_is_up = is_desktop_up()){
00281        /* Must not be from bootup time */
00282        interval = MININTERVAL ;
00283     } else {
00284        interval = INITINTERVAL ;
00285     }
00286     
00287     for(; hardlimit > 0; hardlimit -= interval){ /* delayed start loop */
00288 
00289 #ifdef DEBUG
00290 printf("%3d | ", salt*HARDLIMIT - hardlimit);
00291 #endif
00292 
00293        if (is_load_or_cpu_low(loadlowwater, cpulowwater, &interval)){
00294            confirm++ ;
00295        } else {
00296            confirm = 0 ;
00297        }
00298         if (confirm > 0)
00299            break ;
00300 
00301         if(!desktop_is_up){
00302            if (is_desktop_up()){
00303               desktop_is_up = 1 ; 
00304               /* Causes too much delay - 4264139 
00305               interval += DESKTOPINTERVAL ;
00306               */
00307 #ifdef DEBUG
00308 printf("Desktop is up now\n");
00309 #endif
00310            }
00311        }
00312         if(interval > MININTERVAL){
00313            interval=(interval+MININTERVAL)/2;
00314        }
00315        sleep(interval);
00316 
00317 #ifdef DEBUG
00318         fflush(stdout);
00319 #endif
00320 
00321     }
00322 
00323 #endif  /* DELAYED_START */
00324 
00325 #ifdef DEBUG
00326 printf("Go though\n");
00327 #endif
00328 
00329 #else
00330     for (i = argc - 1, pp = argv + 1; 0 < i; --i, pp++) {
00331        if (0 == strcmp(*pp, "-retryonerror")) {
00332            if (1 == i) break;
00333            --i;
00334            pp++;
00335            retry_on_error = atoi(*pp);
00336        } else if (0 == strcmp(*pp, "-h") ||
00337                  0 == strcmp(*pp, "-help") ||
00338                  0 == strcmp(*pp, "--help")) {
00339            fprintf(stderr, "Usage: %s [-retryonerror NUM]\n", argv[0]);
00340            exit(1);
00341        } else {
00342            /* unknown options found. stop parsing option for iiimd here */
00343            break;
00344        }
00345     }
00346     newargc = argc - i;
00347 
00348     openlog("iiimd", LOG_PID, LOG_USER);
00349 
00350 #endif /* SunOS */
00351 #ifdef SunOS
00352     sigset(SIGTERM, clean_up);
00353     sigset(SIGINT, clean_up);
00354 #else
00355     signal(SIGTERM, clean_up);
00356     signal(SIGINT, clean_up);
00357 #endif
00358 
00359     pgrp = setsid();
00360     if ((pid_t)(-1) == pgrp)
00361       fprintf (stderr,"cannot set session id");
00362 
00363     /* Create new argv */
00364     new_argv = (char**) malloc(sizeof(char*) * (argc + 2));
00365     if (!new_argv) {
00366       syslog(LOG_ERR, "malloc failed");
00367       exit(1);
00368     }
00369     new_argv[0]="iiimd";
00370     new_argv[1] = "-nodaemon";
00371     if (argc > 0)
00372       memcpy(new_argv + 2, argv + newargc, sizeof(char*) * argc - newargc);
00373     else
00374       new_argv[2]= NULL;
00375 
00376     retrycount = retry_on_error;
00377 
00378     for(;;){
00379 #ifdef DEBUG
00380         fflush(stdout);
00381 #endif
00382        if((iiimd_pid = fork()) == 0){
00383            /* XXX uid should be changed to nobody */
00384             execv(iiimdpath, new_argv);
00385            syslog(LOG_ERR, "execv iiimd failed\n");
00386             exit(17);
00387        } else if(iiimd_pid < 0) {
00388            syslog(LOG_WARNING, "iiimd watchdog: fork was failed\n");
00389            sleep(60);
00390        } else {
00391             int status ;
00392             waitpid(iiimd_pid, &status, 0) ;
00393            
00394            if (WIFSIGNALED(status)) {
00395               switch(WTERMSIG(status)) {
00396                 case SIGTERM :
00397                 case SIGKILL : /* there must be a reason */
00398                     exit (0);
00399                     break ;
00400                 case SIGCHLD :
00401                     kill(iiimd_pid, SIGTERM);
00402                     exit(1);       
00403                 case SIGSEGV:
00404                     syslog(LOG_WARNING, "iiimd watchdog: iiimd segfaulted");
00405                     retrycount--;
00406                     break;
00407                 case SIGABRT:
00408                     syslog(LOG_WARNING, "iiimd watchdog: iiimd aborted");
00409                     retrycount--;
00410                     break;
00411                 default:
00412                    /* 
00413                     * 1999/09/28
00414                     * SIGTERM seems more appropriate,
00415                     * but orphan iiimd's do not
00416                     * stop with SIGTERM.
00417                     */
00418 #ifdef SunOS
00419                     disp = sigset(SIGUSR1, SIG_IGN);
00420                     kill(-pgrp, SIGUSR1);
00421                     sigset(SIGUSR1, disp);
00422 #else /* !SunOS */
00423                     disp = signal(SIGUSR1, SIG_IGN);
00424                     kill(-pgrp, SIGUSR1);
00425                     signal(SIGUSR1, disp);
00426 #endif /* !SunOS */
00427                     break ;
00428               }
00429               if (retrycount < 0) {
00430                   syslog(LOG_ERR, "give up trying to run iiimd.\n");
00431                   kill(iiimd_pid, SIGTERM);
00432                   exit(1);
00433               }
00434               sleep(1);
00435            } else if (WIFEXITED(status)) {
00436               switch(WEXITSTATUS(status)) {
00437                 case 100: /* unknown options was given */
00438                     syslog(LOG_ERR, "unrecognized options was given to iiimd.\n");
00439                     retrycount = 0;
00440                 case 17 : /* failed to execv() */
00441                 case 255: /* seriously not continued working */
00442                 case NOLEIF_EXITCODE :   /* Kill the server */
00443                 case NORUN_EXITCODE :   /* fix for 4297357 */
00444                     retrycount--;
00445                     if (retrycount < 0) {
00446                        syslog(LOG_ERR, "give up trying to run iiimd.\n");
00447                        kill(iiimd_pid, SIGTERM);
00448                        exit(1);    
00449                     }
00450                     sleep(1);
00451                     break;
00452                 default:
00453                     retrycount = retry_on_error;
00454                     break ;
00455               }
00456            }
00457        }
00458     }
00459 }
00460 
00461 void clean_up(int unused){
00462     kill(iiimd_pid, SIGTERM);
00463     exit(1);
00464 }