Back to index

plt-scheme  4.2.1
pthread_stop_world.c
Go to the documentation of this file.
00001 #include "private/pthread_support.h"
00002 
00003 #if defined(GC_PTHREADS) && !defined(GC_SOLARIS_THREADS) \
00004      && !defined(GC_WIN32_THREADS) && !defined(GC_DARWIN_THREADS)
00005 
00006 #include <signal.h>
00007 #include <semaphore.h>
00008 #include <errno.h>
00009 #include <unistd.h>
00010 #include <sys/time.h>
00011 #ifndef HPUX
00012 # include <sys/select.h>
00013   /* Doesn't exist on HP/UX 11.11. */
00014 #endif
00015 
00016 #if DEBUG_THREADS
00017 
00018 #ifndef NSIG
00019 # if defined(MAXSIG)
00020 #  define NSIG (MAXSIG+1)
00021 # elif defined(_NSIG)
00022 #  define NSIG _NSIG
00023 # elif defined(__SIGRTMAX)
00024 #  define NSIG (__SIGRTMAX+1)
00025 # else
00026   --> please fix it
00027 # endif
00028 #endif
00029 
00030 void GC_print_sig_mask()
00031 {
00032     sigset_t blocked;
00033     int i;
00034 
00035     if (pthread_sigmask(SIG_BLOCK, NULL, &blocked) != 0)
00036        ABORT("pthread_sigmask");
00037     GC_printf0("Blocked: ");
00038     for (i = 1; i < NSIG; i++) {
00039         if (sigismember(&blocked, i)) { GC_printf1("%ld ",(long) i); }
00040     }
00041     GC_printf0("\n");
00042 }
00043 
00044 #endif
00045 
00046 /* Remove the signals that we want to allow in thread stopping        */
00047 /* handler from a set.                                         */
00048 void GC_remove_allowed_signals(sigset_t *set)
00049 {
00050 #   ifdef NO_SIGNALS
00051       if (sigdelset(set, SIGINT) != 0
00052          || sigdelset(set, SIGQUIT) != 0
00053          || sigdelset(set, SIGABRT) != 0
00054          || sigdelset(set, SIGTERM) != 0) {
00055         ABORT("sigdelset() failed");
00056       }
00057 #   endif
00058 
00059 #   ifdef MPROTECT_VDB
00060       /* Handlers write to the thread structure, which is in the heap,       */
00061       /* and hence can trigger a protection fault.                    */
00062       if (sigdelset(set, SIGSEGV) != 0
00063 #        ifdef SIGBUS
00064            || sigdelset(set, SIGBUS) != 0
00065 #        endif
00066          ) {
00067         ABORT("sigdelset() failed");
00068       }
00069 #   endif
00070 }
00071 
00072 static sigset_t suspend_handler_mask;
00073 
00074 volatile sig_atomic_t GC_stop_count;
00075                      /* Incremented at the beginning of GC_stop_world. */
00076 
00077 volatile sig_atomic_t GC_world_is_stopped = FALSE;
00078                      /* FALSE ==> it is safe for threads to restart, i.e. */
00079                      /* they will see another suspend signal before they  */
00080                      /* are expected to stop (unless they have voluntarily */
00081                      /* stopped).                                   */
00082 
00083 void GC_brief_async_signal_safe_sleep()
00084 {
00085     struct timeval tv;
00086     tv.tv_sec = 0;
00087     tv.tv_usec = 1000 * TIME_LIMIT / 2;
00088     select(0, 0, 0, 0, &tv);
00089 }
00090 
00091 #ifdef GC_OSF1_THREADS
00092   GC_bool GC_retry_signals = TRUE;
00093 #else
00094   GC_bool GC_retry_signals = FALSE;
00095 #endif
00096 
00097 /*
00098  * We use signals to stop threads during GC.
00099  * 
00100  * Suspended threads wait in signal handler for SIG_THR_RESTART.
00101  * That's more portable than semaphores or condition variables.
00102  * (We do use sem_post from a signal handler, but that should be portable.)
00103  *
00104  * The thread suspension signal SIG_SUSPEND is now defined in gc_priv.h.
00105  * Note that we can't just stop a thread; we need it to save its stack
00106  * pointer(s) and acknowledge.
00107  */
00108 
00109 #ifndef SIG_THR_RESTART
00110 #  if defined(GC_HPUX_THREADS) || defined(GC_OSF1_THREADS) || defined(GC_NETBSD_THREADS)
00111 #    ifdef _SIGRTMIN
00112 #      define SIG_THR_RESTART _SIGRTMIN + 5
00113 #    else
00114 #      define SIG_THR_RESTART SIGRTMIN + 5
00115 #    endif
00116 #  else
00117 #   define SIG_THR_RESTART SIGXCPU
00118 #  endif
00119 #endif
00120 
00121 sem_t GC_suspend_ack_sem;
00122 
00123 #ifdef GC_NETBSD_THREADS
00124 # define GC_NETBSD_THREADS_WORKAROUND
00125   /* It seems to be necessary to wait until threads have restarted.   */
00126   /* But it is unclear why that is the case.                          */
00127   sem_t GC_restart_ack_sem;
00128 #endif
00129 
00130 void GC_suspend_handler_inner(ptr_t sig_arg);
00131 
00132 #if defined(IA64) || defined(HP_PA)
00133 extern void GC_with_callee_saves_pushed();
00134 
00135 void GC_suspend_handler(int sig)
00136 {
00137   int old_errno = errno;
00138   GC_with_callee_saves_pushed(GC_suspend_handler_inner, (ptr_t)(word)sig);
00139   errno = old_errno;
00140 }
00141 
00142 #else
00143 /* We believe that in all other cases the full context is already     */
00144 /* in the signal handler frame.                                       */
00145 void GC_suspend_handler(int sig)
00146 {
00147   int old_errno = errno;
00148   GC_suspend_handler_inner((ptr_t)(word)sig);
00149   errno = old_errno;
00150 }
00151 #endif
00152 
00153 void GC_suspend_handler_inner(ptr_t sig_arg)
00154 {
00155     int sig = (int)(word)sig_arg;
00156     int dummy;
00157     pthread_t my_thread = pthread_self();
00158     GC_thread me;
00159 #   ifdef PARALLEL_MARK
00160        word my_mark_no = GC_mark_no;
00161        /* Marker can't proceed until we acknowledge.  Thus this is    */
00162        /* guaranteed to be the mark_no correspending to our           */
00163        /* suspension, i.e. the marker can't have incremented it yet.  */
00164 #   endif
00165     word my_stop_count = GC_stop_count;
00166 
00167     if (sig != SIG_SUSPEND) ABORT("Bad signal in suspend_handler");
00168 
00169 #if DEBUG_THREADS
00170     GC_printf1("Suspending 0x%lx\n", my_thread);
00171 #endif
00172 
00173     me = GC_lookup_thread(my_thread);
00174     /* The lookup here is safe, since I'm doing this on behalf  */
00175     /* of a thread which holds the allocation lock in order    */
00176     /* to stop the world.  Thus concurrent modification of the */
00177     /* data structure is impossible.                           */
00178     if (me -> stop_info.last_stop_count == my_stop_count) {
00179        /* Duplicate signal.  OK if we are retrying.     */
00180        if (!GC_retry_signals) {
00181            WARN("Duplicate suspend signal in thread %lx\n",
00182                pthread_self());
00183        }
00184        return;
00185     }
00186 #   ifdef SPARC
00187        me -> stop_info.stack_ptr = (ptr_t)GC_save_regs_in_stack();
00188 #   else
00189        me -> stop_info.stack_ptr = (ptr_t)(&dummy);
00190 #   endif
00191 #   ifdef IA64
00192        me -> backing_store_ptr = (ptr_t)GC_save_regs_in_stack();
00193 #   endif
00194 
00195     /* Tell the thread that wants to stop the world that this   */
00196     /* thread has been stopped.  Note that sem_post() is       */
00197     /* the only async-signal-safe primitive in LinuxThreads.    */
00198     sem_post(&GC_suspend_ack_sem);
00199     me -> stop_info.last_stop_count = my_stop_count;
00200 
00201     /* Wait until that thread tells us to restart by sending    */
00202     /* this thread a SIG_THR_RESTART signal.                   */
00203     /* SIG_THR_RESTART should be masked at this point.  Thus there    */
00204     /* is no race.                                      */
00205     /* We do not continue until we receive a SIG_THR_RESTART,  */
00206     /* but we do not take that as authoritative.  (We may be   */
00207     /* accidentally restarted by one of the user signals we    */
00208     /* don't block.)  After we receive the signal, we use a    */
00209     /* primitive and expensive mechanism to wait until it's    */
00210     /* really safe to proceed.  Under normal circumstances,    */
00211     /* this code should not be executed.                */
00212     sigsuspend(&suspend_handler_mask);        /* Wait for signal */
00213     while (GC_world_is_stopped && GC_stop_count == my_stop_count) {
00214         GC_brief_async_signal_safe_sleep();
00215 #       if DEBUG_THREADS
00216          GC_err_printf0("Sleeping in signal handler");
00217 #       endif
00218     }
00219     /* If the RESTART signal gets lost, we can still lose.  That should be  */
00220     /* less likely than losing the SUSPEND signal, since we don't do much   */
00221     /* between the sem_post and sigsuspend.                               */
00222     /* We'd need more handshaking to work around that.                    */
00223     /* Simply dropping the sigsuspend call should be safe, but is unlikely  */
00224     /* to be efficient.                                                   */
00225 
00226 #if DEBUG_THREADS
00227     GC_printf1("Continuing 0x%lx\n", my_thread);
00228 #endif
00229 }
00230 
00231 void GC_restart_handler(int sig)
00232 {
00233     pthread_t my_thread = pthread_self();
00234 
00235     if (sig != SIG_THR_RESTART) ABORT("Bad signal in suspend_handler");
00236 
00237 #ifdef GC_NETBSD_THREADS_WORKAROUND
00238     sem_post(&GC_restart_ack_sem);
00239 #endif
00240 
00241     /*
00242     ** Note: even if we don't do anything useful here,
00243     ** it would still be necessary to have a signal handler,
00244     ** rather than ignoring the signals, otherwise
00245     ** the signals will not be delivered at all, and
00246     ** will thus not interrupt the sigsuspend() above.
00247     */
00248 
00249 #if DEBUG_THREADS
00250     GC_printf1("In GC_restart_handler for 0x%lx\n", pthread_self());
00251 #endif
00252 }
00253 
00254 # ifdef IA64
00255 #   define IF_IA64(x) x
00256 # else
00257 #   define IF_IA64(x)
00258 # endif
00259 /* We hold allocation lock.  Should do exactly the right thing if the */
00260 /* world is stopped.  Should not fail if it isn't.                    */
00261 void GC_push_all_stacks()
00262 {
00263     GC_bool found_me = FALSE;
00264     int i;
00265     GC_thread p;
00266     ptr_t lo, hi;
00267     /* On IA64, we also need to scan the register backing store. */
00268     IF_IA64(ptr_t bs_lo; ptr_t bs_hi;)
00269     pthread_t me = pthread_self();
00270     
00271     if (!GC_thr_initialized) GC_thr_init();
00272     #if DEBUG_THREADS
00273         GC_printf1("Pushing stacks from thread 0x%lx\n", (unsigned long) me);
00274     #endif
00275     for (i = 0; i < THREAD_TABLE_SZ; i++) {
00276       for (p = GC_threads[i]; p != 0; p = p -> next) {
00277         if (p -> flags & FINISHED) continue;
00278         if (pthread_equal(p -> id, me)) {
00279 #          ifdef SPARC
00280                lo = (ptr_t)GC_save_regs_in_stack();
00281 #          else
00282                lo = GC_approx_sp();
00283 #           endif
00284            found_me = TRUE;
00285            IF_IA64(bs_hi = (ptr_t)GC_save_regs_in_stack();)
00286        } else {
00287            lo = p -> stop_info.stack_ptr;
00288            IF_IA64(bs_hi = p -> backing_store_ptr;)
00289        }
00290         if ((p -> flags & MAIN_THREAD) == 0) {
00291            hi = p -> stack_end;
00292            IF_IA64(bs_lo = p -> backing_store_end);
00293         } else {
00294             /* The original stack. */
00295             hi = GC_stackbottom;
00296            IF_IA64(bs_lo = BACKING_STORE_BASE;)
00297         }
00298         #if DEBUG_THREADS
00299             GC_printf3("Stack for thread 0x%lx = [%lx,%lx)\n",
00300                (unsigned long) p -> id,
00301               (unsigned long) lo, (unsigned long) hi);
00302         #endif
00303        if (0 == lo) ABORT("GC_push_all_stacks: sp not set!\n");
00304 #       ifdef STACK_GROWS_UP
00305          /* We got them backwards! */
00306           GC_push_all_stack(hi, lo);
00307 #       else
00308           GC_push_all_stack(lo, hi);
00309 #      endif
00310 #      ifdef IA64
00311 #         if DEBUG_THREADS
00312             GC_printf3("Reg stack for thread 0x%lx = [%lx,%lx)\n",
00313                (unsigned long) p -> id,
00314               (unsigned long) bs_lo, (unsigned long) bs_hi);
00315 #        endif
00316           if (pthread_equal(p -> id, me)) {
00317            /* FIXME:  This may add an unbounded number of entries,    */
00318            /* and hence overflow the mark stack, which is bad.        */
00319            GC_push_all_eager(bs_lo, bs_hi);
00320          } else {
00321            GC_push_all_stack(bs_lo, bs_hi);
00322          }
00323 #      endif
00324       }
00325     }
00326     if (!found_me && !GC_in_thread_creation)
00327       ABORT("Collecting from unknown thread.");
00328 }
00329 
00330 /* There seems to be a very rare thread stopping problem.  To help us  */
00331 /* debug that, we save the ids of the stopping thread. */
00332 pthread_t GC_stopping_thread;
00333 int GC_stopping_pid;
00334 
00335 /* We hold the allocation lock.  Suspend all threads that might       */
00336 /* still be running.  Return the number of suspend signals that       */
00337 /* were sent. */
00338 int GC_suspend_all()
00339 {
00340     int n_live_threads = 0;
00341     int i;
00342     GC_thread p;
00343     int result;
00344     pthread_t my_thread = pthread_self();
00345     
00346     GC_stopping_thread = my_thread;    /* debugging only.      */
00347     GC_stopping_pid = getpid();                /* debugging only.      */
00348     for (i = 0; i < THREAD_TABLE_SZ; i++) {
00349       for (p = GC_threads[i]; p != 0; p = p -> next) {
00350         if (p -> id != my_thread) {
00351             if (p -> flags & FINISHED) continue;
00352             if (p -> stop_info.last_stop_count == GC_stop_count) continue;
00353            if (p -> thread_blocked) /* Will wait */ continue;
00354             n_live_threads++;
00355            #if DEBUG_THREADS
00356              GC_printf1("Sending suspend signal to 0x%lx\n", p -> id);
00357            #endif
00358         
00359         result = pthread_kill(p -> id, SIG_SUSPEND);
00360            switch(result) {
00361                 case ESRCH:
00362                     /* Not really there anymore.  Possible? */
00363                     n_live_threads--;
00364                     break;
00365                 case 0:
00366                     break;
00367                 default:
00368                     ABORT("pthread_kill failed");
00369             }
00370         }
00371       }
00372     }
00373     return n_live_threads;
00374 }
00375 
00376 /* Caller holds allocation lock.   */
00377 void GC_stop_world()
00378 {
00379     int i;
00380     int n_live_threads;
00381     int code;
00382 
00383     #if DEBUG_THREADS
00384     GC_printf1("Stopping the world from 0x%lx\n", pthread_self());
00385     #endif
00386        
00387     /* Make sure all free list construction has stopped before we start. */
00388     /* No new construction can start, since free list construction is */
00389     /* required to acquire and release the GC lock before it starts,  */
00390     /* and we have the lock.                                          */
00391 #   ifdef PARALLEL_MARK
00392       GC_acquire_mark_lock();
00393       GC_ASSERT(GC_fl_builder_count == 0);
00394       /* We should have previously waited for it to become zero. */
00395 #   endif /* PARALLEL_MARK */
00396     ++GC_stop_count;
00397     GC_world_is_stopped = TRUE;
00398     n_live_threads = GC_suspend_all();
00399 
00400       if (GC_retry_signals) {
00401          unsigned long wait_usecs = 0;  /* Total wait since retry.    */
00402 #        define WAIT_UNIT 3000
00403 #        define RETRY_INTERVAL 100000
00404          for (;;) {
00405              int ack_count;
00406 
00407              sem_getvalue(&GC_suspend_ack_sem, &ack_count);
00408              if (ack_count == n_live_threads) break;
00409              if (wait_usecs > RETRY_INTERVAL) {
00410                 int newly_sent = GC_suspend_all();
00411 
00412 #                 ifdef CONDPRINT
00413                   if (GC_print_stats) {
00414                     GC_printf1("Resent %ld signals after timeout\n",
00415                              newly_sent);
00416                   }
00417 #                 endif
00418                 sem_getvalue(&GC_suspend_ack_sem, &ack_count);
00419                 if (newly_sent < n_live_threads - ack_count) {
00420                     WARN("Lost some threads during GC_stop_world?!\n",0);
00421                     n_live_threads = ack_count + newly_sent;
00422                 }
00423                 wait_usecs = 0;
00424              }
00425              usleep(WAIT_UNIT);
00426              wait_usecs += WAIT_UNIT;
00427          }
00428       }
00429     for (i = 0; i < n_live_threads; i++) {
00430          while (0 != (code = sem_wait(&GC_suspend_ack_sem))) {
00431              if (errno != EINTR) {
00432                 GC_err_printf1("Sem_wait returned %ld\n", (unsigned long)code);
00433                 ABORT("sem_wait for handler failed");
00434              }
00435          }
00436     }
00437 #   ifdef PARALLEL_MARK
00438       GC_release_mark_lock();
00439 #   endif
00440     #if DEBUG_THREADS
00441       GC_printf1("World stopped from 0x%lx\n", pthread_self());
00442     #endif
00443     GC_stopping_thread = 0;  /* debugging only */
00444 }
00445 
00446 /* Caller holds allocation lock, and has held it continuously since   */
00447 /* the world stopped.                                                 */
00448 void GC_start_world()
00449 {
00450     pthread_t my_thread = pthread_self();
00451     register int i;
00452     register GC_thread p;
00453     register int n_live_threads = 0;
00454     register int result;
00455 #ifdef GC_NETBSD_THREADS_WORKAROUND
00456     int code;
00457 #endif
00458 
00459 #   if DEBUG_THREADS
00460       GC_printf0("World starting\n");
00461 #   endif
00462 
00463     GC_world_is_stopped = FALSE;
00464     for (i = 0; i < THREAD_TABLE_SZ; i++) {
00465       for (p = GC_threads[i]; p != 0; p = p -> next) {
00466         if (p -> id != my_thread) {
00467             if (p -> flags & FINISHED) continue;
00468            if (p -> thread_blocked) continue;
00469             n_live_threads++;
00470            #if DEBUG_THREADS
00471              GC_printf1("Sending restart signal to 0x%lx\n", p -> id);
00472            #endif
00473             result = pthread_kill(p -> id, SIG_THR_RESTART);
00474            switch(result) {
00475                 case ESRCH:
00476                     /* Not really there anymore.  Possible? */
00477                     n_live_threads--;
00478                     break;
00479                 case 0:
00480                     break;
00481                 default:
00482                     ABORT("pthread_kill failed");
00483             }
00484         }
00485       }
00486     }
00487 #ifdef GC_NETBSD_THREADS_WORKAROUND
00488     for (i = 0; i < n_live_threads; i++)
00489        while (0 != (code = sem_wait(&GC_restart_ack_sem)))
00490            if (errno != EINTR) {
00491               GC_err_printf1("sem_wait() returned %ld\n", (unsigned long)code);
00492               ABORT("sem_wait() for restart handler failed");
00493            }
00494 #endif
00495     #if DEBUG_THREADS
00496       GC_printf0("World started\n");
00497     #endif
00498 }
00499 
00500 void GC_stop_init() {
00501     struct sigaction act;
00502     
00503     if (sem_init(&GC_suspend_ack_sem, 0, 0) != 0)
00504         ABORT("sem_init failed");
00505 #ifdef GC_NETBSD_THREADS_WORKAROUND
00506     if (sem_init(&GC_restart_ack_sem, 0, 0) != 0)
00507        ABORT("sem_init failed");
00508 #endif
00509 
00510     act.sa_flags = SA_RESTART;
00511     if (sigfillset(&act.sa_mask) != 0) {
00512        ABORT("sigfillset() failed");
00513     }
00514     GC_remove_allowed_signals(&act.sa_mask);
00515     /* SIG_THR_RESTART is set in the resulting mask.           */
00516     /* It is unmasked by the handler when necessary.           */
00517     act.sa_handler = GC_suspend_handler;
00518     if (sigaction(SIG_SUSPEND, &act, NULL) != 0) {
00519        ABORT("Cannot set SIG_SUSPEND handler");
00520     }
00521 
00522     act.sa_handler = GC_restart_handler;
00523     if (sigaction(SIG_THR_RESTART, &act, NULL) != 0) {
00524        ABORT("Cannot set SIG_THR_RESTART handler");
00525     }
00526 
00527     /* Inititialize suspend_handler_mask. It excludes SIG_THR_RESTART. */
00528       if (sigfillset(&suspend_handler_mask) != 0) ABORT("sigfillset() failed");
00529       GC_remove_allowed_signals(&suspend_handler_mask);
00530       if (sigdelset(&suspend_handler_mask, SIG_THR_RESTART) != 0)
00531          ABORT("sigdelset() failed");
00532 
00533     /* Check for GC_RETRY_SIGNALS. */
00534       if (0 != GETENV("GC_RETRY_SIGNALS")) {
00535          GC_retry_signals = TRUE;
00536       }
00537       if (0 != GETENV("GC_NO_RETRY_SIGNALS")) {
00538          GC_retry_signals = FALSE;
00539       }
00540 #     ifdef CONDPRINT
00541           if (GC_print_stats && GC_retry_signals) {
00542               GC_printf0("Will retry suspend signal if necessary.\n");
00543          }
00544 #     endif
00545 }
00546 
00547 #endif