Back to index

plt-scheme  4.2.1
pthread_support.c
Go to the documentation of this file.
00001 /* 
00002  * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
00003  * Copyright (c) 1996 by Silicon Graphics.  All rights reserved.
00004  * Copyright (c) 1998 by Fergus Henderson.  All rights reserved.
00005  * Copyright (c) 2000-2004 by Hewlett-Packard Company.  All rights reserved.
00006  *
00007  * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
00008  * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
00009  *
00010  * Permission is hereby granted to use or copy this program
00011  * for any purpose,  provided the above notices are retained on all copies.
00012  * Permission to modify the code and to distribute modified code is granted,
00013  * provided the above notices are retained, and a notice that the code was
00014  * modified is included with the above copyright notice.
00015  */
00016 /*
00017  * Support code for LinuxThreads, the clone()-based kernel
00018  * thread package for Linux which is included in libc6.
00019  *
00020  * This code relies on implementation details of LinuxThreads,
00021  * (i.e. properties not guaranteed by the Pthread standard),
00022  * though this version now does less of that than the other Pthreads
00023  * support code.
00024  *
00025  * Note that there is a lot of code duplication between linux_threads.c
00026  * and thread support for some of the other Posix platforms; any changes
00027  * made here may need to be reflected there too.
00028  */
00029  /* DG/UX ix86 support <takis@xfree86.org> */
00030 /*
00031  * Linux_threads.c now also includes some code to support HPUX and
00032  * OSF1 (Compaq Tru64 Unix, really).  The OSF1 support is based on Eric Benson's
00033  * patch.
00034  *
00035  * Eric also suggested an alternate basis for a lock implementation in
00036  * his code:
00037  * + #elif defined(OSF1)
00038  * +    unsigned long GC_allocate_lock = 0;
00039  * +    msemaphore GC_allocate_semaphore;
00040  * + #  define GC_TRY_LOCK() \
00041  * +    ((msem_lock(&GC_allocate_semaphore, MSEM_IF_NOWAIT) == 0) \
00042  * +     ? (GC_allocate_lock = 1) \
00043  * +     : 0)
00044  * + #  define GC_LOCK_TAKEN GC_allocate_lock
00045  */
00046 
00047 /*#define DEBUG_THREADS 1*/
00048 /*#define GC_ASSERTIONS*/
00049 
00050 # include "private/pthread_support.h"
00051 
00052 # if defined(GC_PTHREADS) && !defined(GC_SOLARIS_THREADS) \
00053      && !defined(GC_WIN32_THREADS)
00054 
00055 # if defined(GC_HPUX_THREADS) && !defined(USE_PTHREAD_SPECIFIC) \
00056      && !defined(USE_COMPILER_TLS)
00057 #   ifdef __GNUC__
00058 #     define USE_PTHREAD_SPECIFIC
00059       /* Empirically, as of gcc 3.3, USE_COMPILER_TLS doesn't work.   */
00060 #   else
00061 #     define USE_COMPILER_TLS
00062 #   endif
00063 # endif
00064 
00065 # if defined USE_HPUX_TLS
00066     --> Macro replaced by USE_COMPILER_TLS
00067 # endif
00068 
00069 # if (defined(GC_DGUX386_THREADS) || defined(GC_OSF1_THREADS) || \
00070       defined(GC_DARWIN_THREADS) || defined(GC_AIX_THREADS) || \
00071       defined(GC_NETBSD_THREADS))                       \
00072       && !defined(USE_PTHREAD_SPECIFIC)
00073 #   define USE_PTHREAD_SPECIFIC
00074 # endif
00075 
00076 # if defined(GC_DGUX386_THREADS) && !defined(_POSIX4A_DRAFT10_SOURCE)
00077 #   define _POSIX4A_DRAFT10_SOURCE 1
00078 # endif
00079 
00080 # if defined(GC_DGUX386_THREADS) && !defined(_USING_POSIX4A_DRAFT10)
00081 #   define _USING_POSIX4A_DRAFT10 1
00082 # endif
00083 
00084 # ifdef THREAD_LOCAL_ALLOC
00085 #   if !defined(USE_PTHREAD_SPECIFIC) && !defined(USE_COMPILER_TLS)
00086 #     include "private/specific.h"
00087 #   endif
00088 #   if defined(USE_PTHREAD_SPECIFIC)
00089 #     define GC_getspecific pthread_getspecific
00090 #     define GC_setspecific pthread_setspecific
00091 #     define GC_key_create pthread_key_create
00092       typedef pthread_key_t GC_key_t;
00093 #   endif
00094 #   if defined(USE_COMPILER_TLS)
00095 #     define GC_getspecific(x) (x)
00096 #     define GC_setspecific(key, v) ((key) = (v), 0)
00097 #     define GC_key_create(key, d) 0
00098       typedef void * GC_key_t;
00099 #   endif
00100 # endif
00101 # include <stdlib.h>
00102 # include <pthread.h>
00103 # include <sched.h>
00104 # include <time.h>
00105 # include <errno.h>
00106 # include <unistd.h>
00107 # include <sys/mman.h>
00108 # include <sys/time.h>
00109 # include <sys/types.h>
00110 # include <sys/stat.h>
00111 # include <fcntl.h>
00112 # include <signal.h>
00113 
00114 #if defined(GC_DARWIN_THREADS)
00115 # include "private/darwin_semaphore.h"
00116 #else
00117 # include <semaphore.h>
00118 #endif /* !GC_DARWIN_THREADS */
00119 
00120 #if defined(GC_DARWIN_THREADS) || defined(GC_FREEBSD_THREADS)
00121 # include <sys/sysctl.h>
00122 #endif /* GC_DARWIN_THREADS */
00123 
00124 #if defined(GC_NETBSD_THREADS)
00125 # include <sys/param.h>
00126 # include <sys/sysctl.h>
00127 #endif /* GC_NETBSD_THREADS */
00128 
00129 #if defined(GC_DGUX386_THREADS)
00130 # include <sys/dg_sys_info.h>
00131 # include <sys/_int_psem.h>
00132   /* sem_t is an uint in DG/UX */
00133   typedef unsigned int  sem_t;
00134 #endif /* GC_DGUX386_THREADS */
00135 
00136 #ifndef __GNUC__
00137 #   define __inline__
00138 #endif
00139 
00140 #ifdef GC_USE_LD_WRAP
00141 #   define WRAP_FUNC(f) __wrap_##f
00142 #   define REAL_FUNC(f) __real_##f
00143 #else
00144 #   define WRAP_FUNC(f) GC_##f
00145 #   if !defined(GC_DGUX386_THREADS)
00146 #     define REAL_FUNC(f) f
00147 #   else /* GC_DGUX386_THREADS */
00148 #     define REAL_FUNC(f) __d10_##f
00149 #   endif /* GC_DGUX386_THREADS */
00150 #   undef pthread_create
00151 #   if !defined(GC_DARWIN_THREADS)
00152 #     undef pthread_sigmask
00153 #   endif
00154 #   undef pthread_join
00155 #   undef pthread_detach
00156 #   if defined(GC_OSF1_THREADS) && defined(_PTHREAD_USE_MANGLED_NAMES_) \
00157        && !defined(_PTHREAD_USE_PTDNAM_)
00158 /* Restore the original mangled names on Tru64 UNIX.  */
00159 #     define pthread_create __pthread_create
00160 #     define pthread_join __pthread_join
00161 #     define pthread_detach __pthread_detach
00162 #   endif
00163 #endif
00164 
00165 void GC_thr_init();
00166 
00167 static GC_bool parallel_initialized = FALSE;
00168 
00169 void GC_init_parallel();
00170 
00171 # if defined(THREAD_LOCAL_ALLOC) && !defined(DBG_HDRS_ALL)
00172 
00173 /* We don't really support thread-local allocation with DBG_HDRS_ALL */
00174 
00175 #ifdef USE_COMPILER_TLS
00176   __thread
00177 #endif
00178 GC_key_t GC_thread_key;
00179 
00180 static GC_bool keys_initialized;
00181 
00182 /* Recover the contents of the freelist array fl into the global one gfl.*/
00183 /* Note that the indexing scheme differs, in that gfl has finer size  */
00184 /* resolution, even if not all entries are used.               */
00185 /* We hold the allocator lock.                                        */
00186 static void return_freelists(ptr_t *fl, ptr_t *gfl)
00187 {
00188     int i;
00189     ptr_t q, *qptr;
00190     size_t nwords;
00191 
00192     for (i = 1; i < NFREELISTS; ++i) {
00193        nwords = i * (GRANULARITY/sizeof(word));
00194         qptr = fl + i;      
00195        q = *qptr;
00196        if ((word)q >= HBLKSIZE) {
00197          if (gfl[nwords] == 0) {
00198            gfl[nwords] = q;
00199          } else {
00200            /* Concatenate: */
00201            for (; (word)q >= HBLKSIZE; qptr = &(obj_link(q)), q = *qptr);
00202            GC_ASSERT(0 == q);
00203            *qptr = gfl[nwords];
00204            gfl[nwords] = fl[i];
00205          }
00206        }
00207        /* Clear fl[i], since the thread structure may hang around.    */
00208        /* Do it in a way that is likely to trap if we access it.      */
00209        fl[i] = (ptr_t)HBLKSIZE;
00210     }
00211 }
00212 
00213 /* We statically allocate a single "size 0" object. It is linked to   */
00214 /* itself, and is thus repeatedly reused for all size 0 allocation    */
00215 /* requests.  (Size 0 gcj allocation requests are incorrect, and      */
00216 /* we arrange for those to fault asap.)                               */
00217 static ptr_t size_zero_object = (ptr_t)(&size_zero_object);
00218 
00219 /* Each thread structure must be initialized.    */
00220 /* This call must be made from the new thread.   */
00221 /* Caller holds allocation lock.          */
00222 void GC_init_thread_local(GC_thread p)
00223 {
00224     int i;
00225 
00226     if (!keys_initialized) {
00227        if (0 != GC_key_create(&GC_thread_key, 0)) {
00228            ABORT("Failed to create key for local allocator");
00229         }
00230        keys_initialized = TRUE;
00231     }
00232     if (0 != GC_setspecific(GC_thread_key, p)) {
00233        ABORT("Failed to set thread specific allocation pointers");
00234     }
00235     for (i = 1; i < NFREELISTS; ++i) {
00236        p -> ptrfree_freelists[i] = (ptr_t)1;
00237        p -> normal_freelists[i] = (ptr_t)1;
00238 #      ifdef GC_GCJ_SUPPORT
00239          p -> gcj_freelists[i] = (ptr_t)1;
00240 #      endif
00241     }   
00242     /* Set up the size 0 free lists.      */
00243     p -> ptrfree_freelists[0] = (ptr_t)(&size_zero_object);
00244     p -> normal_freelists[0] = (ptr_t)(&size_zero_object);
00245 #   ifdef GC_GCJ_SUPPORT
00246         p -> gcj_freelists[0] = (ptr_t)(-1);
00247 #   endif
00248 }
00249 
00250 #ifdef GC_GCJ_SUPPORT
00251   extern ptr_t * GC_gcjobjfreelist;
00252 #endif
00253 
00254 /* We hold the allocator lock.     */
00255 void GC_destroy_thread_local(GC_thread p)
00256 {
00257     /* We currently only do this from the thread itself or from       */
00258     /* the fork handler for a child process.                   */
00259 #   ifndef HANDLE_FORK
00260       GC_ASSERT(GC_getspecific(GC_thread_key) == (void *)p);
00261 #   endif
00262     return_freelists(p -> ptrfree_freelists, GC_aobjfreelist);
00263     return_freelists(p -> normal_freelists, GC_objfreelist);
00264 #   ifdef GC_GCJ_SUPPORT
00265        return_freelists(p -> gcj_freelists, GC_gcjobjfreelist);
00266 #   endif
00267 }
00268 
00269 extern GC_PTR GC_generic_malloc_many();
00270 
00271 GC_PTR GC_local_malloc(size_t bytes)
00272 {
00273     if (EXPECT(!SMALL_ENOUGH(bytes),0)) {
00274         return(GC_malloc(bytes));
00275     } else {
00276        int index = INDEX_FROM_BYTES(bytes);
00277        ptr_t * my_fl;
00278        ptr_t my_entry;
00279 #      if defined(REDIRECT_MALLOC) && !defined(USE_PTHREAD_SPECIFIC)
00280        GC_key_t k = GC_thread_key;
00281 #      endif
00282        void * tsd;
00283 
00284 #      if defined(REDIRECT_MALLOC) && !defined(USE_PTHREAD_SPECIFIC)
00285            if (EXPECT(0 == k, 0)) {
00286               /* This can happen if we get called when the world is   */
00287               /* being initialized.  Whether we can actually complete */
00288               /* the initialization then is unclear.                  */
00289               GC_init_parallel();
00290               k = GC_thread_key;
00291            }
00292 #      endif
00293        tsd = GC_getspecific(GC_thread_key);
00294 #      ifdef GC_ASSERTIONS
00295          LOCK();
00296          GC_ASSERT(tsd == (void *)GC_lookup_thread(pthread_self()));
00297          UNLOCK();
00298 #      endif
00299        my_fl = ((GC_thread)tsd) -> normal_freelists + index;
00300        my_entry = *my_fl;
00301        if (EXPECT((word)my_entry >= HBLKSIZE, 1)) {
00302            ptr_t next = obj_link(my_entry);
00303            GC_PTR result = (GC_PTR)my_entry;
00304            *my_fl = next;
00305            obj_link(my_entry) = 0;
00306            PREFETCH_FOR_WRITE(next);
00307            return result;
00308        } else if ((word)my_entry - 1 < DIRECT_GRANULES) {
00309            *my_fl = my_entry + index + 1;
00310             return GC_malloc(bytes);
00311        } else {
00312            GC_generic_malloc_many(BYTES_FROM_INDEX(index), NORMAL, my_fl);
00313            if (*my_fl == 0) return GC_oom_fn(bytes);
00314            return GC_local_malloc(bytes);
00315        }
00316     }
00317 }
00318 
00319 GC_PTR GC_local_malloc_atomic(size_t bytes)
00320 {
00321     if (EXPECT(!SMALL_ENOUGH(bytes), 0)) {
00322         return(GC_malloc_atomic(bytes));
00323     } else {
00324        int index = INDEX_FROM_BYTES(bytes);
00325        ptr_t * my_fl = ((GC_thread)GC_getspecific(GC_thread_key))
00326                       -> ptrfree_freelists + index;
00327        ptr_t my_entry = *my_fl;
00328     
00329        if (EXPECT((word)my_entry >= HBLKSIZE, 1)) {
00330            GC_PTR result = (GC_PTR)my_entry;
00331            *my_fl = obj_link(my_entry);
00332            return result;
00333        } else if ((word)my_entry - 1 < DIRECT_GRANULES) {
00334            *my_fl = my_entry + index + 1;
00335         return GC_malloc_atomic(bytes);
00336        } else {
00337            GC_generic_malloc_many(BYTES_FROM_INDEX(index), PTRFREE, my_fl);
00338            /* *my_fl is updated while the collector is excluded;      */
00339            /* the free list is always visible to the collector as     */
00340            /* such.                                            */
00341            if (*my_fl == 0) return GC_oom_fn(bytes);
00342            return GC_local_malloc_atomic(bytes);
00343        }
00344     }
00345 }
00346 
00347 #ifdef GC_GCJ_SUPPORT
00348 
00349 #include "include/gc_gcj.h"
00350 
00351 #ifdef GC_ASSERTIONS
00352   extern GC_bool GC_gcj_malloc_initialized;
00353 #endif
00354 
00355 extern int GC_gcj_kind;
00356 
00357 GC_PTR GC_local_gcj_malloc(size_t bytes,
00358                         void * ptr_to_struct_containing_descr)
00359 {
00360     GC_ASSERT(GC_gcj_malloc_initialized);
00361     if (EXPECT(!SMALL_ENOUGH(bytes), 0)) {
00362         return GC_gcj_malloc(bytes, ptr_to_struct_containing_descr);
00363     } else {
00364        int index = INDEX_FROM_BYTES(bytes);
00365        ptr_t * my_fl = ((GC_thread)GC_getspecific(GC_thread_key))
00366                        -> gcj_freelists + index;
00367        ptr_t my_entry = *my_fl;
00368        if (EXPECT((word)my_entry >= HBLKSIZE, 1)) {
00369            GC_PTR result = (GC_PTR)my_entry;
00370            GC_ASSERT(!GC_incremental);
00371            /* We assert that any concurrent marker will stop us.      */
00372            /* Thus it is impossible for a mark procedure to see the   */
00373            /* allocation of the next object, but to see this object   */
00374            /* still containing a free list pointer.  Otherwise the    */
00375            /* marker might find a random "mark descriptor".           */
00376            *(volatile ptr_t *)my_fl = obj_link(my_entry);
00377            /* We must update the freelist before we store the pointer.       */
00378            /* Otherwise a GC at this point would see a corrupted      */
00379            /* free list.                                       */
00380            /* A memory barrier is probably never needed, since the    */
00381            /* action of stopping this thread will cause prior writes  */
00382            /* to complete.                                     */
00383            GC_ASSERT(((void * volatile *)result)[1] == 0); 
00384            *(void * volatile *)result = ptr_to_struct_containing_descr; 
00385            return result;
00386        } else if ((word)my_entry - 1 < DIRECT_GRANULES) {
00387            if (!GC_incremental) *my_fl = my_entry + index + 1;
00388               /* In the incremental case, we always have to take this */
00389               /* path.  Thus we leave the counter alone.              */
00390             return GC_gcj_malloc(bytes, ptr_to_struct_containing_descr);
00391        } else {
00392            GC_generic_malloc_many(BYTES_FROM_INDEX(index), GC_gcj_kind, my_fl);
00393            if (*my_fl == 0) return GC_oom_fn(bytes);
00394            return GC_local_gcj_malloc(bytes, ptr_to_struct_containing_descr);
00395        }
00396     }
00397 }
00398 
00399 #endif /* GC_GCJ_SUPPORT */
00400 
00401 # else  /* !THREAD_LOCAL_ALLOC  && !DBG_HDRS_ALL */
00402 
00403 #   define GC_destroy_thread_local(t)
00404 
00405 # endif /* !THREAD_LOCAL_ALLOC */
00406 
00407 #if 0
00408 /*
00409 To make sure that we're using LinuxThreads and not some other thread
00410 package, we generate a dummy reference to `pthread_kill_other_threads_np'
00411 (was `__pthread_initial_thread_bos' but that disappeared),
00412 which is a symbol defined in LinuxThreads, but (hopefully) not in other
00413 thread packages.
00414 
00415 We no longer do this, since this code is now portable enough that it might
00416 actually work for something else.
00417 */
00418 void (*dummy_var_to_force_linux_threads)() = pthread_kill_other_threads_np;
00419 #endif /* 0 */
00420 
00421 long GC_nprocs = 1;  /* Number of processors.  We may not have */
00422                      /* access to all of them, but this is as good    */
00423                      /* a guess as any ...                            */
00424 
00425 #ifdef PARALLEL_MARK
00426 
00427 # ifndef MAX_MARKERS
00428 #   define MAX_MARKERS 16
00429 # endif
00430 
00431 static ptr_t marker_sp[MAX_MARKERS] = {0};
00432 
00433 void * GC_mark_thread(void * id)
00434 {
00435   word my_mark_no = 0;
00436 
00437   marker_sp[(word)id] = GC_approx_sp();
00438   for (;; ++my_mark_no) {
00439     /* GC_mark_no is passed only to allow GC_help_marker to terminate */
00440     /* promptly.  This is important if it were called from the signal */
00441     /* handler or from the GC lock acquisition code.  Under Linux, it's      */
00442     /* not safe to call it from a signal handler, since it uses mutexes      */
00443     /* and condition variables.  Since it is called only here, the    */
00444     /* argument is unnecessary.                                       */
00445     if (my_mark_no < GC_mark_no || my_mark_no > GC_mark_no + 2) {
00446        /* resynchronize if we get far off, e.g. because GC_mark_no    */
00447        /* wrapped.                                             */
00448        my_mark_no = GC_mark_no;
00449     }
00450 #   ifdef DEBUG_THREADS
00451        GC_printf1("Starting mark helper for mark number %ld\n", my_mark_no);
00452 #   endif
00453     GC_help_marker(my_mark_no);
00454   }
00455 }
00456 
00457 extern long GC_markers;            /* Number of mark threads we would */
00458                             /* like to have.  Includes the            */
00459                             /* initiating thread.                     */
00460 
00461 pthread_t GC_mark_threads[MAX_MARKERS];
00462 
00463 #define PTHREAD_CREATE REAL_FUNC(pthread_create)
00464 
00465 static void start_mark_threads()
00466 {
00467     unsigned i;
00468     pthread_attr_t attr;
00469 
00470     if (GC_markers > MAX_MARKERS) {
00471        WARN("Limiting number of mark threads\n", 0);
00472        GC_markers = MAX_MARKERS;
00473     }
00474     if (0 != pthread_attr_init(&attr)) ABORT("pthread_attr_init failed");
00475        
00476     if (0 != pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED))
00477        ABORT("pthread_attr_setdetachstate failed");
00478 
00479 #   if defined(HPUX) || defined(GC_DGUX386_THREADS)
00480       /* Default stack size is usually too small: fix it. */
00481       /* Otherwise marker threads or GC may run out of    */
00482       /* space.                                           */
00483 #     define MIN_STACK_SIZE (8*HBLKSIZE*sizeof(word))
00484       {
00485        size_t old_size;
00486        int code;
00487 
00488         if (pthread_attr_getstacksize(&attr, &old_size) != 0)
00489          ABORT("pthread_attr_getstacksize failed\n");
00490        if (old_size < MIN_STACK_SIZE) {
00491          if (pthread_attr_setstacksize(&attr, MIN_STACK_SIZE) != 0)
00492                 ABORT("pthread_attr_setstacksize failed\n");
00493        }
00494       }
00495 #   endif /* HPUX || GC_DGUX386_THREADS */
00496 #   ifdef CONDPRINT
00497       if (GC_print_stats) {
00498        GC_printf1("Starting %ld marker threads\n", GC_markers - 1);
00499       }
00500 #   endif
00501     for (i = 0; i < GC_markers - 1; ++i) {
00502       if (0 != PTHREAD_CREATE(GC_mark_threads + i, &attr,
00503                            GC_mark_thread, (void *)(word)i)) {
00504        WARN("Marker thread creation failed, errno = %ld.\n", errno);
00505       }
00506     }
00507 }
00508 
00509 #else  /* !PARALLEL_MARK */
00510 
00511 static __inline__ void start_mark_threads()
00512 {
00513 }
00514 
00515 #endif /* !PARALLEL_MARK */
00516 
00517 GC_bool GC_thr_initialized = FALSE;
00518 
00519 volatile GC_thread GC_threads[THREAD_TABLE_SZ];
00520 
00521 void GC_push_thread_structures GC_PROTO((void))
00522 {
00523     GC_push_all((ptr_t)(GC_threads), (ptr_t)(GC_threads)+sizeof(GC_threads));
00524 #   if defined(THREAD_LOCAL_ALLOC) && !defined(DBG_HDRS_ALL)
00525       GC_push_all((ptr_t)(&GC_thread_key),
00526          (ptr_t)(&GC_thread_key)+sizeof(&GC_thread_key));
00527 #   endif
00528 }
00529 
00530 #ifdef THREAD_LOCAL_ALLOC
00531 /* We must explicitly mark ptrfree and gcj free lists, since the free        */
00532 /* list links wouldn't otherwise be found.  We also set them in the   */
00533 /* normal free lists, since that involves touching less memory than if       */
00534 /* we scanned them normally.                                          */
00535 void GC_mark_thread_local_free_lists(void)
00536 {
00537     int i, j;
00538     GC_thread p;
00539     ptr_t q;
00540     
00541     for (i = 0; i < THREAD_TABLE_SZ; ++i) {
00542       for (p = GC_threads[i]; 0 != p; p = p -> next) {
00543        for (j = 1; j < NFREELISTS; ++j) {
00544          q = p -> ptrfree_freelists[j];
00545          if ((word)q > HBLKSIZE) GC_set_fl_marks(q);
00546          q = p -> normal_freelists[j];
00547          if ((word)q > HBLKSIZE) GC_set_fl_marks(q);
00548 #        ifdef GC_GCJ_SUPPORT
00549            q = p -> gcj_freelists[j];
00550            if ((word)q > HBLKSIZE) GC_set_fl_marks(q);
00551 #        endif /* GC_GCJ_SUPPORT */
00552        }
00553       }
00554     }
00555 }
00556 #endif /* THREAD_LOCAL_ALLOC */
00557 
00558 static struct GC_Thread_Rep first_thread;
00559 
00560 /* Add a thread to GC_threads.  We assume it wasn't already there.    */
00561 /* Caller holds allocation lock.                               */
00562 GC_thread GC_new_thread(pthread_t id)
00563 {
00564     int hv = ((word)id) % THREAD_TABLE_SZ;
00565     GC_thread result;
00566     static GC_bool first_thread_used = FALSE;
00567     
00568     if (!first_thread_used) {
00569        result = &first_thread;
00570        first_thread_used = TRUE;
00571     } else {
00572         result = (struct GC_Thread_Rep *)
00573                GC_INTERNAL_MALLOC(sizeof(struct GC_Thread_Rep), NORMAL);
00574     }
00575     if (result == 0) return(0);
00576     result -> id = id;
00577     result -> next = GC_threads[hv];
00578     GC_threads[hv] = result;
00579     GC_ASSERT(result -> flags == 0 && result -> thread_blocked == 0);
00580     return(result);
00581 }
00582 
00583 /* Delete a thread from GC_threads.  We assume it is there.    */
00584 /* (The code intentionally traps if it wasn't.)                */
00585 /* Caller holds allocation lock.                        */
00586 void GC_delete_thread(pthread_t id)
00587 {
00588     int hv = ((word)id) % THREAD_TABLE_SZ;
00589     register GC_thread p = GC_threads[hv];
00590     register GC_thread prev = 0;
00591     
00592     while (!pthread_equal(p -> id, id)) {
00593         prev = p;
00594         p = p -> next;
00595     }
00596     if (prev == 0) {
00597         GC_threads[hv] = p -> next;
00598     } else {
00599         prev -> next = p -> next;
00600     }
00601     GC_INTERNAL_FREE(p);
00602 }
00603 
00604 /* If a thread has been joined, but we have not yet            */
00605 /* been notified, then there may be more than one thread       */
00606 /* in the table with the same pthread id.               */
00607 /* This is OK, but we need a way to delete a specific one.     */
00608 void GC_delete_gc_thread(pthread_t id, GC_thread gc_id)
00609 {
00610     int hv = ((word)id) % THREAD_TABLE_SZ;
00611     register GC_thread p = GC_threads[hv];
00612     register GC_thread prev = 0;
00613 
00614     while (p != gc_id) {
00615         prev = p;
00616         p = p -> next;
00617     }
00618     if (prev == 0) {
00619         GC_threads[hv] = p -> next;
00620     } else {
00621         prev -> next = p -> next;
00622     }
00623     GC_INTERNAL_FREE(p);
00624 }
00625 
00626 /* Return a GC_thread corresponding to a given pthread_t.      */
00627 /* Returns 0 if it's not there.                                */
00628 /* Caller holds  allocation lock or otherwise inhibits         */
00629 /* updates.                                             */
00630 /* If there is more than one thread with the given id we       */
00631 /* return the most recent one.                                 */
00632 GC_thread GC_lookup_thread(pthread_t id)
00633 {
00634     int hv = ((word)id) % THREAD_TABLE_SZ;
00635     register GC_thread p = GC_threads[hv];
00636     
00637     while (p != 0 && !pthread_equal(p -> id, id)) p = p -> next;
00638     return(p);
00639 }
00640 
00641 #ifdef HANDLE_FORK
00642 /* Remove all entries from the GC_threads table, except the    */
00643 /* one for the current thread.  We need to do this in the child       */
00644 /* process after a fork(), since only the current thread       */
00645 /* survives in the child.                               */
00646 void GC_remove_all_threads_but_me(void)
00647 {
00648     pthread_t self = pthread_self();
00649     int hv;
00650     GC_thread p, next, me;
00651 
00652     for (hv = 0; hv < THREAD_TABLE_SZ; ++hv) {
00653       me = 0;
00654       for (p = GC_threads[hv]; 0 != p; p = next) {
00655        next = p -> next;
00656        if (p -> id == self) {
00657          me = p;
00658          p -> next = 0;
00659        } else {
00660 #        ifdef THREAD_LOCAL_ALLOC
00661            if (!(p -> flags & FINISHED)) {
00662              GC_destroy_thread_local(p);
00663            }
00664 #        endif /* THREAD_LOCAL_ALLOC */
00665          if (p != &first_thread) GC_INTERNAL_FREE(p);
00666        }
00667       }
00668       GC_threads[hv] = me;
00669     }
00670 }
00671 #endif /* HANDLE_FORK */
00672 
00673 #ifdef USE_PROC_FOR_LIBRARIES
00674 int GC_segment_is_thread_stack(ptr_t lo, ptr_t hi)
00675 {
00676     int i;
00677     GC_thread p;
00678     
00679 #   ifdef PARALLEL_MARK
00680       for (i = 0; i < GC_markers; ++i) {
00681        if (marker_sp[i] > lo & marker_sp[i] < hi) return 1;
00682       }
00683 #   endif
00684     for (i = 0; i < THREAD_TABLE_SZ; i++) {
00685       for (p = GC_threads[i]; p != 0; p = p -> next) {
00686        if (0 != p -> stack_end) {
00687 #        ifdef STACK_GROWS_UP
00688             if (p -> stack_end >= lo && p -> stack_end < hi) return 1;
00689 #        else /* STACK_GROWS_DOWN */
00690             if (p -> stack_end > lo && p -> stack_end <= hi) return 1;
00691 #        endif
00692        }
00693       }
00694     }
00695     return 0;
00696 }
00697 #endif /* USE_PROC_FOR_LIBRARIES */
00698 
00699 #ifdef GC_LINUX_THREADS
00700 /* Return the number of processors, or i<= 0 if it can't be determined.      */
00701 int GC_get_nprocs()
00702 {
00703     /* Should be "return sysconf(_SC_NPROCESSORS_ONLN);" but that     */
00704     /* appears to be buggy in many cases.                      */
00705     /* We look for lines "cpu<n>" in /proc/stat.               */
00706 #   define STAT_BUF_SIZE 4096
00707 #   define STAT_READ read
00708        /* If read is wrapped, this may need to be redefined to call   */
00709        /* the real one.                                        */
00710     char stat_buf[STAT_BUF_SIZE];
00711     int f;
00712     word result = 1;
00713        /* Some old kernels only have a single "cpu nnnn ..."   */
00714        /* entry in /proc/stat.  We identify those as           */
00715        /* uniprocessors.                                */
00716     size_t i, len = 0;
00717 
00718     f = open("/proc/stat", O_RDONLY);
00719     if (f < 0 || (len = STAT_READ(f, stat_buf, STAT_BUF_SIZE)) < 100) {
00720        WARN("Couldn't read /proc/stat\n", 0);
00721        return -1;
00722     }
00723     for (i = 0; i < len - 100; ++i) {
00724         if (stat_buf[i] == '\n' && stat_buf[i+1] == 'c'
00725            && stat_buf[i+2] == 'p' && stat_buf[i+3] == 'u') {
00726            int cpu_no = atoi(stat_buf + i + 4);
00727            if (cpu_no >= result) result = cpu_no + 1;
00728        }
00729     }
00730     close(f);
00731     return result;
00732 }
00733 #endif /* GC_LINUX_THREADS */
00734 
00735 /* We hold the GC lock.  Wait until an in-progress GC has finished.   */
00736 /* Repeatedly RELEASES GC LOCK in order to wait.               */
00737 /* If wait_for_all is true, then we exit with the GC lock held and no */
00738 /* collection in progress; otherwise we just wait for the current GC  */
00739 /* to finish.                                                  */
00740 extern GC_bool GC_collection_in_progress();
00741 void GC_wait_for_gc_completion(GC_bool wait_for_all)
00742 {
00743     if (GC_incremental && GC_collection_in_progress()) {
00744        int old_gc_no = GC_gc_no;
00745 
00746        /* Make sure that no part of our stack is still on the mark stack, */
00747        /* since it's about to be unmapped.                               */
00748        while (GC_incremental && GC_collection_in_progress()
00749               && (wait_for_all || old_gc_no == GC_gc_no)) {
00750            ENTER_GC();
00751            GC_in_thread_creation = TRUE;
00752             GC_collect_a_little_inner(1);
00753            GC_in_thread_creation = FALSE;
00754            EXIT_GC();
00755            UNLOCK();
00756            sched_yield();
00757            LOCK();
00758        }
00759     }
00760 }
00761 
00762 #ifdef HANDLE_FORK
00763 /* Procedures called before and after a fork.  The goal here is to make */
00764 /* it safe to call GC_malloc() in a forked child.  It's unclear that is      */
00765 /* attainable, since the single UNIX spec seems to imply that one     */
00766 /* should only call async-signal-safe functions, and we probably can't       */
00767 /* quite guarantee that.  But we give it our best shot.  (That same   */
00768 /* spec also implies that it's not safe to call the system malloc     */
00769 /* between fork() and exec().  Thus we're doing no worse than it.     */
00770 
00771 /* Called before a fork()          */
00772 void GC_fork_prepare_proc(void)
00773 {
00774     /* Acquire all relevant locks, so that after releasing the locks  */
00775     /* the child will see a consistent state in which monitor         */
00776     /* invariants hold.      Unfortunately, we can't acquire libc locks      */
00777     /* we might need, and there seems to be no guarantee that libc    */
00778     /* must install a suitable fork handler.                          */
00779     /* Wait for an ongoing GC to finish, since we can't finish it in  */
00780     /* the (one remaining thread in) the child.                       */
00781       LOCK();
00782 #     if defined(PARALLEL_MARK) || defined(THREAD_LOCAL_ALLOC)
00783         GC_wait_for_reclaim();
00784 #     endif
00785       GC_wait_for_gc_completion(TRUE);
00786 #     if defined(PARALLEL_MARK) || defined(THREAD_LOCAL_ALLOC)
00787         GC_acquire_mark_lock();
00788 #     endif
00789 }
00790 
00791 /* Called in parent after a fork() */
00792 void GC_fork_parent_proc(void)
00793 {
00794 #   if defined(PARALLEL_MARK) || defined(THREAD_LOCAL_ALLOC)
00795       GC_release_mark_lock();
00796 #   endif
00797     UNLOCK();
00798 }
00799 
00800 /* Called in child after a fork()  */
00801 void GC_fork_child_proc(void)
00802 {
00803     /* Clean up the thread table, so that just our thread is left. */
00804 #   if defined(PARALLEL_MARK) || defined(THREAD_LOCAL_ALLOC)
00805       GC_release_mark_lock();
00806 #   endif
00807     GC_remove_all_threads_but_me();
00808 #   ifdef PARALLEL_MARK
00809       /* Turn off parallel marking in the child, since we are probably       */
00810       /* just going to exec, and we would have to restart mark threads.      */
00811         GC_markers = 1;
00812         GC_parallel = FALSE;
00813 #   endif /* PARALLEL_MARK */
00814     UNLOCK();
00815 }
00816 #endif /* HANDLE_FORK */
00817 
00818 #if defined(GC_DGUX386_THREADS)
00819 /* Return the number of processors, or i<= 0 if it can't be determined. */
00820 int GC_get_nprocs()
00821 {
00822     /* <takis@XFree86.Org> */
00823     int numCpus;
00824     struct dg_sys_info_pm_info pm_sysinfo;
00825     int status =0;
00826 
00827     status = dg_sys_info((long int *) &pm_sysinfo,
00828        DG_SYS_INFO_PM_INFO_TYPE, DG_SYS_INFO_PM_CURRENT_VERSION);
00829     if (status < 0)
00830        /* set -1 for error */
00831        numCpus = -1;
00832     else
00833       /* Active CPUs */
00834       numCpus = pm_sysinfo.idle_vp_count;
00835 
00836 #  ifdef DEBUG_THREADS
00837     GC_printf1("Number of active CPUs in this system: %d\n", numCpus);
00838 #  endif
00839     return(numCpus);
00840 }
00841 #endif /* GC_DGUX386_THREADS */
00842 
00843 #if defined(GC_NETBSD_THREADS)
00844 static int get_ncpu(void)
00845 {
00846     int mib[] = {CTL_HW,HW_NCPU};
00847     int res;
00848     size_t len = sizeof(res);
00849 
00850     sysctl(mib, sizeof(mib)/sizeof(int), &res, &len, NULL, 0);
00851     return res;
00852 }
00853 #endif /* GC_NETBSD_THREADS */
00854 
00855 /* We hold the allocation lock.    */
00856 void GC_thr_init()
00857 {
00858 #   ifndef GC_DARWIN_THREADS
00859       int dummy;
00860 #   endif
00861     GC_thread t;
00862 
00863     if (GC_thr_initialized) return;
00864     GC_thr_initialized = TRUE;
00865     
00866 #   ifdef HANDLE_FORK
00867       /* Prepare for a possible fork.     */
00868         pthread_atfork(GC_fork_prepare_proc, GC_fork_parent_proc,
00869                      GC_fork_child_proc);
00870 #   endif /* HANDLE_FORK */
00871     /* Add the initial thread, so we can stop it.       */
00872       t = GC_new_thread(pthread_self());
00873 #     ifdef GC_DARWIN_THREADS
00874          t -> stop_info.mach_thread = mach_thread_self();
00875 #     else
00876          t -> stop_info.stack_ptr = (ptr_t)(&dummy);
00877 #     endif
00878       t -> flags = DETACHED | MAIN_THREAD;
00879 
00880     GC_stop_init();
00881 
00882     /* Set GC_nprocs.  */
00883       {
00884        char * nprocs_string = GETENV("GC_NPROCS");
00885        GC_nprocs = -1;
00886        if (nprocs_string != NULL) GC_nprocs = atoi(nprocs_string);
00887       }
00888       if (GC_nprocs <= 0) {
00889 #       if defined(GC_HPUX_THREADS)
00890          GC_nprocs = pthread_num_processors_np();
00891 #       endif
00892 #      if defined(GC_OSF1_THREADS) || defined(GC_AIX_THREADS)
00893          GC_nprocs = sysconf(_SC_NPROCESSORS_ONLN);
00894          if (GC_nprocs <= 0) GC_nprocs = 1;
00895 #      endif
00896 #       if defined(GC_IRIX_THREADS)
00897          GC_nprocs = sysconf(_SC_NPROC_ONLN);
00898          if (GC_nprocs <= 0) GC_nprocs = 1;
00899 #       endif
00900 #       if defined(GC_NETBSD_THREADS)
00901          GC_nprocs = get_ncpu();
00902 #       endif
00903 #       if defined(GC_DARWIN_THREADS) || defined(GC_FREEBSD_THREADS)
00904          int ncpus = 1;
00905          size_t len = sizeof(ncpus);
00906          sysctl((int[2]) {CTL_HW, HW_NCPU}, 2, &ncpus, &len, NULL, 0);
00907          GC_nprocs = ncpus;
00908 #       endif
00909 #      if defined(GC_LINUX_THREADS) || defined(GC_DGUX386_THREADS)
00910           GC_nprocs = GC_get_nprocs();
00911 #      endif
00912       }
00913       if (GC_nprocs <= 0) {
00914        WARN("GC_get_nprocs() returned %ld\n", GC_nprocs);
00915        GC_nprocs = 2;
00916 #      ifdef PARALLEL_MARK
00917          GC_markers = 1;
00918 #      endif
00919       } else {
00920 #      ifdef PARALLEL_MARK
00921           {
00922            char * markers_string = GETENV("GC_MARKERS");
00923            if (markers_string != NULL) {
00924              GC_markers = atoi(markers_string);
00925            } else {
00926              GC_markers = GC_nprocs;
00927            }
00928           }
00929 #      endif
00930       }
00931 #   ifdef PARALLEL_MARK
00932 #     ifdef CONDPRINT
00933         if (GC_print_stats) {
00934           GC_printf2("Number of processors = %ld, "
00935                "number of marker threads = %ld\n", GC_nprocs, GC_markers);
00936        }
00937 #     endif
00938       if (GC_markers == 1) {
00939        GC_parallel = FALSE;
00940 #      ifdef CONDPRINT
00941          if (GC_print_stats) {
00942            GC_printf0("Single marker thread, turning off parallel marking\n");
00943          }
00944 #      endif
00945       } else {
00946        GC_parallel = TRUE;
00947        /* Disable true incremental collection, but generational is OK.       */
00948        GC_time_limit = GC_TIME_UNLIMITED;
00949       }
00950       /* If we are using a parallel marker, actually start helper threads.  */
00951         if (GC_parallel) start_mark_threads();
00952 #   endif
00953 }
00954 
00955 
00956 /* Perform all initializations, including those that    */
00957 /* may require allocation.                       */
00958 /* Called without allocation lock.               */
00959 /* Must be called before a second thread is created.    */
00960 /* Called without allocation lock.               */
00961 void GC_init_parallel()
00962 {
00963     if (parallel_initialized) return;
00964     parallel_initialized = TRUE;
00965 
00966     /* GC_init() calls us back, so set flag first.      */
00967     if (!GC_is_initialized) GC_init();
00968     /* Initialize thread local free lists if used.      */
00969 #   if defined(THREAD_LOCAL_ALLOC) && !defined(DBG_HDRS_ALL)
00970       LOCK();
00971       GC_init_thread_local(GC_lookup_thread(pthread_self()));
00972       UNLOCK();
00973 #   endif
00974 }
00975 
00976 
00977 #if !defined(GC_DARWIN_THREADS)
00978 int WRAP_FUNC(pthread_sigmask)(int how, const sigset_t *set, sigset_t *oset)
00979 {
00980     sigset_t fudged_set;
00981     
00982     if (set != NULL && (how == SIG_BLOCK || how == SIG_SETMASK)) {
00983         fudged_set = *set;
00984         sigdelset(&fudged_set, SIG_SUSPEND);
00985         set = &fudged_set;
00986     }
00987     return(REAL_FUNC(pthread_sigmask)(how, set, oset));
00988 }
00989 #endif /* !GC_DARWIN_THREADS */
00990 
00991 /* Wrappers for functions that are likely to block for an appreciable */
00992 /* length of time.  Must be called in pairs, if at all.               */
00993 /* Nothing much beyond the system call itself should be executed      */
00994 /* between these.                                              */
00995 
00996 void GC_start_blocking(void) {
00997 #   define SP_SLOP 128
00998     GC_thread me;
00999     LOCK();
01000     me = GC_lookup_thread(pthread_self());
01001     GC_ASSERT(!(me -> thread_blocked));
01002 #   ifdef SPARC
01003        me -> stop_info.stack_ptr = (ptr_t)GC_save_regs_in_stack();
01004 #   else
01005 #   ifndef GC_DARWIN_THREADS
01006        me -> stop_info.stack_ptr = (ptr_t)GC_approx_sp();
01007 #   endif
01008 #   endif
01009 #   ifdef IA64
01010        me -> backing_store_ptr = (ptr_t)GC_save_regs_in_stack() + SP_SLOP;
01011 #   endif
01012     /* Add some slop to the stack pointer, since the wrapped call may        */
01013     /* end up pushing more callee-save registers.                     */
01014 #   ifndef GC_DARWIN_THREADS
01015 #   ifdef STACK_GROWS_UP
01016        me -> stop_info.stack_ptr += SP_SLOP;
01017 #   else
01018        me -> stop_info.stack_ptr -= SP_SLOP;
01019 #   endif
01020 #   endif
01021     me -> thread_blocked = TRUE;
01022     UNLOCK();
01023 }
01024 
01025 void GC_end_blocking(void) {
01026     GC_thread me;
01027     LOCK();   /* This will block if the world is stopped.      */
01028     me = GC_lookup_thread(pthread_self());
01029     GC_ASSERT(me -> thread_blocked);
01030     me -> thread_blocked = FALSE;
01031     UNLOCK();
01032 }
01033     
01034 #if defined(GC_DGUX386_THREADS)
01035 #define __d10_sleep sleep
01036 #endif /* GC_DGUX386_THREADS */
01037 
01038 /* A wrapper for the standard C sleep function   */
01039 int WRAP_FUNC(sleep) (unsigned int seconds)
01040 {
01041     int result;
01042 
01043     GC_start_blocking();
01044     result = REAL_FUNC(sleep)(seconds);
01045     GC_end_blocking();
01046     return result;
01047 }
01048 
01049 struct start_info {
01050     void *(*start_routine)(void *);
01051     void *arg;
01052     word flags;
01053     sem_t registered;       /* 1 ==> in our thread table, but  */
01054                             /* parent hasn't yet noticed.             */
01055 };
01056 
01057 /* Called at thread exit.                        */
01058 /* Never called for main thread.  That's OK, since it   */
01059 /* results in at most a tiny one-time leak.  And        */
01060 /* linuxthreads doesn't reclaim the main threads        */
01061 /* resources or id anyway.                       */
01062 void GC_thread_exit_proc(void *arg)
01063 {
01064     GC_thread me;
01065 
01066     LOCK();
01067     me = GC_lookup_thread(pthread_self());
01068     GC_destroy_thread_local(me);
01069     if (me -> flags & DETACHED) {
01070        GC_delete_thread(pthread_self());
01071     } else {
01072        me -> flags |= FINISHED;
01073     }
01074 #   if defined(THREAD_LOCAL_ALLOC) && !defined(USE_PTHREAD_SPECIFIC) \
01075        && !defined(USE_COMPILER_TLS) && !defined(DBG_HDRS_ALL)
01076       GC_remove_specific(GC_thread_key);
01077 #   endif
01078     /* The following may run the GC from "nonexistent" thread. */
01079     GC_wait_for_gc_completion(FALSE);
01080     UNLOCK();
01081 }
01082 
01083 int WRAP_FUNC(pthread_join)(pthread_t thread, void **retval)
01084 {
01085     int result;
01086     GC_thread thread_gc_id;
01087     
01088     LOCK();
01089     thread_gc_id = GC_lookup_thread(thread);
01090     /* This is guaranteed to be the intended one, since the thread id */
01091     /* cant have been recycled by pthreads.                           */
01092     UNLOCK();
01093     result = REAL_FUNC(pthread_join)(thread, retval);
01094 # if defined (GC_FREEBSD_THREADS)
01095     /* On FreeBSD, the wrapped pthread_join() sometimes returns (what
01096        appears to be) a spurious EINTR which caused the test and real code
01097        to gratuitously fail.  Having looked at system pthread library source
01098        code, I see how this return code may be generated.  In one path of
01099        code, pthread_join() just returns the errno setting of the thread
01100        being joined.  This does not match the POSIX specification or the
01101        local man pages thus I have taken the liberty to catch this one
01102        spurious return value properly conditionalized on GC_FREEBSD_THREADS. */
01103     if (result == EINTR) result = 0;
01104 # endif
01105     if (result == 0) {
01106         LOCK();
01107         /* Here the pthread thread id may have been recycled. */
01108         GC_delete_gc_thread(thread, thread_gc_id);
01109         UNLOCK();
01110     }
01111     return result;
01112 }
01113 
01114 int
01115 WRAP_FUNC(pthread_detach)(pthread_t thread)
01116 {
01117     int result;
01118     GC_thread thread_gc_id;
01119     
01120     LOCK();
01121     thread_gc_id = GC_lookup_thread(thread);
01122     UNLOCK();
01123     result = REAL_FUNC(pthread_detach)(thread);
01124     if (result == 0) {
01125       LOCK();
01126       thread_gc_id -> flags |= DETACHED;
01127       /* Here the pthread thread id may have been recycled. */
01128       if (thread_gc_id -> flags & FINISHED) {
01129         GC_delete_gc_thread(thread, thread_gc_id);
01130       }
01131       UNLOCK();
01132     }
01133     return result;
01134 }
01135 
01136 GC_bool GC_in_thread_creation = FALSE;
01137 
01138 void * GC_start_routine(void * arg)
01139 {
01140     int dummy;
01141     struct start_info * si = arg;
01142     void * result;
01143     GC_thread me;
01144     pthread_t my_pthread;
01145     void *(*start)(void *);
01146     void *start_arg;
01147 
01148     my_pthread = pthread_self();
01149 #   ifdef DEBUG_THREADS
01150         GC_printf1("Starting thread 0x%lx\n", my_pthread);
01151         GC_printf1("pid = %ld\n", (long) getpid());
01152         GC_printf1("sp = 0x%lx\n", (long) &arg);
01153 #   endif
01154     LOCK();
01155     GC_in_thread_creation = TRUE;
01156     me = GC_new_thread(my_pthread);
01157     GC_in_thread_creation = FALSE;
01158 #ifdef GC_DARWIN_THREADS
01159     me -> stop_info.mach_thread = mach_thread_self();
01160 #else
01161     me -> stop_info.stack_ptr = 0;
01162 #endif
01163     me -> flags = si -> flags;
01164     /* me -> stack_end = GC_linux_stack_base(); -- currently (11/99)  */
01165     /* doesn't work because the stack base in /proc/self/stat is the  */
01166     /* one for the main thread.  There is a strong argument that that's      */
01167     /* a kernel bug, but a pervasive one.                      */
01168 #   ifdef STACK_GROWS_DOWN
01169       me -> stack_end = (ptr_t)(((word)(&dummy) + (GC_page_size - 1))
01170                               & ~(GC_page_size - 1));
01171 #        ifndef GC_DARWIN_THREADS
01172         me -> stop_info.stack_ptr = me -> stack_end - 0x10;
01173 #        endif
01174        /* Needs to be plausible, since an asynchronous stack mark     */
01175        /* should not crash.                                    */
01176 #   else
01177       me -> stack_end = (ptr_t)((word)(&dummy) & ~(GC_page_size - 1));
01178       me -> stop_info.stack_ptr = me -> stack_end + 0x10;
01179 #   endif
01180     /* This is dubious, since we may be more than a page into the stack, */
01181     /* and hence skip some of it, though it's not clear that matters.  */
01182 #   ifdef IA64
01183       me -> backing_store_end = (ptr_t)
01184                      (GC_save_regs_in_stack() & ~(GC_page_size - 1));
01185       /* This is also < 100% convincing.  We should also read this    */
01186       /* from /proc, but the hook to do so isn't there yet.           */
01187 #   endif /* IA64 */
01188     UNLOCK();
01189     start = si -> start_routine;
01190 #   ifdef DEBUG_THREADS
01191        GC_printf1("start_routine = 0x%lx\n", start);
01192 #   endif
01193     start_arg = si -> arg;
01194     sem_post(&(si -> registered)); /* Last action on si.       */
01195                                    /* OK to deallocate. */
01196     pthread_cleanup_push(GC_thread_exit_proc, 0);
01197 #   if defined(THREAD_LOCAL_ALLOC) && !defined(DBG_HDRS_ALL)
01198        LOCK();
01199         GC_init_thread_local(me);
01200        UNLOCK();
01201 #   endif
01202     result = (*start)(start_arg);
01203 #if DEBUG_THREADS
01204         GC_printf1("Finishing thread 0x%x\n", pthread_self());
01205 #endif
01206     me -> status = result;
01207     pthread_cleanup_pop(1);
01208     /* Cleanup acquires lock, ensuring that we can't exit             */
01209     /* while a collection that thinks we're alive is trying to stop     */
01210     /* us.                                                     */
01211     return(result);
01212 }
01213 
01214 int
01215 WRAP_FUNC(pthread_create)(pthread_t *new_thread,
01216                 const pthread_attr_t *attr,
01217                   void *(*start_routine)(void *), void *arg)
01218 {
01219     int result;
01220     int detachstate;
01221     word my_flags = 0;
01222     struct start_info * si; 
01223        /* This is otherwise saved only in an area mmapped by the thread */
01224        /* library, which isn't visible to the collector.               */
01225  
01226     /* We resist the temptation to muck with the stack size here,     */
01227     /* even if the default is unreasonably small.  That's the client's       */
01228     /* responsibility.                                                */
01229 
01230     LOCK();
01231     si = (struct start_info *)GC_INTERNAL_MALLOC(sizeof(struct start_info),
01232                                            NORMAL);
01233     UNLOCK();
01234     if (!parallel_initialized) GC_init_parallel();
01235     if (0 == si) return(ENOMEM);
01236     sem_init(&(si -> registered), 0, 0);
01237     si -> start_routine = start_routine;
01238     si -> arg = arg;
01239     LOCK();
01240     if (!GC_thr_initialized) GC_thr_init();
01241 #   ifdef GC_ASSERTIONS
01242       {
01243        size_t stack_size;
01244        if (NULL == attr) {
01245           pthread_attr_t my_attr;
01246           pthread_attr_init(&my_attr);
01247           pthread_attr_getstacksize(&my_attr, &stack_size);
01248        } else {
01249           pthread_attr_getstacksize(attr, &stack_size);
01250        }
01251 #       ifdef PARALLEL_MARK
01252          GC_ASSERT(stack_size >= (8*HBLKSIZE*sizeof(word)));
01253 #       else
01254           /* FreeBSD-5.3/Alpha: default pthread stack is 64K,  */
01255          /* HBLKSIZE=8192, sizeof(word)=8               */
01256          GC_ASSERT(stack_size >= 65536);
01257 #       endif
01258        /* Our threads may need to do some work for the GC.     */
01259        /* Ridiculously small threads won't work, and they      */
01260        /* probably wouldn't work anyway.                */
01261       }
01262 #   endif
01263     if (NULL == attr) {
01264        detachstate = PTHREAD_CREATE_JOINABLE;
01265     } else { 
01266         pthread_attr_getdetachstate(attr, &detachstate);
01267     }
01268     if (PTHREAD_CREATE_DETACHED == detachstate) my_flags |= DETACHED;
01269     si -> flags = my_flags;
01270     UNLOCK();
01271 #   ifdef DEBUG_THREADS
01272         GC_printf1("About to start new thread from thread 0x%X\n",
01273                  pthread_self());
01274 #   endif
01275 
01276     result = REAL_FUNC(pthread_create)(new_thread, attr, GC_start_routine, si);
01277 
01278 #   ifdef DEBUG_THREADS
01279         GC_printf1("Started thread 0x%X\n", *new_thread);
01280 #   endif
01281     /* Wait until child has been added to the thread table.           */
01282     /* This also ensures that we hold onto si until the child is done */
01283     /* with it.  Thus it doesn't matter whether it is otherwise              */
01284     /* visible to the collector.                               */
01285     if (0 == result) {
01286        while (0 != sem_wait(&(si -> registered))) {
01287             if (EINTR != errno) ABORT("sem_wait failed");
01288        }
01289     }
01290     sem_destroy(&(si -> registered));
01291     LOCK();
01292     GC_INTERNAL_FREE(si);
01293     UNLOCK();
01294 
01295     return(result);
01296 }
01297 
01298 #ifdef GENERIC_COMPARE_AND_SWAP
01299   pthread_mutex_t GC_compare_and_swap_lock = PTHREAD_MUTEX_INITIALIZER;
01300 
01301   GC_bool GC_compare_and_exchange(volatile GC_word *addr,
01302                                GC_word old, GC_word new_val)
01303   {
01304     GC_bool result;
01305     pthread_mutex_lock(&GC_compare_and_swap_lock);
01306     if (*addr == old) {
01307       *addr = new_val;
01308       result = TRUE;
01309     } else {
01310       result = FALSE;
01311     }
01312     pthread_mutex_unlock(&GC_compare_and_swap_lock);
01313     return result;
01314   }
01315   
01316   GC_word GC_atomic_add(volatile GC_word *addr, GC_word how_much)
01317   {
01318     GC_word old;
01319     pthread_mutex_lock(&GC_compare_and_swap_lock);
01320     old = *addr;
01321     *addr = old + how_much;
01322     pthread_mutex_unlock(&GC_compare_and_swap_lock);
01323     return old;
01324   }
01325 
01326 #endif /* GENERIC_COMPARE_AND_SWAP */
01327 /* Spend a few cycles in a way that can't introduce contention with   */
01328 /* othre threads.                                              */
01329 void GC_pause()
01330 {
01331     int i;
01332 #   if !defined(__GNUC__) || defined(__INTEL_COMPILER)
01333       volatile word dummy = 0;
01334 #   endif
01335 
01336     for (i = 0; i < 10; ++i) { 
01337 #     if defined(__GNUC__) && !defined(__INTEL_COMPILER)
01338         __asm__ __volatile__ (" " : : : "memory");
01339 #     else
01340        /* Something that's unlikely to be optimized away. */
01341        GC_noop(++dummy);
01342 #     endif
01343     }
01344 }
01345     
01346 #define SPIN_MAX 128 /* Maximum number of calls to GC_pause before    */
01347                      /* give up.                               */
01348 
01349 VOLATILE GC_bool GC_collecting = 0;
01350                      /* A hint that we're in the collector and       */
01351                         /* holding the allocation lock for an           */
01352                         /* extended period.                             */
01353 
01354 #if !defined(USE_SPIN_LOCK) || defined(PARALLEL_MARK)
01355 /* If we don't want to use the below spinlock implementation, either  */
01356 /* because we don't have a GC_test_and_set implementation, or because        */
01357 /* we don't want to risk sleeping, we can still try spinning on       */
01358 /* pthread_mutex_trylock for a while.  This appears to be very        */
01359 /* beneficial in many cases.                                          */
01360 /* I suspect that under high contention this is nearly always better  */
01361 /* than the spin lock.  But it's a bit slower on a uniprocessor.      */
01362 /* Hence we still default to the spin lock.                           */
01363 /* This is also used to acquire the mark lock for the parallel        */
01364 /* marker.                                                     */
01365 
01366 /* Here we use a strict exponential backoff scheme.  I don't know     */
01367 /* whether that's better or worse than the above.  We eventually      */
01368 /* yield by calling pthread_mutex_lock(); it never makes sense to     */
01369 /* explicitly sleep.                                           */
01370 
01371 #define LOCK_STATS
01372 #ifdef LOCK_STATS
01373   unsigned long GC_spin_count = 0;
01374   unsigned long GC_block_count = 0;
01375   unsigned long GC_unlocked_count = 0;
01376 #endif
01377 
01378 void GC_generic_lock(pthread_mutex_t * lock)
01379 {
01380 #ifndef NO_PTHREAD_TRYLOCK
01381     unsigned pause_length = 1;
01382     unsigned i;
01383     
01384     if (0 == pthread_mutex_trylock(lock)) {
01385 #       ifdef LOCK_STATS
01386            ++GC_unlocked_count;
01387 #       endif
01388        return;
01389     }
01390     for (; pause_length <= SPIN_MAX; pause_length <<= 1) {
01391        for (i = 0; i < pause_length; ++i) {
01392            GC_pause();
01393        }
01394         switch(pthread_mutex_trylock(lock)) {
01395            case 0:
01396 #             ifdef LOCK_STATS
01397                   ++GC_spin_count;
01398 #             endif
01399               return;
01400            case EBUSY:
01401               break;
01402            default:
01403               ABORT("Unexpected error from pthread_mutex_trylock");
01404         }
01405     }
01406 #endif /* !NO_PTHREAD_TRYLOCK */
01407 #   ifdef LOCK_STATS
01408        ++GC_block_count;
01409 #   endif
01410     pthread_mutex_lock(lock);
01411 }
01412 
01413 #endif /* !USE_SPIN_LOCK || PARALLEL_MARK */
01414 
01415 #if defined(USE_SPIN_LOCK)
01416 
01417 /* Reasonably fast spin locks.  Basically the same implementation */
01418 /* as STL alloc.h.  This isn't really the right way to do this.   */
01419 /* but until the POSIX scheduling mess gets straightened out ...  */
01420 
01421 volatile unsigned int GC_allocate_lock = 0;
01422 
01423 
01424 void GC_lock()
01425 {
01426 #   define low_spin_max 30  /* spin cycles if we suspect uniprocessor */
01427 #   define high_spin_max SPIN_MAX /* spin cycles for multiprocessor */
01428     static unsigned spin_max = low_spin_max;
01429     unsigned my_spin_max;
01430     static unsigned last_spins = 0;
01431     unsigned my_last_spins;
01432     int i;
01433 
01434     if (!GC_test_and_set(&GC_allocate_lock)) {
01435         return;
01436     }
01437     my_spin_max = spin_max;
01438     my_last_spins = last_spins;
01439     for (i = 0; i < my_spin_max; i++) {
01440         if (GC_collecting || GC_nprocs == 1) goto yield;
01441         if (i < my_last_spins/2 || GC_allocate_lock) {
01442             GC_pause();
01443             continue;
01444         }
01445         if (!GC_test_and_set(&GC_allocate_lock)) {
01446            /*
01447              * got it!
01448              * Spinning worked.  Thus we're probably not being scheduled
01449              * against the other process with which we were contending.
01450              * Thus it makes sense to spin longer the next time.
01451             */
01452             last_spins = i;
01453             spin_max = high_spin_max;
01454             return;
01455         }
01456     }
01457     /* We are probably being scheduled against the other process.  Sleep. */
01458     spin_max = low_spin_max;
01459 yield:
01460     for (i = 0;; ++i) {
01461         if (!GC_test_and_set(&GC_allocate_lock)) {
01462             return;
01463         }
01464 #       define SLEEP_THRESHOLD 12
01465               /* Under Linux very short sleeps tend to wait until     */
01466               /* the current time quantum expires.  On old Linux      */
01467               /* kernels nanosleep(<= 2ms) just spins under Linux.    */
01468               /* (Under 2.4, this happens only for real-time          */
01469               /* processes.)  We want to minimize both behaviors      */
01470               /* here.                                         */
01471         if (i < SLEEP_THRESHOLD) {
01472             sched_yield();
01473        } else {
01474            struct timespec ts;
01475        
01476            if (i > 24) i = 24;
01477                      /* Don't wait for more than about 15msecs, even  */
01478                      /* under extreme contention.                     */
01479            ts.tv_sec = 0;
01480            ts.tv_nsec = 1 << i;
01481            nanosleep(&ts, 0);
01482        }
01483     }
01484 }
01485 
01486 #else  /* !USE_SPINLOCK */
01487 void GC_lock()
01488 {
01489 #ifndef NO_PTHREAD_TRYLOCK
01490     if (1 == GC_nprocs || GC_collecting) {
01491        pthread_mutex_lock(&GC_allocate_ml);
01492     } else {
01493         GC_generic_lock(&GC_allocate_ml);
01494     }
01495 #else  /* !NO_PTHREAD_TRYLOCK */
01496     pthread_mutex_lock(&GC_allocate_ml);
01497 #endif /* !NO_PTHREAD_TRYLOCK */
01498 }
01499 
01500 #endif /* !USE_SPINLOCK */
01501 
01502 #if defined(PARALLEL_MARK) || defined(THREAD_LOCAL_ALLOC)
01503 
01504 #ifdef GC_ASSERTIONS
01505   pthread_t GC_mark_lock_holder = NO_THREAD;
01506 #endif
01507 
01508 #if 0
01509   /* Ugly workaround for a linux threads bug in the final versions      */
01510   /* of glibc2.1.  Pthread_mutex_trylock sets the mutex owner           */
01511   /* field even when it fails to acquire the mutex.  This causes        */
01512   /* pthread_cond_wait to die.  Remove for glibc2.2.                    */
01513   /* According to the man page, we should use                           */
01514   /* PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP, but that isn't actually   */
01515   /* defined.                                                           */
01516   static pthread_mutex_t mark_mutex =
01517         {0, 0, 0, PTHREAD_MUTEX_ERRORCHECK_NP, {0, 0}};
01518 #else
01519   static pthread_mutex_t mark_mutex = PTHREAD_MUTEX_INITIALIZER;
01520 #endif
01521 
01522 static pthread_cond_t builder_cv = PTHREAD_COND_INITIALIZER;
01523 
01524 void GC_acquire_mark_lock()
01525 {
01526 /*
01527     if (pthread_mutex_lock(&mark_mutex) != 0) {
01528        ABORT("pthread_mutex_lock failed");
01529     }
01530 */
01531     GC_generic_lock(&mark_mutex);
01532 #   ifdef GC_ASSERTIONS
01533        GC_mark_lock_holder = pthread_self();
01534 #   endif
01535 }
01536 
01537 void GC_release_mark_lock()
01538 {
01539     GC_ASSERT(GC_mark_lock_holder == pthread_self());
01540 #   ifdef GC_ASSERTIONS
01541        GC_mark_lock_holder = NO_THREAD;
01542 #   endif
01543     if (pthread_mutex_unlock(&mark_mutex) != 0) {
01544        ABORT("pthread_mutex_unlock failed");
01545     }
01546 }
01547 
01548 /* Collector must wait for a freelist builders for 2 reasons:         */
01549 /* 1) Mark bits may still be getting examined without lock.           */
01550 /* 2) Partial free lists referenced only by locals may not be scanned        */
01551 /*    correctly, e.g. if they contain "pointer-free" objects, since the      */
01552 /*    free-list link may be ignored.                                  */
01553 void GC_wait_builder()
01554 {
01555     GC_ASSERT(GC_mark_lock_holder == pthread_self());
01556 #   ifdef GC_ASSERTIONS
01557        GC_mark_lock_holder = NO_THREAD;
01558 #   endif
01559     if (pthread_cond_wait(&builder_cv, &mark_mutex) != 0) {
01560        ABORT("pthread_cond_wait failed");
01561     }
01562     GC_ASSERT(GC_mark_lock_holder == NO_THREAD);
01563 #   ifdef GC_ASSERTIONS
01564        GC_mark_lock_holder = pthread_self();
01565 #   endif
01566 }
01567 
01568 void GC_wait_for_reclaim()
01569 {
01570     GC_acquire_mark_lock();
01571     while (GC_fl_builder_count > 0) {
01572        GC_wait_builder();
01573     }
01574     GC_release_mark_lock();
01575 }
01576 
01577 void GC_notify_all_builder()
01578 {
01579     GC_ASSERT(GC_mark_lock_holder == pthread_self());
01580     if (pthread_cond_broadcast(&builder_cv) != 0) {
01581        ABORT("pthread_cond_broadcast failed");
01582     }
01583 }
01584 
01585 #endif /* PARALLEL_MARK || THREAD_LOCAL_ALLOC */
01586 
01587 #ifdef PARALLEL_MARK
01588 
01589 static pthread_cond_t mark_cv = PTHREAD_COND_INITIALIZER;
01590 
01591 void GC_wait_marker()
01592 {
01593     GC_ASSERT(GC_mark_lock_holder == pthread_self());
01594 #   ifdef GC_ASSERTIONS
01595        GC_mark_lock_holder = NO_THREAD;
01596 #   endif
01597     if (pthread_cond_wait(&mark_cv, &mark_mutex) != 0) {
01598        ABORT("pthread_cond_wait failed");
01599     }
01600     GC_ASSERT(GC_mark_lock_holder == NO_THREAD);
01601 #   ifdef GC_ASSERTIONS
01602        GC_mark_lock_holder = pthread_self();
01603 #   endif
01604 }
01605 
01606 void GC_notify_all_marker()
01607 {
01608     if (pthread_cond_broadcast(&mark_cv) != 0) {
01609        ABORT("pthread_cond_broadcast failed");
01610     }
01611 }
01612 
01613 #endif /* PARALLEL_MARK */
01614 
01615 # endif /* GC_LINUX_THREADS and friends */
01616