Back to index

glibc  2.9
manager.c
Go to the documentation of this file.
00001 /* Linuxthreads - a simple clone()-based implementation of Posix        */
00002 /* threads for Linux.                                                   */
00003 /* Copyright (C) 1996 Xavier Leroy (Xavier.Leroy@inria.fr)              */
00004 /*                                                                      */
00005 /* This program is free software; you can redistribute it and/or        */
00006 /* modify it under the terms of the GNU Library General Public License  */
00007 /* as published by the Free Software Foundation; either version 2       */
00008 /* of the License, or (at your option) any later version.               */
00009 /*                                                                      */
00010 /* This program is distributed in the hope that it will be useful,      */
00011 /* but WITHOUT ANY WARRANTY; without even the implied warranty of       */
00012 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        */
00013 /* GNU Library General Public License for more details.                 */
00014 
00015 /* The "thread manager" thread: manages creation and termination of threads */
00016 
00017 #include <assert.h>
00018 #include <errno.h>
00019 #include <sched.h>
00020 #include <stddef.h>
00021 #include <stdio.h>
00022 #include <stdlib.h>
00023 #include <string.h>
00024 #include <unistd.h>
00025 #include <sys/poll.h>              /* for poll */
00026 #include <sys/mman.h>           /* for mmap */
00027 #include <sys/param.h>
00028 #include <sys/time.h>
00029 #include <sys/wait.h>           /* for waitpid macros */
00030 #include <locale.h>         /* for __uselocale */
00031 #include <resolv.h>         /* for __resp */
00032 
00033 #include <ldsodefs.h>
00034 #include "pthread.h"
00035 #include "internals.h"
00036 #include "spinlock.h"
00037 #include "restart.h"
00038 #include "semaphore.h"
00039 #include <not-cancel.h>
00040 
00041 /* For debugging purposes put the maximum number of threads in a variable.  */
00042 const int __linuxthreads_pthread_threads_max = PTHREAD_THREADS_MAX;
00043 
00044 #ifndef THREAD_SELF
00045 /* Indicate whether at least one thread has a user-defined stack (if 1),
00046    or if all threads have stacks supplied by LinuxThreads (if 0). */
00047 int __pthread_nonstandard_stacks;
00048 #endif
00049 
00050 /* Number of active entries in __pthread_handles (used by gdb) */
00051 volatile int __pthread_handles_num = 2;
00052 
00053 /* Whether to use debugger additional actions for thread creation
00054    (set to 1 by gdb) */
00055 volatile int __pthread_threads_debug;
00056 
00057 /* Globally enabled events.  */
00058 volatile td_thr_events_t __pthread_threads_events;
00059 
00060 /* Pointer to thread descriptor with last event.  */
00061 volatile pthread_descr __pthread_last_event;
00062 
00063 static pthread_descr manager_thread;
00064 
00065 /* Mapping from stack segment to thread descriptor. */
00066 /* Stack segment numbers are also indices into the __pthread_handles array. */
00067 /* Stack segment number 0 is reserved for the initial thread. */
00068 
00069 #if FLOATING_STACKS
00070 # define thread_segment(seq) NULL
00071 #else
00072 static inline pthread_descr thread_segment(int seg)
00073 {
00074 # ifdef _STACK_GROWS_UP
00075   return (pthread_descr)(THREAD_STACK_START_ADDRESS + (seg - 1) * STACK_SIZE)
00076          + 1;
00077 # else
00078   return (pthread_descr)(THREAD_STACK_START_ADDRESS - (seg - 1) * STACK_SIZE)
00079          - 1;
00080 # endif
00081 }
00082 #endif
00083 
00084 /* Flag set in signal handler to record child termination */
00085 
00086 static volatile int terminated_children;
00087 
00088 /* Flag set when the initial thread is blocked on pthread_exit waiting
00089    for all other threads to terminate */
00090 
00091 static int main_thread_exiting;
00092 
00093 /* Counter used to generate unique thread identifier.
00094    Thread identifier is pthread_threads_counter + segment. */
00095 
00096 static pthread_t pthread_threads_counter;
00097 
00098 /* Forward declarations */
00099 
00100 static int pthread_handle_create(pthread_t *thread, const pthread_attr_t *attr,
00101                                  void * (*start_routine)(void *), void *arg,
00102                                  sigset_t *mask, int father_pid,
00103                              int report_events,
00104                              td_thr_events_t *event_maskp);
00105 static void pthread_handle_free(pthread_t th_id);
00106 static void pthread_handle_exit(pthread_descr issuing_thread, int exitcode)
00107      __attribute__ ((noreturn));
00108 static void pthread_reap_children(void);
00109 static void pthread_kill_all_threads(int sig, int main_thread_also);
00110 static void pthread_for_each_thread(void *arg,
00111     void (*fn)(void *, pthread_descr));
00112 
00113 /* The server thread managing requests for thread creation and termination */
00114 
00115 int
00116 __attribute__ ((noreturn))
00117 __pthread_manager(void *arg)
00118 {
00119   pthread_descr self = manager_thread = arg;
00120   int reqfd = __pthread_manager_reader;
00121   struct pollfd ufd;
00122   sigset_t manager_mask;
00123   int n;
00124   struct pthread_request request;
00125 
00126   /* If we have special thread_self processing, initialize it.  */
00127 #ifdef INIT_THREAD_SELF
00128   INIT_THREAD_SELF(self, 1);
00129 #endif
00130 #if !(USE_TLS && HAVE___THREAD)
00131   /* Set the error variable.  */
00132   self->p_errnop = &self->p_errno;
00133   self->p_h_errnop = &self->p_h_errno;
00134 #endif
00135   /* Block all signals except __pthread_sig_cancel and SIGTRAP */
00136   sigfillset(&manager_mask);
00137   sigdelset(&manager_mask, __pthread_sig_cancel); /* for thread termination */
00138   sigdelset(&manager_mask, SIGTRAP);            /* for debugging purposes */
00139   if (__pthread_threads_debug && __pthread_sig_debug > 0)
00140     sigdelset(&manager_mask, __pthread_sig_debug);
00141   sigprocmask(SIG_SETMASK, &manager_mask, NULL);
00142   /* Raise our priority to match that of main thread */
00143   __pthread_manager_adjust_prio(__pthread_main_thread->p_priority);
00144   /* Synchronize debugging of the thread manager */
00145   n = TEMP_FAILURE_RETRY(read_not_cancel(reqfd, (char *)&request,
00146                                     sizeof(request)));
00147   ASSERT(n == sizeof(request) && request.req_kind == REQ_DEBUG);
00148   ufd.fd = reqfd;
00149   ufd.events = POLLIN;
00150   /* Enter server loop */
00151   while(1) {
00152     n = __poll(&ufd, 1, 2000);
00153 
00154     /* Check for termination of the main thread */
00155     if (getppid() == 1) {
00156       pthread_kill_all_threads(SIGKILL, 0);
00157       _exit(0);
00158     }
00159     /* Check for dead children */
00160     if (terminated_children) {
00161       terminated_children = 0;
00162       pthread_reap_children();
00163     }
00164     /* Read and execute request */
00165     if (n == 1 && (ufd.revents & POLLIN)) {
00166       n = TEMP_FAILURE_RETRY(read_not_cancel(reqfd, (char *)&request,
00167                                         sizeof(request)));
00168 #ifdef DEBUG
00169       if (n < 0) {
00170        char d[64];
00171        write(STDERR_FILENO, d, snprintf(d, sizeof(d), "*** read err %m\n"));
00172       } else if (n != sizeof(request)) {
00173        write(STDERR_FILENO, "*** short read in manager\n", 26);
00174       }
00175 #endif
00176 
00177       switch(request.req_kind) {
00178       case REQ_CREATE:
00179         request.req_thread->p_retcode =
00180           pthread_handle_create((pthread_t *) &request.req_thread->p_retval,
00181                                 request.req_args.create.attr,
00182                                 request.req_args.create.fn,
00183                                 request.req_args.create.arg,
00184                                 &request.req_args.create.mask,
00185                                 request.req_thread->p_pid,
00186                             request.req_thread->p_report_events,
00187                             &request.req_thread->p_eventbuf.eventmask);
00188         restart(request.req_thread);
00189         break;
00190       case REQ_FREE:
00191        pthread_handle_free(request.req_args.free.thread_id);
00192         break;
00193       case REQ_PROCESS_EXIT:
00194         pthread_handle_exit(request.req_thread,
00195                             request.req_args.exit.code);
00196        /* NOTREACHED */
00197         break;
00198       case REQ_MAIN_THREAD_EXIT:
00199         main_thread_exiting = 1;
00200        /* Reap children in case all other threads died and the signal handler
00201           went off before we set main_thread_exiting to 1, and therefore did
00202           not do REQ_KICK. */
00203        pthread_reap_children();
00204 
00205         if (__pthread_main_thread->p_nextlive == __pthread_main_thread) {
00206           restart(__pthread_main_thread);
00207          /* The main thread will now call exit() which will trigger an
00208             __on_exit handler, which in turn will send REQ_PROCESS_EXIT
00209             to the thread manager. In case you are wondering how the
00210             manager terminates from its loop here. */
00211        }
00212         break;
00213       case REQ_POST:
00214         __new_sem_post(request.req_args.post);
00215         break;
00216       case REQ_DEBUG:
00217        /* Make gdb aware of new thread and gdb will restart the
00218           new thread when it is ready to handle the new thread. */
00219        if (__pthread_threads_debug && __pthread_sig_debug > 0)
00220          raise(__pthread_sig_debug);
00221         break;
00222       case REQ_KICK:
00223        /* This is just a prod to get the manager to reap some
00224           threads right away, avoiding a potential delay at shutdown. */
00225        break;
00226       case REQ_FOR_EACH_THREAD:
00227        pthread_for_each_thread(request.req_args.for_each.arg,
00228                                request.req_args.for_each.fn);
00229        restart(request.req_thread);
00230        break;
00231       }
00232     }
00233   }
00234 }
00235 
00236 int __pthread_manager_event(void *arg)
00237 {
00238   pthread_descr self = arg;
00239   /* If we have special thread_self processing, initialize it.  */
00240 #ifdef INIT_THREAD_SELF
00241   INIT_THREAD_SELF(self, 1);
00242 #endif
00243 
00244   /* Get the lock the manager will free once all is correctly set up.  */
00245   __pthread_lock (THREAD_GETMEM(self, p_lock), NULL);
00246   /* Free it immediately.  */
00247   __pthread_unlock (THREAD_GETMEM(self, p_lock));
00248 
00249   return __pthread_manager(arg);
00250 }
00251 
00252 /* Process creation */
00253 
00254 static int
00255 __attribute__ ((noreturn))
00256 pthread_start_thread(void *arg)
00257 {
00258   pthread_descr self = (pthread_descr) arg;
00259   struct pthread_request request;
00260   void * outcome;
00261 #if HP_TIMING_AVAIL
00262   hp_timing_t tmpclock;
00263 #endif
00264   /* Initialize special thread_self processing, if any.  */
00265 #ifdef INIT_THREAD_SELF
00266   INIT_THREAD_SELF(self, self->p_nr);
00267 #endif
00268 #if HP_TIMING_AVAIL
00269   HP_TIMING_NOW (tmpclock);
00270   THREAD_SETMEM (self, p_cpuclock_offset, tmpclock);
00271 #endif
00272   /* Make sure our pid field is initialized, just in case we get there
00273      before our father has initialized it. */
00274   THREAD_SETMEM(self, p_pid, __getpid());
00275   /* Initial signal mask is that of the creating thread. (Otherwise,
00276      we'd just inherit the mask of the thread manager.) */
00277   sigprocmask(SIG_SETMASK, &self->p_start_args.mask, NULL);
00278   /* Set the scheduling policy and priority for the new thread, if needed */
00279   if (THREAD_GETMEM(self, p_start_args.schedpolicy) >= 0)
00280     /* Explicit scheduling attributes were provided: apply them */
00281     __sched_setscheduler(THREAD_GETMEM(self, p_pid),
00282                       THREAD_GETMEM(self, p_start_args.schedpolicy),
00283                          &self->p_start_args.schedparam);
00284   else if (manager_thread->p_priority > 0)
00285     /* Default scheduling required, but thread manager runs in realtime
00286        scheduling: switch new thread to SCHED_OTHER policy */
00287     {
00288       struct sched_param default_params;
00289       default_params.sched_priority = 0;
00290       __sched_setscheduler(THREAD_GETMEM(self, p_pid),
00291                            SCHED_OTHER, &default_params);
00292     }
00293 #if !(USE_TLS && HAVE___THREAD)
00294   /* Initialize thread-locale current locale to point to the global one.
00295      With __thread support, the variable's initializer takes care of this.  */
00296   __uselocale (LC_GLOBAL_LOCALE);
00297 #else
00298   /* Initialize __resp.  */
00299   __resp = &self->p_res;
00300 #endif
00301   /* Make gdb aware of new thread */
00302   if (__pthread_threads_debug && __pthread_sig_debug > 0) {
00303     request.req_thread = self;
00304     request.req_kind = REQ_DEBUG;
00305     TEMP_FAILURE_RETRY(write_not_cancel(__pthread_manager_request,
00306                                    (char *) &request, sizeof(request)));
00307     suspend(self);
00308   }
00309   /* Run the thread code */
00310   outcome = self->p_start_args.start_routine(THREAD_GETMEM(self,
00311                                                     p_start_args.arg));
00312   /* Exit with the given return value */
00313   __pthread_do_exit(outcome, CURRENT_STACK_FRAME);
00314 }
00315 
00316 static int
00317 __attribute__ ((noreturn))
00318 pthread_start_thread_event(void *arg)
00319 {
00320   pthread_descr self = (pthread_descr) arg;
00321 
00322 #ifdef INIT_THREAD_SELF
00323   INIT_THREAD_SELF(self, self->p_nr);
00324 #endif
00325   /* Make sure our pid field is initialized, just in case we get there
00326      before our father has initialized it. */
00327   THREAD_SETMEM(self, p_pid, __getpid());
00328   /* Get the lock the manager will free once all is correctly set up.  */
00329   __pthread_lock (THREAD_GETMEM(self, p_lock), NULL);
00330   /* Free it immediately.  */
00331   __pthread_unlock (THREAD_GETMEM(self, p_lock));
00332 
00333   /* Continue with the real function.  */
00334   pthread_start_thread (arg);
00335 }
00336 
00337 #if defined USE_TLS && !FLOATING_STACKS
00338 # error "TLS can only work with floating stacks"
00339 #endif
00340 
00341 static int pthread_allocate_stack(const pthread_attr_t *attr,
00342                                   pthread_descr default_new_thread,
00343                                   int pagesize,
00344                                   char ** out_new_thread,
00345                                   char ** out_new_thread_bottom,
00346                                   char ** out_guardaddr,
00347                                   size_t * out_guardsize,
00348                                   size_t * out_stacksize)
00349 {
00350   pthread_descr new_thread;
00351   char * new_thread_bottom;
00352   char * guardaddr;
00353   size_t stacksize, guardsize;
00354 
00355 #ifdef USE_TLS
00356   /* TLS cannot work with fixed thread descriptor addresses.  */
00357   assert (default_new_thread == NULL);
00358 #endif
00359 
00360   if (attr != NULL && attr->__stackaddr_set)
00361     {
00362 #ifdef _STACK_GROWS_UP
00363       /* The user provided a stack. */
00364 # ifdef USE_TLS
00365       /* This value is not needed.  */
00366       new_thread = (pthread_descr) attr->__stackaddr;
00367       new_thread_bottom = (char *) new_thread;
00368 # else
00369       new_thread = (pthread_descr) attr->__stackaddr;
00370       new_thread_bottom = (char *) (new_thread + 1);
00371 # endif
00372       guardaddr = attr->__stackaddr + attr->__stacksize;
00373       guardsize = 0;
00374 #else
00375       /* The user provided a stack.  For now we interpret the supplied
00376         address as 1 + the highest addr. in the stack segment.  If a
00377         separate register stack is needed, we place it at the low end
00378         of the segment, relying on the associated stacksize to
00379         determine the low end of the segment.  This differs from many
00380         (but not all) other pthreads implementations.  The intent is
00381         that on machines with a single stack growing toward higher
00382         addresses, stackaddr would be the lowest address in the stack
00383         segment, so that it is consistently close to the initial sp
00384         value. */
00385 # ifdef USE_TLS
00386       new_thread = (pthread_descr) attr->__stackaddr;
00387 # else
00388       new_thread =
00389         (pthread_descr) ((long)(attr->__stackaddr) & -sizeof(void *)) - 1;
00390 # endif
00391       new_thread_bottom = (char *) attr->__stackaddr - attr->__stacksize;
00392       guardaddr = new_thread_bottom;
00393       guardsize = 0;
00394 #endif
00395 #ifndef THREAD_SELF
00396       __pthread_nonstandard_stacks = 1;
00397 #endif
00398 #ifndef USE_TLS
00399       /* Clear the thread data structure.  */
00400       memset (new_thread, '\0', sizeof (*new_thread));
00401 #endif
00402       stacksize = attr->__stacksize;
00403     }
00404   else
00405     {
00406 #ifdef NEED_SEPARATE_REGISTER_STACK
00407       const size_t granularity = 2 * pagesize;
00408       /* Try to make stacksize/2 a multiple of pagesize */
00409 #else
00410       const size_t granularity = pagesize;
00411 #endif
00412       void *map_addr;
00413 
00414       /* Allocate space for stack and thread descriptor at default address */
00415 #if FLOATING_STACKS
00416       if (attr != NULL)
00417        {
00418          guardsize = page_roundup (attr->__guardsize, granularity);
00419          stacksize = __pthread_max_stacksize - guardsize;
00420          stacksize = MIN (stacksize,
00421                         page_roundup (attr->__stacksize, granularity));
00422        }
00423       else
00424        {
00425          guardsize = granularity;
00426          stacksize = __pthread_max_stacksize - guardsize;
00427        }
00428 
00429       map_addr = mmap(NULL, stacksize + guardsize,
00430                     PROT_READ | PROT_WRITE | PROT_EXEC,
00431                     MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
00432       if (map_addr == MAP_FAILED)
00433         /* No more memory available.  */
00434         return -1;
00435 
00436 # ifdef NEED_SEPARATE_REGISTER_STACK
00437       guardaddr = map_addr + stacksize / 2;
00438       if (guardsize > 0)
00439        mprotect (guardaddr, guardsize, PROT_NONE);
00440 
00441       new_thread_bottom = (char *) map_addr;
00442 #  ifdef USE_TLS
00443       new_thread = ((pthread_descr) (new_thread_bottom + stacksize
00444                                  + guardsize));
00445 #  else
00446       new_thread = ((pthread_descr) (new_thread_bottom + stacksize
00447                                  + guardsize)) - 1;
00448 #  endif
00449 # elif _STACK_GROWS_DOWN
00450       guardaddr = map_addr;
00451       if (guardsize > 0)
00452        mprotect (guardaddr, guardsize, PROT_NONE);
00453 
00454       new_thread_bottom = (char *) map_addr + guardsize;
00455 #  ifdef USE_TLS
00456       new_thread = ((pthread_descr) (new_thread_bottom + stacksize));
00457 #  else
00458       new_thread = ((pthread_descr) (new_thread_bottom + stacksize)) - 1;
00459 #  endif
00460 # elif _STACK_GROWS_UP
00461       guardaddr = map_addr + stacksize;
00462       if (guardsize > 0)
00463        mprotect (guardaddr, guardsize, PROT_NONE);
00464 
00465       new_thread = (pthread_descr) map_addr;
00466 #  ifdef USE_TLS
00467       new_thread_bottom = (char *) new_thread;
00468 #  else
00469       new_thread_bottom = (char *) (new_thread + 1);
00470 #  endif
00471 # else
00472 #  error You must define a stack direction
00473 # endif /* Stack direction */
00474 #else /* !FLOATING_STACKS */
00475       void *res_addr;
00476 
00477       if (attr != NULL)
00478        {
00479          guardsize = page_roundup (attr->__guardsize, granularity);
00480          stacksize = STACK_SIZE - guardsize;
00481          stacksize = MIN (stacksize,
00482                         page_roundup (attr->__stacksize, granularity));
00483        }
00484       else
00485        {
00486          guardsize = granularity;
00487          stacksize = STACK_SIZE - granularity;
00488        }
00489 
00490 # ifdef NEED_SEPARATE_REGISTER_STACK
00491       new_thread = default_new_thread;
00492       new_thread_bottom = (char *) (new_thread + 1) - stacksize - guardsize;
00493       /* Includes guard area, unlike the normal case.  Use the bottom
00494        end of the segment as backing store for the register stack.
00495        Needed on IA64.  In this case, we also map the entire stack at
00496        once.  According to David Mosberger, that's cheaper.  It also
00497        avoids the risk of intermittent failures due to other mappings
00498        in the same region.  The cost is that we might be able to map
00499        slightly fewer stacks.  */
00500 
00501       /* First the main stack: */
00502       map_addr = (caddr_t)((char *)(new_thread + 1) - stacksize / 2);
00503       res_addr = mmap(map_addr, stacksize / 2,
00504                     PROT_READ | PROT_WRITE | PROT_EXEC,
00505                     MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
00506       if (res_addr != map_addr)
00507        {
00508          /* Bad luck, this segment is already mapped. */
00509          if (res_addr != MAP_FAILED)
00510            munmap(res_addr, stacksize / 2);
00511          return -1;
00512        }
00513       /* Then the register stack:  */
00514       map_addr = (caddr_t)new_thread_bottom;
00515       res_addr = mmap(map_addr, stacksize/2,
00516                     PROT_READ | PROT_WRITE | PROT_EXEC,
00517                     MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
00518       if (res_addr != map_addr)
00519        {
00520          if (res_addr != MAP_FAILED)
00521            munmap(res_addr, stacksize / 2);
00522          munmap((caddr_t)((char *)(new_thread + 1) - stacksize/2),
00523                stacksize/2);
00524          return -1;
00525        }
00526 
00527       guardaddr = new_thread_bottom + stacksize/2;
00528       /* We leave the guard area in the middle unmapped.       */
00529 # else  /* !NEED_SEPARATE_REGISTER_STACK */
00530 #  ifdef _STACK_GROWS_DOWN
00531       new_thread = default_new_thread;
00532       new_thread_bottom = (char *) (new_thread + 1) - stacksize;
00533       map_addr = new_thread_bottom - guardsize;
00534       res_addr = mmap(map_addr, stacksize + guardsize,
00535                     PROT_READ | PROT_WRITE | PROT_EXEC,
00536                     MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
00537       if (res_addr != map_addr)
00538        {
00539          /* Bad luck, this segment is already mapped. */
00540          if (res_addr != MAP_FAILED)
00541            munmap (res_addr, stacksize + guardsize);
00542          return -1;
00543        }
00544 
00545       /* We manage to get a stack.  Protect the guard area pages if
00546         necessary.  */
00547       guardaddr = map_addr;
00548       if (guardsize > 0)
00549        mprotect (guardaddr, guardsize, PROT_NONE);
00550 #  else
00551       /* The thread description goes at the bottom of this area, and
00552        * the stack starts directly above it.
00553        */
00554       new_thread = (pthread_descr)((unsigned long)default_new_thread &~ (STACK_SIZE - 1));
00555       map_addr = mmap(new_thread, stacksize + guardsize,
00556                     PROT_READ | PROT_WRITE | PROT_EXEC,
00557                     MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
00558       if (map_addr == MAP_FAILED)
00559          return -1;
00560 
00561       new_thread_bottom = map_addr + sizeof(*new_thread);
00562       guardaddr = map_addr + stacksize;
00563       if (guardsize > 0)
00564          mprotect (guardaddr, guardsize, PROT_NONE);
00565 
00566 #  endif /* stack direction */
00567 # endif  /* !NEED_SEPARATE_REGISTER_STACK */
00568 #endif   /* !FLOATING_STACKS */
00569     }
00570   *out_new_thread = (char *) new_thread;
00571   *out_new_thread_bottom = new_thread_bottom;
00572   *out_guardaddr = guardaddr;
00573   *out_guardsize = guardsize;
00574 #ifdef NEED_SEPARATE_REGISTER_STACK
00575   *out_stacksize = stacksize / 2;
00576 #else
00577   *out_stacksize = stacksize;
00578 #endif
00579   return 0;
00580 }
00581 
00582 static int pthread_handle_create(pthread_t *thread, const pthread_attr_t *attr,
00583                              void * (*start_routine)(void *), void *arg,
00584                              sigset_t * mask, int father_pid,
00585                              int report_events,
00586                              td_thr_events_t *event_maskp)
00587 {
00588   size_t sseg;
00589   int pid;
00590   pthread_descr new_thread;
00591   char *stack_addr;
00592   char * new_thread_bottom;
00593   pthread_t new_thread_id;
00594   char *guardaddr = NULL;
00595   size_t guardsize = 0, stksize = 0;
00596   int pagesize = __getpagesize();
00597   int saved_errno = 0;
00598 
00599 #ifdef USE_TLS
00600   new_thread = _dl_allocate_tls (NULL);
00601   if (new_thread == NULL)
00602     return EAGAIN;
00603 # if TLS_DTV_AT_TP
00604   /* pthread_descr is below TP.  */
00605   new_thread = (pthread_descr) ((char *) new_thread - TLS_PRE_TCB_SIZE);
00606 # endif
00607 #else
00608   /* Prevent warnings.  */
00609   new_thread = NULL;
00610 #endif
00611 
00612   /* First check whether we have to change the policy and if yes, whether
00613      we can  do this.  Normally this should be done by examining the
00614      return value of the __sched_setscheduler call in pthread_start_thread
00615      but this is hard to implement.  FIXME  */
00616   if (attr != NULL && attr->__schedpolicy != SCHED_OTHER && geteuid () != 0)
00617     return EPERM;
00618   /* Find a free segment for the thread, and allocate a stack if needed */
00619   for (sseg = 2; ; sseg++)
00620     {
00621       if (sseg >= PTHREAD_THREADS_MAX)
00622        {
00623 #ifdef USE_TLS
00624 # if TLS_DTV_AT_TP
00625          new_thread = (pthread_descr) ((char *) new_thread + TLS_PRE_TCB_SIZE);
00626 # endif
00627          _dl_deallocate_tls (new_thread, true);
00628 #endif
00629          return EAGAIN;
00630        }
00631       if (__pthread_handles[sseg].h_descr != NULL)
00632        continue;
00633       if (pthread_allocate_stack(attr, thread_segment(sseg),
00634                              pagesize, &stack_addr, &new_thread_bottom,
00635                                  &guardaddr, &guardsize, &stksize) == 0)
00636        {
00637 #ifdef USE_TLS
00638          new_thread->p_stackaddr = stack_addr;
00639 #else
00640          new_thread = (pthread_descr) stack_addr;
00641 #endif
00642          break;
00643        }
00644     }
00645   __pthread_handles_num++;
00646   /* Allocate new thread identifier */
00647   pthread_threads_counter += PTHREAD_THREADS_MAX;
00648   new_thread_id = sseg + pthread_threads_counter;
00649   /* Initialize the thread descriptor.  Elements which have to be
00650      initialized to zero already have this value.  */
00651 #if !defined USE_TLS || !TLS_DTV_AT_TP
00652   new_thread->p_header.data.tcb = new_thread;
00653   new_thread->p_header.data.self = new_thread;
00654 #endif
00655 #if TLS_MULTIPLE_THREADS_IN_TCB || !defined USE_TLS || !TLS_DTV_AT_TP
00656   new_thread->p_multiple_threads = 1;
00657 #endif
00658   new_thread->p_tid = new_thread_id;
00659   new_thread->p_lock = &(__pthread_handles[sseg].h_lock);
00660   new_thread->p_cancelstate = PTHREAD_CANCEL_ENABLE;
00661   new_thread->p_canceltype = PTHREAD_CANCEL_DEFERRED;
00662 #if !(USE_TLS && HAVE___THREAD)
00663   new_thread->p_errnop = &new_thread->p_errno;
00664   new_thread->p_h_errnop = &new_thread->p_h_errno;
00665   new_thread->p_resp = &new_thread->p_res;
00666 #endif
00667   new_thread->p_guardaddr = guardaddr;
00668   new_thread->p_guardsize = guardsize;
00669   new_thread->p_nr = sseg;
00670   new_thread->p_inheritsched = attr ? attr->__inheritsched : 0;
00671   new_thread->p_alloca_cutoff = stksize / 4 > __MAX_ALLOCA_CUTOFF
00672                              ? __MAX_ALLOCA_CUTOFF : stksize / 4;
00673 
00674   /* Copy the stack guard canary.  */
00675 #ifdef THREAD_COPY_STACK_GUARD
00676   THREAD_COPY_STACK_GUARD (new_thread);
00677 #endif
00678 
00679   /* Copy the pointer guard value.  */
00680 #ifdef THREAD_COPY_POINTER_GUARD
00681   THREAD_COPY_POINTER_GUARD (new_thread);
00682 #endif
00683 
00684   /* Initialize the thread handle */
00685   __pthread_init_lock(&__pthread_handles[sseg].h_lock);
00686   __pthread_handles[sseg].h_descr = new_thread;
00687   __pthread_handles[sseg].h_bottom = new_thread_bottom;
00688   /* Determine scheduling parameters for the thread */
00689   new_thread->p_start_args.schedpolicy = -1;
00690   if (attr != NULL) {
00691     new_thread->p_detached = attr->__detachstate;
00692     new_thread->p_userstack = attr->__stackaddr_set;
00693 
00694     switch(attr->__inheritsched) {
00695     case PTHREAD_EXPLICIT_SCHED:
00696       new_thread->p_start_args.schedpolicy = attr->__schedpolicy;
00697       memcpy (&new_thread->p_start_args.schedparam, &attr->__schedparam,
00698              sizeof (struct sched_param));
00699       break;
00700     case PTHREAD_INHERIT_SCHED:
00701       new_thread->p_start_args.schedpolicy = __sched_getscheduler(father_pid);
00702       __sched_getparam(father_pid, &new_thread->p_start_args.schedparam);
00703       break;
00704     }
00705     new_thread->p_priority =
00706       new_thread->p_start_args.schedparam.sched_priority;
00707   }
00708   /* Finish setting up arguments to pthread_start_thread */
00709   new_thread->p_start_args.start_routine = start_routine;
00710   new_thread->p_start_args.arg = arg;
00711   new_thread->p_start_args.mask = *mask;
00712   /* Make the new thread ID available already now.  If any of the later
00713      functions fail we return an error value and the caller must not use
00714      the stored thread ID.  */
00715   *thread = new_thread_id;
00716   /* Raise priority of thread manager if needed */
00717   __pthread_manager_adjust_prio(new_thread->p_priority);
00718   /* Do the cloning.  We have to use two different functions depending
00719      on whether we are debugging or not.  */
00720   pid = 0;    /* Note that the thread never can have PID zero.  */
00721   if (report_events)
00722     {
00723       /* See whether the TD_CREATE event bit is set in any of the
00724          masks.  */
00725       int idx = __td_eventword (TD_CREATE);
00726       uint32_t mask = __td_eventmask (TD_CREATE);
00727 
00728       if ((mask & (__pthread_threads_events.event_bits[idx]
00729                  | event_maskp->event_bits[idx])) != 0)
00730        {
00731          /* Lock the mutex the child will use now so that it will stop.  */
00732          __pthread_lock(new_thread->p_lock, NULL);
00733 
00734          /* We have to report this event.  */
00735 #ifdef NEED_SEPARATE_REGISTER_STACK
00736          /* Perhaps this version should be used on all platforms. But
00737           this requires that __clone2 be uniformly supported
00738           everywhere.
00739 
00740           And there is some argument for changing the __clone2
00741           interface to pass sp and bsp instead, making it more IA64
00742           specific, but allowing stacks to grow outward from each
00743           other, to get less paging and fewer mmaps.  */
00744          pid = __clone2(pthread_start_thread_event,
00745                (void **)new_thread_bottom,
00746                       (char *)stack_addr - new_thread_bottom,
00747                       CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_SYSVSEM |
00748                       __pthread_sig_cancel, new_thread);
00749 #elif _STACK_GROWS_UP
00750          pid = __clone(pthread_start_thread_event, (void *) new_thread_bottom,
00751                      CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_SYSVSEM |
00752                      __pthread_sig_cancel, new_thread);
00753 #else
00754          pid = __clone(pthread_start_thread_event, stack_addr,
00755                      CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_SYSVSEM |
00756                      __pthread_sig_cancel, new_thread);
00757 #endif
00758          saved_errno = errno;
00759          if (pid != -1)
00760            {
00761              /* Now fill in the information about the new thread in
00762                the newly created thread's data structure.  We cannot let
00763                the new thread do this since we don't know whether it was
00764                already scheduled when we send the event.  */
00765              new_thread->p_eventbuf.eventdata = new_thread;
00766              new_thread->p_eventbuf.eventnum = TD_CREATE;
00767              __pthread_last_event = new_thread;
00768 
00769              /* We have to set the PID here since the callback function
00770                in the debug library will need it and we cannot guarantee
00771                the child got scheduled before the debugger.  */
00772              new_thread->p_pid = pid;
00773 
00774              /* Now call the function which signals the event.  */
00775              __linuxthreads_create_event ();
00776 
00777              /* Now restart the thread.  */
00778              __pthread_unlock(new_thread->p_lock);
00779            }
00780        }
00781     }
00782   if (pid == 0)
00783     {
00784 #ifdef NEED_SEPARATE_REGISTER_STACK
00785       pid = __clone2(pthread_start_thread,
00786                    (void **)new_thread_bottom,
00787                      (char *)stack_addr - new_thread_bottom,
00788                    CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_SYSVSEM |
00789                    __pthread_sig_cancel, new_thread);
00790 #elif _STACK_GROWS_UP
00791       pid = __clone(pthread_start_thread, (void *) new_thread_bottom,
00792                   CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_SYSVSEM |
00793                   __pthread_sig_cancel, new_thread);
00794 #else
00795       pid = __clone(pthread_start_thread, stack_addr,
00796                   CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_SYSVSEM |
00797                   __pthread_sig_cancel, new_thread);
00798 #endif /* !NEED_SEPARATE_REGISTER_STACK */
00799       saved_errno = errno;
00800     }
00801   /* Check if cloning succeeded */
00802   if (pid == -1) {
00803     /* Free the stack if we allocated it */
00804     if (attr == NULL || !attr->__stackaddr_set)
00805       {
00806 #ifdef NEED_SEPARATE_REGISTER_STACK
00807        size_t stacksize = ((char *)(new_thread->p_guardaddr)
00808                          - new_thread_bottom);
00809        munmap((caddr_t)new_thread_bottom,
00810               2 * stacksize + new_thread->p_guardsize);
00811 #elif _STACK_GROWS_UP
00812 # ifdef USE_TLS
00813        size_t stacksize = guardaddr - stack_addr;
00814        munmap(stack_addr, stacksize + guardsize);
00815 # else
00816        size_t stacksize = guardaddr - (char *)new_thread;
00817        munmap(new_thread, stacksize + guardsize);
00818 # endif
00819 #else
00820 # ifdef USE_TLS
00821        size_t stacksize = stack_addr - new_thread_bottom;
00822 # else
00823        size_t stacksize = (char *)(new_thread+1) - new_thread_bottom;
00824 # endif
00825        munmap(new_thread_bottom - guardsize, guardsize + stacksize);
00826 #endif
00827       }
00828 #ifdef USE_TLS
00829 # if TLS_DTV_AT_TP
00830     new_thread = (pthread_descr) ((char *) new_thread + TLS_PRE_TCB_SIZE);
00831 # endif
00832     _dl_deallocate_tls (new_thread, true);
00833 #endif
00834     __pthread_handles[sseg].h_descr = NULL;
00835     __pthread_handles[sseg].h_bottom = NULL;
00836     __pthread_handles_num--;
00837     return saved_errno;
00838   }
00839   /* Insert new thread in doubly linked list of active threads */
00840   new_thread->p_prevlive = __pthread_main_thread;
00841   new_thread->p_nextlive = __pthread_main_thread->p_nextlive;
00842   __pthread_main_thread->p_nextlive->p_prevlive = new_thread;
00843   __pthread_main_thread->p_nextlive = new_thread;
00844   /* Set pid field of the new thread, in case we get there before the
00845      child starts. */
00846   new_thread->p_pid = pid;
00847   return 0;
00848 }
00849 
00850 
00851 /* Try to free the resources of a thread when requested by pthread_join
00852    or pthread_detach on a terminated thread. */
00853 
00854 static void pthread_free(pthread_descr th)
00855 {
00856   pthread_handle handle;
00857   pthread_readlock_info *iter, *next;
00858 
00859   ASSERT(th->p_exited);
00860   /* Make the handle invalid */
00861   handle =  thread_handle(th->p_tid);
00862   __pthread_lock(&handle->h_lock, NULL);
00863   handle->h_descr = NULL;
00864   handle->h_bottom = (char *)(-1L);
00865   __pthread_unlock(&handle->h_lock);
00866 #ifdef FREE_THREAD
00867   FREE_THREAD(th, th->p_nr);
00868 #endif
00869   /* One fewer threads in __pthread_handles */
00870   __pthread_handles_num--;
00871 
00872   /* Destroy read lock list, and list of free read lock structures.
00873      If the former is not empty, it means the thread exited while
00874      holding read locks! */
00875 
00876   for (iter = th->p_readlock_list; iter != NULL; iter = next)
00877     {
00878       next = iter->pr_next;
00879       free(iter);
00880     }
00881 
00882   for (iter = th->p_readlock_free; iter != NULL; iter = next)
00883     {
00884       next = iter->pr_next;
00885       free(iter);
00886     }
00887 
00888   /* If initial thread, nothing to free */
00889   if (!th->p_userstack)
00890     {
00891       size_t guardsize = th->p_guardsize;
00892       /* Free the stack and thread descriptor area */
00893       char *guardaddr = th->p_guardaddr;
00894 #ifdef _STACK_GROWS_UP
00895 # ifdef USE_TLS
00896       size_t stacksize = guardaddr - th->p_stackaddr;
00897       guardaddr = th->p_stackaddr;
00898 # else
00899       size_t stacksize = guardaddr - (char *)th;
00900       guardaddr = (char *)th;
00901 # endif
00902 #else
00903       /* Guardaddr is always set, even if guardsize is 0.  This allows
00904         us to compute everything else.  */
00905 # ifdef USE_TLS
00906       size_t stacksize = th->p_stackaddr - guardaddr - guardsize;
00907 # else
00908       size_t stacksize = (char *)(th+1) - guardaddr - guardsize;
00909 # endif
00910 # ifdef NEED_SEPARATE_REGISTER_STACK
00911       /* Take account of the register stack, which is below guardaddr.  */
00912       guardaddr -= stacksize;
00913       stacksize *= 2;
00914 # endif
00915 #endif
00916       /* Unmap the stack.  */
00917       munmap(guardaddr, stacksize + guardsize);
00918 
00919     }
00920 
00921 #ifdef USE_TLS
00922 # if TLS_DTV_AT_TP
00923   th = (pthread_descr) ((char *) th + TLS_PRE_TCB_SIZE);
00924 # endif
00925   _dl_deallocate_tls (th, true);
00926 #endif
00927 }
00928 
00929 /* Handle threads that have exited */
00930 
00931 static void pthread_exited(pid_t pid)
00932 {
00933   pthread_descr th;
00934   int detached;
00935   /* Find thread with that pid */
00936   for (th = __pthread_main_thread->p_nextlive;
00937        th != __pthread_main_thread;
00938        th = th->p_nextlive) {
00939     if (th->p_pid == pid) {
00940       /* Remove thread from list of active threads */
00941       th->p_nextlive->p_prevlive = th->p_prevlive;
00942       th->p_prevlive->p_nextlive = th->p_nextlive;
00943       /* Mark thread as exited, and if detached, free its resources */
00944       __pthread_lock(th->p_lock, NULL);
00945       th->p_exited = 1;
00946       /* If we have to signal this event do it now.  */
00947       if (th->p_report_events)
00948        {
00949          /* See whether TD_REAP is in any of the mask.  */
00950          int idx = __td_eventword (TD_REAP);
00951          uint32_t mask = __td_eventmask (TD_REAP);
00952 
00953          if ((mask & (__pthread_threads_events.event_bits[idx]
00954                      | th->p_eventbuf.eventmask.event_bits[idx])) != 0)
00955            {
00956              /* Yep, we have to signal the reapage.  */
00957              th->p_eventbuf.eventnum = TD_REAP;
00958              th->p_eventbuf.eventdata = th;
00959              __pthread_last_event = th;
00960 
00961              /* Now call the function to signal the event.  */
00962              __linuxthreads_reap_event();
00963            }
00964        }
00965       detached = th->p_detached;
00966       __pthread_unlock(th->p_lock);
00967       if (detached)
00968        pthread_free(th);
00969       break;
00970     }
00971   }
00972   /* If all threads have exited and the main thread is pending on a
00973      pthread_exit, wake up the main thread and terminate ourselves. */
00974   if (main_thread_exiting &&
00975       __pthread_main_thread->p_nextlive == __pthread_main_thread) {
00976     restart(__pthread_main_thread);
00977     /* Same logic as REQ_MAIN_THREAD_EXIT. */
00978   }
00979 }
00980 
00981 static void pthread_reap_children(void)
00982 {
00983   pid_t pid;
00984   int status;
00985 
00986   while ((pid = waitpid_not_cancel(-1, &status, WNOHANG | __WCLONE)) > 0) {
00987     pthread_exited(pid);
00988     if (WIFSIGNALED(status)) {
00989       /* If a thread died due to a signal, send the same signal to
00990          all other threads, including the main thread. */
00991       pthread_kill_all_threads(WTERMSIG(status), 1);
00992       _exit(0);
00993     }
00994   }
00995 }
00996 
00997 /* Try to free the resources of a thread when requested by pthread_join
00998    or pthread_detach on a terminated thread. */
00999 
01000 static void pthread_handle_free(pthread_t th_id)
01001 {
01002   pthread_handle handle = thread_handle(th_id);
01003   pthread_descr th;
01004 
01005   __pthread_lock(&handle->h_lock, NULL);
01006   if (nonexisting_handle(handle, th_id)) {
01007     /* pthread_reap_children has deallocated the thread already,
01008        nothing needs to be done */
01009     __pthread_unlock(&handle->h_lock);
01010     return;
01011   }
01012   th = handle->h_descr;
01013   if (th->p_exited) {
01014     __pthread_unlock(&handle->h_lock);
01015     pthread_free(th);
01016   } else {
01017     /* The Unix process of the thread is still running.
01018        Mark the thread as detached so that the thread manager will
01019        deallocate its resources when the Unix process exits. */
01020     th->p_detached = 1;
01021     __pthread_unlock(&handle->h_lock);
01022   }
01023 }
01024 
01025 /* Send a signal to all running threads */
01026 
01027 static void pthread_kill_all_threads(int sig, int main_thread_also)
01028 {
01029   pthread_descr th;
01030   for (th = __pthread_main_thread->p_nextlive;
01031        th != __pthread_main_thread;
01032        th = th->p_nextlive) {
01033     kill(th->p_pid, sig);
01034   }
01035   if (main_thread_also) {
01036     kill(__pthread_main_thread->p_pid, sig);
01037   }
01038 }
01039 
01040 static void pthread_for_each_thread(void *arg,
01041     void (*fn)(void *, pthread_descr))
01042 {
01043   pthread_descr th;
01044 
01045   for (th = __pthread_main_thread->p_nextlive;
01046        th != __pthread_main_thread;
01047        th = th->p_nextlive) {
01048     fn(arg, th);
01049   }
01050 
01051   fn(arg, __pthread_main_thread);
01052 }
01053 
01054 /* Process-wide exit() */
01055 
01056 static void pthread_handle_exit(pthread_descr issuing_thread, int exitcode)
01057 {
01058   pthread_descr th;
01059   __pthread_exit_requested = 1;
01060   __pthread_exit_code = exitcode;
01061   /* A forced asynchronous cancellation follows.  Make sure we won't
01062      get stuck later in the main thread with a system lock being held
01063      by one of the cancelled threads.  Ideally one would use the same
01064      code as in pthread_atfork(), but we can't distinguish system and
01065      user handlers there.  */
01066   __flockfilelist();
01067   /* Send the CANCEL signal to all running threads, including the main
01068      thread, but excluding the thread from which the exit request originated
01069      (that thread must complete the exit, e.g. calling atexit functions
01070      and flushing stdio buffers). */
01071   for (th = issuing_thread->p_nextlive;
01072        th != issuing_thread;
01073        th = th->p_nextlive) {
01074     kill(th->p_pid, __pthread_sig_cancel);
01075   }
01076   /* Now, wait for all these threads, so that they don't become zombies
01077      and their times are properly added to the thread manager's times. */
01078   for (th = issuing_thread->p_nextlive;
01079        th != issuing_thread;
01080        th = th->p_nextlive) {
01081     waitpid(th->p_pid, NULL, __WCLONE);
01082   }
01083   __fresetlockfiles();
01084   restart(issuing_thread);
01085   _exit(0);
01086 }
01087 
01088 /* Handler for __pthread_sig_cancel in thread manager thread */
01089 
01090 void __pthread_manager_sighandler(int sig)
01091 {
01092   int kick_manager = terminated_children == 0 && main_thread_exiting;
01093   terminated_children = 1;
01094 
01095   /* If the main thread is terminating, kick the thread manager loop
01096      each time some threads terminate. This eliminates a two second
01097      shutdown delay caused by the thread manager sleeping in the
01098      call to __poll(). Instead, the thread manager is kicked into
01099      action, reaps the outstanding threads and resumes the main thread
01100      so that it can complete the shutdown. */
01101 
01102   if (kick_manager) {
01103     struct pthread_request request;
01104     request.req_thread = 0;
01105     request.req_kind = REQ_KICK;
01106     TEMP_FAILURE_RETRY(write_not_cancel(__pthread_manager_request,
01107                                    (char *) &request, sizeof(request)));
01108   }
01109 }
01110 
01111 /* Adjust priority of thread manager so that it always run at a priority
01112    higher than all threads */
01113 
01114 void __pthread_manager_adjust_prio(int thread_prio)
01115 {
01116   struct sched_param param;
01117 
01118   if (thread_prio <= manager_thread->p_priority) return;
01119   param.sched_priority =
01120     thread_prio < __sched_get_priority_max(SCHED_FIFO)
01121     ? thread_prio + 1 : thread_prio;
01122   __sched_setscheduler(manager_thread->p_pid, SCHED_FIFO, &param);
01123   manager_thread->p_priority = thread_prio;
01124 }