Back to index

glibc  2.9
allocatestack.c
Go to the documentation of this file.
00001 /* Copyright (C) 2002,2003,2004,2005,2006,2007 Free Software Foundation, Inc.
00002    This file is part of the GNU C Library.
00003    Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
00004 
00005    The GNU C Library is free software; you can redistribute it and/or
00006    modify it under the terms of the GNU Lesser General Public
00007    License as published by the Free Software Foundation; either
00008    version 2.1 of the License, or (at your option) any later version.
00009 
00010    The GNU C Library is distributed in the hope that it will be useful,
00011    but WITHOUT ANY WARRANTY; without even the implied warranty of
00012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013    Lesser General Public License for more details.
00014 
00015    You should have received a copy of the GNU Lesser General Public
00016    License along with the GNU C Library; if not, write to the Free
00017    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
00018    02111-1307 USA.  */
00019 
00020 #include <assert.h>
00021 #include <errno.h>
00022 #include <signal.h>
00023 #include <stdint.h>
00024 #include <string.h>
00025 #include <unistd.h>
00026 #include <sys/mman.h>
00027 #include <sys/param.h>
00028 #include <dl-sysdep.h>
00029 #include <tls.h>
00030 #include <lowlevellock.h>
00031 #include <kernel-features.h>
00032 
00033 
00034 #ifndef NEED_SEPARATE_REGISTER_STACK
00035 
00036 /* Most architectures have exactly one stack pointer.  Some have more.  */
00037 # define STACK_VARIABLES void *stackaddr = NULL
00038 
00039 /* How to pass the values to the 'create_thread' function.  */
00040 # define STACK_VARIABLES_ARGS stackaddr
00041 
00042 /* How to declare function which gets there parameters.  */
00043 # define STACK_VARIABLES_PARMS void *stackaddr
00044 
00045 /* How to declare allocate_stack.  */
00046 # define ALLOCATE_STACK_PARMS void **stack
00047 
00048 /* This is how the function is called.  We do it this way to allow
00049    other variants of the function to have more parameters.  */
00050 # define ALLOCATE_STACK(attr, pd) allocate_stack (attr, pd, &stackaddr)
00051 
00052 #else
00053 
00054 /* We need two stacks.  The kernel will place them but we have to tell
00055    the kernel about the size of the reserved address space.  */
00056 # define STACK_VARIABLES void *stackaddr = NULL; size_t stacksize = 0
00057 
00058 /* How to pass the values to the 'create_thread' function.  */
00059 # define STACK_VARIABLES_ARGS stackaddr, stacksize
00060 
00061 /* How to declare function which gets there parameters.  */
00062 # define STACK_VARIABLES_PARMS void *stackaddr, size_t stacksize
00063 
00064 /* How to declare allocate_stack.  */
00065 # define ALLOCATE_STACK_PARMS void **stack, size_t *stacksize
00066 
00067 /* This is how the function is called.  We do it this way to allow
00068    other variants of the function to have more parameters.  */
00069 # define ALLOCATE_STACK(attr, pd) \
00070   allocate_stack (attr, pd, &stackaddr, &stacksize)
00071 
00072 #endif
00073 
00074 
00075 /* Default alignment of stack.  */
00076 #ifndef STACK_ALIGN
00077 # define STACK_ALIGN __alignof__ (long double)
00078 #endif
00079 
00080 /* Default value for minimal stack size after allocating thread
00081    descriptor and guard.  */
00082 #ifndef MINIMAL_REST_STACK
00083 # define MINIMAL_REST_STACK 4096
00084 #endif
00085 
00086 
00087 /* Newer kernels have the MAP_STACK flag to indicate a mapping is used for
00088    a stack.  Use it when possible.  */
00089 #ifndef MAP_STACK
00090 # define MAP_STACK 0
00091 #endif
00092 
00093 /* This yields the pointer that TLS support code calls the thread pointer.  */
00094 #if TLS_TCB_AT_TP
00095 # define TLS_TPADJ(pd) (pd)
00096 #elif TLS_DTV_AT_TP
00097 # define TLS_TPADJ(pd) ((struct pthread *)((char *) (pd) + TLS_PRE_TCB_SIZE))
00098 #endif
00099 
00100 /* Cache handling for not-yet free stacks.  */
00101 
00102 /* Maximum size in kB of cache.  */
00103 static size_t stack_cache_maxsize = 40 * 1024 * 1024; /* 40MiBi by default.  */
00104 static size_t stack_cache_actsize;
00105 
00106 /* Mutex protecting this variable.  */
00107 static int stack_cache_lock = LLL_LOCK_INITIALIZER;
00108 
00109 /* List of queued stack frames.  */
00110 static LIST_HEAD (stack_cache);
00111 
00112 /* List of the stacks in use.  */
00113 static LIST_HEAD (stack_used);
00114 
00115 /* List of the threads with user provided stacks in use.  No need to
00116    initialize this, since it's done in __pthread_initialize_minimal.  */
00117 list_t __stack_user __attribute__ ((nocommon));
00118 hidden_data_def (__stack_user)
00119 
00120 #if COLORING_INCREMENT != 0
00121 /* Number of threads created.  */
00122 static unsigned int nptl_ncreated;
00123 #endif
00124 
00125 
00126 /* Check whether the stack is still used or not.  */
00127 #define FREE_P(descr) ((descr)->tid <= 0)
00128 
00129 
00130 /* We create a double linked list of all cache entries.  Double linked
00131    because this allows removing entries from the end.  */
00132 
00133 
00134 /* Get a stack frame from the cache.  We have to match by size since
00135    some blocks might be too small or far too large.  */
00136 static struct pthread *
00137 get_cached_stack (size_t *sizep, void **memp)
00138 {
00139   size_t size = *sizep;
00140   struct pthread *result = NULL;
00141   list_t *entry;
00142 
00143   lll_lock (stack_cache_lock, LLL_PRIVATE);
00144 
00145   /* Search the cache for a matching entry.  We search for the
00146      smallest stack which has at least the required size.  Note that
00147      in normal situations the size of all allocated stacks is the
00148      same.  As the very least there are only a few different sizes.
00149      Therefore this loop will exit early most of the time with an
00150      exact match.  */
00151   list_for_each (entry, &stack_cache)
00152     {
00153       struct pthread *curr;
00154 
00155       curr = list_entry (entry, struct pthread, list);
00156       if (FREE_P (curr) && curr->stackblock_size >= size)
00157        {
00158          if (curr->stackblock_size == size)
00159            {
00160              result = curr;
00161              break;
00162            }
00163 
00164          if (result == NULL
00165              || result->stackblock_size > curr->stackblock_size)
00166            result = curr;
00167        }
00168     }
00169 
00170   if (__builtin_expect (result == NULL, 0)
00171       /* Make sure the size difference is not too excessive.  In that
00172         case we do not use the block.  */
00173       || __builtin_expect (result->stackblock_size > 4 * size, 0))
00174     {
00175       /* Release the lock.  */
00176       lll_unlock (stack_cache_lock, LLL_PRIVATE);
00177 
00178       return NULL;
00179     }
00180 
00181   /* Dequeue the entry.  */
00182   list_del (&result->list);
00183 
00184   /* And add to the list of stacks in use.  */
00185   list_add (&result->list, &stack_used);
00186 
00187   /* And decrease the cache size.  */
00188   stack_cache_actsize -= result->stackblock_size;
00189 
00190   /* Release the lock early.  */
00191   lll_unlock (stack_cache_lock, LLL_PRIVATE);
00192 
00193   /* Report size and location of the stack to the caller.  */
00194   *sizep = result->stackblock_size;
00195   *memp = result->stackblock;
00196 
00197   /* Cancellation handling is back to the default.  */
00198   result->cancelhandling = 0;
00199   result->cleanup = NULL;
00200 
00201   /* No pending event.  */
00202   result->nextevent = NULL;
00203 
00204   /* Clear the DTV.  */
00205   dtv_t *dtv = GET_DTV (TLS_TPADJ (result));
00206   memset (dtv, '\0', (dtv[-1].counter + 1) * sizeof (dtv_t));
00207 
00208   /* Re-initialize the TLS.  */
00209   _dl_allocate_tls_init (TLS_TPADJ (result));
00210 
00211   return result;
00212 }
00213 
00214 
00215 /* Free stacks until cache size is lower than LIMIT.  */
00216 static void
00217 free_stacks (size_t limit)
00218 {
00219   /* We reduce the size of the cache.  Remove the last entries until
00220      the size is below the limit.  */
00221   list_t *entry;
00222   list_t *prev;
00223 
00224   /* Search from the end of the list.  */
00225   list_for_each_prev_safe (entry, prev, &stack_cache)
00226     {
00227       struct pthread *curr;
00228 
00229       curr = list_entry (entry, struct pthread, list);
00230       if (FREE_P (curr))
00231        {
00232          /* Unlink the block.  */
00233          list_del (entry);
00234 
00235          /* Account for the freed memory.  */
00236          stack_cache_actsize -= curr->stackblock_size;
00237 
00238          /* Free the memory associated with the ELF TLS.  */
00239          _dl_deallocate_tls (TLS_TPADJ (curr), false);
00240 
00241          /* Remove this block.  This should never fail.  If it does
00242             something is really wrong.  */
00243          if (munmap (curr->stackblock, curr->stackblock_size) != 0)
00244            abort ();
00245 
00246          /* Maybe we have freed enough.  */
00247          if (stack_cache_actsize <= limit)
00248            break;
00249        }
00250     }
00251 }
00252 
00253 
00254 /* Add a stack frame which is not used anymore to the stack.  Must be
00255    called with the cache lock held.  */
00256 static inline void
00257 __attribute ((always_inline))
00258 queue_stack (struct pthread *stack)
00259 {
00260   /* We unconditionally add the stack to the list.  The memory may
00261      still be in use but it will not be reused until the kernel marks
00262      the stack as not used anymore.  */
00263   list_add (&stack->list, &stack_cache);
00264 
00265   stack_cache_actsize += stack->stackblock_size;
00266   if (__builtin_expect (stack_cache_actsize > stack_cache_maxsize, 0))
00267     free_stacks (stack_cache_maxsize);
00268 }
00269 
00270 
00271 /* This function is called indirectly from the freeres code in libc.  */
00272 void
00273 __free_stack_cache (void)
00274 {
00275   free_stacks (0);
00276 }
00277 
00278 
00279 static int
00280 internal_function
00281 change_stack_perm (struct pthread *pd
00282 #ifdef NEED_SEPARATE_REGISTER_STACK
00283                  , size_t pagemask
00284 #endif
00285                  )
00286 {
00287 #ifdef NEED_SEPARATE_REGISTER_STACK
00288   void *stack = (pd->stackblock
00289                + (((((pd->stackblock_size - pd->guardsize) / 2)
00290                     & pagemask) + pd->guardsize) & pagemask));
00291   size_t len = pd->stackblock + pd->stackblock_size - stack;
00292 #elif _STACK_GROWS_DOWN
00293   void *stack = pd->stackblock + pd->guardsize;
00294   size_t len = pd->stackblock_size - pd->guardsize;
00295 #elif _STACK_GROWS_UP
00296   void *stack = pd->stackblock;
00297   size_t len = (uintptr_t) pd - pd->guardsize - (uintptr_t) pd->stackblock;
00298 #else
00299 # error "Define either _STACK_GROWS_DOWN or _STACK_GROWS_UP"
00300 #endif
00301   if (mprotect (stack, len, PROT_READ | PROT_WRITE | PROT_EXEC) != 0)
00302     return errno;
00303 
00304   return 0;
00305 }
00306 
00307 
00308 static int
00309 allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
00310               ALLOCATE_STACK_PARMS)
00311 {
00312   struct pthread *pd;
00313   size_t size;
00314   size_t pagesize_m1 = __getpagesize () - 1;
00315   void *stacktop;
00316 
00317   assert (attr != NULL);
00318   assert (powerof2 (pagesize_m1 + 1));
00319   assert (TCB_ALIGNMENT >= STACK_ALIGN);
00320 
00321   /* Get the stack size from the attribute if it is set.  Otherwise we
00322      use the default we determined at start time.  */
00323   size = attr->stacksize ?: __default_stacksize;
00324 
00325   /* Get memory for the stack.  */
00326   if (__builtin_expect (attr->flags & ATTR_FLAG_STACKADDR, 0))
00327     {
00328       uintptr_t adj;
00329 
00330       /* If the user also specified the size of the stack make sure it
00331         is large enough.  */
00332       if (attr->stacksize != 0
00333          && attr->stacksize < (__static_tls_size + MINIMAL_REST_STACK))
00334        return EINVAL;
00335 
00336       /* Adjust stack size for alignment of the TLS block.  */
00337 #if TLS_TCB_AT_TP
00338       adj = ((uintptr_t) attr->stackaddr - TLS_TCB_SIZE)
00339            & __static_tls_align_m1;
00340       assert (size > adj + TLS_TCB_SIZE);
00341 #elif TLS_DTV_AT_TP
00342       adj = ((uintptr_t) attr->stackaddr - __static_tls_size)
00343            & __static_tls_align_m1;
00344       assert (size > adj);
00345 #endif
00346 
00347       /* The user provided some memory.  Let's hope it matches the
00348         size...  We do not allocate guard pages if the user provided
00349         the stack.  It is the user's responsibility to do this if it
00350         is wanted.  */
00351 #if TLS_TCB_AT_TP
00352       pd = (struct pthread *) ((uintptr_t) attr->stackaddr
00353                             - TLS_TCB_SIZE - adj);
00354 #elif TLS_DTV_AT_TP
00355       pd = (struct pthread *) (((uintptr_t) attr->stackaddr
00356                              - __static_tls_size - adj)
00357                             - TLS_PRE_TCB_SIZE);
00358 #endif
00359 
00360       /* The user provided stack memory needs to be cleared.  */
00361       memset (pd, '\0', sizeof (struct pthread));
00362 
00363       /* The first TSD block is included in the TCB.  */
00364       pd->specific[0] = pd->specific_1stblock;
00365 
00366       /* Remember the stack-related values.  */
00367       pd->stackblock = (char *) attr->stackaddr - size;
00368       pd->stackblock_size = size;
00369 
00370       /* This is a user-provided stack.  It will not be queued in the
00371         stack cache nor will the memory (except the TLS memory) be freed.  */
00372       pd->user_stack = true;
00373 
00374       /* This is at least the second thread.  */
00375       pd->header.multiple_threads = 1;
00376 #ifndef TLS_MULTIPLE_THREADS_IN_TCB
00377       __pthread_multiple_threads = *__libc_multiple_threads_ptr = 1;
00378 #endif
00379 
00380 #ifndef __ASSUME_PRIVATE_FUTEX
00381       /* The thread must know when private futexes are supported.  */
00382       pd->header.private_futex = THREAD_GETMEM (THREAD_SELF,
00383                                           header.private_futex);
00384 #endif
00385 
00386 #ifdef NEED_DL_SYSINFO
00387       /* Copy the sysinfo value from the parent.  */
00388       THREAD_SYSINFO(pd) = THREAD_SELF_SYSINFO;
00389 #endif
00390 
00391       /* The process ID is also the same as that of the caller.  */
00392       pd->pid = THREAD_GETMEM (THREAD_SELF, pid);
00393 
00394       /* Allocate the DTV for this thread.  */
00395       if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
00396        {
00397          /* Something went wrong.  */
00398          assert (errno == ENOMEM);
00399          return EAGAIN;
00400        }
00401 
00402 
00403       /* Prepare to modify global data.  */
00404       lll_lock (stack_cache_lock, LLL_PRIVATE);
00405 
00406       /* And add to the list of stacks in use.  */
00407       list_add (&pd->list, &__stack_user);
00408 
00409       lll_unlock (stack_cache_lock, LLL_PRIVATE);
00410     }
00411   else
00412     {
00413       /* Allocate some anonymous memory.  If possible use the cache.  */
00414       size_t guardsize;
00415       size_t reqsize;
00416       void *mem;
00417       const int prot = (PROT_READ | PROT_WRITE
00418                      | ((GL(dl_stack_flags) & PF_X) ? PROT_EXEC : 0));
00419 
00420 #if COLORING_INCREMENT != 0
00421       /* Add one more page for stack coloring.  Don't do it for stacks
00422         with 16 times pagesize or larger.  This might just cause
00423         unnecessary misalignment.  */
00424       if (size <= 16 * pagesize_m1)
00425        size += pagesize_m1 + 1;
00426 #endif
00427 
00428       /* Adjust the stack size for alignment.  */
00429       size &= ~__static_tls_align_m1;
00430       assert (size != 0);
00431 
00432       /* Make sure the size of the stack is enough for the guard and
00433         eventually the thread descriptor.  */
00434       guardsize = (attr->guardsize + pagesize_m1) & ~pagesize_m1;
00435       if (__builtin_expect (size < ((guardsize + __static_tls_size
00436                                  + MINIMAL_REST_STACK + pagesize_m1)
00437                                 & ~pagesize_m1),
00438                          0))
00439        /* The stack is too small (or the guard too large).  */
00440        return EINVAL;
00441 
00442       /* Try to get a stack from the cache.  */
00443       reqsize = size;
00444       pd = get_cached_stack (&size, &mem);
00445       if (pd == NULL)
00446        {
00447          /* To avoid aliasing effects on a larger scale than pages we
00448             adjust the allocated stack size if necessary.  This way
00449             allocations directly following each other will not have
00450             aliasing problems.  */
00451 #if MULTI_PAGE_ALIASING != 0
00452          if ((size % MULTI_PAGE_ALIASING) == 0)
00453            size += pagesize_m1 + 1;
00454 #endif
00455 
00456          mem = mmap (NULL, size, prot,
00457                     MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
00458 
00459          if (__builtin_expect (mem == MAP_FAILED, 0))
00460            {
00461              if (errno == ENOMEM)
00462               __set_errno (EAGAIN);
00463 
00464               return errno;
00465            }
00466 
00467          /* SIZE is guaranteed to be greater than zero.
00468             So we can never get a null pointer back from mmap.  */
00469          assert (mem != NULL);
00470 
00471 #if COLORING_INCREMENT != 0
00472          /* Atomically increment NCREATED.  */
00473          unsigned int ncreated = atomic_increment_val (&nptl_ncreated);
00474 
00475          /* We chose the offset for coloring by incrementing it for
00476             every new thread by a fixed amount.  The offset used
00477             module the page size.  Even if coloring would be better
00478             relative to higher alignment values it makes no sense to
00479             do it since the mmap() interface does not allow us to
00480             specify any alignment for the returned memory block.  */
00481          size_t coloring = (ncreated * COLORING_INCREMENT) & pagesize_m1;
00482 
00483          /* Make sure the coloring offsets does not disturb the alignment
00484             of the TCB and static TLS block.  */
00485          if (__builtin_expect ((coloring & __static_tls_align_m1) != 0, 0))
00486            coloring = (((coloring + __static_tls_align_m1)
00487                       & ~(__static_tls_align_m1))
00488                      & ~pagesize_m1);
00489 #else
00490          /* Unless specified we do not make any adjustments.  */
00491 # define coloring 0
00492 #endif
00493 
00494          /* Place the thread descriptor at the end of the stack.  */
00495 #if TLS_TCB_AT_TP
00496          pd = (struct pthread *) ((char *) mem + size - coloring) - 1;
00497 #elif TLS_DTV_AT_TP
00498          pd = (struct pthread *) ((((uintptr_t) mem + size - coloring
00499                                 - __static_tls_size)
00500                                 & ~__static_tls_align_m1)
00501                                - TLS_PRE_TCB_SIZE);
00502 #endif
00503 
00504          /* Remember the stack-related values.  */
00505          pd->stackblock = mem;
00506          pd->stackblock_size = size;
00507 
00508          /* We allocated the first block thread-specific data array.
00509             This address will not change for the lifetime of this
00510             descriptor.  */
00511          pd->specific[0] = pd->specific_1stblock;
00512 
00513          /* This is at least the second thread.  */
00514          pd->header.multiple_threads = 1;
00515 #ifndef TLS_MULTIPLE_THREADS_IN_TCB
00516          __pthread_multiple_threads = *__libc_multiple_threads_ptr = 1;
00517 #endif
00518 
00519 #ifndef __ASSUME_PRIVATE_FUTEX
00520          /* The thread must know when private futexes are supported.  */
00521          pd->header.private_futex = THREAD_GETMEM (THREAD_SELF,
00522                                                     header.private_futex);
00523 #endif
00524 
00525 #ifdef NEED_DL_SYSINFO
00526          /* Copy the sysinfo value from the parent.  */
00527          THREAD_SYSINFO(pd) = THREAD_SELF_SYSINFO;
00528 #endif
00529 
00530          /* The process ID is also the same as that of the caller.  */
00531          pd->pid = THREAD_GETMEM (THREAD_SELF, pid);
00532 
00533          /* Allocate the DTV for this thread.  */
00534          if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
00535            {
00536              /* Something went wrong.  */
00537              assert (errno == ENOMEM);
00538 
00539              /* Free the stack memory we just allocated.  */
00540              (void) munmap (mem, size);
00541 
00542              return EAGAIN;
00543            }
00544 
00545 
00546          /* Prepare to modify global data.  */
00547          lll_lock (stack_cache_lock, LLL_PRIVATE);
00548 
00549          /* And add to the list of stacks in use.  */
00550          list_add (&pd->list, &stack_used);
00551 
00552          lll_unlock (stack_cache_lock, LLL_PRIVATE);
00553 
00554 
00555          /* There might have been a race.  Another thread might have
00556             caused the stacks to get exec permission while this new
00557             stack was prepared.  Detect if this was possible and
00558             change the permission if necessary.  */
00559          if (__builtin_expect ((GL(dl_stack_flags) & PF_X) != 0
00560                             && (prot & PROT_EXEC) == 0, 0))
00561            {
00562              int err = change_stack_perm (pd
00563 #ifdef NEED_SEPARATE_REGISTER_STACK
00564                                       , ~pagesize_m1
00565 #endif
00566                                       );
00567              if (err != 0)
00568               {
00569                 /* Free the stack memory we just allocated.  */
00570                 (void) munmap (mem, size);
00571 
00572                 return err;
00573               }
00574            }
00575 
00576 
00577          /* Note that all of the stack and the thread descriptor is
00578             zeroed.  This means we do not have to initialize fields
00579             with initial value zero.  This is specifically true for
00580             the 'tid' field which is always set back to zero once the
00581             stack is not used anymore and for the 'guardsize' field
00582             which will be read next.  */
00583        }
00584 
00585       /* Create or resize the guard area if necessary.  */
00586       if (__builtin_expect (guardsize > pd->guardsize, 0))
00587        {
00588 #ifdef NEED_SEPARATE_REGISTER_STACK
00589          char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1);
00590 #elif _STACK_GROWS_DOWN
00591          char *guard = mem;
00592 # elif _STACK_GROWS_UP
00593          char *guard = (char *) (((uintptr_t) pd - guardsize) & ~pagesize_m1);
00594 #endif
00595          if (mprotect (guard, guardsize, PROT_NONE) != 0)
00596            {
00597              int err;
00598            mprot_error:
00599              err = errno;
00600 
00601              lll_lock (stack_cache_lock, LLL_PRIVATE);
00602 
00603              /* Remove the thread from the list.  */
00604              list_del (&pd->list);
00605 
00606              lll_unlock (stack_cache_lock, LLL_PRIVATE);
00607 
00608              /* Get rid of the TLS block we allocated.  */
00609              _dl_deallocate_tls (TLS_TPADJ (pd), false);
00610 
00611              /* Free the stack memory regardless of whether the size
00612                of the cache is over the limit or not.  If this piece
00613                of memory caused problems we better do not use it
00614                anymore.  Uh, and we ignore possible errors.  There
00615                is nothing we could do.  */
00616              (void) munmap (mem, size);
00617 
00618              return err;
00619            }
00620 
00621          pd->guardsize = guardsize;
00622        }
00623       else if (__builtin_expect (pd->guardsize - guardsize > size - reqsize,
00624                              0))
00625        {
00626          /* The old guard area is too large.  */
00627 
00628 #ifdef NEED_SEPARATE_REGISTER_STACK
00629          char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1);
00630          char *oldguard = mem + (((size - pd->guardsize) / 2) & ~pagesize_m1);
00631 
00632          if (oldguard < guard
00633              && mprotect (oldguard, guard - oldguard, prot) != 0)
00634            goto mprot_error;
00635 
00636          if (mprotect (guard + guardsize,
00637                      oldguard + pd->guardsize - guard - guardsize,
00638                      prot) != 0)
00639            goto mprot_error;
00640 #elif _STACK_GROWS_DOWN
00641          if (mprotect ((char *) mem + guardsize, pd->guardsize - guardsize,
00642                      prot) != 0)
00643            goto mprot_error;
00644 #elif _STACK_GROWS_UP
00645          if (mprotect ((char *) pd - pd->guardsize,
00646                      pd->guardsize - guardsize, prot) != 0)
00647            goto mprot_error;
00648 #endif
00649 
00650          pd->guardsize = guardsize;
00651        }
00652       /* The pthread_getattr_np() calls need to get passed the size
00653         requested in the attribute, regardless of how large the
00654         actually used guardsize is.  */
00655       pd->reported_guardsize = guardsize;
00656     }
00657 
00658   /* Initialize the lock.  We have to do this unconditionally since the
00659      stillborn thread could be canceled while the lock is taken.  */
00660   pd->lock = LLL_LOCK_INITIALIZER;
00661 
00662   /* The robust mutex lists also need to be initialized
00663      unconditionally because the cleanup for the previous stack owner
00664      might have happened in the kernel.  */
00665   pd->robust_head.futex_offset = (offsetof (pthread_mutex_t, __data.__lock)
00666                               - offsetof (pthread_mutex_t,
00667                                          __data.__list.__next));
00668   pd->robust_head.list_op_pending = NULL;
00669 #ifdef __PTHREAD_MUTEX_HAVE_PREV
00670   pd->robust_prev = &pd->robust_head;
00671 #endif
00672   pd->robust_head.list = &pd->robust_head;
00673 
00674   /* We place the thread descriptor at the end of the stack.  */
00675   *pdp = pd;
00676 
00677 #if TLS_TCB_AT_TP
00678   /* The stack begins before the TCB and the static TLS block.  */
00679   stacktop = ((char *) (pd + 1) - __static_tls_size);
00680 #elif TLS_DTV_AT_TP
00681   stacktop = (char *) (pd - 1);
00682 #endif
00683 
00684 #ifdef NEED_SEPARATE_REGISTER_STACK
00685   *stack = pd->stackblock;
00686   *stacksize = stacktop - *stack;
00687 #elif _STACK_GROWS_DOWN
00688   *stack = stacktop;
00689 #elif _STACK_GROWS_UP
00690   *stack = pd->stackblock;
00691   assert (*stack > 0);
00692 #endif
00693 
00694   return 0;
00695 }
00696 
00697 
00698 void
00699 internal_function
00700 __deallocate_stack (struct pthread *pd)
00701 {
00702   lll_lock (stack_cache_lock, LLL_PRIVATE);
00703 
00704   /* Remove the thread from the list of threads with user defined
00705      stacks.  */
00706   list_del (&pd->list);
00707 
00708   /* Not much to do.  Just free the mmap()ed memory.  Note that we do
00709      not reset the 'used' flag in the 'tid' field.  This is done by
00710      the kernel.  If no thread has been created yet this field is
00711      still zero.  */
00712   if (__builtin_expect (! pd->user_stack, 1))
00713     (void) queue_stack (pd);
00714   else
00715     /* Free the memory associated with the ELF TLS.  */
00716     _dl_deallocate_tls (TLS_TPADJ (pd), false);
00717 
00718   lll_unlock (stack_cache_lock, LLL_PRIVATE);
00719 }
00720 
00721 
00722 int
00723 internal_function
00724 __make_stacks_executable (void **stack_endp)
00725 {
00726   /* First the main thread's stack.  */
00727   int err = _dl_make_stack_executable (stack_endp);
00728   if (err != 0)
00729     return err;
00730 
00731 #ifdef NEED_SEPARATE_REGISTER_STACK
00732   const size_t pagemask = ~(__getpagesize () - 1);
00733 #endif
00734 
00735   lll_lock (stack_cache_lock, LLL_PRIVATE);
00736 
00737   list_t *runp;
00738   list_for_each (runp, &stack_used)
00739     {
00740       err = change_stack_perm (list_entry (runp, struct pthread, list)
00741 #ifdef NEED_SEPARATE_REGISTER_STACK
00742                             , pagemask
00743 #endif
00744                             );
00745       if (err != 0)
00746        break;
00747     }
00748 
00749   /* Also change the permission for the currently unused stacks.  This
00750      might be wasted time but better spend it here than adding a check
00751      in the fast path.  */
00752   if (err == 0)
00753     list_for_each (runp, &stack_cache)
00754       {
00755        err = change_stack_perm (list_entry (runp, struct pthread, list)
00756 #ifdef NEED_SEPARATE_REGISTER_STACK
00757                              , pagemask
00758 #endif
00759                              );
00760        if (err != 0)
00761          break;
00762       }
00763 
00764   lll_unlock (stack_cache_lock, LLL_PRIVATE);
00765 
00766   return err;
00767 }
00768 
00769 
00770 /* In case of a fork() call the memory allocation in the child will be
00771    the same but only one thread is running.  All stacks except that of
00772    the one running thread are not used anymore.  We have to recycle
00773    them.  */
00774 void
00775 __reclaim_stacks (void)
00776 {
00777   struct pthread *self = (struct pthread *) THREAD_SELF;
00778 
00779   /* No locking necessary.  The caller is the only stack in use.  */
00780 
00781   /* Mark all stacks except the still running one as free.  */
00782   list_t *runp;
00783   list_for_each (runp, &stack_used)
00784     {
00785       struct pthread *curp = list_entry (runp, struct pthread, list);
00786       if (curp != self)
00787        {
00788          /* This marks the stack as free.  */
00789          curp->tid = 0;
00790 
00791          /* The PID field must be initialized for the new process.  */
00792          curp->pid = self->pid;
00793 
00794          /* Account for the size of the stack.  */
00795          stack_cache_actsize += curp->stackblock_size;
00796 
00797          if (curp->specific_used)
00798            {
00799              /* Clear the thread-specific data.  */
00800              memset (curp->specific_1stblock, '\0',
00801                     sizeof (curp->specific_1stblock));
00802 
00803              curp->specific_used = false;
00804 
00805              for (size_t cnt = 1; cnt < PTHREAD_KEY_1STLEVEL_SIZE; ++cnt)
00806               if (curp->specific[cnt] != NULL)
00807                 {
00808                   memset (curp->specific[cnt], '\0',
00809                          sizeof (curp->specific_1stblock));
00810 
00811                   /* We have allocated the block which we do not
00812                      free here so re-set the bit.  */
00813                   curp->specific_used = true;
00814                 }
00815            }
00816        }
00817     }
00818 
00819   /* Reset the PIDs in any cached stacks.  */
00820   list_for_each (runp, &stack_cache)
00821     {
00822       struct pthread *curp = list_entry (runp, struct pthread, list);
00823       curp->pid = self->pid;
00824     }
00825 
00826   /* Add the stack of all running threads to the cache.  */
00827   list_splice (&stack_used, &stack_cache);
00828 
00829   /* Remove the entry for the current thread to from the cache list
00830      and add it to the list of running threads.  Which of the two
00831      lists is decided by the user_stack flag.  */
00832   list_del (&self->list);
00833 
00834   /* Re-initialize the lists for all the threads.  */
00835   INIT_LIST_HEAD (&stack_used);
00836   INIT_LIST_HEAD (&__stack_user);
00837 
00838   if (__builtin_expect (THREAD_GETMEM (self, user_stack), 0))
00839     list_add (&self->list, &__stack_user);
00840   else
00841     list_add (&self->list, &stack_used);
00842 
00843   /* There is one thread running.  */
00844   __nptl_nthreads = 1;
00845 
00846   /* Initialize the lock.  */
00847   stack_cache_lock = LLL_LOCK_INITIALIZER;
00848 }
00849 
00850 
00851 #if HP_TIMING_AVAIL
00852 # undef __find_thread_by_id
00853 /* Find a thread given the thread ID.  */
00854 attribute_hidden
00855 struct pthread *
00856 __find_thread_by_id (pid_t tid)
00857 {
00858   struct pthread *result = NULL;
00859 
00860   lll_lock (stack_cache_lock, LLL_PRIVATE);
00861 
00862   /* Iterate over the list with system-allocated threads first.  */
00863   list_t *runp;
00864   list_for_each (runp, &stack_used)
00865     {
00866       struct pthread *curp;
00867 
00868       curp = list_entry (runp, struct pthread, list);
00869 
00870       if (curp->tid == tid)
00871        {
00872          result = curp;
00873          goto out;
00874        }
00875     }
00876 
00877   /* Now the list with threads using user-allocated stacks.  */
00878   list_for_each (runp, &__stack_user)
00879     {
00880       struct pthread *curp;
00881 
00882       curp = list_entry (runp, struct pthread, list);
00883 
00884       if (curp->tid == tid)
00885        {
00886          result = curp;
00887          goto out;
00888        }
00889     }
00890 
00891  out:
00892   lll_unlock (stack_cache_lock, LLL_PRIVATE);
00893 
00894   return result;
00895 }
00896 #endif
00897 
00898 
00899 static void
00900 internal_function
00901 setxid_signal_thread (struct xid_command *cmdp, struct pthread *t)
00902 {
00903   if (! IS_DETACHED (t))
00904     {
00905       int ch;
00906       do
00907        {
00908          ch = t->cancelhandling;
00909 
00910          /* If the thread is exiting right now, ignore it.  */
00911          if ((ch & EXITING_BITMASK) != 0)
00912            return;
00913        }
00914       while (atomic_compare_and_exchange_bool_acq (&t->cancelhandling,
00915                                              ch | SETXID_BITMASK, ch));
00916     }
00917 
00918   int val;
00919   INTERNAL_SYSCALL_DECL (err);
00920 #if __ASSUME_TGKILL
00921   val = INTERNAL_SYSCALL (tgkill, err, 3, THREAD_GETMEM (THREAD_SELF, pid),
00922                        t->tid, SIGSETXID);
00923 #else
00924 # ifdef __NR_tgkill
00925   val = INTERNAL_SYSCALL (tgkill, err, 3, THREAD_GETMEM (THREAD_SELF, pid),
00926                        t->tid, SIGSETXID);
00927   if (INTERNAL_SYSCALL_ERROR_P (val, err)
00928       && INTERNAL_SYSCALL_ERRNO (val, err) == ENOSYS)
00929 # endif
00930     val = INTERNAL_SYSCALL (tkill, err, 2, t->tid, SIGSETXID);
00931 #endif
00932 
00933   if (!INTERNAL_SYSCALL_ERROR_P (val, err))
00934     atomic_increment (&cmdp->cntr);
00935 }
00936 
00937 
00938 int
00939 attribute_hidden
00940 __nptl_setxid (struct xid_command *cmdp)
00941 {
00942   int result;
00943   lll_lock (stack_cache_lock, LLL_PRIVATE);
00944 
00945   __xidcmd = cmdp;
00946   cmdp->cntr = 0;
00947 
00948   struct pthread *self = THREAD_SELF;
00949 
00950   /* Iterate over the list with system-allocated threads first.  */
00951   list_t *runp;
00952   list_for_each (runp, &stack_used)
00953     {
00954       struct pthread *t = list_entry (runp, struct pthread, list);
00955       if (t == self)
00956        continue;
00957 
00958       setxid_signal_thread (cmdp, t);
00959     }
00960 
00961   /* Now the list with threads using user-allocated stacks.  */
00962   list_for_each (runp, &__stack_user)
00963     {
00964       struct pthread *t = list_entry (runp, struct pthread, list);
00965       if (t == self)
00966        continue;
00967 
00968       setxid_signal_thread (cmdp, t);
00969     }
00970 
00971   int cur = cmdp->cntr;
00972   while (cur != 0)
00973     {
00974       lll_futex_wait (&cmdp->cntr, cur, LLL_PRIVATE);
00975       cur = cmdp->cntr;
00976     }
00977 
00978   /* This must be last, otherwise the current thread might not have
00979      permissions to send SIGSETXID syscall to the other threads.  */
00980   INTERNAL_SYSCALL_DECL (err);
00981   result = INTERNAL_SYSCALL_NCS (cmdp->syscall_no, err, 3,
00982                              cmdp->id[0], cmdp->id[1], cmdp->id[2]);
00983   if (INTERNAL_SYSCALL_ERROR_P (result, err))
00984     {
00985       __set_errno (INTERNAL_SYSCALL_ERRNO (result, err));
00986       result = -1;
00987     }
00988 
00989   lll_unlock (stack_cache_lock, LLL_PRIVATE);
00990   return result;
00991 }
00992 
00993 static inline void __attribute__((always_inline))
00994 init_one_static_tls (struct pthread *curp, struct link_map *map)
00995 {
00996   dtv_t *dtv = GET_DTV (TLS_TPADJ (curp));
00997 # if TLS_TCB_AT_TP
00998   void *dest = (char *) curp - map->l_tls_offset;
00999 # elif TLS_DTV_AT_TP
01000   void *dest = (char *) curp + map->l_tls_offset + TLS_PRE_TCB_SIZE;
01001 # else
01002 #  error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
01003 # endif
01004 
01005   /* Fill in the DTV slot so that a later LD/GD access will find it.  */
01006   dtv[map->l_tls_modid].pointer.val = dest;
01007   dtv[map->l_tls_modid].pointer.is_static = true;
01008 
01009   /* Initialize the memory.  */
01010   memset (__mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size),
01011          '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
01012 }
01013 
01014 void
01015 attribute_hidden
01016 __pthread_init_static_tls (struct link_map *map)
01017 {
01018   lll_lock (stack_cache_lock, LLL_PRIVATE);
01019 
01020   /* Iterate over the list with system-allocated threads first.  */
01021   list_t *runp;
01022   list_for_each (runp, &stack_used)
01023     init_one_static_tls (list_entry (runp, struct pthread, list), map);
01024 
01025   /* Now the list with threads using user-allocated stacks.  */
01026   list_for_each (runp, &__stack_user)
01027     init_one_static_tls (list_entry (runp, struct pthread, list), map);
01028 
01029   lll_unlock (stack_cache_lock, LLL_PRIVATE);
01030 }
01031 
01032 
01033 void
01034 attribute_hidden
01035 __wait_lookup_done (void)
01036 {
01037   lll_lock (stack_cache_lock, LLL_PRIVATE);
01038 
01039   struct pthread *self = THREAD_SELF;
01040 
01041   /* Iterate over the list with system-allocated threads first.  */
01042   list_t *runp;
01043   list_for_each (runp, &stack_used)
01044     {
01045       struct pthread *t = list_entry (runp, struct pthread, list);
01046       if (t == self || t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED)
01047        continue;
01048 
01049       int *const gscope_flagp = &t->header.gscope_flag;
01050 
01051       /* We have to wait until this thread is done with the global
01052         scope.  First tell the thread that we are waiting and
01053         possibly have to be woken.  */
01054       if (atomic_compare_and_exchange_bool_acq (gscope_flagp,
01055                                           THREAD_GSCOPE_FLAG_WAIT,
01056                                           THREAD_GSCOPE_FLAG_USED))
01057        continue;
01058 
01059       do
01060        lll_futex_wait (gscope_flagp, THREAD_GSCOPE_FLAG_WAIT, LLL_PRIVATE);
01061       while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT);
01062     }
01063 
01064   /* Now the list with threads using user-allocated stacks.  */
01065   list_for_each (runp, &__stack_user)
01066     {
01067       struct pthread *t = list_entry (runp, struct pthread, list);
01068       if (t == self || t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED)
01069        continue;
01070 
01071       int *const gscope_flagp = &t->header.gscope_flag;
01072 
01073       /* We have to wait until this thread is done with the global
01074         scope.  First tell the thread that we are waiting and
01075         possibly have to be woken.  */
01076       if (atomic_compare_and_exchange_bool_acq (gscope_flagp,
01077                                           THREAD_GSCOPE_FLAG_WAIT,
01078                                           THREAD_GSCOPE_FLAG_USED))
01079        continue;
01080 
01081       do
01082        lll_futex_wait (gscope_flagp, THREAD_GSCOPE_FLAG_WAIT, LLL_PRIVATE);
01083       while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT);
01084     }
01085 
01086   lll_unlock (stack_cache_lock, LLL_PRIVATE);
01087 }