Back to index

tor  0.2.3.18-rc
mempool.c
Go to the documentation of this file.
00001 /* Copyright (c) 2007-2012, The Tor Project, Inc. */
00002 /* See LICENSE for licensing information */
00003 #if 1
00004 /* Tor dependencies */
00005 #include "orconfig.h"
00006 #endif
00007 
00008 #include <stdlib.h>
00009 #include <string.h>
00010 #include "torint.h"
00011 #define MEMPOOL_PRIVATE
00012 #include "mempool.h"
00013 
00014 /* OVERVIEW:
00015  *
00016  *     This is an implementation of memory pools for Tor cells.  It may be
00017  *     useful for you too.
00018  *
00019  *     Generally, a memory pool is an allocation strategy optimized for large
00020  *     numbers of identically-sized objects.  Rather than the elaborate arena
00021  *     and coalescing strategies you need to get good performance for a
00022  *     general-purpose malloc(), pools use a series of large memory "chunks",
00023  *     each of which is carved into a bunch of smaller "items" or
00024  *     "allocations".
00025  *
00026  *     To get decent performance, you need to:
00027  *        - Minimize the number of times you hit the underlying allocator.
00028  *        - Try to keep accesses as local in memory as possible.
00029  *        - Try to keep the common case fast.
00030  *
00031  *     Our implementation uses three lists of chunks per pool.  Each chunk can
00032  *     be either "full" (no more room for items); "empty" (no items); or
00033  *     "used" (not full, not empty).  There are independent doubly-linked
00034  *     lists for each state.
00035  *
00036  * CREDIT:
00037  *
00038  *     I wrote this after looking at 3 or 4 other pooling allocators, but
00039  *     without copying.  The strategy this most resembles (which is funny,
00040  *     since that's the one I looked at longest ago) is the pool allocator
00041  *     underlying Python's obmalloc code.  Major differences from obmalloc's
00042  *     pools are:
00043  *       - We don't even try to be threadsafe.
00044  *       - We only handle objects of one size.
00045  *       - Our list of empty chunks is doubly-linked, not singly-linked.
00046  *         (This could change pretty easily; it's only doubly-linked for
00047  *         consistency.)
00048  *       - We keep a list of full chunks (so we can have a "nuke everything"
00049  *         function).  Obmalloc's pools leave full chunks to float unanchored.
00050  *
00051  * LIMITATIONS:
00052  *   - Not even slightly threadsafe.
00053  *   - Likes to have lots of items per chunks.
00054  *   - One pointer overhead per allocated thing.  (The alternative is
00055  *     something like glib's use of an RB-tree to keep track of what
00056  *     chunk any given piece of memory is in.)
00057  *   - Only aligns allocated things to void* level: redefine ALIGNMENT_TYPE
00058  *     if you need doubles.
00059  *   - Could probably be optimized a bit; the representation contains
00060  *     a bit more info than it really needs to have.
00061  */
00062 
00063 #if 1
00064 /* Tor dependencies */
00065 #include "util.h"
00066 #include "compat.h"
00067 #include "torlog.h"
00068 #define ALLOC(x) tor_malloc(x)
00069 #define FREE(x) tor_free(x)
00070 #define ASSERT(x) tor_assert(x)
00071 #undef ALLOC_CAN_RETURN_NULL
00072 #define TOR
00073 //#define ALLOC_ROUNDUP(p) tor_malloc_roundup(p)
00074 /* End Tor dependencies */
00075 #else
00076 /* If you're not building this as part of Tor, you'll want to define the
00077  * following macros.  For now, these should do as defaults.
00078  */
00079 #include <assert.h>
00080 #define PREDICT_UNLIKELY(x) (x)
00081 #define PREDICT_LIKELY(x) (x)
00082 #define ALLOC(x) malloc(x)
00083 #define FREE(x) free(x)
00084 #define STRUCT_OFFSET(tp, member)                       \
00085   ((off_t) (((char*)&((tp*)0)->member)-(char*)0))
00086 #define ASSERT(x) assert(x)
00087 #define ALLOC_CAN_RETURN_NULL
00088 #endif
00089 
00090 /* Tuning parameters */
00093 #define ALIGNMENT_TYPE void *
00094 
00095 #define ALIGNMENT sizeof(ALIGNMENT_TYPE)
00096 
00097 #define MAX_CHUNK (8*(1L<<20))
00098 
00099 #define MIN_CHUNK 4096
00100 
00101 typedef struct mp_allocated_t mp_allocated_t;
00102 typedef struct mp_chunk_t mp_chunk_t;
00103 
00105 struct mp_allocated_t {
00109   mp_chunk_t *in_chunk;
00110   union {
00112     mp_allocated_t *next_free;
00115     char mem[1];
00117     ALIGNMENT_TYPE _dummy;
00118   } u;
00119 };
00120 
00122 #define MP_CHUNK_MAGIC 0x09870123
00123 
00125 struct mp_chunk_t {
00126   unsigned long magic; 
00127   mp_chunk_t *next; 
00128   mp_chunk_t *prev; 
00129   mp_pool_t *pool; 
00134   mp_allocated_t *first_free;
00135   int n_allocated; 
00136   int capacity; 
00137   size_t mem_size; 
00138   char *next_mem; 
00139   char mem[FLEXIBLE_ARRAY_MEMBER]; 
00140 };
00141 
00143 #define CHUNK_OVERHEAD STRUCT_OFFSET(mp_chunk_t, mem[0])
00144 
00147 #define A2M(a) (&(a)->u.mem)
00148 
00150 #define M2A(p) ( ((char*)p) - STRUCT_OFFSET(mp_allocated_t, u.mem) )
00151 
00152 #ifdef ALLOC_CAN_RETURN_NULL
00153 
00155 #define CHECK_ALLOC(x)                           \
00156   if (PREDICT_UNLIKELY(!x)) { return NULL; }
00157 #else
00158 
00159 #define CHECK_ALLOC(x)
00160 #endif
00161 
00164 static mp_chunk_t *
00165 mp_chunk_new(mp_pool_t *pool)
00166 {
00167   size_t sz = pool->new_chunk_capacity * pool->item_alloc_size;
00168 #ifdef ALLOC_ROUNDUP
00169   size_t alloc_size = CHUNK_OVERHEAD + sz;
00170   mp_chunk_t *chunk = ALLOC_ROUNDUP(&alloc_size);
00171 #else
00172   mp_chunk_t *chunk = ALLOC(CHUNK_OVERHEAD + sz);
00173 #endif
00174 #ifdef MEMPOOL_STATS
00175   ++pool->total_chunks_allocated;
00176 #endif
00177   CHECK_ALLOC(chunk);
00178   memset(chunk, 0, sizeof(mp_chunk_t)); /* Doesn't clear the whole thing. */
00179   chunk->magic = MP_CHUNK_MAGIC;
00180 #ifdef ALLOC_ROUNDUP
00181   chunk->mem_size = alloc_size - CHUNK_OVERHEAD;
00182   chunk->capacity = chunk->mem_size / pool->item_alloc_size;
00183 #else
00184   chunk->capacity = pool->new_chunk_capacity;
00185   chunk->mem_size = sz;
00186 #endif
00187   chunk->next_mem = chunk->mem;
00188   chunk->pool = pool;
00189   return chunk;
00190 }
00191 
00195 static INLINE void
00196 add_newly_used_chunk_to_used_list(mp_pool_t *pool, mp_chunk_t *chunk)
00197 {
00198   chunk->next = pool->used_chunks;
00199   if (chunk->next)
00200     chunk->next->prev = chunk;
00201   pool->used_chunks = chunk;
00202   ASSERT(!chunk->prev);
00203 }
00204 
00206 void *
00207 mp_pool_get(mp_pool_t *pool)
00208 {
00209   mp_chunk_t *chunk;
00210   mp_allocated_t *allocated;
00211 
00212   if (PREDICT_LIKELY(pool->used_chunks != NULL)) {
00213     /* Common case: there is some chunk that is neither full nor empty.  Use
00214      * that one. (We can't use the full ones, obviously, and we should fill
00215      * up the used ones before we start on any empty ones. */
00216     chunk = pool->used_chunks;
00217 
00218   } else if (pool->empty_chunks) {
00219     /* We have no used chunks, but we have an empty chunk that we haven't
00220      * freed yet: use that.  (We pull from the front of the list, which should
00221      * get us the most recently emptied chunk.) */
00222     chunk = pool->empty_chunks;
00223 
00224     /* Remove the chunk from the empty list. */
00225     pool->empty_chunks = chunk->next;
00226     if (chunk->next)
00227       chunk->next->prev = NULL;
00228 
00229     /* Put the chunk on the 'used' list*/
00230     add_newly_used_chunk_to_used_list(pool, chunk);
00231 
00232     ASSERT(!chunk->prev);
00233     --pool->n_empty_chunks;
00234     if (pool->n_empty_chunks < pool->min_empty_chunks)
00235       pool->min_empty_chunks = pool->n_empty_chunks;
00236   } else {
00237     /* We have no used or empty chunks: allocate a new chunk. */
00238     chunk = mp_chunk_new(pool);
00239     CHECK_ALLOC(chunk);
00240 
00241     /* Add the new chunk to the used list. */
00242     add_newly_used_chunk_to_used_list(pool, chunk);
00243   }
00244 
00245   ASSERT(chunk->n_allocated < chunk->capacity);
00246 
00247   if (chunk->first_free) {
00248     /* If there's anything on the chunk's freelist, unlink it and use it. */
00249     allocated = chunk->first_free;
00250     chunk->first_free = allocated->u.next_free;
00251     allocated->u.next_free = NULL; /* For debugging; not really needed. */
00252     ASSERT(allocated->in_chunk == chunk);
00253   } else {
00254     /* Otherwise, the chunk had better have some free space left on it. */
00255     ASSERT(chunk->next_mem + pool->item_alloc_size <=
00256            chunk->mem + chunk->mem_size);
00257 
00258     /* Good, it did.  Let's carve off a bit of that free space, and use
00259      * that. */
00260     allocated = (void*)chunk->next_mem;
00261     chunk->next_mem += pool->item_alloc_size;
00262     allocated->in_chunk = chunk;
00263     allocated->u.next_free = NULL; /* For debugging; not really needed. */
00264   }
00265 
00266   ++chunk->n_allocated;
00267 #ifdef MEMPOOL_STATS
00268   ++pool->total_items_allocated;
00269 #endif
00270 
00271   if (PREDICT_UNLIKELY(chunk->n_allocated == chunk->capacity)) {
00272     /* This chunk just became full. */
00273     ASSERT(chunk == pool->used_chunks);
00274     ASSERT(chunk->prev == NULL);
00275 
00276     /* Take it off the used list. */
00277     pool->used_chunks = chunk->next;
00278     if (chunk->next)
00279       chunk->next->prev = NULL;
00280 
00281     /* Put it on the full list. */
00282     chunk->next = pool->full_chunks;
00283     if (chunk->next)
00284       chunk->next->prev = chunk;
00285     pool->full_chunks = chunk;
00286   }
00287   /* And return the memory portion of the mp_allocated_t. */
00288   return A2M(allocated);
00289 }
00290 
00292 void
00293 mp_pool_release(void *item)
00294 {
00295   mp_allocated_t *allocated = (void*) M2A(item);
00296   mp_chunk_t *chunk = allocated->in_chunk;
00297 
00298   ASSERT(chunk);
00299   ASSERT(chunk->magic == MP_CHUNK_MAGIC);
00300   ASSERT(chunk->n_allocated > 0);
00301 
00302   allocated->u.next_free = chunk->first_free;
00303   chunk->first_free = allocated;
00304 
00305   if (PREDICT_UNLIKELY(chunk->n_allocated == chunk->capacity)) {
00306     /* This chunk was full and is about to be used. */
00307     mp_pool_t *pool = chunk->pool;
00308     /* unlink from the full list  */
00309     if (chunk->prev)
00310       chunk->prev->next = chunk->next;
00311     if (chunk->next)
00312       chunk->next->prev = chunk->prev;
00313     if (chunk == pool->full_chunks)
00314       pool->full_chunks = chunk->next;
00315 
00316     /* link to the used list. */
00317     chunk->next = pool->used_chunks;
00318     chunk->prev = NULL;
00319     if (chunk->next)
00320       chunk->next->prev = chunk;
00321     pool->used_chunks = chunk;
00322   } else if (PREDICT_UNLIKELY(chunk->n_allocated == 1)) {
00323     /* This was used and is about to be empty. */
00324     mp_pool_t *pool = chunk->pool;
00325 
00326     /* Unlink from the used list */
00327     if (chunk->prev)
00328       chunk->prev->next = chunk->next;
00329     if (chunk->next)
00330       chunk->next->prev = chunk->prev;
00331     if (chunk == pool->used_chunks)
00332       pool->used_chunks = chunk->next;
00333 
00334     /* Link to the empty list */
00335     chunk->next = pool->empty_chunks;
00336     chunk->prev = NULL;
00337     if (chunk->next)
00338       chunk->next->prev = chunk;
00339     pool->empty_chunks = chunk;
00340 
00341     /* Reset the guts of this chunk to defragment it, in case it gets
00342      * used again. */
00343     chunk->first_free = NULL;
00344     chunk->next_mem = chunk->mem;
00345 
00346     ++pool->n_empty_chunks;
00347   }
00348   --chunk->n_allocated;
00349 }
00350 
00353 mp_pool_t *
00354 mp_pool_new(size_t item_size, size_t chunk_capacity)
00355 {
00356   mp_pool_t *pool;
00357   size_t alloc_size, new_chunk_cap;
00358 
00359   tor_assert(item_size < SIZE_T_CEILING);
00360   tor_assert(chunk_capacity < SIZE_T_CEILING);
00361   tor_assert(SIZE_T_CEILING / item_size > chunk_capacity);
00362 
00363   pool = ALLOC(sizeof(mp_pool_t));
00364   CHECK_ALLOC(pool);
00365   memset(pool, 0, sizeof(mp_pool_t));
00366 
00367   /* First, we figure out how much space to allow per item.  We'll want to
00368    * use make sure we have enough for the overhead plus the item size. */
00369   alloc_size = (size_t)(STRUCT_OFFSET(mp_allocated_t, u.mem) + item_size);
00370   /* If the item_size is less than sizeof(next_free), we need to make
00371    * the allocation bigger. */
00372   if (alloc_size < sizeof(mp_allocated_t))
00373     alloc_size = sizeof(mp_allocated_t);
00374 
00375   /* If we're not an even multiple of ALIGNMENT, round up. */
00376   if (alloc_size % ALIGNMENT) {
00377     alloc_size = alloc_size + ALIGNMENT - (alloc_size % ALIGNMENT);
00378   }
00379   if (alloc_size < ALIGNMENT)
00380     alloc_size = ALIGNMENT;
00381   ASSERT((alloc_size % ALIGNMENT) == 0);
00382 
00383   /* Now we figure out how many items fit in each chunk.  We need to fit at
00384    * least 2 items per chunk. No chunk can be more than MAX_CHUNK bytes long,
00385    * or less than MIN_CHUNK. */
00386   if (chunk_capacity > MAX_CHUNK)
00387     chunk_capacity = MAX_CHUNK;
00388   /* Try to be around a power of 2 in size, since that's what allocators like
00389    * handing out. 512K-1 byte is a lot better than 512K+1 byte. */
00390   chunk_capacity = (size_t) round_to_power_of_2(chunk_capacity);
00391   while (chunk_capacity < alloc_size * 2 + CHUNK_OVERHEAD)
00392     chunk_capacity *= 2;
00393   if (chunk_capacity < MIN_CHUNK)
00394     chunk_capacity = MIN_CHUNK;
00395 
00396   new_chunk_cap = (chunk_capacity-CHUNK_OVERHEAD) / alloc_size;
00397   tor_assert(new_chunk_cap < INT_MAX);
00398   pool->new_chunk_capacity = (int)new_chunk_cap;
00399 
00400   pool->item_alloc_size = alloc_size;
00401 
00402   log_debug(LD_MM, "Capacity is %lu, item size is %lu, alloc size is %lu",
00403             (unsigned long)pool->new_chunk_capacity,
00404             (unsigned long)pool->item_alloc_size,
00405             (unsigned long)(pool->new_chunk_capacity*pool->item_alloc_size));
00406 
00407   return pool;
00408 }
00409 
00412 static int
00413 mp_pool_sort_used_chunks_helper(const void *_a, const void *_b)
00414 {
00415   mp_chunk_t *a = *(mp_chunk_t**)_a;
00416   mp_chunk_t *b = *(mp_chunk_t**)_b;
00417   return b->n_allocated - a->n_allocated;
00418 }
00419 
00423 static void
00424 mp_pool_sort_used_chunks(mp_pool_t *pool)
00425 {
00426   int i, n=0, inverted=0;
00427   mp_chunk_t **chunks, *chunk;
00428   for (chunk = pool->used_chunks; chunk; chunk = chunk->next) {
00429     ++n;
00430     if (chunk->next && chunk->next->n_allocated > chunk->n_allocated)
00431       ++inverted;
00432   }
00433   if (!inverted)
00434     return;
00435   //printf("Sort %d/%d\n",inverted,n);
00436   chunks = ALLOC(sizeof(mp_chunk_t *)*n);
00437 #ifdef ALLOC_CAN_RETURN_NULL
00438   if (PREDICT_UNLIKELY(!chunks)) return;
00439 #endif
00440   for (i=0,chunk = pool->used_chunks; chunk; chunk = chunk->next)
00441     chunks[i++] = chunk;
00442   qsort(chunks, n, sizeof(mp_chunk_t *), mp_pool_sort_used_chunks_helper);
00443   pool->used_chunks = chunks[0];
00444   chunks[0]->prev = NULL;
00445   for (i=1;i<n;++i) {
00446     chunks[i-1]->next = chunks[i];
00447     chunks[i]->prev = chunks[i-1];
00448   }
00449   chunks[n-1]->next = NULL;
00450   FREE(chunks);
00451   mp_pool_assert_ok(pool);
00452 }
00453 
00459 void
00460 mp_pool_clean(mp_pool_t *pool, int n_to_keep, int keep_recently_used)
00461 {
00462   mp_chunk_t *chunk, **first_to_free;
00463 
00464   mp_pool_sort_used_chunks(pool);
00465   ASSERT(n_to_keep >= 0);
00466 
00467   if (keep_recently_used) {
00468     int n_recently_used = pool->n_empty_chunks - pool->min_empty_chunks;
00469     if (n_to_keep < n_recently_used)
00470       n_to_keep = n_recently_used;
00471   }
00472 
00473   ASSERT(n_to_keep >= 0);
00474 
00475   first_to_free = &pool->empty_chunks;
00476   while (*first_to_free && n_to_keep > 0) {
00477     first_to_free = &(*first_to_free)->next;
00478     --n_to_keep;
00479   }
00480   if (!*first_to_free) {
00481     pool->min_empty_chunks = pool->n_empty_chunks;
00482     return;
00483   }
00484 
00485   chunk = *first_to_free;
00486   while (chunk) {
00487     mp_chunk_t *next = chunk->next;
00488     chunk->magic = 0xdeadbeef;
00489     FREE(chunk);
00490 #ifdef MEMPOOL_STATS
00491     ++pool->total_chunks_freed;
00492 #endif
00493     --pool->n_empty_chunks;
00494     chunk = next;
00495   }
00496 
00497   pool->min_empty_chunks = pool->n_empty_chunks;
00498   *first_to_free = NULL;
00499 }
00500 
00502 static void
00503 destroy_chunks(mp_chunk_t *chunk)
00504 {
00505   mp_chunk_t *next;
00506   while (chunk) {
00507     chunk->magic = 0xd3adb33f;
00508     next = chunk->next;
00509     FREE(chunk);
00510     chunk = next;
00511   }
00512 }
00513 
00516 void
00517 mp_pool_destroy(mp_pool_t *pool)
00518 {
00519   destroy_chunks(pool->empty_chunks);
00520   destroy_chunks(pool->used_chunks);
00521   destroy_chunks(pool->full_chunks);
00522   memset(pool, 0xe0, sizeof(mp_pool_t));
00523   FREE(pool);
00524 }
00525 
00527 static int
00528 assert_chunks_ok(mp_pool_t *pool, mp_chunk_t *chunk, int empty, int full)
00529 {
00530   mp_allocated_t *allocated;
00531   int n = 0;
00532   if (chunk)
00533     ASSERT(chunk->prev == NULL);
00534 
00535   while (chunk) {
00536     n++;
00537     ASSERT(chunk->magic == MP_CHUNK_MAGIC);
00538     ASSERT(chunk->pool == pool);
00539     for (allocated = chunk->first_free; allocated;
00540          allocated = allocated->u.next_free) {
00541       ASSERT(allocated->in_chunk == chunk);
00542     }
00543     if (empty)
00544       ASSERT(chunk->n_allocated == 0);
00545     else if (full)
00546       ASSERT(chunk->n_allocated == chunk->capacity);
00547     else
00548       ASSERT(chunk->n_allocated > 0 && chunk->n_allocated < chunk->capacity);
00549 
00550     ASSERT(chunk->capacity == pool->new_chunk_capacity);
00551 
00552     ASSERT(chunk->mem_size ==
00553            pool->new_chunk_capacity * pool->item_alloc_size);
00554 
00555     ASSERT(chunk->next_mem >= chunk->mem &&
00556            chunk->next_mem <= chunk->mem + chunk->mem_size);
00557 
00558     if (chunk->next)
00559       ASSERT(chunk->next->prev == chunk);
00560 
00561     chunk = chunk->next;
00562   }
00563   return n;
00564 }
00565 
00567 void
00568 mp_pool_assert_ok(mp_pool_t *pool)
00569 {
00570   int n_empty;
00571 
00572   n_empty = assert_chunks_ok(pool, pool->empty_chunks, 1, 0);
00573   assert_chunks_ok(pool, pool->full_chunks, 0, 1);
00574   assert_chunks_ok(pool, pool->used_chunks, 0, 0);
00575 
00576   ASSERT(pool->n_empty_chunks == n_empty);
00577 }
00578 
00579 #ifdef TOR
00580 
00582 /*FFFF uses Tor logging functions. */
00583 void
00584 mp_pool_log_status(mp_pool_t *pool, int severity)
00585 {
00586   uint64_t bytes_used = 0;
00587   uint64_t bytes_allocated = 0;
00588   uint64_t bu = 0, ba = 0;
00589   mp_chunk_t *chunk;
00590   int n_full = 0, n_used = 0;
00591 
00592   ASSERT(pool);
00593 
00594   for (chunk = pool->empty_chunks; chunk; chunk = chunk->next) {
00595     bytes_allocated += chunk->mem_size;
00596   }
00597   log_fn(severity, LD_MM, U64_FORMAT" bytes in %d empty chunks",
00598          U64_PRINTF_ARG(bytes_allocated), pool->n_empty_chunks);
00599   for (chunk = pool->used_chunks; chunk; chunk = chunk->next) {
00600     ++n_used;
00601     bu += chunk->n_allocated * pool->item_alloc_size;
00602     ba += chunk->mem_size;
00603     log_fn(severity, LD_MM, "   used chunk: %d items allocated",
00604            chunk->n_allocated);
00605   }
00606   log_fn(severity, LD_MM, U64_FORMAT"/"U64_FORMAT
00607          " bytes in %d partially full chunks",
00608          U64_PRINTF_ARG(bu), U64_PRINTF_ARG(ba), n_used);
00609   bytes_used += bu;
00610   bytes_allocated += ba;
00611   bu = ba = 0;
00612   for (chunk = pool->full_chunks; chunk; chunk = chunk->next) {
00613     ++n_full;
00614     bu += chunk->n_allocated * pool->item_alloc_size;
00615     ba += chunk->mem_size;
00616   }
00617   log_fn(severity, LD_MM, U64_FORMAT"/"U64_FORMAT
00618          " bytes in %d full chunks",
00619          U64_PRINTF_ARG(bu), U64_PRINTF_ARG(ba), n_full);
00620   bytes_used += bu;
00621   bytes_allocated += ba;
00622 
00623   log_fn(severity, LD_MM, "Total: "U64_FORMAT"/"U64_FORMAT" bytes allocated "
00624          "for cell pools are full.",
00625          U64_PRINTF_ARG(bytes_used), U64_PRINTF_ARG(bytes_allocated));
00626 
00627 #ifdef MEMPOOL_STATS
00628   log_fn(severity, LD_MM, U64_FORMAT" cell allocations ever; "
00629          U64_FORMAT" chunk allocations ever; "
00630          U64_FORMAT" chunk frees ever.",
00631          U64_PRINTF_ARG(pool->total_items_allocated),
00632          U64_PRINTF_ARG(pool->total_chunks_allocated),
00633          U64_PRINTF_ARG(pool->total_chunks_freed));
00634 #endif
00635 }
00636 #endif
00637