Back to index

numactl  2.0.8~rc4
Classes | Defines | Enumerations | Functions | Variables
numademo.c File Reference
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <sys/time.h>
#include "numa.h"

Go to the source code of this file.

Classes

union  node
struct  node.__unnamed__

Defines

#define _GNU_SOURCE   1
#define FRACT_NODES   8
#define FRACT_MASKS   32
#define LOOPS   10
#define H(t)   (((double)msize) / ((double)t))
#define D3   delim,delim,delim

Enumerations

enum  { CACHELINESIZE = 64 }
enum  test {
  MEMSET = 0, MEMCPY, FORWARD, BACKWARD,
  STREAM, RANDOM2, PTRCHASE
}

Functions

static void clearcache (void *a, unsigned size)
void output (char *title, char *result)
static int cmp_node (const void *ap, const void *bp)
void ** ptrchase_init (unsigned char *mem)
static unsigned long long timerfold (struct timeval *tv)
void memtest (char *name, unsigned char *mem)
int popcnt (unsigned long val)
void test (enum test type)
void usage (void)
long memsize (char *s)
int main (int ac, char **av)

Variables

int fract_nodes
unsigned long msize
enum test thistest
char * delim = " "
int force
int regression_testing = 0
char * testname []
int max_node

Class Documentation

union node

Definition at line 111 of file numademo.c.

Collaboration diagram for node:
Class Members
struct node __unnamed__
union node * next
struct node.__unnamed__

Definition at line 113 of file numademo.c.

Class Members
unsigned nexti
unsigned val

Define Documentation

#define _GNU_SOURCE   1

Definition at line 20 of file numademo.c.

#define D3   delim,delim,delim
#define FRACT_MASKS   32

Definition at line 39 of file numademo.c.

#define FRACT_NODES   8

Definition at line 38 of file numademo.c.

#define H (   t)    (((double)msize) / ((double)t))
#define LOOPS   10

Definition at line 149 of file numademo.c.


Enumeration Type Documentation

anonymous enum
Enumerator:
CACHELINESIZE 

Definition at line 45 of file numademo.c.

     {
       CACHELINESIZE = 64,
};
enum test
Enumerator:
MEMSET 
MEMCPY 
FORWARD 
BACKWARD 
STREAM 
RANDOM2 
PTRCHASE 

Definition at line 49 of file numademo.c.


Function Documentation

static void clearcache ( void *  a,
unsigned  size 
) [inline, static]

Definition at line 36 of file numademo.c.

{}
static int cmp_node ( const void *  ap,
const void *  bp 
) [static]

Definition at line 119 of file numademo.c.

{
       union node *a = (union node *)ap;
       union node *b = (union node *)bp;
       return a->val - b->val;
}

Here is the caller graph for this function:

int main ( int  ac,
char **  av 
)

Definition at line 464 of file numademo.c.

{
       int simple_tests = 0;
       
       while (av[1] && av[1][0] == '-') {
              ac--;
              switch (av[1][1]) {
              case 'c':
                     delim = ",";
                     break;
              case 'f':
                     force = 1;
                     break;
              case 'S':
                     simple_tests = 1;
                     break;
              case 'e':
                     numa_exit_on_error = 1;
                     numa_exit_on_warn = 1;
                     break;
              case 't':
                     regression_testing = 1;
                     break;
              default:
                     usage();
                     break;
              }
              ++av;
       }

       if (!av[1])
              usage();

       if (numa_available() < 0) {
              printf("your system does not support the numa API.\n");
              if (!force)
                     exit(1);
       }

       max_node = numa_max_node();
       printf("%d nodes available\n", max_node+1);
       fract_nodes = ((max_node/8)*2) + FRACT_NODES;

       if (max_node <= 2)
              regression_testing = 0; /* set -t auto-off for small systems */

       msize = memsize(av[1]);

       if (!msize)
              usage();

#ifdef HAVE_STREAM_LIB
       stream_setmem(msize);
#endif

       if (av[2] == NULL) {
              test(MEMSET);
              test(MEMCPY);
              if (simple_tests) {
                     test(FORWARD);
                     test(BACKWARD);
              }
#ifdef HAVE_MT
              test(RANDOM2);
#endif
#ifdef HAVE_STREAM_LIB
              test(STREAM);
#endif
              if (msize >= sizeof(union node)) {
                     test(PTRCHASE);
              } else {
                     fprintf(stderr, "You must set msize at least %lu bytes for ptrchase test.\n",
                            sizeof(union node));
                     exit(1);
              }
       } else {
              int k;
              for (k = 2; k < ac; k++) {
                     int i;
                     int found = 0;
                     for (i = 0; testname[i]; i++) {
                            if (!strcmp(testname[i],av[k])) {
                                   test(i);
                                   found = 1;
                                   break;
                            }
                     }
                     if (!found) {
                            fprintf(stderr,"unknown test `%s'\n", av[k]);
                            usage();
                     }
              }
       }
       return 0;
}

Here is the call graph for this function:

long memsize ( char *  s)

Definition at line 452 of file numademo.c.

{
       char *end;
       long length = strtoul(s,&end,0);
       switch (toupper(*end)) {
       case 'G': length *= 1024;  /*FALL THROUGH*/
       case 'M': length *= 1024;  /*FALL THROUGH*/
       case 'K': length *= 1024; break;
       }
       return length;
}

Here is the caller graph for this function:

void memtest ( char *  name,
unsigned char *  mem 
)

Definition at line 151 of file numademo.c.

{
       long k;
       struct timeval start, end, res;
       unsigned long long max, min, sum, r;
       int i;
       char title[128], result[128];

       if (!mem) {
              fprintf(stderr,
              "Failed to allocate %lu bytes of memory. Test \"%s\" exits.\n",
                     msize, name);
              return;
       }

#ifdef HAVE_STREAM_LIB
       if (thistest == STREAM) {
              do_stream(name, mem);
              goto out;
       }
#endif
       
       max = 0;
       min = ~0UL;
       sum = 0;

       /*
        * Note:  0th pass allocates the pages, don't measure
        */
       for (i = 0; i < LOOPS+1; i++) {
              clearcache(mem, msize);
              switch (thistest) {
              case PTRCHASE:
              {
                     void **ptr;
                     ptr = ptrchase_init(mem);
                     gettimeofday(&start,NULL);
                     while (*ptr)
                            ptr = (void **)*ptr;
                     gettimeofday(&end,NULL);
                     /* Side effect to trick the optimizer */
                     *ptr = "bla";
                     break;
              }

              case MEMSET:
                     gettimeofday(&start,NULL);
                     memset(mem, 0xff, msize);
                     gettimeofday(&end,NULL);
                     break;

              case MEMCPY:
                     gettimeofday(&start,NULL);
                     memcpy(mem, mem + msize/2, msize/2);
                     gettimeofday(&end,NULL);
                     break;

              case FORWARD:
                     /* simple kernel to just fetch cachelines and write them back.
                        will trigger hardware prefetch */
                     gettimeofday(&start,NULL);
                     for (k = 0; k < msize; k+=CACHELINESIZE)
                            mem[k]++;
                     gettimeofday(&end,NULL);
                     break;

              case BACKWARD:
                     gettimeofday(&start,NULL);
                     for (k = msize-5; k > 0; k-=CACHELINESIZE)
                            mem[k]--;
                     gettimeofday(&end,NULL);
                     break;

#ifdef HAVE_MT
              case RANDOM2:
              {
                     unsigned * __restrict m = (unsigned *)mem;
                     unsigned max = msize / sizeof(unsigned);
                     unsigned mask;

                     mt_init();
                     mask = 1;
                     while (mask < max)
                            mask = (mask << 1) | 1;
                     /*
                      * There's no guarantee all memory is touched, but
                      * we assume (hope) that the distribution of the MT
                      * is good enough to touch most.
                      */
                     gettimeofday(&start,NULL);
                     for (k = 0; k < max; k++) {
                            unsigned idx = mt_random() & mask;
                            if (idx >= max)
                                   idx -= max;
                            m[idx]++;
                     }
                     gettimeofday(&end,NULL);
              }

#endif
              default:
                     break;
              }

              if (!i)
                     continue;  /* don't count allocation pass */

              timersub(&end, &start, &res);
              r = timerfold(&res);
              if (r > max) max = r;
              if (r < min) min = r;
              sum += r;
       }
       sprintf(title, "%s%s%s", name, delim, testname[thistest]);
#define H(t) (((double)msize) / ((double)t))
#define D3 delim,delim,delim
       sprintf(result, "Avg%s%.2f%sMB/s%sMax%s%.2f%sMB/s%sMin%s%.2f%sMB/s",
              delim,
              H(sum/LOOPS),
              D3,
              H(min),
              D3,
              H(max),
              delim);
#undef H
#undef D3
       output(title,result);

#ifdef HAVE_STREAM_LIB
 out:
#endif
       /* Just to make sure that when we switch CPUs that the old guy
          doesn't still keep it around. */
       clearcache(mem, msize);

       numa_free(mem, msize);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void output ( char *  title,
char *  result 
)

Definition at line 78 of file numademo.c.

{
       if (!isspace(delim[0]))
              printf("%s%s%s\n", title,delim, result);
       else
              printf("%-42s%s\n", title, result);
}

Here is the caller graph for this function:

int popcnt ( unsigned long  val)

Definition at line 289 of file numademo.c.

{
       int i = 0, cnt = 0;
       while (val >> i) {
              if ((1UL << i) & val)
                     cnt++;
              i++;
       }
       return cnt;
}

Here is the caller graph for this function:

void** ptrchase_init ( unsigned char *  mem)

Definition at line 126 of file numademo.c.

{
       long i;
       union node *nodes = (union node *)mem;
       long nmemb = msize / sizeof(union node);
       srand(1234);
       for (i = 0; i < nmemb; i++) {
              nodes[i].val = rand();
              nodes[i].nexti = i + 1;
       }
       qsort(nodes, nmemb, sizeof(union node), cmp_node);
       for (i = 0; i < nmemb; i++) {
              union node *n = &nodes[i];
              n->next = n->nexti >= nmemb ? NULL : &nodes[n->nexti];
       }
       return (void **)nodes;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void test ( enum test  type)

Definition at line 302 of file numademo.c.

{
       unsigned long mask;
       int i, k;
       char buf[512];
       struct bitmask *nodes;

       nodes = numa_allocate_nodemask();
       thistest = type;

       if (regression_testing) {
              printf("\nTest %s doing 1 of %d nodes and 1 of %d masks.\n",
                     testname[thistest], fract_nodes, FRACT_MASKS);
       }

       memtest("memory with no policy", numa_alloc(msize));
       memtest("local memory", numa_alloc_local(msize));

       memtest("memory interleaved on all nodes", numa_alloc_interleaved(msize));
       for (i = 0; i <= max_node; i++) {
              if (regression_testing && (i % fract_nodes)) {
              /* for regression testing (-t) do only every eighth node */
                     continue;
              }
              sprintf(buf, "memory on node %d", i);
              memtest(buf, numa_alloc_onnode(msize, i));
       }
       
       for (mask = 1, i = 0; mask < (1UL<<(max_node+1)); mask++, i++) {
              int w;
              char buf2[10];
              if (popcnt(mask) == 1)
                     continue;
              if (regression_testing && (i > 50)) {
                     break;
              }
              if (regression_testing && (i % FRACT_MASKS)) {
              /* for regression testing (-t)
                     do only every 32nd mask permutation */
                     continue;
              }
              numa_bitmask_clearall(nodes);
              for (w = 0; mask >> w; w++) {
                     if ((mask >> w) & 1)
                            numa_bitmask_setbit(nodes, w);
              }

              sprintf(buf, "memory interleaved on");
              for (k = 0; k <= max_node; k++)
                     if ((1UL<<k) & mask) {
                            sprintf(buf2, " %d", k);
                            strcat(buf, buf2);
                     }
              memtest(buf, numa_alloc_interleaved_subset(msize, nodes));
       }

       for (i = 0; i <= max_node; i++) {
              if (regression_testing && (i % fract_nodes)) {
              /* for regression testing (-t) do only every eighth node */
                     continue;
              }
              printf("setting preferred node to %d\n", i);
              numa_set_preferred(i);
              memtest("memory without policy", numa_alloc(msize));
       }

       numa_set_interleave_mask(numa_all_nodes_ptr);
       memtest("manual interleaving to all nodes", numa_alloc(msize));

       if (max_node > 0) {
              numa_bitmask_clearall(nodes);
              numa_bitmask_setbit(nodes, 0);
              numa_bitmask_setbit(nodes, 1);
              numa_set_interleave_mask(nodes);
              memtest("manual interleaving on node 0/1", numa_alloc(msize));
              printf("current interleave node %d\n", numa_get_interleave_node());
       }

       numa_set_interleave_mask(numa_no_nodes_ptr);

       nodes = numa_allocate_nodemask();

       for (i = 0; i <= max_node; i++) {
              int oldhn = numa_preferred();

              if (regression_testing && (i % fract_nodes)) {
              /* for regression testing (-t) do only every eighth node */
                     continue;
              }
              numa_run_on_node(i);
              printf("running on node %d, preferred node %d\n",i, oldhn);

              memtest("local memory", numa_alloc_local(msize));

              memtest("memory interleaved on all nodes",
                     numa_alloc_interleaved(msize));

              if (max_node >= 1) {
                     numa_bitmask_clearall(nodes);
                     numa_bitmask_setbit(nodes, 0);
                     numa_bitmask_setbit(nodes, 1);
                     memtest("memory interleaved on node 0/1",
                            numa_alloc_interleaved_subset(msize, nodes));
              }

              for (k = 0; k <= max_node; k++) {
                     if (k == i)
                            continue;
                     if (regression_testing && (k % fract_nodes)) {
                     /* for regression testing (-t)
                            do only every eighth node */
                            continue;
                     }
                     sprintf(buf, "alloc on node %d", k);
                     numa_bitmask_clearall(nodes);
                     numa_bitmask_setbit(nodes, k);
                     numa_set_membind(nodes);
                     memtest(buf, numa_alloc(msize));                 
                     numa_set_membind(numa_all_nodes_ptr);
              }
              
              numa_set_localalloc();
              memtest("local allocation", numa_alloc(msize));

              numa_set_preferred((i+1) % (1+max_node));
              memtest("setting wrong preferred node", numa_alloc(msize));
              numa_set_preferred(i);
              memtest("setting correct preferred node", numa_alloc(msize));
              numa_set_preferred(-1);
              if (!delim[0])
                     printf("\n\n\n");
       }

       /* numa_run_on_node_mask is not tested */
}

Here is the call graph for this function:

static unsigned long long timerfold ( struct timeval *  tv) [inline, static]

Definition at line 144 of file numademo.c.

{
       return tv->tv_sec * 1000000ULL + tv->tv_usec;
}

Here is the caller graph for this function:

void usage ( void  )

Definition at line 438 of file numademo.c.

{
       int i;
       printf("usage: numademo [-S] [-f] [-c] [-e] [-t] msize[kmg] {tests}\nNo tests means run all.\n");
       printf("-c output CSV data. -f run even without NUMA API. -S run stupid tests. -e exit on error\n");
       printf("-t regression test; do not run all node combinations\n");
       printf("valid tests:");
       for (i = 0; testname[i]; i++)
              printf(" %s", testname[i]);
       putchar('\n');
       exit(1);
}

Variable Documentation

char* delim = " "

Definition at line 59 of file numademo.c.

int force

Definition at line 60 of file numademo.c.

Definition at line 40 of file numademo.c.

int max_node

Definition at line 300 of file numademo.c.

unsigned long msize

Definition at line 42 of file numademo.c.

Definition at line 61 of file numademo.c.

char* testname[]
Initial value:
 {
       "memset",
       "memcpy",
       "forward",
       "backward",






       "ptrchase",
       NULL,
}

Definition at line 63 of file numademo.c.

enum test thistest