Back to index

numactl  2.0.8~rc4
libnuma.c
Go to the documentation of this file.
00001 /* Simple NUMA library.
00002    Copyright (C) 2003,2004,2005,2008 Andi Kleen,SuSE Labs and
00003    Cliff Wickman,SGI.
00004 
00005    libnuma is free software; you can redistribute it and/or
00006    modify it under the terms of the GNU Lesser General Public
00007    License as published by the Free Software Foundation; version
00008    2.1.
00009 
00010    libnuma is distributed in the hope that it will be useful,
00011    but WITHOUT ANY WARRANTY; without even the implied warranty of
00012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013    Lesser General Public License for more details.
00014 
00015    You should find a copy of v2.1 of the GNU Lesser General Public License
00016    somewhere on your Linux system; if not, write to the Free Software 
00017    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 
00018 
00019    All calls are undefined when numa_available returns an error. */
00020 #define _GNU_SOURCE 1
00021 #include <stdlib.h>
00022 #include <stdio.h>
00023 #include <unistd.h>
00024 #include <string.h>
00025 #include <sched.h>
00026 #include <dirent.h>
00027 #include <errno.h>
00028 #include <stdarg.h>
00029 #include <ctype.h>
00030 
00031 #include <sys/mman.h>
00032 #include <limits.h>
00033 
00034 #include "numa.h"
00035 #include "numaif.h"
00036 #include "numaint.h"
00037 #include "util.h"
00038 #include "affinity.h"
00039 
00040 #define WEAK __attribute__((weak))
00041 
00042 #define CPU_BUFFER_SIZE 4096     /* This limits you to 32768 CPUs */
00043 
00044 /* these are the old (version 1) masks */
00045 nodemask_t numa_no_nodes;
00046 nodemask_t numa_all_nodes;
00047 /* these are now the default bitmask (pointers to) (version 2) */
00048 struct bitmask *numa_no_nodes_ptr = NULL;
00049 struct bitmask *numa_all_nodes_ptr = NULL;
00050 struct bitmask *numa_all_cpus_ptr = NULL;
00051 /* I would prefer to use symbol versioning to create v1 and v2 versions
00052    of numa_no_nodes and numa_all_nodes, but the loader does not correctly
00053    handle versioning of BSS versus small data items */
00054 
00055 struct bitmask *numa_nodes_ptr = NULL;
00056 static struct bitmask *numa_memnode_ptr = NULL;
00057 static unsigned long *node_cpu_mask_v1[NUMA_NUM_NODES];
00058 struct bitmask **node_cpu_mask_v2;
00059 
00060 WEAK void numa_error(char *where);
00061 
00062 #ifdef __thread
00063 #warning "not threadsafe"
00064 #endif
00065 
00066 static __thread int bind_policy = MPOL_BIND; 
00067 static __thread unsigned int mbind_flags = 0;
00068 static int sizes_set=0;
00069 static int maxconfigurednode = -1;
00070 static int maxconfiguredcpu = -1;
00071 static int numprocnode = -1;
00072 static int numproccpu = -1;
00073 static int nodemask_sz = 0;
00074 static int cpumask_sz = 0;
00075 
00076 int numa_exit_on_error = 0;
00077 int numa_exit_on_warn = 0;
00078 static void set_sizes(void);
00079 
00080 /*
00081  * There are two special functions, _init(void) and _fini(void), which
00082  * are called automatically by the dynamic loader whenever a library is loaded.
00083  *
00084  * The v1 library depends upon nodemask_t's of all nodes and no nodes.
00085  */
00086 void __attribute__((constructor))
00087 numa_init(void)
00088 {
00089        int max,i;
00090 
00091        if (sizes_set)
00092               return;
00093 
00094        set_sizes();
00095        /* numa_all_nodes should represent existing nodes on this system */
00096         max = numa_num_configured_nodes();
00097         for (i = 0; i < max; i++)
00098                 nodemask_set_compat((nodemask_t *)&numa_all_nodes, i);
00099        memset(&numa_no_nodes, 0, sizeof(numa_no_nodes));
00100 }
00101 
00102 #define FREE_AND_ZERO(x) if (x) {  \
00103               numa_bitmask_free(x);       \
00104               x = NULL;            \
00105        }
00106 
00107 void __attribute__((destructor))
00108 numa_fini(void)
00109 {
00110        FREE_AND_ZERO(numa_all_cpus_ptr);
00111        FREE_AND_ZERO(numa_all_nodes_ptr);
00112        FREE_AND_ZERO(numa_no_nodes_ptr);
00113        FREE_AND_ZERO(numa_memnode_ptr);
00114        FREE_AND_ZERO(numa_nodes_ptr);
00115 }
00116 
00117 /*
00118  * The following bitmask declarations, bitmask_*() routines, and associated
00119  * _setbit() and _getbit() routines are:
00120  * Copyright (c) 2004_2007 Silicon Graphics, Inc. (SGI) All rights reserved.
00121  * SGI publishes it under the terms of the GNU General Public License, v2,
00122  * as published by the Free Software Foundation.
00123  */
00124 static unsigned int
00125 _getbit(const struct bitmask *bmp, unsigned int n)
00126 {
00127        if (n < bmp->size)
00128               return (bmp->maskp[n/bitsperlong] >> (n % bitsperlong)) & 1;
00129        else
00130               return 0;
00131 }
00132 
00133 static void
00134 _setbit(struct bitmask *bmp, unsigned int n, unsigned int v)
00135 {
00136        if (n < bmp->size) {
00137               if (v)
00138                      bmp->maskp[n/bitsperlong] |= 1UL << (n % bitsperlong);
00139               else
00140                      bmp->maskp[n/bitsperlong] &= ~(1UL << (n % bitsperlong));
00141        }
00142 }
00143 
00144 int
00145 numa_bitmask_isbitset(const struct bitmask *bmp, unsigned int i)
00146 {
00147        return _getbit(bmp, i);
00148 }
00149 
00150 struct bitmask *
00151 numa_bitmask_setall(struct bitmask *bmp)
00152 {
00153        unsigned int i;
00154        for (i = 0; i < bmp->size; i++)
00155               _setbit(bmp, i, 1);
00156        return bmp;
00157 }
00158 
00159 struct bitmask *
00160 numa_bitmask_clearall(struct bitmask *bmp)
00161 {
00162        unsigned int i;
00163        for (i = 0; i < bmp->size; i++)
00164               _setbit(bmp, i, 0);
00165        return bmp;
00166 }
00167 
00168 struct bitmask *
00169 numa_bitmask_setbit(struct bitmask *bmp, unsigned int i)
00170 {
00171        _setbit(bmp, i, 1);
00172        return bmp;
00173 }
00174 
00175 struct bitmask *
00176 numa_bitmask_clearbit(struct bitmask *bmp, unsigned int i)
00177 {
00178        _setbit(bmp, i, 0);
00179        return bmp;
00180 }
00181 
00182 unsigned int
00183 numa_bitmask_nbytes(struct bitmask *bmp)
00184 {
00185        return longsperbits(bmp->size) * sizeof(unsigned long);
00186 }
00187 
00188 /* where n is the number of bits in the map */
00189 /* This function should not exit on failure, but right now we cannot really
00190    recover from this. */
00191 struct bitmask *
00192 numa_bitmask_alloc(unsigned int n)
00193 {
00194        struct bitmask *bmp;
00195 
00196        if (n < 1) {
00197               numa_error("request to allocate mask for invalid number; abort\n");
00198               exit(1);
00199        }
00200        bmp = malloc(sizeof(*bmp));
00201        if (bmp == 0)
00202               goto oom;
00203        bmp->size = n;
00204        bmp->maskp = calloc(longsperbits(n), sizeof(unsigned long));
00205        if (bmp->maskp == 0) {
00206               free(bmp);
00207               goto oom;
00208        }
00209        return bmp;
00210 
00211 oom:
00212        numa_error("Out of memory allocating bitmask");
00213        exit(1);
00214 }
00215 
00216 void
00217 numa_bitmask_free(struct bitmask *bmp)
00218 {
00219        if (bmp == 0)
00220               return;
00221        free(bmp->maskp);
00222        bmp->maskp = (unsigned long *)0xdeadcdef;  /* double free tripwire */
00223        free(bmp);
00224        return;
00225 }
00226 
00227 /* True if two bitmasks are equal */
00228 int
00229 numa_bitmask_equal(const struct bitmask *bmp1, const struct bitmask *bmp2)
00230 {
00231        unsigned int i;
00232        for (i = 0; i < bmp1->size || i < bmp2->size; i++)
00233               if (_getbit(bmp1, i) != _getbit(bmp2, i))
00234                      return 0;
00235        return 1;
00236 }
00237 
00238 /* Hamming Weight: number of set bits */
00239 unsigned int numa_bitmask_weight(const struct bitmask *bmp)
00240 {
00241        unsigned int i;
00242        unsigned int w = 0;
00243        for (i = 0; i < bmp->size; i++)
00244               if (_getbit(bmp, i))
00245                      w++;
00246        return w;
00247 }
00248 
00249 /* *****end of bitmask_  routines ************ */
00250 
00251 /* Next two can be overwritten by the application for different error handling */
00252 WEAK void numa_error(char *where) 
00253 { 
00254        int olde = errno;
00255        perror(where); 
00256        if (numa_exit_on_error)
00257               exit(1); 
00258        errno = olde;
00259 } 
00260 
00261 WEAK void numa_warn(int num, char *fmt, ...) 
00262 { 
00263        static unsigned warned;
00264        va_list ap;
00265        int olde = errno;
00266        
00267        /* Give each warning only once */
00268        if ((1<<num) & warned)
00269               return; 
00270        warned |= (1<<num); 
00271 
00272        va_start(ap,fmt);
00273        fprintf(stderr, "libnuma: Warning: ");
00274        vfprintf(stderr, fmt, ap);
00275        fputc('\n', stderr);
00276        va_end(ap);
00277 
00278        errno = olde;
00279 } 
00280 
00281 static void setpol(int policy, struct bitmask *bmp)
00282 { 
00283        if (set_mempolicy(policy, bmp->maskp, bmp->size + 1) < 0)
00284               numa_error("set_mempolicy");
00285 } 
00286 
00287 static void getpol(int *oldpolicy, struct bitmask *bmp)
00288 { 
00289        if (get_mempolicy(oldpolicy, bmp->maskp, bmp->size + 1, 0, 0) < 0)
00290               numa_error("get_mempolicy");
00291 } 
00292 
00293 static void dombind(void *mem, size_t size, int pol, struct bitmask *bmp)
00294 { 
00295        if (mbind(mem, size, pol, bmp ? bmp->maskp : NULL, bmp ? bmp->size + 1 : 0,
00296                 mbind_flags) < 0)
00297               numa_error("mbind"); 
00298 } 
00299 
00300 /* (undocumented) */
00301 /* gives the wrong answer for hugetlbfs mappings. */
00302 int numa_pagesize(void)
00303 { 
00304        static int pagesize;
00305        if (pagesize > 0) 
00306               return pagesize;
00307        pagesize = getpagesize();
00308        return pagesize;
00309 } 
00310 
00311 make_internal_alias(numa_pagesize);
00312 
00313 /*
00314  * Find nodes (numa_nodes_ptr), nodes with memory (numa_memnode_ptr)
00315  * and the highest numbered existing node (maxconfigurednode).
00316  */
00317 static void
00318 set_configured_nodes(void)
00319 {
00320        DIR *d;
00321        struct dirent *de;
00322        long long freep;
00323 
00324        numa_memnode_ptr = numa_allocate_nodemask();
00325        numa_nodes_ptr = numa_allocate_nodemask();
00326 
00327        d = opendir("/sys/devices/system/node");
00328        if (!d) {
00329               maxconfigurednode = 0;
00330        } else {
00331               while ((de = readdir(d)) != NULL) {
00332                      int nd;
00333                      if (strncmp(de->d_name, "node", 4))
00334                             continue;
00335                      nd = strtoul(de->d_name+4, NULL, 0);
00336                      numa_bitmask_setbit(numa_nodes_ptr, nd);
00337                      if (numa_node_size64(nd, &freep) > 0)
00338                             numa_bitmask_setbit(numa_memnode_ptr, nd);
00339                      if (maxconfigurednode < nd)
00340                             maxconfigurednode = nd;
00341               }
00342               closedir(d);
00343        }
00344 }
00345 
00346 /*
00347  * Convert the string length of an ascii hex mask to the number
00348  * of bits represented by that mask.
00349  */
00350 static int s2nbits(const char *s)
00351 {
00352        return strlen(s) * 32 / 9;
00353 }
00354 
00355 /* Is string 'pre' a prefix of string 's'? */
00356 static int strprefix(const char *s, const char *pre)
00357 {
00358        return strncmp(s, pre, strlen(pre)) == 0;
00359 }
00360 
00361 static const char *mask_size_file = "/proc/self/status";
00362 static const char *nodemask_prefix = "Mems_allowed:\t";
00363 /*
00364  * (do this the way Paul Jackson's libcpuset does it)
00365  * The nodemask values in /proc/self/status are in an
00366  * ascii format that uses 9 characters for each 32 bits of mask.
00367  * (this could also be used to find the cpumask size)
00368  */
00369 static void
00370 set_nodemask_size(void)
00371 {
00372        FILE *fp;
00373        char *buf = NULL;
00374        size_t bufsize = 0;
00375 
00376        if ((fp = fopen(mask_size_file, "r")) == NULL)
00377               goto done;
00378 
00379        while (getline(&buf, &bufsize, fp) > 0) {
00380               if (strprefix(buf, nodemask_prefix)) {
00381                      nodemask_sz = s2nbits(buf + strlen(nodemask_prefix));
00382                      break;
00383               }
00384        }
00385        free(buf);
00386        fclose(fp);
00387 done:
00388        if (nodemask_sz == 0) {/* fall back on error */
00389               int pol;
00390               unsigned long *mask = NULL;
00391               nodemask_sz = 16;
00392               do {
00393                      nodemask_sz <<= 1;
00394                      mask = realloc(mask, nodemask_sz / 8);
00395                      if (!mask)
00396                             return;
00397               } while (get_mempolicy(&pol, mask, nodemask_sz + 1, 0, 0) < 0 && errno == EINVAL &&
00398                             nodemask_sz < 4096*8);
00399               free(mask);
00400        }
00401 }
00402 
00403 /*
00404  * Read a mask consisting of a sequence of hexadecimal longs separated by
00405  * commas. Order them correctly and return the number of bits set.
00406  */
00407 static int
00408 read_mask(char *s, struct bitmask *bmp)
00409 {
00410        char *end = s;
00411        int tmplen = (bmp->size + bitsperint - 1) / bitsperint;
00412        unsigned int tmp[tmplen];
00413        unsigned int *start = tmp;
00414        unsigned int i, n = 0, m = 0;
00415 
00416        if (!s)
00417               return 0;     /* shouldn't happen */
00418 
00419        i = strtoul(s, &end, 16);
00420 
00421        /* Skip leading zeros */
00422        while (!i && *end++ == ',') {
00423               i = strtoul(end, &end, 16);
00424        }
00425 
00426        if (!i)
00427               /* End of string. No mask */
00428               return -1;
00429 
00430        start[n++] = i;
00431        /* Read sequence of ints */
00432        while (*end++ == ',') {
00433               i = strtoul(end, &end, 16);
00434               start[n++] = i;
00435 
00436               /* buffer overflow */
00437               if (n > tmplen)
00438                      return -1;
00439        }
00440 
00441        /*
00442         * Invert sequence of ints if necessary since the first int
00443         * is the highest and we put it first because we read it first.
00444         */
00445        while (n) {
00446               int w;
00447               unsigned long x = 0;
00448               /* read into long values in an endian-safe way */
00449               for (w = 0; n && w < bitsperlong; w += bitsperint)
00450                      x |= ((unsigned long)start[n-- - 1] << w);
00451 
00452               bmp->maskp[m++] = x;
00453        }
00454        /*
00455         * Return the number of bits set
00456         */
00457        return numa_bitmask_weight(bmp);
00458 }
00459 
00460 /*
00461  * Read a processes constraints in terms of nodes and cpus from
00462  * /proc/self/status.
00463  */
00464 static void
00465 set_task_constraints(void)
00466 {
00467        int hicpu = sysconf(_SC_NPROCESSORS_CONF)-1;
00468        int i;
00469        char *buffer = NULL;
00470        size_t buflen = 0;
00471        FILE *f;
00472 
00473        numa_all_cpus_ptr = numa_allocate_cpumask();
00474        numa_all_nodes_ptr = numa_allocate_nodemask();
00475        numa_no_nodes_ptr = numa_allocate_nodemask();
00476 
00477        f = fopen(mask_size_file, "r");
00478        if (!f) {
00479               //numa_warn(W_cpumap, "Cannot parse %s", mask_size_file);
00480               return;
00481        }
00482 
00483        while (getline(&buffer, &buflen, f) > 0) {
00484               /* mask starts after [last] tab */
00485               char  *mask = strrchr(buffer,'\t') + 1;
00486 
00487               if (strncmp(buffer,"Cpus_allowed:",13) == 0)
00488                      numproccpu = read_mask(mask, numa_all_cpus_ptr);
00489 
00490               if (strncmp(buffer,"Mems_allowed:",13) == 0) {
00491                      numprocnode = read_mask(mask, numa_all_nodes_ptr);
00492               }
00493        }
00494        fclose(f);
00495        free(buffer);
00496 
00497        /*
00498         * Cpus_allowed in the kernel can be defined to all f's
00499         * i.e. it may be a superset of the actual available processors.
00500         * As such let's reduce numproccpu to the number of actual
00501         * available cpus.
00502         */
00503        if (numproccpu <= 0) {
00504               for (i = 0; i <= hicpu; i++)
00505                      numa_bitmask_setbit(numa_all_cpus_ptr, i);
00506               numproccpu = hicpu+1;
00507        }
00508 
00509        if (numproccpu > hicpu+1) {
00510               numproccpu = hicpu+1;
00511               for (i=hicpu+1; i<numa_all_cpus_ptr->size; i++) {
00512                      numa_bitmask_clearbit(numa_all_cpus_ptr, i);
00513               }
00514        }
00515 
00516        if (numprocnode <= 0) {
00517               for (i = 0; i <= maxconfigurednode; i++)
00518                      numa_bitmask_setbit(numa_all_nodes_ptr, i);
00519               numprocnode = maxconfigurednode + 1;
00520        }
00521 
00522        return;
00523 }
00524 
00525 /*
00526  * Find the highest cpu number possible (in other words the size
00527  * of a kernel cpumask_t (in bits) - 1)
00528  */
00529 static void
00530 set_numa_max_cpu(void)
00531 {
00532        int len = 4096;
00533        int n;
00534        int olde = errno;
00535        struct bitmask *buffer;
00536 
00537        do {
00538               buffer = numa_bitmask_alloc(len);
00539               n = numa_sched_getaffinity_v2_int(0, buffer);
00540               /* on success, returns size of kernel cpumask_t, in bytes */
00541               if (n < 0 && errno == EINVAL) {
00542                      if (len >= 1024*1024)
00543                             break;
00544                      len *= 2;
00545                      numa_bitmask_free(buffer);
00546                      continue;
00547               }
00548        } while (n < 0);
00549        numa_bitmask_free(buffer);
00550        errno = olde;
00551        cpumask_sz = n*8;
00552 }
00553 
00554 /*
00555  * get the total (configured) number of cpus - both online and offline
00556  */
00557 static void
00558 set_configured_cpus(void)
00559 {
00560        char          *dirnamep = "/sys/devices/system/cpu";
00561        struct dirent *dirent;
00562        DIR           *dir;
00563        dir = opendir(dirnamep);
00564 
00565        if (dir == NULL) {
00566               /* fall back to using the online cpu count */
00567               maxconfiguredcpu = sysconf(_SC_NPROCESSORS_CONF) - 1;
00568               return;
00569        }
00570        while ((dirent = readdir(dir)) != 0) {
00571               if (dirent->d_type == DT_DIR
00572                   && !strncmp("cpu", dirent->d_name, 3)) {
00573                      long cpu = strtol(dirent->d_name + 3, NULL, 10);
00574 
00575                      if (cpu < INT_MAX && cpu > maxconfiguredcpu)
00576                             maxconfiguredcpu = cpu;
00577               }
00578        }
00579        closedir(dir);
00580        if (maxconfiguredcpu < 0) {
00581               /* fall back to using the online cpu count */
00582               maxconfiguredcpu = sysconf(_SC_NPROCESSORS_CONF) - 1;
00583        }
00584 }
00585 
00586 /*
00587  * Initialize all the sizes.
00588  */
00589 static void
00590 set_sizes(void)
00591 {
00592        sizes_set++;
00593        set_nodemask_size(); /* size of kernel nodemask_t */
00594        set_configured_nodes();     /* configured nodes listed in /sys */
00595        set_numa_max_cpu();  /* size of kernel cpumask_t */
00596        set_configured_cpus();      /* cpus listed in /sys/devices/system/cpu */
00597        set_task_constraints(); /* cpus and nodes for current task */
00598 }
00599 
00600 int
00601 numa_num_configured_nodes(void)
00602 {
00603        /*
00604        * NOTE: this function's behavior matches the documentation (ie: it
00605        * returns a count of nodes with memory) despite the poor function
00606        * naming.  We also cannot use the similarly poorly named
00607        * numa_all_nodes_ptr as it only tracks nodes with memory from which
00608        * the calling process can allocate.  Think sparse nodes, memory-less
00609        * nodes, cpusets...
00610        */
00611        int memnodecount=0, i;
00612 
00613        for (i=0; i <= maxconfigurednode; i++) {
00614               if (numa_bitmask_isbitset(numa_memnode_ptr, i))
00615                      memnodecount++;
00616        }
00617        return memnodecount;
00618 }
00619 
00620 int
00621 numa_num_configured_cpus(void)
00622 {
00623 
00624        return maxconfiguredcpu+1;
00625 }
00626 
00627 int
00628 numa_num_possible_nodes(void)
00629 {
00630        return nodemask_sz;
00631 }
00632 
00633 int
00634 numa_num_possible_cpus(void)
00635 {
00636        return cpumask_sz;
00637 }
00638 
00639 int
00640 numa_num_task_nodes(void)
00641 {
00642        return numprocnode;
00643 }
00644 
00645 /*
00646  * for backward compatibility
00647  */
00648 int
00649 numa_num_thread_nodes(void)
00650 {
00651        return numa_num_task_nodes();
00652 }
00653 
00654 int
00655 numa_num_task_cpus(void)
00656 {
00657        return numproccpu;
00658 }
00659 
00660 /*
00661  * for backward compatibility
00662  */
00663 int
00664 numa_num_thread_cpus(void)
00665 {
00666        return numa_num_task_cpus();
00667 }
00668 
00669 /*
00670  * Return the number of the highest node in this running system,
00671  */
00672 int
00673 numa_max_node(void)
00674 {
00675        return maxconfigurednode;
00676 }
00677 
00678 make_internal_alias(numa_max_node);
00679 
00680 /*
00681  * Return the number of the highest possible node in a system,
00682  * which for v1 is the size of a numa.h nodemask_t(in bits)-1.
00683  * but for v2 is the size of a kernel nodemask_t(in bits)-1.
00684  */
00685 int
00686 numa_max_possible_node_v1(void)
00687 {
00688        return ((sizeof(nodemask_t)*8)-1);
00689 }
00690 __asm__(".symver numa_max_possible_node_v1,numa_max_possible_node@libnuma_1.1");
00691 
00692 int
00693 numa_max_possible_node_v2(void)
00694 {
00695        return numa_num_possible_nodes()-1;
00696 }
00697 __asm__(".symver numa_max_possible_node_v2,numa_max_possible_node@@libnuma_1.2");
00698 
00699 make_internal_alias(numa_max_possible_node_v1);
00700 make_internal_alias(numa_max_possible_node_v2);
00701 
00702 /*
00703  * Allocate a bitmask for cpus, of a size large enough to
00704  * match the kernel's cpumask_t.
00705  */
00706 struct bitmask *
00707 numa_allocate_cpumask()
00708 {
00709        int ncpus = numa_num_possible_cpus();
00710 
00711        return numa_bitmask_alloc(ncpus);
00712 }
00713 
00714 /*
00715  * Allocate a bitmask the size of a libnuma nodemask_t
00716  */
00717 static struct bitmask *
00718 allocate_nodemask_v1(void)
00719 {
00720        int nnodes = numa_max_possible_node_v1_int()+1;
00721 
00722        return numa_bitmask_alloc(nnodes);
00723 }
00724 
00725 /*
00726  * Allocate a bitmask for nodes, of a size large enough to
00727  * match the kernel's nodemask_t.
00728  */
00729 struct bitmask *
00730 numa_allocate_nodemask(void)
00731 {
00732        struct bitmask *bmp;
00733        int nnodes = numa_max_possible_node_v2_int() + 1;
00734 
00735        bmp = numa_bitmask_alloc(nnodes);
00736        return bmp;
00737 }
00738 
00739 /* (cache the result?) */
00740 long long numa_node_size64(int node, long long *freep)
00741 { 
00742        size_t len = 0;
00743        char *line = NULL;
00744        long long size = -1;
00745        FILE *f; 
00746        char fn[64];
00747        int ok = 0;
00748        int required = freep ? 2 : 1; 
00749 
00750        if (freep) 
00751               *freep = -1; 
00752        sprintf(fn,"/sys/devices/system/node/node%d/meminfo", node); 
00753        f = fopen(fn, "r");
00754        if (!f)
00755               return -1; 
00756        while (getdelim(&line, &len, '\n', f) > 0) { 
00757               char *end;
00758               char *s = strcasestr(line, "kB"); 
00759               if (!s) 
00760                      continue; 
00761               --s; 
00762               while (s > line && isspace(*s))
00763                      --s;
00764               while (s > line && isdigit(*s))
00765                      --s; 
00766               if (strstr(line, "MemTotal")) { 
00767                      size = strtoull(s,&end,0) << 10; 
00768                      if (end == s) 
00769                             size = -1;
00770                      else
00771                             ok++; 
00772               }
00773               if (freep && strstr(line, "MemFree")) { 
00774                      *freep = strtoull(s,&end,0) << 10; 
00775                      if (end == s) 
00776                             *freep = -1;
00777                      else
00778                             ok++; 
00779               }
00780        } 
00781        fclose(f); 
00782        free(line);
00783        if (ok != required)
00784               numa_warn(W_badmeminfo, "Cannot parse sysfs meminfo (%d)", ok);
00785        return size;
00786 }
00787 
00788 make_internal_alias(numa_node_size64);
00789 
00790 long numa_node_size(int node, long *freep)
00791 {      
00792        long long f2; 
00793        long sz = numa_node_size64_int(node, &f2);
00794        if (freep) 
00795               *freep = f2; 
00796        return sz;    
00797 }
00798 
00799 int numa_available(void)
00800 {
00801        if (get_mempolicy(NULL, NULL, 0, 0, 0) < 0 && errno == ENOSYS)
00802               return -1; 
00803        return 0;
00804 } 
00805 
00806 void
00807 numa_interleave_memory_v1(void *mem, size_t size, const nodemask_t *mask)
00808 {
00809        struct bitmask bitmask;
00810 
00811        bitmask.size = sizeof(nodemask_t) * 8;
00812        bitmask.maskp = (unsigned long *)mask;
00813        dombind(mem, size, MPOL_INTERLEAVE, &bitmask);
00814 }
00815 __asm__(".symver numa_interleave_memory_v1,numa_interleave_memory@libnuma_1.1");
00816 
00817 void
00818 numa_interleave_memory_v2(void *mem, size_t size, struct bitmask *bmp)
00819 { 
00820        dombind(mem, size, MPOL_INTERLEAVE, bmp);
00821 } 
00822 __asm__(".symver numa_interleave_memory_v2,numa_interleave_memory@@libnuma_1.2");
00823 
00824 void numa_tonode_memory(void *mem, size_t size, int node)
00825 {
00826        struct bitmask *nodes;
00827 
00828        nodes = numa_allocate_nodemask();
00829        numa_bitmask_setbit(nodes, node);
00830        dombind(mem, size, bind_policy, nodes);
00831        numa_bitmask_free(nodes);
00832 }
00833 
00834 void
00835 numa_tonodemask_memory_v1(void *mem, size_t size, const nodemask_t *mask)
00836 {
00837        struct bitmask bitmask;
00838 
00839        bitmask.maskp = (unsigned long *)mask;
00840        bitmask.size  = sizeof(nodemask_t);
00841        dombind(mem, size,  bind_policy, &bitmask);
00842 }
00843 __asm__(".symver numa_tonodemask_memory_v1,numa_tonodemask_memory@libnuma_1.1");
00844 
00845 void
00846 numa_tonodemask_memory_v2(void *mem, size_t size, struct bitmask *bmp)
00847 {
00848        dombind(mem, size,  bind_policy, bmp);
00849 }
00850 __asm__(".symver numa_tonodemask_memory_v2,numa_tonodemask_memory@@libnuma_1.2");
00851 
00852 void numa_setlocal_memory(void *mem, size_t size)
00853 {
00854        dombind(mem, size, MPOL_PREFERRED, NULL);
00855 }
00856 
00857 void numa_police_memory(void *mem, size_t size)
00858 {
00859        int pagesize = numa_pagesize_int();
00860        unsigned long i; 
00861        for (i = 0; i < size; i += pagesize)
00862               asm volatile("" :: "r" (((volatile unsigned char *)mem)[i]));
00863 }
00864 
00865 make_internal_alias(numa_police_memory);
00866 
00867 void *numa_alloc(size_t size)
00868 {
00869        char *mem;
00870        mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
00871                  0, 0); 
00872        if (mem == (char *)-1)
00873               return NULL;
00874        numa_police_memory_int(mem, size);
00875        return mem;
00876 } 
00877 
00878 void *numa_realloc(void *old_addr, size_t old_size, size_t new_size)
00879 {
00880        char *mem;
00881        mem = mremap(old_addr, old_size, new_size, MREMAP_MAYMOVE);
00882        if (mem == (char *)-1)
00883               return NULL;
00884        /*
00885         *     The memory policy of the allocated pages is preserved by mremap(), so
00886         *     there is no need to (re)set it here. If the policy of the original
00887         *     allocation is not set, the new pages will be allocated according to the
00888         *     process' mempolicy. Trying to allocate explicitly the new pages on the
00889         *     same node as the original ones would require changing the policy of the
00890         *     newly allocated pages, which violates the numa_realloc() semantics.
00891         */ 
00892        return mem;
00893 }
00894 
00895 void *numa_alloc_interleaved_subset_v1(size_t size, const nodemask_t *mask)
00896 {
00897        char *mem;
00898        struct bitmask bitmask;
00899 
00900        mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
00901                      0, 0);
00902        if (mem == (char *)-1)
00903               return NULL;
00904        bitmask.maskp = (unsigned long *)mask;
00905        bitmask.size  = sizeof(nodemask_t);
00906        dombind(mem, size, MPOL_INTERLEAVE, &bitmask);
00907        return mem;
00908 }
00909 __asm__(".symver numa_alloc_interleaved_subset_v1,numa_alloc_interleaved_subset@libnuma_1.1");
00910 
00911 void *numa_alloc_interleaved_subset_v2(size_t size, struct bitmask *bmp)
00912 { 
00913        char *mem;    
00914 
00915        mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
00916                  0, 0); 
00917        if (mem == (char *)-1) 
00918               return NULL;
00919        dombind(mem, size, MPOL_INTERLEAVE, bmp);
00920        return mem;
00921 } 
00922 __asm__(".symver numa_alloc_interleaved_subset_v2,numa_alloc_interleaved_subset@@libnuma_1.2");
00923 
00924 make_internal_alias(numa_alloc_interleaved_subset_v1);
00925 make_internal_alias(numa_alloc_interleaved_subset_v2);
00926 
00927 void *
00928 numa_alloc_interleaved(size_t size)
00929 { 
00930        return numa_alloc_interleaved_subset_v2_int(size, numa_all_nodes_ptr);
00931 } 
00932 
00933 /*
00934  * given a user node mask, set memory policy to use those nodes
00935  */
00936 void
00937 numa_set_interleave_mask_v1(nodemask_t *mask)
00938 {
00939        struct bitmask *bmp;
00940        int nnodes = numa_max_possible_node_v1_int()+1;
00941 
00942        bmp = numa_bitmask_alloc(nnodes);
00943        copy_nodemask_to_bitmask(mask, bmp);
00944        if (numa_bitmask_equal(bmp, numa_no_nodes_ptr))
00945               setpol(MPOL_DEFAULT, bmp);
00946        else
00947               setpol(MPOL_INTERLEAVE, bmp);
00948        numa_bitmask_free(bmp);
00949 }
00950 
00951 __asm__(".symver numa_set_interleave_mask_v1,numa_set_interleave_mask@libnuma_1.1");
00952 
00953 void
00954 numa_set_interleave_mask_v2(struct bitmask *bmp)
00955 {
00956        if (numa_bitmask_equal(bmp, numa_no_nodes_ptr))
00957               setpol(MPOL_DEFAULT, bmp);
00958        else
00959               setpol(MPOL_INTERLEAVE, bmp);
00960 } 
00961 __asm__(".symver numa_set_interleave_mask_v2,numa_set_interleave_mask@@libnuma_1.2");
00962 
00963 nodemask_t
00964 numa_get_interleave_mask_v1(void)
00965 {
00966        int oldpolicy;
00967        struct bitmask *bmp;
00968        nodemask_t mask;
00969 
00970        bmp = allocate_nodemask_v1();
00971        getpol(&oldpolicy, bmp);
00972        if (oldpolicy == MPOL_INTERLEAVE)
00973               copy_bitmask_to_nodemask(bmp, &mask);
00974        else
00975               copy_bitmask_to_nodemask(numa_no_nodes_ptr, &mask);
00976        numa_bitmask_free(bmp);
00977        return mask;
00978 }
00979 __asm__(".symver numa_get_interleave_mask_v1,numa_get_interleave_mask@libnuma_1.1");
00980 
00981 struct bitmask *
00982 numa_get_interleave_mask_v2(void)
00983 { 
00984        int oldpolicy;
00985        struct bitmask *bmp;
00986 
00987        bmp = numa_allocate_nodemask();
00988        getpol(&oldpolicy, bmp);
00989        if (oldpolicy != MPOL_INTERLEAVE)
00990               copy_bitmask_to_bitmask(numa_no_nodes_ptr, bmp);
00991        return bmp;
00992 } 
00993 __asm__(".symver numa_get_interleave_mask_v2,numa_get_interleave_mask@@libnuma_1.2");
00994 
00995 /* (undocumented) */
00996 int numa_get_interleave_node(void)
00997 { 
00998        int nd;
00999        if (get_mempolicy(&nd, NULL, 0, 0, MPOL_F_NODE) == 0)
01000               return nd;
01001        return 0;     
01002 } 
01003 
01004 void *numa_alloc_onnode(size_t size, int node) 
01005 { 
01006        char *mem; 
01007        struct bitmask *bmp;
01008 
01009        bmp = numa_allocate_nodemask();
01010        numa_bitmask_setbit(bmp, node);
01011        mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
01012                  0, 0);  
01013        if (mem == (char *)-1)
01014               mem = NULL;
01015        else 
01016               dombind(mem, size, bind_policy, bmp);
01017        numa_bitmask_free(bmp);
01018        return mem;   
01019 } 
01020 
01021 void *numa_alloc_local(size_t size) 
01022 { 
01023        char *mem; 
01024        mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
01025                  0, 0); 
01026        if (mem == (char *)-1)
01027               mem =  NULL;
01028        else
01029               dombind(mem, size, MPOL_PREFERRED, NULL);
01030        return mem;   
01031 } 
01032 
01033 void numa_set_bind_policy(int strict) 
01034 { 
01035        if (strict) 
01036               bind_policy = MPOL_BIND; 
01037        else
01038               bind_policy = MPOL_PREFERRED;
01039 } 
01040 
01041 void
01042 numa_set_membind_v1(const nodemask_t *mask)
01043 {
01044        struct bitmask bitmask;
01045 
01046        bitmask.maskp = (unsigned long *)mask;
01047        bitmask.size  = sizeof(nodemask_t);
01048        setpol(MPOL_BIND, &bitmask);
01049 }
01050 __asm__(".symver numa_set_membind_v1,numa_set_membind@libnuma_1.1");
01051 
01052 void
01053 numa_set_membind_v2(struct bitmask *bmp)
01054 { 
01055        setpol(MPOL_BIND, bmp);
01056 } 
01057 __asm__(".symver numa_set_membind_v2,numa_set_membind@@libnuma_1.2");
01058 
01059 make_internal_alias(numa_set_membind_v2);
01060 
01061 /*
01062  * copy a bitmask map body to a numa.h nodemask_t structure
01063  */
01064 void
01065 copy_bitmask_to_nodemask(struct bitmask *bmp, nodemask_t *nmp)
01066 {
01067        int max, i;
01068 
01069        memset(nmp, 0, sizeof(nodemask_t));
01070         max = (sizeof(nodemask_t)*8);
01071        for (i=0; i<bmp->size; i++) {
01072               if (i >= max)
01073                      break;
01074               if (numa_bitmask_isbitset(bmp, i))
01075                      nodemask_set_compat((nodemask_t *)nmp, i);
01076        }
01077 }
01078 
01079 /*
01080  * copy a bitmask map body to another bitmask body
01081  * fill a larger destination with zeroes
01082  */
01083 void
01084 copy_bitmask_to_bitmask(struct bitmask *bmpfrom, struct bitmask *bmpto)
01085 {
01086        int bytes;
01087 
01088        if (bmpfrom->size >= bmpto->size) {
01089               memcpy(bmpto->maskp, bmpfrom->maskp, CPU_BYTES(bmpto->size));
01090        } else if (bmpfrom->size < bmpto->size) {
01091               bytes = CPU_BYTES(bmpfrom->size);
01092               memcpy(bmpto->maskp, bmpfrom->maskp, bytes);
01093               memset(((char *)bmpto->maskp)+bytes, 0,
01094                                    CPU_BYTES(bmpto->size)-bytes);
01095        }
01096 }
01097 
01098 /*
01099  * copy a numa.h nodemask_t structure to a bitmask map body
01100  */
01101 void
01102 copy_nodemask_to_bitmask(nodemask_t *nmp, struct bitmask *bmp)
01103 {
01104        int max, i;
01105 
01106        numa_bitmask_clearall(bmp);
01107         max = (sizeof(nodemask_t)*8);
01108        if (max > bmp->size)
01109               max = bmp->size;
01110        for (i=0; i<max; i++) {
01111               if (nodemask_isset_compat(nmp, i))
01112                      numa_bitmask_setbit(bmp, i);
01113        }
01114 }
01115 
01116 nodemask_t
01117 numa_get_membind_v1(void)
01118 {
01119        int oldpolicy;
01120        struct bitmask *bmp;
01121        nodemask_t nmp;
01122 
01123        bmp = allocate_nodemask_v1();
01124        getpol(&oldpolicy, bmp);
01125        if (oldpolicy == MPOL_BIND) {
01126               copy_bitmask_to_nodemask(bmp, &nmp);
01127        } else {
01128               /* copy the body of the map to numa_all_nodes */
01129               copy_bitmask_to_nodemask(bmp, &numa_all_nodes);
01130               nmp = numa_all_nodes;
01131        }
01132        numa_bitmask_free(bmp);
01133        return nmp;
01134 }
01135 __asm__(".symver numa_get_membind_v1,numa_get_membind@libnuma_1.1");
01136 
01137 struct bitmask *
01138 numa_get_membind_v2(void)
01139 {
01140        int oldpolicy;
01141        struct bitmask *bmp;
01142 
01143        bmp = numa_allocate_nodemask();
01144        getpol(&oldpolicy, bmp);
01145        if (oldpolicy != MPOL_BIND)
01146               copy_bitmask_to_bitmask(numa_all_nodes_ptr, bmp);
01147        return bmp;
01148 } 
01149 __asm__(".symver numa_get_membind_v2,numa_get_membind@@libnuma_1.2");
01150 
01151 //TODO:  do we need a v1 nodemask_t version?
01152 struct bitmask *numa_get_mems_allowed(void)
01153 {
01154        struct bitmask *bmp;
01155 
01156        /*
01157         * can change, so query on each call.
01158         */
01159        bmp = numa_allocate_nodemask();
01160        if (get_mempolicy(NULL, bmp->maskp, bmp->size + 1, 0,
01161                             MPOL_F_MEMS_ALLOWED) < 0)
01162               numa_error("get_mempolicy");
01163        return bmp;
01164 }
01165 make_internal_alias(numa_get_mems_allowed);
01166 
01167 
01168 void numa_free(void *mem, size_t size)
01169 { 
01170        munmap(mem, size); 
01171 } 
01172 
01173 int
01174 numa_parse_bitmap_v1(char *line, unsigned long *mask, int ncpus)
01175 {
01176        int i;
01177        char *p = strchr(line, '\n');
01178        if (!p)
01179               return -1;
01180 
01181        for (i = 0; p > line;i++) {
01182               char *oldp, *endp;
01183               oldp = p;
01184               if (*p == ',')
01185                      --p;
01186               while (p > line && *p != ',')
01187                      --p;
01188               /* Eat two 32bit fields at a time to get longs */
01189               if (p > line && sizeof(unsigned long) == 8) {
01190                      oldp--;
01191                      memmove(p, p+1, oldp-p+1);
01192                      while (p > line && *p != ',')
01193                             --p;
01194               }
01195               if (*p == ',')
01196                      p++;
01197               if (i >= CPU_LONGS(ncpus))
01198                      return -1;
01199               mask[i] = strtoul(p, &endp, 16);
01200               if (endp != oldp)
01201                      return -1;
01202               p--;
01203        }
01204        return 0;
01205 }
01206 __asm__(".symver numa_parse_bitmap_v1,numa_parse_bitmap@libnuma_1.1");
01207 
01208 int
01209 numa_parse_bitmap_v2(char *line, struct bitmask *mask)
01210 {
01211        int i, ncpus;
01212        char *p = strchr(line, '\n'); 
01213        if (!p)
01214               return -1;
01215        ncpus = mask->size;
01216 
01217        for (i = 0; p > line;i++) {
01218               char *oldp, *endp; 
01219               oldp = p;
01220               if (*p == ',') 
01221                      --p;
01222               while (p > line && *p != ',')
01223                      --p;
01224               /* Eat two 32bit fields at a time to get longs */
01225               if (p > line && sizeof(unsigned long) == 8) {
01226                      oldp--;
01227                      memmove(p, p+1, oldp-p+1);
01228                      while (p > line && *p != ',')
01229                             --p;
01230               }
01231               if (*p == ',')
01232                      p++;
01233               if (i >= CPU_LONGS(ncpus))
01234                      return -1;
01235               mask->maskp[i] = strtoul(p, &endp, 16);
01236               if (endp != oldp)
01237                      return -1;
01238               p--;
01239        }
01240        return 0;
01241 }
01242 __asm__(".symver numa_parse_bitmap_v2,numa_parse_bitmap@@libnuma_1.2");
01243 
01244 void
01245 init_node_cpu_mask_v2(void)
01246 {
01247        int nnodes = numa_max_possible_node_v2_int() + 1;
01248        node_cpu_mask_v2 = calloc (nnodes, sizeof(struct bitmask *));
01249 }
01250 
01251 /* This would be better with some locking, but I don't want to make libnuma
01252    dependent on pthreads right now. The races are relatively harmless. */
01253 int
01254 numa_node_to_cpus_v1(int node, unsigned long *buffer, int bufferlen)
01255 {
01256        int err = 0;
01257        char fn[64];
01258        FILE *f;
01259        char *line = NULL;
01260        size_t len = 0;
01261        struct bitmask bitmask;
01262        int buflen_needed;
01263        unsigned long *mask;
01264        int ncpus = numa_num_possible_cpus();
01265        int maxnode = numa_max_node_int();
01266 
01267        buflen_needed = CPU_BYTES(ncpus);
01268        if ((unsigned)node > maxnode || bufferlen < buflen_needed) {
01269               errno = ERANGE;
01270               return -1;
01271        }
01272        if (bufferlen > buflen_needed)
01273               memset(buffer, 0, bufferlen);
01274        if (node_cpu_mask_v1[node]) {
01275               memcpy(buffer, node_cpu_mask_v1[node], buflen_needed);
01276               return 0;
01277        }
01278 
01279        mask = malloc(buflen_needed);
01280        if (!mask)
01281               mask = (unsigned long *)buffer;
01282        memset(mask, 0, buflen_needed);
01283 
01284        sprintf(fn, "/sys/devices/system/node/node%d/cpumap", node);
01285        f = fopen(fn, "r");
01286        if (!f || getdelim(&line, &len, '\n', f) < 1) {
01287               numa_warn(W_nosysfs2,
01288                  "/sys not mounted or invalid. Assuming one node: %s",
01289                        strerror(errno));
01290               numa_warn(W_nosysfs2,
01291                  "(cannot open or correctly parse %s)", fn);
01292               bitmask.maskp = (unsigned long *)mask;
01293               bitmask.size  = buflen_needed * 8;
01294               numa_bitmask_setall(&bitmask);
01295               err = -1;
01296        }
01297        if (f)
01298               fclose(f);
01299 
01300        if (line && (numa_parse_bitmap_v1(line, mask, ncpus) < 0)) {
01301               numa_warn(W_cpumap, "Cannot parse cpumap. Assuming one node");
01302               bitmask.maskp = (unsigned long *)mask;
01303               bitmask.size  = buflen_needed * 8;
01304               numa_bitmask_setall(&bitmask);
01305               err = -1;
01306        }
01307 
01308        free(line);
01309        memcpy(buffer, mask, buflen_needed);
01310 
01311        /* slightly racy, see above */
01312        if (node_cpu_mask_v1[node]) {
01313               if (mask != buffer)
01314                      free(mask);
01315        } else {
01316               node_cpu_mask_v1[node] = mask;
01317        }
01318        return err;
01319 }
01320 __asm__(".symver numa_node_to_cpus_v1,numa_node_to_cpus@libnuma_1.1");
01321 
01322 /*
01323  * test whether a node has cpus
01324  */
01325 /* This would be better with some locking, but I don't want to make libnuma
01326    dependent on pthreads right now. The races are relatively harmless. */
01327 /*
01328  * deliver a bitmask of cpus representing the cpus on a given node
01329  */
01330 int
01331 numa_node_to_cpus_v2(int node, struct bitmask *buffer)
01332 {
01333        int err = 0, bufferlen;
01334        int nnodes = numa_max_node();
01335        char fn[64], *line = NULL;
01336        FILE *f; 
01337        size_t len = 0; 
01338        struct bitmask *mask;
01339 
01340        if (!node_cpu_mask_v2)
01341               init_node_cpu_mask_v2();
01342 
01343        bufferlen = numa_bitmask_nbytes(buffer);
01344        if (node > nnodes) {
01345               errno = ERANGE;
01346               return -1;
01347        }
01348        numa_bitmask_clearall(buffer);
01349 
01350        if (node_cpu_mask_v2[node]) {
01351               /* have already constructed a mask for this node */
01352               if (buffer->size < node_cpu_mask_v2[node]->size) {
01353                      numa_error("map size mismatch; abort\n");
01354                      return -1;
01355               }
01356               copy_bitmask_to_bitmask(node_cpu_mask_v2[node], buffer);
01357               return 0;
01358        }
01359 
01360        /* need a new mask for this node */
01361        mask = numa_allocate_cpumask();
01362 
01363        /* this is a kernel cpumask_t (see node_read_cpumap()) */
01364        sprintf(fn, "/sys/devices/system/node/node%d/cpumap", node); 
01365        f = fopen(fn, "r"); 
01366        if (!f || getdelim(&line, &len, '\n', f) < 1) { 
01367               numa_warn(W_nosysfs2,
01368                  "/sys not mounted or invalid. Assuming one node: %s",
01369                        strerror(errno)); 
01370               numa_warn(W_nosysfs2,
01371                  "(cannot open or correctly parse %s)", fn);
01372               numa_bitmask_setall(mask);
01373               err = -1;
01374        } 
01375        if (f)
01376               fclose(f);
01377 
01378        if (line && (numa_parse_bitmap_v2(line, mask) < 0)) {
01379               numa_warn(W_cpumap, "Cannot parse cpumap. Assuming one node");
01380               numa_bitmask_setall(mask);
01381               err = -1;
01382        }
01383 
01384        free(line);
01385        copy_bitmask_to_bitmask(mask, buffer);
01386 
01387        /* slightly racy, see above */ 
01388        /* save the mask we created */
01389        if (node_cpu_mask_v2[node]) {
01390               /* how could this be? */
01391               if (mask != buffer)
01392                      numa_bitmask_free(mask);
01393        } else {
01394               node_cpu_mask_v2[node] = mask;
01395        } 
01396        return err; 
01397 }
01398 __asm__(".symver numa_node_to_cpus_v2,numa_node_to_cpus@@libnuma_1.2");
01399 
01400 make_internal_alias(numa_node_to_cpus_v1);
01401 make_internal_alias(numa_node_to_cpus_v2);
01402 
01403 /* report the node of the specified cpu */
01404 int numa_node_of_cpu(int cpu)
01405 {
01406        struct bitmask *bmp;
01407        int ncpus, nnodes, node, ret;
01408 
01409        ncpus = numa_num_possible_cpus();
01410        if (cpu > ncpus){
01411               errno = EINVAL;
01412               return -1;
01413        }
01414        bmp = numa_bitmask_alloc(ncpus);
01415        nnodes = numa_max_node();
01416        for (node = 0; node <= nnodes; node++){
01417               numa_node_to_cpus_v2_int(node, bmp);
01418               if (numa_bitmask_isbitset(bmp, cpu)){
01419                      ret = node;
01420                      goto end;
01421               }
01422        }
01423        ret = -1;
01424        errno = EINVAL;
01425 end:
01426        numa_bitmask_free(bmp);
01427        return ret;
01428 }
01429 
01430 
01431 int
01432 numa_run_on_node_mask_v1(const nodemask_t *mask)
01433 {
01434        int ncpus = numa_num_possible_cpus();
01435        int i, k, err;
01436        unsigned long cpus[CPU_LONGS(ncpus)], nodecpus[CPU_LONGS(ncpus)];
01437        memset(cpus, 0, CPU_BYTES(ncpus));
01438        for (i = 0; i < NUMA_NUM_NODES; i++) {
01439               if (mask->n[i / BITS_PER_LONG] == 0)
01440                      continue;
01441               if (nodemask_isset_compat(mask, i)) {
01442                      if (numa_node_to_cpus_v1_int(i, nodecpus, CPU_BYTES(ncpus)) < 0) {
01443                             numa_warn(W_noderunmask,
01444                                      "Cannot read node cpumask from sysfs");
01445                             continue;
01446                      }
01447                      for (k = 0; k < CPU_LONGS(ncpus); k++)
01448                             cpus[k] |= nodecpus[k];
01449               }
01450        }
01451        err = numa_sched_setaffinity_v1(0, CPU_BYTES(ncpus), cpus);
01452 
01453        /* The sched_setaffinity API is broken because it expects
01454           the user to guess the kernel cpuset size. Do this in a
01455           brute force way. */
01456        if (err < 0 && errno == EINVAL) {
01457               int savederrno = errno;
01458               char *bigbuf;
01459               static int size = -1;
01460               if (size == -1)
01461                      size = CPU_BYTES(ncpus) * 2;
01462               bigbuf = malloc(CPU_BUFFER_SIZE);
01463               if (!bigbuf) {
01464                      errno = ENOMEM;
01465                      return -1;
01466               }
01467               errno = savederrno;
01468               while (size <= CPU_BUFFER_SIZE) {
01469                      memcpy(bigbuf, cpus, CPU_BYTES(ncpus));
01470                      memset(bigbuf + CPU_BYTES(ncpus), 0,
01471                             CPU_BUFFER_SIZE - CPU_BYTES(ncpus));
01472                      err = numa_sched_setaffinity_v1_int(0, size, (unsigned long *)bigbuf);
01473                      if (err == 0 || errno != EINVAL)
01474                             break;
01475                      size *= 2;
01476               }
01477               savederrno = errno;
01478               free(bigbuf);
01479               errno = savederrno;
01480        }
01481        return err;
01482 }
01483 __asm__(".symver numa_run_on_node_mask_v1,numa_run_on_node_mask@libnuma_1.1");
01484 
01485 /*
01486  * Given a node mask (size of a kernel nodemask_t) (probably populated by
01487  * a user argument list) set up a map of cpus (map "cpus") on those nodes.
01488  * Then set affinity to those cpus.
01489  */
01490 int
01491 numa_run_on_node_mask_v2(struct bitmask *bmp)
01492 {      
01493        int ncpus, i, k, err;
01494        struct bitmask *cpus, *nodecpus;
01495 
01496        cpus = numa_allocate_cpumask();
01497        ncpus = cpus->size;
01498        nodecpus = numa_allocate_cpumask();
01499 
01500        for (i = 0; i < bmp->size; i++) {
01501               if (bmp->maskp[i / BITS_PER_LONG] == 0)
01502                      continue;
01503               if (numa_bitmask_isbitset(bmp, i)) {
01504                      /*
01505                       * numa_all_nodes_ptr is cpuset aware; use only
01506                       * these nodes
01507                       */
01508                      if (!numa_bitmask_isbitset(numa_all_nodes_ptr, i)) {
01509                             numa_warn(W_noderunmask,
01510                                    "node %d not allowed", i);
01511                             continue;
01512                      }
01513                      if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) {
01514                             numa_warn(W_noderunmask, 
01515                                    "Cannot read node cpumask from sysfs");
01516                             continue;
01517                      }
01518                      for (k = 0; k < CPU_LONGS(ncpus); k++)
01519                             cpus->maskp[k] |= nodecpus->maskp[k];
01520               }      
01521        }
01522        err = numa_sched_setaffinity_v2_int(0, cpus);
01523 
01524        numa_bitmask_free(cpus);
01525        numa_bitmask_free(nodecpus);
01526 
01527        /* used to have to consider that this could fail - it shouldn't now */
01528        if (err < 0) {
01529               numa_error("numa_sched_setaffinity_v2_int() failed; abort\n");
01530        }
01531 
01532        return err;
01533 } 
01534 __asm__(".symver numa_run_on_node_mask_v2,numa_run_on_node_mask@@libnuma_1.2");
01535 
01536 make_internal_alias(numa_run_on_node_mask_v2);
01537 
01538 nodemask_t
01539 numa_get_run_node_mask_v1(void)
01540 {
01541        int ncpus = numa_num_configured_cpus();
01542        int i, k;
01543        int max = numa_max_node_int();
01544        struct bitmask *bmp, *cpus, *nodecpus;
01545        nodemask_t nmp;
01546 
01547        cpus = numa_allocate_cpumask();
01548        if (numa_sched_getaffinity_v2_int(0, cpus) < 0){
01549               nmp = numa_no_nodes;
01550               goto free_cpus;
01551        }
01552 
01553        nodecpus = numa_allocate_cpumask();
01554        bmp = allocate_nodemask_v1(); /* the size of a nodemask_t */
01555        for (i = 0; i <= max; i++) {
01556               if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) {
01557                      /* It's possible for the node to not exist */
01558                      continue;
01559               }
01560               for (k = 0; k < CPU_LONGS(ncpus); k++) {
01561                      if (nodecpus->maskp[k] & cpus->maskp[k])
01562                             numa_bitmask_setbit(bmp, i);
01563               }
01564        }
01565        copy_bitmask_to_nodemask(bmp, &nmp);
01566        numa_bitmask_free(bmp);
01567        numa_bitmask_free(nodecpus);
01568 free_cpus:
01569        numa_bitmask_free(cpus);
01570        return nmp;
01571 }
01572 __asm__(".symver numa_get_run_node_mask_v1,numa_get_run_node_mask@libnuma_1.1");
01573 
01574 struct bitmask *
01575 numa_get_run_node_mask_v2(void)
01576 { 
01577        int i, k;
01578        int ncpus = numa_num_configured_cpus();
01579        int max = numa_max_node_int();
01580        struct bitmask *bmp, *cpus, *nodecpus;
01581 
01582 
01583        bmp = numa_allocate_cpumask();
01584        cpus = numa_allocate_cpumask();
01585        if (numa_sched_getaffinity_v2_int(0, cpus) < 0){
01586               copy_bitmask_to_bitmask(numa_no_nodes_ptr, bmp);
01587               goto free_cpus;
01588        }
01589 
01590        nodecpus = numa_allocate_cpumask();
01591        for (i = 0; i <= max; i++) {
01592               /*
01593                * numa_all_nodes_ptr is cpuset aware; show only
01594                * these nodes
01595                */
01596               if (!numa_bitmask_isbitset(numa_all_nodes_ptr, i)) {
01597                      continue;
01598               }
01599               if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) {
01600                      /* It's possible for the node to not exist */
01601                      continue;
01602               }
01603               for (k = 0; k < CPU_LONGS(ncpus); k++) {
01604                      if (nodecpus->maskp[k] & cpus->maskp[k])
01605                             numa_bitmask_setbit(bmp, i);
01606               }
01607        }             
01608        numa_bitmask_free(nodecpus);
01609 free_cpus:
01610        numa_bitmask_free(cpus);
01611        return bmp;
01612 } 
01613 __asm__(".symver numa_get_run_node_mask_v2,numa_get_run_node_mask@@libnuma_1.2");
01614 
01615 int
01616 numa_migrate_pages(int pid, struct bitmask *fromnodes, struct bitmask *tonodes)
01617 {
01618        int numa_num_nodes = numa_num_possible_nodes();
01619 
01620        return migrate_pages(pid, numa_num_nodes + 1, fromnodes->maskp,
01621                                                  tonodes->maskp);
01622 }
01623 
01624 int numa_move_pages(int pid, unsigned long count,
01625        void **pages, const int *nodes, int *status, int flags)
01626 {
01627        return move_pages(pid, count, pages, nodes, status, flags);
01628 }
01629 
01630 int numa_run_on_node(int node)
01631 { 
01632        int numa_num_nodes = numa_num_possible_nodes();
01633        int ret = -1;
01634        struct bitmask *cpus;
01635 
01636        if (node >= numa_num_nodes){
01637               errno = EINVAL;
01638               goto out;
01639        }
01640 
01641        cpus = numa_allocate_cpumask();
01642 
01643        if (node == -1)
01644               numa_bitmask_setall(cpus);
01645        else if (numa_node_to_cpus_v2_int(node, cpus) < 0){
01646               numa_warn(W_noderunmask, "Cannot read node cpumask from sysfs");
01647               goto free;
01648        }
01649 
01650        ret = numa_sched_setaffinity_v2_int(0, cpus);
01651 free:
01652        numa_bitmask_free(cpus);
01653 out:
01654        return ret;
01655 } 
01656 
01657 int numa_preferred(void)
01658 { 
01659        int policy;
01660        int ret;
01661        struct bitmask *bmp;
01662 
01663        bmp = numa_allocate_nodemask();
01664        getpol(&policy, bmp);
01665        if (policy == MPOL_PREFERRED || policy == MPOL_BIND) { 
01666               int i;
01667               int max = numa_num_possible_nodes();
01668               for (i = 0; i < max ; i++) 
01669                      if (numa_bitmask_isbitset(bmp, i)){
01670                             ret = i;
01671                             goto end;
01672                      }
01673        }
01674        /* could read the current CPU from /proc/self/status. Probably 
01675           not worth it. */
01676        ret = 0; /* or random one? */
01677 end:
01678        numa_bitmask_free(bmp);
01679        return ret;
01680 }
01681 
01682 void numa_set_preferred(int node)
01683 { 
01684        struct bitmask *bmp;
01685 
01686        bmp = numa_allocate_nodemask();
01687        if (node >= 0) {
01688               numa_bitmask_setbit(bmp, node);
01689               setpol(MPOL_PREFERRED, bmp);
01690        } else
01691               setpol(MPOL_DEFAULT, bmp);
01692        numa_bitmask_free(bmp);
01693 } 
01694 
01695 void numa_set_localalloc(void) 
01696 {      
01697        setpol(MPOL_DEFAULT, numa_no_nodes_ptr);
01698 } 
01699 
01700 void numa_bind_v1(const nodemask_t *nodemask)
01701 {
01702        struct bitmask bitmask;
01703 
01704        bitmask.maskp = (unsigned long *)nodemask;
01705        bitmask.size  = sizeof(nodemask_t);
01706        numa_run_on_node_mask_v2_int(&bitmask);
01707        numa_set_membind_v2_int(&bitmask);
01708 }
01709 __asm__(".symver numa_bind_v1,numa_bind@libnuma_1.1");
01710 
01711 void numa_bind_v2(struct bitmask *bmp)
01712 {
01713        numa_run_on_node_mask_v2_int(bmp);
01714        numa_set_membind_v2_int(bmp);
01715 }
01716 __asm__(".symver numa_bind_v2,numa_bind@@libnuma_1.2");
01717 
01718 void numa_set_strict(int flag)
01719 {
01720        if (flag)
01721               mbind_flags |= MPOL_MF_STRICT;
01722        else
01723               mbind_flags &= ~MPOL_MF_STRICT;
01724 }
01725 
01726 /*
01727  * Extract a node or processor number from the given string.
01728  * Allow a relative node / processor specification within the allowed
01729  * set if "relative" is nonzero
01730  */
01731 static unsigned long get_nr(char *s, char **end, struct bitmask *bmp, int relative)
01732 {
01733        long i, nr;
01734 
01735        if (!relative)
01736               return strtoul(s, end, 0);
01737 
01738        nr = strtoul(s, end, 0);
01739        if (s == *end)
01740               return nr;
01741        /* Find the nth set bit */
01742        for (i = 0; nr >= 0 && i <= bmp->size; i++)
01743               if (numa_bitmask_isbitset(bmp, i))
01744                      nr--;
01745        return i-1;
01746 }
01747 
01748 /*
01749  * numa_parse_nodestring() is called to create a node mask, given
01750  * an ascii string such as 25 or 12-15 or 1,3,5-7 or +6-10.
01751  * (the + indicates that the numbers are cpuset-relative)
01752  *
01753  * The nodes may be specified as absolute, or relative to the current cpuset.
01754  * The list of available nodes is in a map pointed to by "numa_all_nodes_ptr",
01755  * which may represent all nodes or the nodes in the current cpuset.
01756  *
01757  * The caller must free the returned bitmask.
01758  */
01759 struct bitmask *
01760 numa_parse_nodestring(char *s)
01761 {
01762        int invert = 0, relative = 0;
01763        int conf_nodes = numa_num_configured_nodes();
01764        char *end;
01765        struct bitmask *mask;
01766 
01767        mask = numa_allocate_nodemask();
01768 
01769        if (s[0] == 0){
01770               copy_bitmask_to_bitmask(numa_no_nodes_ptr, mask);
01771               return mask; /* return freeable mask */
01772        }
01773        if (*s == '!') {
01774               invert = 1;
01775               s++;
01776        }
01777        if (*s == '+') {
01778               relative++;
01779               s++;
01780        }
01781        do {
01782               unsigned long arg;
01783               int i;
01784               if (isalpha(*s)) {
01785                      int n;
01786                      if (!strcmp(s,"all")) {
01787                             copy_bitmask_to_bitmask(numa_all_nodes_ptr,
01788                                                  mask);
01789                             s+=4;
01790                             break;
01791                      }
01792                      n = resolve_affinity(s, mask);
01793                      if (n != NO_IO_AFFINITY) {
01794                             if (n < 0)
01795                                    goto err;
01796                             s += strlen(s) + 1;
01797                             break;
01798                      }
01799               }
01800               arg = get_nr(s, &end, numa_all_nodes_ptr, relative);
01801               if (end == s) {
01802                      numa_warn(W_nodeparse, "unparseable node description `%s'\n", s);
01803                      goto err;
01804               }
01805               if (!numa_bitmask_isbitset(numa_all_nodes_ptr, arg)) {
01806                      numa_warn(W_nodeparse, "node argument %d is out of range\n", arg);
01807                      goto err;
01808               }
01809               i = arg;
01810               numa_bitmask_setbit(mask, i);
01811               s = end;
01812               if (*s == '-') {
01813                      char *end2;
01814                      unsigned long arg2;
01815                      arg2 = get_nr(++s, &end2, numa_all_nodes_ptr, relative);
01816                      if (end2 == s) {
01817                             numa_warn(W_nodeparse, "missing node argument %s\n", s);
01818                             goto err;
01819                      }
01820                      if (!numa_bitmask_isbitset(numa_all_nodes_ptr, arg2)) {
01821                             numa_warn(W_nodeparse, "node argument %d out of range\n", arg2);
01822                             goto err;
01823                      }
01824                      while (arg <= arg2) {
01825                             i = arg;
01826                             if (numa_bitmask_isbitset(numa_all_nodes_ptr,i))
01827                                    numa_bitmask_setbit(mask, i);
01828                             arg++;
01829                      }
01830                      s = end2;
01831               }
01832        } while (*s++ == ',');
01833        if (s[-1] != '\0')
01834               goto err;
01835        if (invert) {
01836               int i;
01837               for (i = 0; i < conf_nodes; i++) {
01838                      if (numa_bitmask_isbitset(mask, i))
01839                             numa_bitmask_clearbit(mask, i);
01840                      else
01841                             numa_bitmask_setbit(mask, i);
01842               }
01843        }
01844        return mask;
01845 
01846 err:
01847        numa_bitmask_free(mask);
01848        return NULL;
01849 }
01850 
01851 /*
01852  * numa_parse_cpustring() is called to create a bitmask, given
01853  * an ascii string such as 25 or 12-15 or 1,3,5-7 or +6-10.
01854  * (the + indicates that the numbers are cpuset-relative)
01855  *
01856  * The cpus may be specified as absolute, or relative to the current cpuset.
01857  * The list of available cpus for this task is in the map pointed to by
01858  * "numa_all_cpus_ptr", which may represent all cpus or the cpus in the
01859  * current cpuset.
01860  *
01861  * The caller must free the returned bitmask.
01862  */
01863 struct bitmask *
01864 numa_parse_cpustring(char *s)
01865 {
01866        int invert = 0, relative=0;
01867        int conf_cpus = numa_num_configured_cpus();
01868        char *end;
01869        struct bitmask *mask;
01870 
01871        mask = numa_allocate_cpumask();
01872 
01873        if (s[0] == 0)
01874               return mask;
01875        if (*s == '!') {
01876               invert = 1;
01877               s++;
01878        }
01879        if (*s == '+') {
01880               relative++;
01881               s++;
01882        }
01883        do {
01884               unsigned long arg;
01885               int i;
01886 
01887               if (!strcmp(s,"all")) {
01888                      copy_bitmask_to_bitmask(numa_all_cpus_ptr, mask);
01889                      s+=4;
01890                      break;
01891               }
01892               arg = get_nr(s, &end, numa_all_cpus_ptr, relative);
01893               if (end == s) {
01894                      numa_warn(W_cpuparse, "unparseable cpu description `%s'\n", s);
01895                      goto err;
01896               }
01897               if (!numa_bitmask_isbitset(numa_all_cpus_ptr, arg)) {
01898                      numa_warn(W_cpuparse, "cpu argument %s is out of range\n", s);
01899                      goto err;
01900               }
01901               i = arg;
01902               numa_bitmask_setbit(mask, i);
01903               s = end;
01904               if (*s == '-') {
01905                      char *end2;
01906                      unsigned long arg2;
01907                      int i;
01908                      arg2 = get_nr(++s, &end2, numa_all_cpus_ptr, relative);
01909                      if (end2 == s) {
01910                             numa_warn(W_cpuparse, "missing cpu argument %s\n", s);
01911                             goto err;
01912                      }
01913                      if (!numa_bitmask_isbitset(numa_all_cpus_ptr, arg2)) {
01914                             numa_warn(W_cpuparse, "cpu argument %s out of range\n", s);
01915                             goto err;
01916                      }
01917                      while (arg <= arg2) {
01918                             i = arg;
01919                             if (numa_bitmask_isbitset(numa_all_cpus_ptr, i))
01920                                    numa_bitmask_setbit(mask, i);
01921                             arg++;
01922                      }
01923                      s = end2;
01924               }
01925        } while (*s++ == ',');
01926        if (s[-1] != '\0')
01927               goto err;
01928        if (invert) {
01929               int i;
01930               for (i = 0; i < conf_cpus; i++) {
01931                      if (numa_bitmask_isbitset(mask, i))
01932                             numa_bitmask_clearbit(mask, i);
01933                      else
01934                             numa_bitmask_setbit(mask, i);
01935               }
01936        }
01937        return mask;
01938 
01939 err:
01940        numa_bitmask_free(mask);
01941        return NULL;
01942 }