Back to index

numactl  2.0.8~rc4
numactl.c
Go to the documentation of this file.
00001 /* Copyright (C) 2003,2004,2005 Andi Kleen, SuSE Labs.
00002    Command line NUMA policy control.
00003 
00004    numactl is free software; you can redistribute it and/or
00005    modify it under the terms of the GNU General Public
00006    License as published by the Free Software Foundation; version
00007    2.
00008 
00009    numactl is distributed in the hope that it will be useful,
00010    but WITHOUT ANY WARRANTY; without even the implied warranty of
00011    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012    General Public License for more details.
00013 
00014    You should find a copy of v2 of the GNU General Public License somewhere
00015    on your Linux system; if not, write to the Free Software Foundation,
00016    Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
00017 #define _GNU_SOURCE
00018 #include <getopt.h>
00019 #include <errno.h>
00020 #include <stdio.h>
00021 #include <stdlib.h>
00022 #include <string.h>
00023 #include <unistd.h>
00024 #include <stdarg.h>
00025 #include <ctype.h>
00026 #include "numa.h"
00027 #include "numaif.h"
00028 #include "numaint.h"
00029 #include "util.h"
00030 #include "shm.h"
00031 
00032 int exitcode;
00033 
00034 struct option opts[] = {
00035        {"interleave", 1, 0, 'i' },
00036        {"preferred", 1, 0, 'p' },
00037        {"cpubind", 1, 0, 'c' },
00038        {"cpunodebind", 1, 0, 'N' },
00039        {"physcpubind", 1, 0, 'C' },
00040        {"membind", 1, 0, 'm'},
00041        {"show", 0, 0, 's' },
00042        {"localalloc", 0,0, 'l'},
00043        {"hardware", 0,0,'H' },
00044 
00045        {"shm", 1, 0, 'S'},
00046        {"file", 1, 0, 'f'},
00047        {"offset", 1, 0, 'o'},
00048        {"length", 1, 0, 'L'},
00049        {"strict", 0, 0, 't'},
00050        {"shmmode", 1, 0, 'M'},
00051        {"dump", 0, 0, 'd'},
00052        {"dump-nodes", 0, 0, 'D'},
00053        {"shmid", 1, 0, 'I'},
00054        {"huge", 0, 0, 'u'},
00055        {"touch", 0, 0, 'T'},
00056        {"verify", 0, 0, 'V'}, /* undocumented - for debugging */
00057        { 0 }
00058 };
00059 
00060 void usage(void)
00061 {
00062        fprintf(stderr,
00063               "usage: numactl [--interleave=nodes] [--preferred=node]\n"
00064               "               [--physcpubind=cpus] [--cpunodebind=nodes]\n"
00065               "               [--membind=nodes] [--localalloc] command args ...\n"
00066               "       numactl [--show]\n"
00067               "       numactl [--hardware]\n"
00068               "       numactl [--length length] [--offset offset] [--shmmode shmmode]\n"
00069               "               [--strict]\n"
00070               "               [--shmid id] --shm shmkeyfile | --file tmpfsfile\n"
00071               "               [--huge] [--touch] \n"
00072               "               memory policy | --dump | --dump-nodes\n"
00073               "\n"
00074               "memory policy is --interleave, --preferred, --membind, --localalloc\n"
00075               "nodes is a comma delimited list of node numbers or A-B ranges or all.\n"
00076               "Instead of a number a node can also be:\n"
00077               "  netdev:DEV the node connected to network device DEV\n"
00078               "  file:PATH  the node the block device of path is connected to\n"
00079               "  ip:HOST    the node of the network device host routes through\n"
00080               "  block:PATH the node of block device path\n"
00081               "  pci:[seg:]bus:dev[:func] The node of a PCI device\n"
00082               "cpus is a comma delimited list of cpu numbers or A-B ranges or all\n"
00083               "all ranges can be inverted with !\n"
00084               "all numbers and ranges can be made cpuset-relative with +\n"
00085               "the old --cpubind argument is deprecated.\n"
00086               "use --cpunodebind or --physcpubind instead\n"
00087               "length can have g (GB), m (MB) or k (KB) suffixes\n");
00088        exit(1);
00089 }
00090 
00091 void usage_msg(char *msg, ...)
00092 {
00093        va_list ap;
00094        va_start(ap,msg);
00095        fprintf(stderr, "numactl: ");
00096        vfprintf(stderr, msg, ap);
00097        putchar('\n');
00098        usage();
00099 }
00100 
00101 void show_physcpubind(void)
00102 {
00103        int ncpus = numa_num_configured_cpus();
00104        
00105        for (;;) {
00106               struct bitmask *cpubuf;
00107 
00108               cpubuf = numa_bitmask_alloc(ncpus);
00109 
00110               if (numa_sched_getaffinity(0, cpubuf) < 0) {
00111                      if (errno == EINVAL && ncpus < 1024*1024) {
00112                             ncpus *= 2;
00113                             continue;
00114                      }
00115                      err("sched_get_affinity");
00116               }
00117               printcpumask("physcpubind", cpubuf);
00118               break;
00119        }
00120 }
00121 
00122 void show(void)
00123 {
00124        unsigned long prefnode;
00125        struct bitmask *membind, *interleave, *cpubind;
00126        unsigned long cur;
00127        int policy;
00128        int numa_num_nodes = numa_num_possible_nodes();
00129        
00130        if (numa_available() < 0) {
00131               show_physcpubind();
00132               printf("No NUMA support available on this system.\n");
00133               exit(1);
00134        }
00135 
00136        cpubind = numa_get_run_node_mask();
00137 
00138        prefnode = numa_preferred();
00139        interleave = numa_get_interleave_mask();
00140        membind = numa_get_membind();
00141        cur = numa_get_interleave_node();
00142 
00143        policy = 0;
00144        if (get_mempolicy(&policy, NULL, 0, 0, 0) < 0)
00145               perror("get_mempolicy");
00146 
00147        printf("policy: %s\n", policy_name(policy));
00148               
00149        printf("preferred node: ");
00150        switch (policy) {
00151        case MPOL_PREFERRED:
00152               if (prefnode != -1) {
00153                      printf("%ld\n", prefnode);
00154                      break;
00155               }
00156               /*FALL THROUGH*/
00157        case MPOL_DEFAULT:
00158               printf("current\n");
00159               break;
00160        case MPOL_INTERLEAVE:
00161               printf("%ld (interleave next)\n",cur);
00162               break;
00163        case MPOL_BIND:
00164               printf("%d\n", find_first_bit(&membind, numa_num_nodes));
00165               break;
00166        }
00167        if (policy == MPOL_INTERLEAVE) {
00168               printmask("interleavemask", interleave);
00169               printf("interleavenode: %ld\n", cur);
00170        }
00171        show_physcpubind();
00172        printmask("cpubind", cpubind);  // for compatibility
00173        printmask("nodebind", cpubind);
00174        printmask("membind", membind);
00175 }
00176 
00177 char *fmt_mem(unsigned long long mem, char *buf)
00178 {
00179        if (mem == -1L)
00180               sprintf(buf, "<not available>");
00181        else
00182               sprintf(buf, "%Lu MB", mem >> 20);
00183        return buf;
00184 }
00185 
00186 static void print_distances(int maxnode)
00187 {
00188        int i,k;
00189 
00190        if (numa_distance(maxnode,0) == 0) {
00191               printf("No distance information available.\n");
00192               return;
00193        }
00194        printf("node distances:\n");
00195        printf("node ");
00196        for (i = 0; i <= maxnode; i++)
00197               if (numa_bitmask_isbitset(numa_nodes_ptr, i))
00198                      printf("% 3d ", i);
00199        printf("\n");
00200        for (i = 0; i <= maxnode; i++) {
00201               if (!numa_bitmask_isbitset(numa_nodes_ptr, i))
00202                      continue;
00203               printf("% 3d: ", i);
00204               for (k = 0; k <= maxnode; k++)
00205                      if (numa_bitmask_isbitset(numa_nodes_ptr, i) &&
00206                          numa_bitmask_isbitset(numa_nodes_ptr, k))
00207                             printf("% 3d ", numa_distance(i,k));
00208               printf("\n");
00209        }                    
00210 }
00211 
00212 void print_node_cpus(int node)
00213 {
00214        int i, err;
00215        struct bitmask *cpus;
00216 
00217        cpus = numa_allocate_cpumask();
00218        err = numa_node_to_cpus(node, cpus);
00219        if (err >= 0) {
00220               for (i = 0; i < cpus->size; i++)
00221                      if (numa_bitmask_isbitset(cpus, i))
00222                             printf(" %d", i);
00223        }
00224        putchar('\n');
00225 }
00226 
00227 void hardware(void)
00228 {
00229        int i;
00230        int numnodes=0;
00231        int prevnode=-1;
00232        int skip=0;
00233        int maxnode = numa_max_node();
00234 
00235        for (i=0; i<=maxnode; i++)
00236               if (numa_bitmask_isbitset(numa_nodes_ptr, i))
00237                      numnodes++;
00238        printf("available: %d nodes (", numnodes);
00239        for (i=0; i<=maxnode; i++) {
00240               if (numa_bitmask_isbitset(numa_nodes_ptr, i)) {
00241                      if (prevnode == -1) {
00242                             printf("%d", i);
00243                             prevnode=i;
00244                             continue;
00245                      }
00246 
00247                      if (i > prevnode + 1) {
00248                             if (skip) {
00249                                    printf("%d", prevnode);
00250                                    skip=0;
00251                             }
00252                             printf(",%d", i);
00253                             prevnode=i;
00254                             continue;
00255                      }
00256 
00257                      if (i == prevnode + 1) {
00258                             if (!skip) {
00259                                    printf("-");
00260                                    skip=1;
00261                             }
00262                             prevnode=i;
00263                      }
00264 
00265                      if ((i == maxnode) && skip)
00266                             printf("%d", prevnode);
00267               }
00268        }
00269        printf(")\n");
00270 
00271        for (i = 0; i <= maxnode; i++) {
00272               char buf[64];
00273               long long fr;
00274               unsigned long long sz = numa_node_size64(i, &fr);
00275               if (!numa_bitmask_isbitset(numa_nodes_ptr, i))
00276                      continue;
00277 
00278               printf("node %d cpus:", i);
00279               print_node_cpus(i);
00280               printf("node %d size: %s\n", i, fmt_mem(sz, buf));
00281               printf("node %d free: %s\n", i, fmt_mem(fr, buf));
00282        }
00283        print_distances(maxnode);
00284 }
00285 
00286 void checkerror(char *s)
00287 {
00288        if (errno) {
00289               perror(s);
00290               exit(1);
00291        }
00292 }
00293 
00294 void checknuma(void)
00295 {
00296        static int numa = -1;
00297        if (numa < 0) {
00298               if (numa_available() < 0)
00299                      complain("This system does not support NUMA policy");
00300        }
00301        numa = 0;
00302 }
00303 
00304 int set_policy = -1;
00305 
00306 void setpolicy(int pol)
00307 {
00308        if (set_policy != -1)
00309               usage_msg("Conflicting policies");
00310        set_policy = pol;
00311 }
00312 
00313 void nopolicy(void)
00314 {
00315        if (set_policy >= 0)
00316               usage_msg("specify policy after --shm/--file");
00317 }
00318 
00319 int did_cpubind = 0;
00320 int did_strict = 0;
00321 int do_shm = 0;
00322 int do_dump = 0;
00323 int shmattached = 0;
00324 char *shmoption;
00325 
00326 void check_cpubind(int flag)
00327 {
00328        if (flag)
00329               usage_msg("cannot do --cpubind on shared memory\n");
00330 }
00331 
00332 void noshm(char *opt)
00333 {
00334        if (shmattached)
00335               usage_msg("%s must be before shared memory specification", opt);
00336        shmoption = opt;            
00337 }
00338 
00339 void dontshm(char *opt)
00340 {
00341        if (shmoption)
00342               usage_msg("%s shm option is not allowed before %s", shmoption, opt);
00343 }
00344 
00345 void needshm(char *opt)
00346 {
00347        if (!shmattached)
00348               usage_msg("%s must be after shared memory specification", opt);
00349 }
00350 
00351 void get_short_opts(struct option *o, char *s)
00352 {
00353        *s++ = '+';
00354        while (o->name) {
00355               if (isprint(o->val)) {
00356                      *s++ = o->val;
00357                      if (o->has_arg)
00358                             *s++ = ':';
00359               }
00360               o++;
00361        }
00362        *s = '\0';
00363 }
00364 
00365 void check_shmbeyond(char *msg)
00366 {
00367        if (shmoffset >= shmlen) {
00368               fprintf(stderr,
00369               "numactl: region offset %#llx beyond its length %#llx at %s\n",
00370                             shmoffset, shmlen, msg);
00371               exit(1);
00372        }
00373 }
00374 
00375 static struct bitmask *numactl_parse_nodestring(char *s)
00376 {
00377        static char *last;
00378 
00379        if (s[0] == 's' && !strcmp(s, "same")) {
00380               if (!last)
00381                      usage_msg("same needs previous node specification");
00382               s = last;
00383        } else {
00384               last = s;
00385        }
00386        return numa_parse_nodestring(s);
00387 }
00388 
00389 int main(int ac, char **av)
00390 {
00391        int c, i, nnodes=0;
00392        long node=-1;
00393        char *end;
00394        char shortopts[array_len(opts)*2 + 1];
00395        struct bitmask *mask = NULL;
00396 
00397        get_short_opts(opts,shortopts);
00398        while ((c = getopt_long(ac, av, shortopts, opts, NULL)) != -1) {
00399               switch (c) {
00400               case 's': /* --show */
00401                      show();
00402                      exit(0);
00403               case 'H': /* --hardware */
00404                      nopolicy();
00405                      hardware();
00406                      exit(0);
00407               case 'i': /* --interleave */
00408                      checknuma();
00409                      mask = numactl_parse_nodestring(optarg);
00410                      if (!mask) {
00411                             printf ("<%s> is invalid\n", optarg);
00412                             usage();
00413                      }
00414 
00415                      errno = 0;
00416                      setpolicy(MPOL_INTERLEAVE);
00417                      if (shmfd >= 0)
00418                             numa_interleave_memory(shmptr, shmlen, mask);
00419                      else
00420                             numa_set_interleave_mask(mask);
00421                      checkerror("setting interleave mask");
00422                      break;
00423               case 'N': /* --cpunodebind */
00424               case 'c': /* --cpubind */
00425                      dontshm("-c/--cpubind/--cpunodebind");
00426                      checknuma();
00427                      mask = numactl_parse_nodestring(optarg);
00428                      if (!mask) {
00429                             printf ("<%s> is invalid\n", optarg);
00430                             usage();
00431                      }
00432                      errno = 0;
00433                      check_cpubind(do_shm);
00434                      did_cpubind = 1;
00435                      numa_run_on_node_mask(mask);
00436                      checkerror("sched_setaffinity");
00437                      break;
00438               case 'C': /* --physcpubind */
00439               {
00440                      struct bitmask *cpubuf;
00441                      dontshm("-C/--physcpubind");
00442                      cpubuf = numa_parse_cpustring(optarg);
00443                      if (!cpubuf) {
00444                             printf ("<%s> is invalid\n", optarg);
00445                             usage();
00446                      }
00447                      errno = 0;
00448                      check_cpubind(do_shm);
00449                      did_cpubind = 1;
00450                      numa_sched_setaffinity(0, cpubuf);
00451                      checkerror("sched_setaffinity");
00452                      free(cpubuf);
00453                      break;
00454               }
00455               case 'm': /* --membind */
00456                      checknuma();
00457                      setpolicy(MPOL_BIND);
00458                      mask = numactl_parse_nodestring(optarg);
00459                      if (!mask) {
00460                             printf ("<%s> is invalid\n", optarg);
00461                             usage();
00462                      }
00463                      errno = 0;
00464                      numa_set_bind_policy(1);
00465                      if (shmfd >= 0) {
00466                             numa_tonodemask_memory(shmptr, shmlen, mask);
00467                      } else {
00468                             numa_set_membind(mask);
00469                      }
00470                      numa_set_bind_policy(0);
00471                      checkerror("setting membind");
00472                      break;
00473               case 'p': /* --preferred */
00474                      checknuma();
00475                      setpolicy(MPOL_PREFERRED);
00476                      mask = numactl_parse_nodestring(optarg);
00477                      if (!mask) {
00478                             printf ("<%s> is invalid\n", optarg);
00479                             usage();
00480                      }
00481                      for (i=0; i<mask->size; i++) {
00482                             if (numa_bitmask_isbitset(mask, i)) {
00483                                    node = i;
00484                                    nnodes++;
00485                             }
00486                      }
00487                      if (nnodes != 1)
00488                             usage();
00489                      numa_bitmask_free(mask);
00490                      errno = 0;
00491                      numa_set_bind_policy(0);
00492                      if (shmfd >= 0)
00493                             numa_tonode_memory(shmptr, shmlen, node);
00494                      else
00495                             numa_set_preferred(node);
00496                      checkerror("setting preferred node");
00497                      break;
00498               case 'l': /* --local */
00499                      checknuma();
00500                      setpolicy(MPOL_DEFAULT);
00501                      errno = 0;
00502                      if (shmfd >= 0)
00503                             numa_setlocal_memory(shmptr, shmlen);
00504                      else
00505                             numa_set_localalloc();
00506                      checkerror("local allocation");
00507                      break;
00508               case 'S': /* --shm */
00509                      check_cpubind(did_cpubind);
00510                      nopolicy();
00511                      attach_sysvshm(optarg, "--shm");
00512                      shmattached = 1;
00513                      break;
00514               case 'f': /* --file */
00515                      check_cpubind(did_cpubind);
00516                      nopolicy();
00517                      attach_shared(optarg, "--file");
00518                      shmattached = 1;
00519                      break;
00520               case 'L': /* --length */
00521                      noshm("--length");
00522                      shmlen = memsize(optarg);
00523                      break;
00524               case 'M': /* --shmmode */
00525                      noshm("--shmmode");
00526                      shmmode = strtoul(optarg, &end, 8);
00527                      if (end == optarg || *end)
00528                             usage();
00529                      break;
00530               case 'd': /* --dump */
00531                      if (shmfd < 0)
00532                             complain(
00533                             "Cannot do --dump without shared memory.\n");
00534                      dump_shm();
00535                      do_dump = 1;
00536                      break;
00537               case 'D': /* --dump-nodes */
00538                      if (shmfd < 0)
00539                             complain(
00540                          "Cannot do --dump-nodes without shared memory.\n");
00541                      dump_shm_nodes();
00542                      do_dump = 1;
00543                      break;
00544               case 't': /* --strict */
00545                      did_strict = 1;
00546                      numa_set_strict(1);
00547                      break;
00548               case 'I': /* --shmid */
00549                      shmid = strtoul(optarg, &end, 0);
00550                      if (end == optarg || *end)
00551                             usage();
00552                      break;
00553 
00554               case 'u': /* --huge */
00555                      noshm("--huge");
00556                      shmflags |= SHM_HUGETLB;
00557                      break;
00558 
00559               case 'o':  /* --offset */
00560                      noshm("--offset");
00561                      shmoffset = memsize(optarg);
00562                      break;               
00563 
00564               case 'T': /* --touch */
00565                      needshm("--touch");
00566                      check_shmbeyond("--touch");
00567                      numa_police_memory(shmptr, shmlen);
00568                      break;
00569 
00570               case 'V': /* --verify */
00571                      needshm("--verify");
00572                      if (set_policy < 0)
00573                             complain("Need a policy first to verify");
00574                      check_shmbeyond("--verify");
00575                      numa_police_memory(shmptr, shmlen);
00576                      if (!mask)
00577                             complain("Need a mask to verify");
00578                      else
00579                             verify_shm(set_policy, mask);
00580                      break;
00581 
00582               default:
00583                      usage();
00584               }
00585        }
00586 
00587        av += optind;
00588        ac -= optind;
00589 
00590        if (shmfd >= 0) {
00591               if (*av)
00592                      usage();
00593               exit(exitcode);
00594        }
00595 
00596        if (did_strict)
00597               fprintf(stderr,
00598                      "numactl: warning. Strict flag for process ignored.\n");
00599 
00600        if (do_dump)
00601               usage_msg("cannot do --dump|--dump-shm for process");
00602 
00603        if (shmoption)
00604               usage_msg("shm related option %s for process", shmoption);
00605        
00606        if (*av == NULL)
00607               usage();
00608        execvp(*av, av);
00609        complain("execution of `%s': %s\n", av[0], strerror(errno));
00610        return 0; /* not reached */
00611 }