Back to index

numactl  2.0.8~rc4
shm.c
Go to the documentation of this file.
00001 /* Copyright (C) 2003,2004 Andi Kleen, SuSE Labs.
00002    Manage shared memory policy for numactl.
00003    The actual policy is set in numactl itself, this just sets up and maps
00004    the shared memory segments and dumps them.
00005 
00006    numactl is free software; you can redistribute it and/or
00007    modify it under the terms of the GNU General Public
00008    License as published by the Free Software Foundation; version
00009    2.
00010 
00011    numactl is distributed in the hope that it will be useful,
00012    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014    General Public License for more details.
00015 
00016    You should find a copy of v2 of the GNU General Public License somewhere
00017    on your Linux system; if not, write to the Free Software Foundation,
00018    Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
00019 
00020 #define _GNU_SOURCE 1
00021 #include <stdio.h>
00022 #include <stdlib.h>
00023 #include <string.h>
00024 #include <sys/mman.h>
00025 #include <sys/ipc.h>
00026 #include <sys/shm.h>
00027 #include <sys/fcntl.h>
00028 #include <sys/stat.h>
00029 #include <stdarg.h>
00030 #include <errno.h>
00031 #include <unistd.h>
00032 #include "numa.h"
00033 #include "numaif.h"
00034 #include "numaint.h"
00035 #include "util.h"
00036 #include "shm.h"
00037 
00038 int shmfd = -1;
00039 long shmid = 0;
00040 char *shmptr;
00041 unsigned long long shmlen;
00042 mode_t shmmode = 0600;
00043 unsigned long long shmoffset;
00044 int shmflags;
00045 static int shm_pagesize;
00046 
00047 long huge_page_size(void)
00048 {
00049        size_t len = 0;
00050        char *line = NULL;
00051        FILE *f = fopen("/proc/meminfo", "r");
00052        if (f != NULL) {
00053               while (getdelim(&line, &len, '\n', f) > 0) {
00054                      int ps;
00055                      if (sscanf(line, "Hugepagesize: %d kB", &ps) == 1)
00056                             return ps * 1024;
00057               }
00058               free(line);
00059               fclose(f);
00060        }
00061        return getpagesize();
00062 }
00063 
00064 static void check_region(char *opt)
00065 {
00066        if (((unsigned long)shmptr % shm_pagesize) || (shmlen % shm_pagesize)) {
00067               fprintf(stderr, "numactl: policy region not page aligned\n");
00068               exit(1);
00069        }
00070        if (!shmlen) {
00071               fprintf(stderr,
00072               "numactl: policy region length not specified before %s\n",
00073                      opt);
00074               exit(1);
00075        }
00076 }
00077 
00078 static key_t sysvkey(char *name)
00079 {
00080        int fd;
00081        key_t key = ftok(name, shmid);
00082        if (key >= 0)
00083               return key;
00084 
00085        fprintf(stderr, "numactl: Creating shm key file %s mode %04o\n",
00086               name, shmmode);
00087        fd = creat(name, shmmode);
00088        if (fd < 0)
00089               nerror("cannot create key for shm %s\n", name);  
00090        key = ftok(name, shmid);
00091        if (key < 0)
00092               nerror("cannot get key for newly created shm key file %s",
00093                      name);
00094        return key;
00095 }
00096 
00097 /* Attach a sysv style shared memory segment. */
00098 void attach_sysvshm(char *name, char *opt)
00099 {
00100        struct shmid_ds s;
00101        key_t key = sysvkey(name);
00102        
00103        shmfd = shmget(key, shmlen, shmflags);
00104        if (shmfd < 0 && errno == ENOENT) {
00105               if (shmlen == 0)
00106                      complain(
00107                      "need a --length to create a sysv shared memory segment");
00108               fprintf(stderr,
00109          "numactl: Creating shared memory segment %s id %ld mode %04o length %.fMB\n",
00110                      name, shmid, shmmode, ((double)shmlen) / (1024*1024) );
00111               shmfd = shmget(key, shmlen, IPC_CREAT|shmmode|shmflags);
00112               if (shmfd < 0)
00113                      nerror("cannot create shared memory segment");
00114        }
00115 
00116        if (shmlen == 0) {
00117               if (shmctl(shmfd, IPC_STAT, &s) < 0)
00118                      err("shmctl IPC_STAT");
00119               shmlen = s.shm_segsz;
00120        }
00121 
00122        shmptr = shmat(shmfd, NULL, SHM_RDONLY);
00123        if (shmptr == (void*)-1)
00124               err("shmat");
00125        shmptr += shmoffset;
00126 
00127        shm_pagesize = (shmflags & SHM_HUGETLB) ? huge_page_size() : getpagesize();
00128 
00129        check_region(opt);
00130 }
00131 
00132 /* Attach a shared memory file. */
00133 void attach_shared(char *name, char *opt)
00134 {
00135        struct stat64 st;
00136 
00137        shmfd = open(name, O_RDONLY);
00138        if (shmfd < 0) {
00139               errno = 0;
00140               if (shmlen == 0)
00141                       complain("need a --length to create a shared file");
00142               shmfd = open(name, O_RDWR|O_CREAT, shmmode);
00143               if (shmfd < 0)
00144                      nerror("cannot create file %s", name);
00145        }
00146        if (fstat64(shmfd, &st) < 0)
00147               err("shm stat");
00148        if (shmlen > st.st_size) {
00149               if (ftruncate64(shmfd, shmlen) < 0) {
00150                      /* XXX: we could do it by hand, but it would it
00151                         would be impossible to apply policy then.
00152                         need to fix that in the kernel. */
00153                      perror("ftruncate");
00154               }
00155        }
00156 
00157        shm_pagesize = st.st_blksize;
00158 
00159        check_region(opt);
00160        
00161        /* RED-PEN For shmlen > address space may need to map in pieces.
00162           Left for some poor 32bit soul. */
00163        shmptr = mmap64(NULL, shmlen, PROT_READ, MAP_SHARED, shmfd, shmoffset);
00164        if (shmptr == (char*)-1)
00165               err("shm mmap");                   
00166 
00167 }
00168 
00169 static void
00170 dumppol(unsigned long long start, unsigned long long end, int pol, struct bitmask *mask)
00171 {
00172        if (pol == MPOL_DEFAULT)
00173               return;
00174        printf("%016Lx-%016Lx: %s ",
00175               shmoffset+start,
00176               shmoffset+end,
00177               policy_name(pol));
00178        printmask("", mask);
00179 }
00180 
00181 /* Dump policies in a shared memory segment. */
00182 void dump_shm(void)
00183 {
00184        struct bitmask *nodes, *prevnodes;
00185        int prevpol = -1, pol;
00186        unsigned long long c, start;
00187 
00188        start = 0;
00189        if (shmlen == 0) {
00190               printf("nothing to dump\n");
00191               return;
00192        }
00193 
00194        nodes = numa_allocate_nodemask();
00195        prevnodes = numa_allocate_nodemask();
00196 
00197        for (c = 0; c < shmlen; c += shm_pagesize) {
00198               if (get_mempolicy(&pol, nodes->maskp, nodes->size, c+shmptr,
00199                                           MPOL_F_ADDR) < 0)
00200                      err("get_mempolicy on shm");
00201               if (pol == prevpol)
00202                      continue;
00203               if (prevpol != -1)
00204                      dumppol(start, c, prevpol, prevnodes);
00205               prevnodes = nodes;
00206               prevpol = pol;
00207               start = c;
00208        }
00209        dumppol(start, c, prevpol, prevnodes);
00210 }
00211 
00212 static void dumpnode(unsigned long long start, unsigned long long end, int node)
00213 {
00214        printf("%016Lx-%016Lx: %d\n", shmoffset+start, shmoffset+end, node);
00215 }
00216 
00217 /* Dump nodes in a shared memory segment. */
00218 void dump_shm_nodes(void)
00219 {
00220        int prevnode = -1, node;
00221        unsigned long long c, start;
00222 
00223        start = 0;
00224        if (shmlen == 0) {
00225               printf("nothing to dump\n");
00226               return;
00227        }
00228 
00229        for (c = 0; c < shmlen; c += shm_pagesize) {
00230               if (get_mempolicy(&node, NULL, 0, c+shmptr,
00231                                           MPOL_F_ADDR|MPOL_F_NODE) < 0)
00232                      err("get_mempolicy on shm");
00233               if (node == prevnode)
00234                      continue;
00235               if (prevnode != -1)
00236                      dumpnode(start, c, prevnode);
00237               prevnode = node;
00238               start = c;
00239        }
00240        dumpnode(start, c, prevnode);
00241 }
00242 
00243 static void vwarn(char *ptr, char *fmt, ...)
00244 {
00245        va_list ap;
00246        unsigned long off = (unsigned long)ptr - (unsigned long)shmptr;
00247        va_start(ap,fmt);
00248        printf("numactl verify %lx(%lx): ",  (unsigned long)ptr, off);
00249        vprintf(fmt, ap);
00250        va_end(ap);
00251        exitcode = 1;
00252 }
00253 
00254 static unsigned interleave_next(unsigned cur, struct bitmask *mask)
00255 {
00256        int numa_num_nodes = numa_num_possible_nodes();
00257 
00258        ++cur;
00259        while (!numa_bitmask_isbitset(mask, cur)) {
00260               cur = (cur+1) % numa_num_nodes;
00261        }
00262        return cur;
00263 }
00264 
00265 /* Verify policy in a shared memory segment */
00266 void verify_shm(int policy, struct bitmask *nodes)
00267 {
00268        char *p;
00269        int ilnode, node;
00270        int pol2;
00271        struct bitmask *nodes2;
00272 
00273        nodes2 = numa_allocate_nodemask();
00274        
00275        if (policy == MPOL_INTERLEAVE) {
00276               if (get_mempolicy(&ilnode, NULL, 0, shmptr,
00277                                    MPOL_F_ADDR|MPOL_F_NODE)
00278                   < 0)
00279                      err("get_mempolicy");
00280        }
00281        
00282        for (p = shmptr; p - (char *)shmptr < shmlen; p += shm_pagesize) {
00283               if (get_mempolicy(&pol2, nodes2->maskp, nodes2->size, p,
00284                                                  MPOL_F_ADDR) < 0)
00285                      err("get_mempolicy");
00286               if (pol2 != policy) {
00287                      vwarn(p, "wrong policy %s, expected %s\n",
00288                            policy_name(pol2), policy_name(policy));
00289                      return;
00290               }
00291               if (memcmp(nodes2, nodes, numa_bitmask_nbytes(nodes))) {
00292                      vwarn(p, "mismatched node mask\n");
00293                      printmask("expected", nodes);
00294                      printmask("real", nodes2);
00295               }
00296 
00297               if (get_mempolicy(&node, NULL, 0, p, MPOL_F_ADDR|MPOL_F_NODE) < 0)
00298                      err("get_mempolicy");
00299 
00300               switch (policy) {
00301               case MPOL_INTERLEAVE:
00302                      if (node < 0 || !numa_bitmask_isbitset(nodes2, node))
00303                             vwarn(p, "interleave node out of range %d\n", node);
00304                      if (node != ilnode) {
00305                             vwarn(p, "expected interleave node %d, got %d\n",
00306                                  ilnode,node);
00307                             return;
00308                      }
00309                      ilnode = interleave_next(ilnode, nodes2);
00310                      break;
00311               case MPOL_PREFERRED:
00312               case MPOL_BIND:
00313                      if (!numa_bitmask_isbitset(nodes2, node)) {
00314                             vwarn(p, "unexpected node %d\n", node);
00315                             printmask("expected", nodes2);
00316                      }      
00317                      break;
00318 
00319               case MPOL_DEFAULT:
00320                      break;
00321                      
00322               }
00323        }
00324               
00325 }