Back to index

nbd  3.2
nbd-server.c
Go to the documentation of this file.
00001 /*
00002  * Network Block Device - server
00003  *
00004  * Copyright 1996-1998 Pavel Machek, distribute under GPL
00005  *  <pavel@atrey.karlin.mff.cuni.cz>
00006  * Copyright 2001-2004 Wouter Verhelst <wouter@debian.org>
00007  * Copyright 2002 Anton Altaparmakov <aia21@cam.ac.uk>
00008  *
00009  * Version 1.0 - hopefully 64-bit-clean
00010  * Version 1.1 - merging enhancements from Josh Parsons, <josh@coombs.anu.edu.au>
00011  * Version 1.2 - autodetect size of block devices, thanx to Peter T. Breuer" <ptb@it.uc3m.es>
00012  * Version 1.5 - can compile on Unix systems that don't have 64 bit integer
00013  *     type, or don't have 64 bit file offsets by defining FS_32BIT
00014  *     in compile options for nbd-server *only*. This can be done
00015  *     with make FSCHOICE=-DFS_32BIT nbd-server. (I don't have the
00016  *     original autoconf input file, or I would make it a configure
00017  *     option.) Ken Yap <ken@nlc.net.au>.
00018  * Version 1.6 - fix autodetection of block device size and really make 64 bit
00019  *     clean on 32 bit machines. Anton Altaparmakov <aia21@cam.ac.uk>
00020  * Version 2.0 - Version synchronised with client
00021  * Version 2.1 - Reap zombie client processes when they exit. Removed
00022  *     (uncommented) the _IO magic, it's no longer necessary. Wouter
00023  *     Verhelst <wouter@debian.org>
00024  * Version 2.2 - Auto switch to read-only mode (usefull for floppies).
00025  * Version 2.3 - Fixed code so that Large File Support works. This
00026  *     removes the FS_32BIT compile-time directive; define
00027  *     _FILE_OFFSET_BITS=64 and _LARGEFILE_SOURCE if you used to be
00028  *     using FS_32BIT. This will allow you to use files >2GB instead of
00029  *     having to use the -m option. Wouter Verhelst <wouter@debian.org>
00030  * Version 2.4 - Added code to keep track of children, so that we can
00031  *     properly kill them from initscripts. Add a call to daemon(),
00032  *     so that processes don't think they have to wait for us, which is
00033  *     interesting for initscripts as well. Wouter Verhelst
00034  *     <wouter@debian.org>
00035  * Version 2.5 - Bugfix release: forgot to reset child_arraysize to
00036  *      zero after fork()ing, resulting in nbd-server going berserk
00037  *      when it receives a signal with at least one child open. Wouter
00038  *      Verhelst <wouter@debian.org>
00039  * 10/10/2003 - Added socket option SO_KEEPALIVE (sf.net bug 819235);
00040  *     rectified type of mainloop::size_host (sf.net bugs 814435 and
00041  *     817385); close the PID file after writing to it, so that the
00042  *     daemon can actually be found. Wouter Verhelst
00043  *     <wouter@debian.org>
00044  * 10/10/2003 - Size of the data "size_host" was wrong and so was not
00045  *     correctly put in network endianness. Many types were corrected
00046  *     (size_t and off_t instead of int).  <vspaceg@sourceforge.net>
00047  * Version 2.6 - Some code cleanup.
00048  * Version 2.7 - Better build system.
00049  * 11/02/2004 - Doxygenified the source, modularized it a bit. Needs a 
00050  *     lot more work, but this is a start. Wouter Verhelst
00051  *     <wouter@debian.org>
00052  * 16/03/2010 - Add IPv6 support.
00053  *     Kitt Tientanopajai <kitt@kitty.in.th>
00054  *     Neutron Soutmun <neo.neutron@gmail.com>
00055  *     Suriya Soutmun <darksolar@gmail.com>
00056  */
00057 
00058 /* Includes LFS defines, which defines behaviours of some of the following
00059  * headers, so must come before those */
00060 #include "lfs.h"
00061 
00062 #include <assert.h>
00063 #include <sys/types.h>
00064 #include <sys/socket.h>
00065 #include <sys/stat.h>
00066 #include <sys/select.h>
00067 #include <sys/wait.h>
00068 #ifdef HAVE_SYS_IOCTL_H
00069 #include <sys/ioctl.h>
00070 #endif
00071 #include <sys/param.h>
00072 #ifdef HAVE_SYS_MOUNT_H
00073 #include <sys/mount.h>
00074 #endif
00075 #include <signal.h>
00076 #include <errno.h>
00077 #include <netinet/tcp.h>
00078 #include <netinet/in.h>
00079 #include <netdb.h>
00080 #include <syslog.h>
00081 #include <unistd.h>
00082 #include <stdbool.h>
00083 #include <stdio.h>
00084 #include <stdlib.h>
00085 #include <string.h>
00086 #include <fcntl.h>
00087 #if HAVE_FALLOC_PH
00088 #include <linux/falloc.h>
00089 #endif
00090 #include <arpa/inet.h>
00091 #include <strings.h>
00092 #include <dirent.h>
00093 #include <unistd.h>
00094 #include <getopt.h>
00095 #include <pwd.h>
00096 #include <grp.h>
00097 #include <dirent.h>
00098 
00099 #include <glib.h>
00100 
00101 /* used in cliserv.h, so must come first */
00102 #define MY_NAME "nbd_server"
00103 #include "cliserv.h"
00104 #include "netdb-compat.h"
00105 
00106 #ifdef WITH_SDP
00107 #include <sdp_inet.h>
00108 #endif
00109 
00111 #ifndef SYSCONFDIR
00112 #define SYSCONFDIR "/etc"
00113 #endif
00114 #define CFILE SYSCONFDIR "/nbd-server/config"
00115 
00117 gchar* config_file_pos;
00118 
00120 gchar* runuser=NULL;
00122 gchar* rungroup=NULL;
00124 int glob_flags=0;
00125 
00126 /* Whether we should avoid forking */
00127 int dontfork = 0;
00128 
00130 #ifdef ISSERVER
00131 #define msg2(a,b) syslog(a,b)
00132 #define msg3(a,b,c) syslog(a,b,c)
00133 #define msg4(a,b,c,d) syslog(a,b,c,d)
00134 #else
00135 #define msg2(a,b) g_message((char*)b)
00136 #define msg3(a,b,c) g_message((char*)b,c)
00137 #define msg4(a,b,c,d) g_message((char*)b,c,d)
00138 #endif
00139 
00140 /* Debugging macros */
00141 //#define DODBG
00142 #ifdef DODBG
00143 #define DEBUG(...) printf(__VA_ARGS__)
00144 #else
00145 #define DEBUG(...)
00146 #endif
00147 #ifndef PACKAGE_VERSION
00148 #define PACKAGE_VERSION ""
00149 #endif
00150 
00154 #define OFFT_MAX ~((off_t)1<<(sizeof(off_t)*8-1))
00155 #define LINELEN 256    
00157 #define BUFSIZE ((1024*1024)+sizeof(struct nbd_reply)) 
00158 #define DIFFPAGESIZE 4096 
00161 #define F_READONLY 1      
00162 #define F_MULTIFILE 2         
00163 #define F_COPYONWRITE 4       
00165 #define F_AUTOREADONLY 8  
00166 #define F_SPARSE 16    
00167 #define F_SDP 32       
00168 #define F_SYNC 64      
00169 #define F_FLUSH 128    
00170 #define F_FUA 256      
00171 #define F_ROTATIONAL 512  
00172 #define F_TEMPORARY 1024  
00173 #define F_TRIM 2048       
00174 #define F_FIXED 4096   
00177 #define F_OLDSTYLE 1   
00178 #define F_LIST 2       
00179 GHashTable *children;
00180 char pidfname[256]; 
00181 char pidftemplate[256]; 
00182 char default_authname[] = SYSCONFDIR "/nbd-server/allow"; 
00184 #define NEG_INIT     (1 << 0)
00185 #define NEG_OLD             (1 << 1)
00186 #define NEG_MODERN   (1 << 2)
00187 
00188 int modernsock=-1;     
00193 char* modern_listen;   
00194 char* modernport=NBD_DEFAULT_PORT; 
00197 bool logged_oversized=false;  
00202 typedef enum {
00203        VIRT_NONE=0,  
00204        VIRT_IPLIT,   
00205        VIRT_IPHASH,  
00207        VIRT_CIDR,    
00208 } VIRT_STYLE;
00209 
00213 typedef struct {
00214        gchar* exportname;    
00215        off_t expected_size; 
00217        gchar* listenaddr;   
00218        unsigned int port;   
00219        char* authname;      
00220        int flags;           
00221        int socket;        
00222        int socket_family;   
00223        VIRT_STYLE virtstyle;
00224        uint8_t cidrlen;     
00226        gchar* prerun;            
00228        gchar* postrun;           
00230        gchar* servename;    
00231        int max_connections; 
00232        gchar* transactionlog;
00233 } SERVER;
00234 
00238 typedef struct {
00239        int fhandle;      
00240        off_t startoff;   
00241 } FILE_INFO;
00242 
00243 typedef struct {
00244        off_t exportsize;    
00245        char *clientname;    
00246        char *exportname;    
00247        GArray *export;    
00250        int net;           
00251        SERVER *server;           
00252        char* difffilename;  
00253        int difffile;      
00256        u32 difffilelen;     
00257        u32 *difmap;       
00258        gboolean modern;     
00259        int transactionlogfd;
00260        int clientfeats;     
00261 } CLIENT;
00262 
00266 typedef enum {
00267        PARAM_INT,           
00268        PARAM_INT64,         
00269        PARAM_STRING,        
00270        PARAM_BOOL,          
00271 } PARAM_TYPE;
00272 
00276 typedef struct {
00277        gchar *paramname;    
00279        gboolean required;   
00281        PARAM_TYPE ptype;    
00282        gpointer target;     
00286        gint flagval;        
00288 } PARAM;
00289 
00296 static inline const char * getcommandname(uint64_t command) {
00297        switch (command) {
00298        case NBD_CMD_READ:
00299               return "NBD_CMD_READ";
00300        case NBD_CMD_WRITE:
00301               return "NBD_CMD_WRITE";
00302        case NBD_CMD_DISC:
00303               return "NBD_CMD_DISC";
00304        case NBD_CMD_FLUSH:
00305               return "NBD_CMD_FLUSH";
00306        default:
00307               break;
00308        }
00309        return "UNKNOWN";
00310 }
00311 
00319 int authorized_client(CLIENT *opts) {
00320        const char *ERRMSG="Invalid entry '%s' in authfile '%s', so, refusing all connections.";
00321        FILE *f ;
00322        char line[LINELEN]; 
00323        char *tmp;
00324        struct in_addr addr;
00325        struct in_addr client;
00326        struct in_addr cltemp;
00327        int len;
00328 
00329        if ((f=fopen(opts->server->authname,"r"))==NULL) {
00330               msg4(LOG_INFO,"Can't open authorization file %s (%s).",
00331                    opts->server->authname,strerror(errno)) ;
00332               return 1 ; 
00333        }
00334   
00335        inet_aton(opts->clientname, &client);
00336        while (fgets(line,LINELEN,f)!=NULL) {
00337               if((tmp=strchr(line, '/'))) {
00338                      if(strlen(line)<=tmp-line) {
00339                             msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
00340                             return 0;
00341                      }
00342                      *(tmp++)=0;
00343                      if(!inet_aton(line,&addr)) {
00344                             msg4(LOG_CRIT, ERRMSG, line, opts->server->authname);
00345                             return 0;
00346                      }
00347                      len=strtol(tmp, NULL, 0);
00348                      addr.s_addr>>=32-len;
00349                      addr.s_addr<<=32-len;
00350                      memcpy(&cltemp,&client,sizeof(client));
00351                      cltemp.s_addr>>=32-len;
00352                      cltemp.s_addr<<=32-len;
00353                      if(addr.s_addr == cltemp.s_addr) {
00354                             return 1;
00355                      }
00356               }
00357               if (strncmp(line,opts->clientname,strlen(opts->clientname))==0) {
00358                      fclose(f);
00359                      return 1;
00360               }
00361        }
00362        fclose(f);
00363        return 0;
00364 }
00365 
00373 static inline void readit(int f, void *buf, size_t len) {
00374        ssize_t res;
00375        while (len > 0) {
00376               DEBUG("*");
00377               if ((res = read(f, buf, len)) <= 0) {
00378                      if(errno != EAGAIN) {
00379                             err("Read failed: %m");
00380                      }
00381               } else {
00382                      len -= res;
00383                      buf += res;
00384               }
00385        }
00386 }
00387 
00396 static inline void consume(int f, void * buf, size_t len, size_t bufsiz) {
00397        size_t curlen;
00398        while (len>0) {
00399               curlen = (len>bufsiz)?bufsiz:len;
00400               readit(f, buf, curlen);
00401               len -= curlen;
00402        }
00403 }
00404 
00405 
00413 static inline void writeit(int f, void *buf, size_t len) {
00414        ssize_t res;
00415        while (len > 0) {
00416               DEBUG("+");
00417               if ((res = write(f, buf, len)) <= 0)
00418                      err("Send failed: %m");
00419               len -= res;
00420               buf += res;
00421        }
00422 }
00423 
00428 void usage() {
00429        printf("This is nbd-server version " VERSION "\n");
00430        printf("Usage: [ip:|ip6@]port file_to_export [size][kKmM] [-l authorize_file] [-r] [-m] [-c] [-C configuration file] [-p PID file name] [-o section name] [-M max connections]\n"
00431               "\t-r|--read-only\t\tread only\n"
00432               "\t-m|--multi-file\t\tmultiple file\n"
00433               "\t-c|--copy-on-write\tcopy on write\n"
00434               "\t-C|--config-file\tspecify an alternate configuration file\n"
00435               "\t-l|--authorize-file\tfile with list of hosts that are allowed to\n\t\t\t\tconnect.\n"
00436               "\t-p|--pid-file\t\tspecify a filename to write our PID to\n"
00437               "\t-o|--output-config\toutput a config file section for what you\n\t\t\t\tspecified on the command line, with the\n\t\t\t\tspecified section name\n"
00438               "\t-M|--max-connections\tspecify the maximum number of opened connections\n\n"
00439               "\tif port is set to 0, stdin is used (for running from inetd).\n"
00440               "\tif file_to_export contains '%%s', it is substituted with the IP\n"
00441               "\t\taddress of the machine trying to connect\n" 
00442               "\tif ip is set, it contains the local IP address on which we're listening.\n\tif not, the server will listen on all local IP addresses\n");
00443        printf("Using configuration file %s\n", CFILE);
00444 }
00445 
00446 /* Dumps a config file section of the given SERVER*, and exits. */
00447 void dump_section(SERVER* serve, gchar* section_header) {
00448        printf("[%s]\n", section_header);
00449        printf("\texportname = %s\n", serve->exportname);
00450        printf("\tlistenaddr = %s\n", serve->listenaddr);
00451        printf("\tport = %d\n", serve->port);
00452        if(serve->flags & F_READONLY) {
00453               printf("\treadonly = true\n");
00454        }
00455        if(serve->flags & F_MULTIFILE) {
00456               printf("\tmultifile = true\n");
00457        }
00458        if(serve->flags & F_COPYONWRITE) {
00459               printf("\tcopyonwrite = true\n");
00460        }
00461        if(serve->expected_size) {
00462               printf("\tfilesize = %lld\n", (long long int)serve->expected_size);
00463        }
00464        if(serve->authname) {
00465               printf("\tauthfile = %s\n", serve->authname);
00466        }
00467        exit(EXIT_SUCCESS);
00468 }
00469 
00476 SERVER* cmdline(int argc, char *argv[]) {
00477        int i=0;
00478        int nonspecial=0;
00479        int c;
00480        struct option long_options[] = {
00481               {"read-only", no_argument, NULL, 'r'},
00482               {"multi-file", no_argument, NULL, 'm'},
00483               {"copy-on-write", no_argument, NULL, 'c'},
00484               {"dont-fork", no_argument, NULL, 'd'},
00485               {"authorize-file", required_argument, NULL, 'l'},
00486               {"config-file", required_argument, NULL, 'C'},
00487               {"pid-file", required_argument, NULL, 'p'},
00488               {"output-config", required_argument, NULL, 'o'},
00489               {"max-connection", required_argument, NULL, 'M'},
00490               {0,0,0,0}
00491        };
00492        SERVER *serve;
00493        off_t es;
00494        size_t last;
00495        char suffix;
00496        gboolean do_output=FALSE;
00497        gchar* section_header="";
00498        gchar** addr_port;
00499 
00500        if(argc==1) {
00501               return NULL;
00502        }
00503        serve=g_new0(SERVER, 1);
00504        serve->authname = g_strdup(default_authname);
00505        serve->virtstyle=VIRT_IPLIT;
00506        while((c=getopt_long(argc, argv, "-C:cdl:mo:rp:M:", long_options, &i))>=0) {
00507               switch (c) {
00508               case 1:
00509                      /* non-option argument */
00510                      switch(nonspecial++) {
00511                      case 0:
00512                             if(strchr(optarg, ':') == strrchr(optarg, ':')) {
00513                                    addr_port=g_strsplit(optarg, ":", 2);
00514 
00515                                    /* Check for "@" - maybe user using this separator
00516                                            for IPv4 address */
00517                                    if(!addr_port[1]) {
00518                                           g_strfreev(addr_port);
00519                                           addr_port=g_strsplit(optarg, "@", 2);
00520                                    }
00521                             } else {
00522                                    addr_port=g_strsplit(optarg, "@", 2);
00523                             }
00524 
00525                             if(addr_port[1]) {
00526                                    serve->port=strtol(addr_port[1], NULL, 0);
00527                                    serve->listenaddr=g_strdup(addr_port[0]);
00528                             } else {
00529                                    serve->listenaddr=NULL;
00530                                    serve->port=strtol(addr_port[0], NULL, 0);
00531                             }
00532                             g_strfreev(addr_port);
00533                             break;
00534                      case 1:
00535                             serve->exportname = g_strdup(optarg);
00536                             if(serve->exportname[0] != '/') {
00537                                    fprintf(stderr, "E: The to be exported file needs to be an absolute filename!\n");
00538                                    exit(EXIT_FAILURE);
00539                             }
00540                             break;
00541                      case 2:
00542                             last=strlen(optarg)-1;
00543                             suffix=optarg[last];
00544                             if (suffix == 'k' || suffix == 'K' ||
00545                                 suffix == 'm' || suffix == 'M')
00546                                    optarg[last] = '\0';
00547                             es = (off_t)atoll(optarg);
00548                             switch (suffix) {
00549                                    case 'm':
00550                                    case 'M':  es <<= 10;
00551                                    case 'k':
00552                                    case 'K':  es <<= 10;
00553                                    default :  break;
00554                             }
00555                             serve->expected_size = es;
00556                             break;
00557                      }
00558                      break;
00559               case 'r':
00560                      serve->flags |= F_READONLY;
00561                      break;
00562               case 'm':
00563                      serve->flags |= F_MULTIFILE;
00564                      break;
00565               case 'o':
00566                      do_output = TRUE;
00567                      section_header = g_strdup(optarg);
00568                      break;
00569               case 'p':
00570                      strncpy(pidftemplate, optarg, 256);
00571                      break;
00572               case 'c': 
00573                      serve->flags |=F_COPYONWRITE;
00574                       break;
00575               case 'd': 
00576                      dontfork = 1;
00577                       break;
00578               case 'C':
00579                      g_free(config_file_pos);
00580                      config_file_pos=g_strdup(optarg);
00581                      break;
00582               case 'l':
00583                      g_free(serve->authname);
00584                      serve->authname=g_strdup(optarg);
00585                      break;
00586               case 'M':
00587                      serve->max_connections = strtol(optarg, NULL, 0);
00588                      break;
00589               default:
00590                      usage();
00591                      exit(EXIT_FAILURE);
00592                      break;
00593               }
00594        }
00595        /* What's left: the port to export, the name of the to be exported
00596         * file, and, optionally, the size of the file, in that order. */
00597        if(nonspecial<2) {
00598               g_free(serve);
00599               serve=NULL;
00600        } else {
00601               glob_flags |= F_OLDSTYLE;
00602        }
00603        if(do_output) {
00604               if(!serve) {
00605                      g_critical("Need a complete configuration on the command line to output a config file section!");
00606                      exit(EXIT_FAILURE);
00607               }
00608               dump_section(serve, section_header);
00609        }
00610        return serve;
00611 }
00612 
00616 typedef enum {
00617        CFILE_NOTFOUND,             
00618        CFILE_MISSING_GENERIC,      
00619        CFILE_KEY_MISSING,   
00620        CFILE_VALUE_INVALID, 
00621        CFILE_VALUE_UNSUPPORTED,
00622        CFILE_PROGERR,              
00623        CFILE_NO_EXPORTS,    
00625        CFILE_INCORRECT_PORT,       
00627        CFILE_DIR_UNKNOWN,   
00628        CFILE_READDIR_ERR,   
00629 } CFILE_ERRORS;
00630 
00634 void remove_server(gpointer s) {
00635        SERVER *server;
00636 
00637        server=(SERVER*)s;
00638        g_free(server->exportname);
00639        if(server->authname)
00640               g_free(server->authname);
00641        if(server->listenaddr)
00642               g_free(server->listenaddr);
00643        if(server->prerun)
00644               g_free(server->prerun);
00645        if(server->postrun)
00646               g_free(server->postrun);
00647        if(server->transactionlog)
00648               g_free(server->transactionlog);
00649        g_free(server);
00650 }
00651 
00657 SERVER* dup_serve(SERVER *s) {
00658        SERVER *serve = NULL;
00659 
00660        serve=g_new0(SERVER, 1);
00661        if(serve == NULL)
00662               return NULL;
00663 
00664        if(s->exportname)
00665               serve->exportname = g_strdup(s->exportname);
00666 
00667        serve->expected_size = s->expected_size;
00668 
00669        if(s->listenaddr)
00670               serve->listenaddr = g_strdup(s->listenaddr);
00671 
00672        serve->port = s->port;
00673 
00674        if(s->authname)
00675               serve->authname = strdup(s->authname);
00676 
00677        serve->flags = s->flags;
00678        serve->socket = s->socket;
00679        serve->socket_family = s->socket_family;
00680        serve->virtstyle = s->virtstyle;
00681        serve->cidrlen = s->cidrlen;
00682 
00683        if(s->prerun)
00684               serve->prerun = g_strdup(s->prerun);
00685 
00686        if(s->postrun)
00687               serve->postrun = g_strdup(s->postrun);
00688 
00689        if(s->transactionlog)
00690               serve->transactionlog = g_strdup(s->transactionlog);
00691        
00692        if(s->servename)
00693               serve->servename = g_strdup(s->servename);
00694 
00695        serve->max_connections = s->max_connections;
00696 
00697        return serve;
00698 }
00699 
00706 int append_serve(SERVER *s, GArray *a) {
00707        SERVER *ns = NULL;
00708        struct addrinfo hints;
00709        struct addrinfo *ai = NULL;
00710        struct addrinfo *rp = NULL;
00711        char   host[NI_MAXHOST];
00712        gchar  *port = NULL;
00713        int e;
00714        int ret;
00715 
00716        if(!s) {
00717               err("Invalid parsing server");
00718               return -1;
00719        }
00720 
00721        port = g_strdup_printf("%d", s->port);
00722 
00723        memset(&hints,'\0',sizeof(hints));
00724        hints.ai_family = AF_UNSPEC;
00725        hints.ai_socktype = SOCK_STREAM;
00726        hints.ai_flags = AI_ADDRCONFIG | AI_PASSIVE;
00727        hints.ai_protocol = IPPROTO_TCP;
00728 
00729        e = getaddrinfo(s->listenaddr, port, &hints, &ai);
00730 
00731        if (port)
00732               g_free(port);
00733 
00734        if(e == 0) {
00735               for (rp = ai; rp != NULL; rp = rp->ai_next) {
00736                      e = getnameinfo(rp->ai_addr, rp->ai_addrlen, host, sizeof(host), NULL, 0, NI_NUMERICHOST);
00737 
00738                      if (e != 0) { // error
00739                             fprintf(stderr, "getnameinfo: %s\n", gai_strerror(e));
00740                             continue;
00741                      }
00742 
00743                      // duplicate server and set listenaddr to resolved IP address
00744                      ns = dup_serve (s);
00745                      if (ns) {
00746                             ns->listenaddr = g_strdup(host);
00747                             ns->socket_family = rp->ai_family;
00748                             g_array_append_val(a, *ns);
00749                             free(ns);
00750                             ns = NULL;
00751                      }
00752               }
00753 
00754               ret = 0;
00755        } else {
00756               fprintf(stderr, "getaddrinfo failed on listen host/address: %s (%s)\n", s->listenaddr ? s->listenaddr : "any", gai_strerror(e));
00757               ret = -1;
00758        }
00759 
00760        if (ai)
00761               freeaddrinfo(ai);
00762 
00763        return ret;
00764 }
00765 
00766 /* forward definition of parse_cfile */
00767 GArray* parse_cfile(gchar* f, bool have_global, GError** e);
00768 
00774 GArray* do_cfile_dir(gchar* dir, GError** e) {
00775        DIR* dirh = opendir(dir);
00776        GQuark errdomain = g_quark_from_string("do_cfile_dir");
00777        struct dirent* de;
00778        gchar* fname;
00779        GArray* retval = NULL;
00780        GArray* tmp;
00781        struct stat stbuf;
00782 
00783        if(!dir) {
00784               g_set_error(e, errdomain, CFILE_DIR_UNKNOWN, "Invalid directory specified: %s", strerror(errno));
00785               return NULL;
00786        }
00787        errno=0;
00788        while((de = readdir(dirh))) {
00789               int saved_errno=errno;
00790               fname = g_build_filename(dir, de->d_name, NULL);
00791               switch(de->d_type) {
00792                      case DT_UNKNOWN:
00793                             /* Filesystem doesn't return type of
00794                              * file through readdir. Run stat() on
00795                              * the file instead */
00796                             if(stat(fname, &stbuf)) {
00797                                    perror("stat");
00798                                    goto err_out;
00799                             }
00800                             if (!S_ISREG(stbuf.st_mode)) {
00801                                    goto next;
00802                             }
00803                      case DT_REG:
00804                             /* Skip unless the name ends with '.conf' */
00805                             if(strcmp((de->d_name + strlen(de->d_name) - 5), ".conf")) {
00806                                    goto next;
00807                             }
00808                             tmp = parse_cfile(fname, FALSE, e);
00809                             errno=saved_errno;
00810                             if(*e) {
00811                                    goto err_out;
00812                             }
00813                             if(!retval)
00814                                    retval = g_array_new(FALSE, TRUE, sizeof(SERVER));
00815                             retval = g_array_append_vals(retval, tmp->data, tmp->len);
00816                             g_array_free(tmp, TRUE);
00817                      default:
00818                             break;
00819               }
00820        next:
00821               g_free(fname);
00822        }
00823        if(errno) {
00824               g_set_error(e, errdomain, CFILE_READDIR_ERR, "Error trying to read directory: %s", strerror(errno));
00825        err_out:
00826               if(retval)
00827                      g_array_free(retval, TRUE);
00828               return NULL;
00829        }
00830        return retval;
00831 }
00832 
00843 GArray* parse_cfile(gchar* f, bool have_global, GError** e) {
00844        const char* DEFAULT_ERROR = "Could not parse %s in group %s: %s";
00845        const char* MISSING_REQUIRED_ERROR = "Could not find required value %s in group %s: %s";
00846        gchar* cfdir = NULL;
00847        SERVER s;
00848        gchar *virtstyle=NULL;
00849        PARAM lp[] = {
00850               { "exportname", TRUE,       PARAM_STRING,        &(s.exportname),     0 },
00851               { "port",     TRUE,  PARAM_INT,    &(s.port),           0 },
00852               { "authfile", FALSE, PARAM_STRING, &(s.authname),              0 },
00853               { "filesize", FALSE, PARAM_OFFT,   &(s.expected_size),  0 },
00854               { "virtstyle",       FALSE, PARAM_STRING, &(virtstyle),        0 },
00855               { "prerun",   FALSE, PARAM_STRING, &(s.prerun),         0 },
00856               { "postrun",  FALSE, PARAM_STRING, &(s.postrun),        0 },
00857               { "transactionlog", FALSE, PARAM_STRING, &(s.transactionlog),  0 },
00858               { "readonly", FALSE, PARAM_BOOL,   &(s.flags),          F_READONLY },
00859               { "multifile",       FALSE, PARAM_BOOL,   &(s.flags),          F_MULTIFILE },
00860               { "copyonwrite", FALSE,     PARAM_BOOL,   &(s.flags),          F_COPYONWRITE },
00861               { "sparse_cow",      FALSE, PARAM_BOOL,   &(s.flags),          F_SPARSE },
00862               { "sdp",      FALSE, PARAM_BOOL,   &(s.flags),          F_SDP },
00863               { "sync",     FALSE,  PARAM_BOOL,  &(s.flags),          F_SYNC },
00864               { "flush",    FALSE,  PARAM_BOOL,  &(s.flags),          F_FLUSH },
00865               { "fua",      FALSE,  PARAM_BOOL,  &(s.flags),          F_FUA },
00866               { "rotational",      FALSE,  PARAM_BOOL,  &(s.flags),          F_ROTATIONAL },
00867               { "temporary",       FALSE,  PARAM_BOOL,  &(s.flags),          F_TEMPORARY },
00868               { "trim",     FALSE,  PARAM_BOOL,  &(s.flags),          F_TRIM },
00869               { "listenaddr", FALSE,  PARAM_STRING,   &(s.listenaddr),       0 },
00870               { "maxconnections", FALSE, PARAM_INT,     &(s.max_connections),       0 },
00871        };
00872        const int lp_size=sizeof(lp)/sizeof(PARAM);
00873        PARAM gp[] = {
00874               { "user",     FALSE, PARAM_STRING, &runuser,     0 },
00875               { "group",    FALSE, PARAM_STRING, &rungroup,    0 },
00876               { "oldstyle", FALSE, PARAM_BOOL,   &glob_flags,  F_OLDSTYLE },
00877               { "listenaddr", FALSE, PARAM_STRING,      &modern_listen, 0 },
00878               { "port",     FALSE, PARAM_STRING, &modernport,  0 },
00879               { "includedir", FALSE, PARAM_STRING,      &cfdir,              0 },
00880               { "allowlist",  FALSE, PARAM_BOOL, &glob_flags,  F_LIST },
00881        };
00882        PARAM* p=gp;
00883        int p_size=sizeof(gp)/sizeof(PARAM);
00884        GKeyFile *cfile;
00885        GError *err = NULL;
00886        const char *err_msg=NULL;
00887        GQuark errdomain;
00888        GArray *retval=NULL;
00889        gchar **groups;
00890        gboolean bval;
00891        gint ival;
00892        gint64 i64val;
00893        gchar* sval;
00894        gchar* startgroup;
00895        gint i;
00896        gint j;
00897 
00898        errdomain = g_quark_from_string("parse_cfile");
00899        cfile = g_key_file_new();
00900        retval = g_array_new(FALSE, TRUE, sizeof(SERVER));
00901        if(!g_key_file_load_from_file(cfile, f, G_KEY_FILE_KEEP_COMMENTS |
00902                      G_KEY_FILE_KEEP_TRANSLATIONS, &err)) {
00903               g_set_error(e, errdomain, CFILE_NOTFOUND, "Could not open config file %s.", f);
00904               g_key_file_free(cfile);
00905               return retval;
00906        }
00907        startgroup = g_key_file_get_start_group(cfile);
00908        if((!startgroup || strcmp(startgroup, "generic")) && have_global) {
00909               g_set_error(e, errdomain, CFILE_MISSING_GENERIC, "Config file does not contain the [generic] group!");
00910               g_key_file_free(cfile);
00911               return NULL;
00912        }
00913        groups = g_key_file_get_groups(cfile, NULL);
00914        for(i=0;groups[i];i++) {
00915               memset(&s, '\0', sizeof(SERVER));
00916 
00917               /* After the [generic] group or when we're parsing an include
00918                * directory, start parsing exports */
00919               if(i==1 || !have_global) {
00920                      p=lp;
00921                      p_size=lp_size;
00922                      if(!(glob_flags & F_OLDSTYLE)) {
00923                             lp[1].required = FALSE;
00924                      }
00925               } 
00926               for(j=0;j<p_size;j++) {
00927                      assert(p[j].target != NULL);
00928                      assert(p[j].ptype==PARAM_INT||p[j].ptype==PARAM_STRING||p[j].ptype==PARAM_BOOL||p[j].ptype==PARAM_INT64);
00929                      switch(p[j].ptype) {
00930                             case PARAM_INT:
00931                                    ival = g_key_file_get_integer(cfile,
00932                                                         groups[i],
00933                                                         p[j].paramname,
00934                                                         &err);
00935                                    if(!err) {
00936                                           *((gint*)p[j].target) = ival;
00937                                    }
00938                                    break;
00939                             case PARAM_INT64:
00940                                    i64val = g_key_file_get_int64(cfile,
00941                                                         groups[i],
00942                                                         p[j].paramname,
00943                                                         &err);
00944                                    if(!err) {
00945                                           *((gint64*)p[j].target) = i64val;
00946                                    }
00947                                    break;
00948                             case PARAM_STRING:
00949                                    sval = g_key_file_get_string(cfile,
00950                                                         groups[i],
00951                                                         p[j].paramname,
00952                                                         &err);
00953                                    if(!err) {
00954                                           *((gchar**)p[j].target) = sval;
00955                                    }
00956                                    break;
00957                             case PARAM_BOOL:
00958                                    bval = g_key_file_get_boolean(cfile,
00959                                                  groups[i],
00960                                                  p[j].paramname, &err);
00961                                    if(!err) {
00962                                           if(bval) {
00963                                                  *((gint*)p[j].target) |= p[j].flagval;
00964                                           } else {
00965                                                  *((gint*)p[j].target) &= ~(p[j].flagval);
00966                                           }
00967                                    }
00968                                    break;
00969                      }
00970                      if(err) {
00971                             if(err->code == G_KEY_FILE_ERROR_KEY_NOT_FOUND) {
00972                                    if(!p[j].required) {
00973                                           /* Ignore not-found error for optional values */
00974                                           g_clear_error(&err);
00975                                           continue;
00976                                    } else {
00977                                           err_msg = MISSING_REQUIRED_ERROR;
00978                                    }
00979                             } else {
00980                                    err_msg = DEFAULT_ERROR;
00981                             }
00982                             g_set_error(e, errdomain, CFILE_VALUE_INVALID, err_msg, p[j].paramname, groups[i], err->message);
00983                             g_array_free(retval, TRUE);
00984                             g_error_free(err);
00985                             g_key_file_free(cfile);
00986                             return NULL;
00987                      }
00988               }
00989               if(virtstyle) {
00990                      if(!strncmp(virtstyle, "none", 4)) {
00991                             s.virtstyle=VIRT_NONE;
00992                      } else if(!strncmp(virtstyle, "ipliteral", 9)) {
00993                             s.virtstyle=VIRT_IPLIT;
00994                      } else if(!strncmp(virtstyle, "iphash", 6)) {
00995                             s.virtstyle=VIRT_IPHASH;
00996                      } else if(!strncmp(virtstyle, "cidrhash", 8)) {
00997                             s.virtstyle=VIRT_CIDR;
00998                             if(strlen(virtstyle)<10) {
00999                                    g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s: missing length", virtstyle, groups[i]);
01000                                    g_array_free(retval, TRUE);
01001                                    g_key_file_free(cfile);
01002                                    return NULL;
01003                             }
01004                             s.cidrlen=strtol(virtstyle+8, NULL, 0);
01005                      } else {
01006                             g_set_error(e, errdomain, CFILE_VALUE_INVALID, "Invalid value %s for parameter virtstyle in group %s", virtstyle, groups[i]);
01007                             g_array_free(retval, TRUE);
01008                             g_key_file_free(cfile);
01009                             return NULL;
01010                      }
01011               } else {
01012                      s.virtstyle=VIRT_IPLIT;
01013               }
01014               if(s.port && !(glob_flags & F_OLDSTYLE)) {
01015                      g_warning("A port was specified, but oldstyle exports were not requested. This may not do what you expect.");
01016                      g_warning("Please read 'man 5 nbd-server' and search for oldstyle for more info");
01017               }
01018               /* Don't need to free this, it's not our string */
01019               virtstyle=NULL;
01020               /* Don't append values for the [generic] group */
01021               if(i>0 || !have_global) {
01022                      s.socket_family = AF_UNSPEC;
01023                      s.servename = groups[i];
01024 
01025                      append_serve(&s, retval);
01026               }
01027 #ifndef WITH_SDP
01028               if(s.flags & F_SDP) {
01029                      g_set_error(e, errdomain, CFILE_VALUE_UNSUPPORTED, "This nbd-server was built without support for SDP, yet group %s uses it", groups[i]);
01030                      g_array_free(retval, TRUE);
01031                      g_key_file_free(cfile);
01032                      return NULL;
01033               }
01034 #endif
01035        }
01036        g_key_file_free(cfile);
01037        if(cfdir) {
01038               GArray* extra = do_cfile_dir(cfdir, e);
01039               if(extra) {
01040                      retval = g_array_append_vals(retval, extra->data, extra->len);
01041                      i+=extra->len;
01042                      g_array_free(extra, TRUE);
01043               } else {
01044                      if(*e) {
01045                             g_array_free(retval, TRUE);
01046                             return NULL;
01047                      }
01048               }
01049        }
01050        if(i==1 && have_global) {
01051               g_set_error(e, errdomain, CFILE_NO_EXPORTS, "The config file does not specify any exports");
01052        }
01053        return retval;
01054 }
01055 
01061 void sigchld_handler(int s) {
01062         int status;
01063        int* i;
01064        pid_t pid;
01065 
01066        while((pid=waitpid(-1, &status, WNOHANG)) > 0) {
01067               if(WIFEXITED(status)) {
01068                      msg3(LOG_INFO, "Child exited with %d", WEXITSTATUS(status));
01069               }
01070               i=g_hash_table_lookup(children, &pid);
01071               if(!i) {
01072                      msg3(LOG_INFO, "SIGCHLD received for an unknown child with PID %ld", (long)pid);
01073               } else {
01074                      DEBUG("Removing %d from the list of children", pid);
01075                      g_hash_table_remove(children, &pid);
01076               }
01077        }
01078 }
01079 
01088 void killchild(gpointer key, gpointer value, gpointer user_data) {
01089        pid_t *pid=value;
01090 
01091        kill(*pid, SIGTERM);
01092 }
01093 
01099 void sigterm_handler(int s) {
01100        g_hash_table_foreach(children, killchild, NULL);
01101        unlink(pidfname);
01102 
01103        exit(EXIT_SUCCESS);
01104 }
01105 
01113 off_t size_autodetect(int fhandle) {
01114        off_t es;
01115        u64 bytes __attribute__((unused));
01116        struct stat stat_buf;
01117        int error;
01118 
01119 #ifdef HAVE_SYS_MOUNT_H
01120 #ifdef HAVE_SYS_IOCTL_H
01121 #ifdef BLKGETSIZE64
01122        DEBUG("looking for export size with ioctl BLKGETSIZE64\n");
01123        if (!ioctl(fhandle, BLKGETSIZE64, &bytes) && bytes) {
01124               return (off_t)bytes;
01125        }
01126 #endif /* BLKGETSIZE64 */
01127 #endif /* HAVE_SYS_IOCTL_H */
01128 #endif /* HAVE_SYS_MOUNT_H */
01129 
01130        DEBUG("looking for fhandle size with fstat\n");
01131        stat_buf.st_size = 0;
01132        error = fstat(fhandle, &stat_buf);
01133        if (!error) {
01134               /* always believe stat if a regular file as it might really
01135                * be zero length */
01136               if (S_ISREG(stat_buf.st_mode) || (stat_buf.st_size > 0))
01137                      return (off_t)stat_buf.st_size;
01138         } else {
01139                 err("fstat failed: %m");
01140         }
01141 
01142        DEBUG("looking for fhandle size with lseek SEEK_END\n");
01143        es = lseek(fhandle, (off_t)0, SEEK_END);
01144        if (es > ((off_t)0)) {
01145               return es;
01146         } else {
01147                 DEBUG("lseek failed: %d", errno==EBADF?1:(errno==ESPIPE?2:(errno==EINVAL?3:4)));
01148         }
01149 
01150        err("Could not find size of exported block device: %m");
01151        return OFFT_MAX;
01152 }
01153 
01165 int get_filepos(GArray* export, off_t a, int* fhandle, off_t* foffset, size_t* maxbytes ) {
01166        /* Negative offset not allowed */
01167        if(a < 0)
01168               return -1;
01169 
01170        /* Binary search for last file with starting offset <= a */
01171        FILE_INFO fi;
01172        int start = 0;
01173        int end = export->len - 1;
01174        while( start <= end ) {
01175               int mid = (start + end) / 2;
01176               fi = g_array_index(export, FILE_INFO, mid);
01177               if( fi.startoff < a ) {
01178                      start = mid + 1;
01179               } else if( fi.startoff > a ) {
01180                      end = mid - 1;
01181               } else {
01182                      start = end = mid;
01183                      break;
01184               }
01185        }
01186 
01187        /* end should never go negative, since first startoff is 0 and a >= 0 */
01188        assert(end >= 0);
01189 
01190        fi = g_array_index(export, FILE_INFO, end);
01191        *fhandle = fi.fhandle;
01192        *foffset = a - fi.startoff;
01193        *maxbytes = 0;
01194        if( end+1 < export->len ) {
01195               FILE_INFO fi_next = g_array_index(export, FILE_INFO, end+1);
01196               *maxbytes = fi_next.startoff - a;
01197        }
01198 
01199        return 0;
01200 }
01201 
01210 void myseek(int handle,off_t a) {
01211        if (lseek(handle, a, SEEK_SET) < 0) {
01212               err("Can not seek locally!\n");
01213        }
01214 }
01215 
01227 ssize_t rawexpwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
01228        int fhandle;
01229        off_t foffset;
01230        size_t maxbytes;
01231        ssize_t retval;
01232 
01233        if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
01234               return -1;
01235        if(maxbytes && len > maxbytes)
01236               len = maxbytes;
01237 
01238        DEBUG("(WRITE to fd %d offset %llu len %u fua %d), ", fhandle, (long long unsigned)foffset, (unsigned int)len, fua);
01239 
01240        myseek(fhandle, foffset);
01241        retval = write(fhandle, buf, len);
01242        if(client->server->flags & F_SYNC) {
01243               fsync(fhandle);
01244        } else if (fua) {
01245 
01246          /* This is where we would do the following
01247           *   #ifdef USE_SYNC_FILE_RANGE
01248           * However, we don't, for the reasons set out below
01249           * by Christoph Hellwig <hch@infradead.org>
01250           *
01251           * [BEGINS] 
01252           * fdatasync is equivalent to fsync except that it does not flush
01253           * non-essential metadata (basically just timestamps in practice), but it
01254           * does flush metadata requried to find the data again, e.g. allocation
01255           * information and extent maps.  sync_file_range does nothing but flush
01256           * out pagecache content - it means you basically won't get your data
01257           * back in case of a crash if you either:
01258           * 
01259           *  a) have a volatile write cache in your disk (e.g. any normal SATA disk)
01260           *  b) are using a sparse file on a filesystem
01261           *  c) are using a fallocate-preallocated file on a filesystem
01262           *  d) use any file on a COW filesystem like btrfs
01263           * 
01264           * e.g. it only does anything useful for you if you do not have a volatile
01265           * write cache, and either use a raw block device node, or just overwrite
01266           * an already fully allocated (and not preallocated) file on a non-COW
01267           * filesystem.
01268           * [ENDS]
01269           *
01270           * What we should do is open a second FD with O_DSYNC set, then write to
01271           * that when appropriate. However, with a Linux client, every REQ_FUA
01272           * immediately follows a REQ_FLUSH, so fdatasync does not cause performance
01273           * problems.
01274           *
01275           */
01276 #if 0
01277               sync_file_range(fhandle, foffset, len,
01278                             SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE |
01279                             SYNC_FILE_RANGE_WAIT_AFTER);
01280 #else
01281               fdatasync(fhandle);
01282 #endif
01283        }
01284        return retval;
01285 }
01286 
01297 int rawexpwrite_fully(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
01298        ssize_t ret=0;
01299 
01300        while(len > 0 && (ret=rawexpwrite(a, buf, len, client, fua)) > 0 ) {
01301               a += ret;
01302               buf += ret;
01303               len -= ret;
01304        }
01305        return (ret < 0 || len != 0);
01306 }
01307 
01319 ssize_t rawexpread(off_t a, char *buf, size_t len, CLIENT *client) {
01320        int fhandle;
01321        off_t foffset;
01322        size_t maxbytes;
01323 
01324        if(get_filepos(client->export, a, &fhandle, &foffset, &maxbytes))
01325               return -1;
01326        if(maxbytes && len > maxbytes)
01327               len = maxbytes;
01328 
01329        DEBUG("(READ from fd %d offset %llu len %u), ", fhandle, (long long unsigned int)foffset, (unsigned int)len);
01330 
01331        myseek(fhandle, foffset);
01332        return read(fhandle, buf, len);
01333 }
01334 
01339 int rawexpread_fully(off_t a, char *buf, size_t len, CLIENT *client) {
01340        ssize_t ret=0;
01341 
01342        while(len > 0 && (ret=rawexpread(a, buf, len, client)) > 0 ) {
01343               a += ret;
01344               buf += ret;
01345               len -= ret;
01346        }
01347        return (ret < 0 || len != 0);
01348 }
01349 
01360 int expread(off_t a, char *buf, size_t len, CLIENT *client) {
01361        off_t rdlen, offset;
01362        off_t mapcnt, mapl, maph, pagestart;
01363 
01364        if (!(client->server->flags & F_COPYONWRITE))
01365               return(rawexpread_fully(a, buf, len, client));
01366        DEBUG("Asked to read %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
01367 
01368        mapl=a/DIFFPAGESIZE; maph=(a+len-1)/DIFFPAGESIZE;
01369 
01370        for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
01371               pagestart=mapcnt*DIFFPAGESIZE;
01372               offset=a-pagestart;
01373               rdlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
01374                      len : (size_t)DIFFPAGESIZE-offset;
01375               if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
01376                      DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
01377                             (unsigned long)(client->difmap[mapcnt]));
01378                      myseek(client->difffile, client->difmap[mapcnt]*DIFFPAGESIZE+offset);
01379                      if (read(client->difffile, buf, rdlen) != rdlen) return -1;
01380               } else { /* the block is not there */
01381                      DEBUG("Page %llu is not here, we read the original one\n",
01382                             (unsigned long long)mapcnt);
01383                      if(rawexpread_fully(a, buf, rdlen, client)) return -1;
01384               }
01385               len-=rdlen; a+=rdlen; buf+=rdlen;
01386        }
01387        return 0;
01388 }
01389 
01402 int expwrite(off_t a, char *buf, size_t len, CLIENT *client, int fua) {
01403        char pagebuf[DIFFPAGESIZE];
01404        off_t mapcnt,mapl,maph;
01405        off_t wrlen,rdlen; 
01406        off_t pagestart;
01407        off_t offset;
01408 
01409        if (!(client->server->flags & F_COPYONWRITE))
01410               return(rawexpwrite_fully(a, buf, len, client, fua)); 
01411        DEBUG("Asked to write %u bytes at %llu.\n", (unsigned int)len, (unsigned long long)a);
01412 
01413        mapl=a/DIFFPAGESIZE ; maph=(a+len-1)/DIFFPAGESIZE ;
01414 
01415        for (mapcnt=mapl;mapcnt<=maph;mapcnt++) {
01416               pagestart=mapcnt*DIFFPAGESIZE ;
01417               offset=a-pagestart ;
01418               wrlen=(0<DIFFPAGESIZE-offset && len<(size_t)(DIFFPAGESIZE-offset)) ?
01419                      len : (size_t)DIFFPAGESIZE-offset;
01420 
01421               if (client->difmap[mapcnt]!=(u32)(-1)) { /* the block is already there */
01422                      DEBUG("Page %llu is at %lu\n", (unsigned long long)mapcnt,
01423                             (unsigned long)(client->difmap[mapcnt])) ;
01424                      myseek(client->difffile,
01425                                    client->difmap[mapcnt]*DIFFPAGESIZE+offset);
01426                      if (write(client->difffile, buf, wrlen) != wrlen) return -1 ;
01427               } else { /* the block is not there */
01428                      myseek(client->difffile,client->difffilelen*DIFFPAGESIZE) ;
01429                      client->difmap[mapcnt]=(client->server->flags&F_SPARSE)?mapcnt:client->difffilelen++;
01430                      DEBUG("Page %llu is not here, we put it at %lu\n",
01431                             (unsigned long long)mapcnt,
01432                             (unsigned long)(client->difmap[mapcnt]));
01433                      rdlen=DIFFPAGESIZE ;
01434                      if (rawexpread_fully(pagestart, pagebuf, rdlen, client))
01435                             return -1;
01436                      memcpy(pagebuf+offset,buf,wrlen) ;
01437                      if (write(client->difffile, pagebuf, DIFFPAGESIZE) !=
01438                                    DIFFPAGESIZE)
01439                             return -1;
01440               }                                             
01441               len-=wrlen ; a+=wrlen ; buf+=wrlen ;
01442        }
01443        if (client->server->flags & F_SYNC) {
01444               fsync(client->difffile);
01445        } else if (fua) {
01446               /* open question: would it be cheaper to do multiple sync_file_ranges?
01447                  as we iterate through the above?
01448                */
01449               fdatasync(client->difffile);
01450        }
01451        return 0;
01452 }
01453 
01460 int expflush(CLIENT *client) {
01461        gint i;
01462 
01463         if (client->server->flags & F_COPYONWRITE) {
01464               return fsync(client->difffile);
01465        }
01466        
01467        for (i = 0; i < client->export->len; i++) {
01468               FILE_INFO fi = g_array_index(client->export, FILE_INFO, i);
01469               if (fsync(fi.fhandle) < 0)
01470                      return -1;
01471        }
01472        
01473        return 0;
01474 }
01475 
01476 /*
01477  * If the current system supports it, call fallocate() on the backend
01478  * file to resparsify stuff that isn't needed anymore (see NBD_CMD_TRIM)
01479  */
01480 int exptrim(struct nbd_request* req, CLIENT* client) {
01481 #if HAVE_FALLOC_PH
01482        FILE_INFO prev = g_array_index(client->export, FILE_INFO, 0);
01483        FILE_INFO cur = prev;
01484        int i = 1;
01485        /* We're running on a system that supports the
01486         * FALLOC_FL_PUNCH_HOLE option to re-sparsify a file */
01487        do {
01488               if(i<client->export->len) {
01489                      cur = g_array_index(client->export, FILE_INFO, i);
01490               }
01491               if(prev.startoff < req->from) {
01492                      off_t curoff = req->from - prev.startoff;
01493                      off_t curlen = cur.startoff - prev.startoff - curoff;
01494                      fallocate(prev.fhandle, FALLOC_FL_PUNCH_HOLE, curoff, curlen);
01495               }
01496               prev = cur;
01497        } while(i < client->export->len && cur.startoff < (req->from + req->len));
01498        DEBUG("Performed TRIM request from %llu to %llu", (unsigned long long) req->from, (unsigned long long) req->len);
01499 #else
01500        DEBUG("Ignoring TRIM request (not supported on current platform");
01501 #endif
01502        return 0;
01503 }
01504 
01505 static void send_reply(uint32_t opt, int net, uint32_t reply_type, size_t datasize, void* data) {
01506        uint64_t magic = htonll(0x3e889045565a9LL);
01507        reply_type = htonl(reply_type);
01508        uint32_t datsize = htonl(datasize);
01509        struct iovec v_data[] = {
01510               { &magic, sizeof(magic) },
01511               { &opt, sizeof(opt) },
01512               { &reply_type, sizeof(reply_type) },
01513               { &datsize, sizeof(datsize) },
01514               { data, datasize },
01515        };
01516        writev(net, v_data, 5);
01517 }
01518 
01519 static CLIENT* handle_export_name(uint32_t opt, int net, GArray* servers, uint32_t cflags) {
01520        uint32_t namelen;
01521        char* name;
01522        int i;
01523 
01524        if (read(net, &namelen, sizeof(namelen)) < 0)
01525               err("Negotiation failed/7: %m");
01526        namelen = ntohl(namelen);
01527        name = malloc(namelen+1);
01528        name[namelen]=0;
01529        if (read(net, name, namelen) < 0)
01530               err("Negotiation failed/8: %m");
01531        for(i=0; i<servers->len; i++) {
01532               SERVER* serve = &(g_array_index(servers, SERVER, i));
01533               if(!strcmp(serve->servename, name)) {
01534                      CLIENT* client = g_new0(CLIENT, 1);
01535                      client->server = serve;
01536                      client->exportsize = OFFT_MAX;
01537                      client->net = net;
01538                      client->modern = TRUE;
01539                      client->transactionlogfd = -1;
01540                      client->clientfeats = cflags;
01541                      free(name);
01542                      return client;
01543               }
01544        }
01545        free(name);
01546        return NULL;
01547 }
01548 
01549 static void handle_list(uint32_t opt, int net, GArray* servers, uint32_t cflags) {
01550        uint32_t len;
01551        int i;
01552        char buf[1024];
01553        char *ptr = buf + sizeof(len);
01554 
01555        if (read(net, &len, sizeof(len)) < 0)
01556               err("Negotiation failed/8: %m");
01557        len = ntohl(len);
01558        if(len) {
01559               send_reply(opt, net, NBD_REP_ERR_INVALID, 0, NULL);
01560        }
01561        if(!(glob_flags & F_LIST)) {
01562               send_reply(opt, net, NBD_REP_ERR_POLICY, 0, NULL);
01563               err_nonfatal("Client tried disallowed list option");
01564               return;
01565        }
01566        for(i=0; i<servers->len; i++) {
01567               SERVER* serve = &(g_array_index(servers, SERVER, i));
01568               len = htonl(strlen(serve->servename));
01569               memcpy(buf, &len, sizeof(len));
01570               strcpy(ptr, serve->servename);
01571               send_reply(opt, net, NBD_REP_SERVER, strlen(serve->servename)+sizeof(len), buf);
01572        }
01573        send_reply(opt, net, NBD_REP_ACK, 0, NULL);
01574 }
01575 
01581 CLIENT* negotiate(int net, CLIENT *client, GArray* servers, int phase) {
01582        char zeros[128];
01583        uint64_t size_host;
01584        uint32_t flags = NBD_FLAG_HAS_FLAGS;
01585        uint16_t smallflags = 0;
01586        uint64_t magic;
01587 
01588        memset(zeros, '\0', sizeof(zeros));
01589        assert(((phase & NEG_INIT) && (phase & NEG_MODERN)) || client);
01590        if(phase & NEG_MODERN) {
01591               smallflags |= NBD_FLAG_FIXED_NEWSTYLE;
01592        }
01593        if(phase & NEG_INIT) {
01594               /* common */
01595               if (write(net, INIT_PASSWD, 8) < 0) {
01596                      err_nonfatal("Negotiation failed/1: %m");
01597                      if(client)
01598                             exit(EXIT_FAILURE);
01599               }
01600               if(phase & NEG_MODERN) {
01601                      /* modern */
01602                      magic = htonll(opts_magic);
01603               } else {
01604                      /* oldstyle */
01605                      magic = htonll(cliserv_magic);
01606               }
01607               if (write(net, &magic, sizeof(magic)) < 0) {
01608                      err_nonfatal("Negotiation failed/2: %m");
01609                      if(phase & NEG_OLD)
01610                             exit(EXIT_FAILURE);
01611               }
01612        }
01613        if ((phase & NEG_MODERN) && (phase & NEG_INIT)) {
01614               /* modern */
01615               uint32_t cflags;
01616               uint32_t opt;
01617 
01618               if(!servers)
01619                      err("programmer error");
01620               smallflags = htons(smallflags);
01621               if (write(net, &smallflags, sizeof(uint16_t)) < 0)
01622                      err_nonfatal("Negotiation failed/3: %m");
01623               if (read(net, &cflags, sizeof(cflags)) < 0)
01624                      err_nonfatal("Negotiation failed/4: %m");
01625               cflags = htonl(cflags);
01626               do {
01627                      if (read(net, &magic, sizeof(magic)) < 0)
01628                             err_nonfatal("Negotiation failed/5: %m");
01629                      magic = ntohll(magic);
01630                      if(magic != opts_magic) {
01631                             close(net);
01632                             return NULL;
01633                      }
01634                      if (read(net, &opt, sizeof(opt)) < 0)
01635                             err_nonfatal("Negotiation failed/6: %m");
01636                      opt = ntohl(opt);
01637                      switch(opt) {
01638                      case NBD_OPT_EXPORT_NAME:
01639                             // NBD_OPT_EXPORT_NAME must be the last
01640                             // selected option, so return from here
01641                             // if that is chosen.
01642                             return handle_export_name(opt, net, servers, cflags);
01643                             break;
01644                      case NBD_OPT_LIST:
01645                             handle_list(opt, net, servers, cflags);
01646                             break;
01647                      case NBD_OPT_ABORT:
01648                             // handled below
01649                             break;
01650                      default:
01651                             send_reply(opt, net, NBD_REP_ERR_UNSUP, 0, NULL);
01652                             break;
01653                      }
01654               } while((opt != NBD_OPT_EXPORT_NAME) && (opt != NBD_OPT_ABORT));
01655               if(opt == NBD_OPT_ABORT) {
01656                      close(net);
01657                      return NULL;
01658               }
01659        }
01660        /* common */
01661        size_host = htonll((u64)(client->exportsize));
01662        if (write(net, &size_host, 8) < 0)
01663               err("Negotiation failed/9: %m");
01664        if (client->server->flags & F_READONLY)
01665               flags |= NBD_FLAG_READ_ONLY;
01666        if (client->server->flags & F_FLUSH)
01667               flags |= NBD_FLAG_SEND_FLUSH;
01668        if (client->server->flags & F_FUA)
01669               flags |= NBD_FLAG_SEND_FUA;
01670        if (client->server->flags & F_ROTATIONAL)
01671               flags |= NBD_FLAG_ROTATIONAL;
01672        if (client->server->flags & F_TRIM)
01673               flags |= NBD_FLAG_SEND_TRIM;
01674        if (phase & NEG_OLD) {
01675               /* oldstyle */
01676               flags = htonl(flags);
01677               if (write(client->net, &flags, 4) < 0)
01678                      err("Negotiation failed/10: %m");
01679        } else {
01680               /* modern */
01681               smallflags = (uint16_t)(flags & ~((uint16_t)0));
01682               smallflags = htons(smallflags);
01683               if (write(client->net, &smallflags, sizeof(smallflags)) < 0) {
01684                      err("Negotiation failed/11: %m");
01685               }
01686        }
01687        /* common */
01688        if (write(client->net, zeros, 124) < 0)
01689               err("Negotiation failed/12: %m");
01690        return NULL;
01691 }
01692 
01694 #define SEND(net,reply) { writeit( net, &reply, sizeof( reply )); \
01695        if (client->transactionlogfd != -1) \
01696               writeit(client->transactionlogfd, &reply, sizeof(reply)); }
01697 
01698 #define ERROR(client,reply,errcode) { reply.error = htonl(errcode); SEND(client->net,reply); reply.error = 0; }
01699 
01708 int mainloop(CLIENT *client) {
01709        struct nbd_request request;
01710        struct nbd_reply reply;
01711        gboolean go_on=TRUE;
01712 #ifdef DODBG
01713        int i = 0;
01714 #endif
01715        negotiate(client->net, client, NULL, client->modern ? NEG_MODERN : (NEG_OLD | NEG_INIT));
01716        DEBUG("Entering request loop!\n");
01717        reply.magic = htonl(NBD_REPLY_MAGIC);
01718        reply.error = 0;
01719        while (go_on) {
01720               char buf[BUFSIZE];
01721               char* p;
01722               size_t len;
01723               size_t currlen;
01724               size_t writelen;
01725               uint16_t command;
01726 #ifdef DODBG
01727               i++;
01728               printf("%d: ", i);
01729 #endif
01730               readit(client->net, &request, sizeof(request));
01731               if (client->transactionlogfd != -1)
01732                      writeit(client->transactionlogfd, &request, sizeof(request));
01733 
01734               request.from = ntohll(request.from);
01735               request.type = ntohl(request.type);
01736               command = request.type & NBD_CMD_MASK_COMMAND;
01737               len = ntohl(request.len);
01738 
01739               DEBUG("%s from %llu (%llu) len %d, ", getcommandname(command),
01740                             (unsigned long long)request.from,
01741                             (unsigned long long)request.from / 512, (unsigned int)len);
01742 
01743               if (request.magic != htonl(NBD_REQUEST_MAGIC))
01744                      err("Not enough magic.");
01745 
01746               memcpy(reply.handle, request.handle, sizeof(reply.handle));
01747 
01748               if ((command==NBD_CMD_WRITE) || (command==NBD_CMD_READ)) {
01749                      if ((request.from + len) > (OFFT_MAX)) {
01750                             DEBUG("[Number too large!]");
01751                             ERROR(client, reply, EINVAL);
01752                             continue;
01753                      }
01754 
01755                      if (((ssize_t)((off_t)request.from + len) > client->exportsize)) {
01756                             DEBUG("[RANGE!]");
01757                             ERROR(client, reply, EINVAL);
01758                             continue;
01759                      }
01760 
01761                      currlen = len;
01762                      if (currlen > BUFSIZE - sizeof(struct nbd_reply)) {
01763                             currlen = BUFSIZE - sizeof(struct nbd_reply);
01764                             if(!logged_oversized) {
01765                                    msg2(LOG_DEBUG, "oversized request (this is not a problem)");
01766                                    logged_oversized = true;
01767                             }
01768                      }
01769               }
01770 
01771               switch (command) {
01772 
01773               case NBD_CMD_DISC:
01774                      msg2(LOG_INFO, "Disconnect request received.");
01775                      if (client->server->flags & F_COPYONWRITE) { 
01776                             if (client->difmap) g_free(client->difmap) ;
01777                             close(client->difffile);
01778                             unlink(client->difffilename);
01779                             free(client->difffilename);
01780                      }
01781                      go_on=FALSE;
01782                      continue;
01783 
01784               case NBD_CMD_WRITE:
01785                      DEBUG("wr: net->buf, ");
01786                      while(len > 0) {
01787                             readit(client->net, buf, currlen);
01788                             DEBUG("buf->exp, ");
01789                             if ((client->server->flags & F_READONLY) ||
01790                                 (client->server->flags & F_AUTOREADONLY)) {
01791                                    DEBUG("[WRITE to READONLY!]");
01792                                    ERROR(client, reply, EPERM);
01793                                    consume(client->net, buf, len-currlen, BUFSIZE);
01794                                    continue;
01795                             }
01796                             if (expwrite(request.from, buf, currlen, client,
01797                                         request.type & NBD_CMD_FLAG_FUA)) {
01798                                    DEBUG("Write failed: %m" );
01799                                    ERROR(client, reply, errno);
01800                                    consume(client->net, buf, len-currlen, BUFSIZE);
01801                                    continue;
01802                             }
01803                             len -= currlen;
01804                             request.from += currlen;
01805                             currlen = (len < BUFSIZE) ? len : BUFSIZE;
01806                      }
01807                      SEND(client->net, reply);
01808                      DEBUG("OK!\n");
01809                      continue;
01810 
01811               case NBD_CMD_FLUSH:
01812                      DEBUG("fl: ");
01813                      if (expflush(client)) {
01814                             DEBUG("Flush failed: %m");
01815                             ERROR(client, reply, errno);
01816                             continue;
01817                      }
01818                      SEND(client->net, reply);
01819                      DEBUG("OK!\n");
01820                      continue;
01821 
01822               case NBD_CMD_READ:
01823                      DEBUG("exp->buf, ");
01824                      memcpy(buf, &reply, sizeof(struct nbd_reply));
01825                      if (client->transactionlogfd != -1)
01826                             writeit(client->transactionlogfd, &reply, sizeof(reply));
01827                      p = buf + sizeof(struct nbd_reply);
01828                      writelen = currlen + sizeof(struct nbd_reply);
01829                      while(len > 0) {
01830                             if (expread(request.from, p, currlen, client)) {
01831                                    DEBUG("Read failed: %m");
01832                                    ERROR(client, reply, errno);
01833                                    continue;
01834                             }
01835                             
01836                             DEBUG("buf->net, ");
01837                             writeit(client->net, buf, writelen);
01838                             len -= currlen;
01839                             request.from += currlen;
01840                             currlen = (len < BUFSIZE) ? len : BUFSIZE;
01841                             p = buf;
01842                             writelen = currlen;
01843                      }
01844                      DEBUG("OK!\n");
01845                      continue;
01846 
01847               case NBD_CMD_TRIM:
01848                      /* The kernel module sets discard_zeroes_data == 0,
01849                       * so it is okay to do nothing.  */
01850                      if (exptrim(&request, client)) {
01851                             DEBUG("Trim failed: %m");
01852                             ERROR(client, reply, errno);
01853                             continue;
01854                      }
01855                      SEND(client->net, reply);
01856                      continue;
01857 
01858               default:
01859                      DEBUG ("Ignoring unknown command\n");
01860                      continue;
01861               }
01862        }
01863        return 0;
01864 }
01865 
01871 void setupexport(CLIENT* client) {
01872        int i;
01873        off_t laststartoff = 0, lastsize = 0;
01874        int multifile = (client->server->flags & F_MULTIFILE);
01875        int temporary = (client->server->flags & F_TEMPORARY) && !multifile;
01876        int cancreate = (client->server->expected_size) && !multifile;
01877 
01878        client->export = g_array_new(TRUE, TRUE, sizeof(FILE_INFO));
01879 
01880        /* If multi-file, open as many files as we can.
01881         * If not, open exactly one file.
01882         * Calculate file sizes as we go to get total size. */
01883        for(i=0; ; i++) {
01884               FILE_INFO fi;
01885               gchar *tmpname;
01886               gchar* error_string;
01887 
01888               if (i)
01889                 cancreate = 0;
01890               /* if expected_size is specified, and this is the first file, we can create the file */
01891               mode_t mode = (client->server->flags & F_READONLY) ?
01892                 O_RDONLY : (O_RDWR | (cancreate?O_CREAT:0));
01893 
01894               if (temporary) {
01895                      tmpname=g_strdup_printf("%s.%d-XXXXXX", client->exportname, i);
01896                      DEBUG( "Opening %s\n", tmpname );
01897                      fi.fhandle = mkstemp(tmpname);
01898               } else {
01899                      if(multifile) {
01900                             tmpname=g_strdup_printf("%s.%d", client->exportname, i);
01901                      } else {
01902                             tmpname=g_strdup(client->exportname);
01903                      }
01904                      DEBUG( "Opening %s\n", tmpname );
01905                      fi.fhandle = open(tmpname, mode, 0x600);
01906                      if(fi.fhandle == -1 && mode == O_RDWR) {
01907                             /* Try again because maybe media was read-only */
01908                             fi.fhandle = open(tmpname, O_RDONLY);
01909                             if(fi.fhandle != -1) {
01910                                    /* Opening the base file in copyonwrite mode is
01911                                     * okay */
01912                                    if(!(client->server->flags & F_COPYONWRITE)) {
01913                                           client->server->flags |= F_AUTOREADONLY;
01914                                           client->server->flags |= F_READONLY;
01915                                    }
01916                             }
01917                      }
01918               }
01919               if(fi.fhandle == -1) {
01920                      if(multifile && i>0)
01921                             break;
01922                      error_string=g_strdup_printf(
01923                             "Could not open exported file %s: %%m",
01924                             tmpname);
01925                      err(error_string);
01926               }
01927 
01928               if (temporary)
01929                      unlink(tmpname); /* File will stick around whilst FD open */
01930 
01931               fi.startoff = laststartoff + lastsize;
01932               g_array_append_val(client->export, fi);
01933               g_free(tmpname);
01934 
01935               /* Starting offset and size of this file will be used to
01936                * calculate starting offset of next file */
01937               laststartoff = fi.startoff;
01938               lastsize = size_autodetect(fi.fhandle);
01939 
01940               /* If we created the file, it will be length zero */
01941               if (!lastsize && cancreate) {
01942                      assert(!multifile);
01943                      if(ftruncate (fi.fhandle, client->server->expected_size)<0) {
01944                             err("Could not expand file: %m");
01945                      }
01946                      lastsize = client->server->expected_size;
01947                      break; /* don't look for any more files */
01948               }
01949 
01950               if(!multifile || temporary)
01951                      break;
01952        }
01953 
01954        /* Set export size to total calculated size */
01955        client->exportsize = laststartoff + lastsize;
01956 
01957        /* Export size may be overridden */
01958        if(client->server->expected_size) {
01959               /* desired size must be <= total calculated size */
01960               if(client->server->expected_size > client->exportsize) {
01961                      err("Size of exported file is too big\n");
01962               }
01963 
01964               client->exportsize = client->server->expected_size;
01965        }
01966 
01967        msg3(LOG_INFO, "Size of exported file/device is %llu", (unsigned long long)client->exportsize);
01968        if(multifile) {
01969               msg3(LOG_INFO, "Total number of files: %d", i);
01970        }
01971 }
01972 
01973 int copyonwrite_prepare(CLIENT* client) {
01974        off_t i;
01975        if ((client->difffilename = malloc(1024))==NULL)
01976               err("Failed to allocate string for diff file name");
01977        snprintf(client->difffilename, 1024, "%s-%s-%d.diff",client->exportname,client->clientname,
01978               (int)getpid()) ;
01979        client->difffilename[1023]='\0';
01980        msg3(LOG_INFO,"About to create map and diff file %s",client->difffilename) ;
01981        client->difffile=open(client->difffilename,O_RDWR | O_CREAT | O_TRUNC,0600) ;
01982        if (client->difffile<0) err("Could not create diff file (%m)") ;
01983        if ((client->difmap=calloc(client->exportsize/DIFFPAGESIZE,sizeof(u32)))==NULL)
01984               err("Could not allocate memory") ;
01985        for (i=0;i<client->exportsize/DIFFPAGESIZE;i++) client->difmap[i]=(u32)-1 ;
01986 
01987        return 0;
01988 }
01989 
01997 int do_run(gchar* command, gchar* file) {
01998        gchar* cmd;
01999        int retval=0;
02000 
02001        if(command && *command) {
02002               cmd = g_strdup_printf(command, file);
02003               retval=system(cmd);
02004               g_free(cmd);
02005        }
02006        return retval;
02007 }
02008 
02017 void serveconnection(CLIENT *client) {
02018        if (client->server->transactionlog && (client->transactionlogfd == -1))
02019        {
02020               if (-1 == (client->transactionlogfd = open(client->server->transactionlog,
02021                                                     O_WRONLY | O_CREAT,
02022                                                     S_IRUSR | S_IWUSR)))
02023                      g_warning("Could not open transaction log %s",
02024                               client->server->transactionlog);
02025        }
02026 
02027        if(do_run(client->server->prerun, client->exportname)) {
02028               exit(EXIT_FAILURE);
02029        }
02030        setupexport(client);
02031 
02032        if (client->server->flags & F_COPYONWRITE) {
02033               copyonwrite_prepare(client);
02034        }
02035 
02036        setmysockopt(client->net);
02037 
02038        mainloop(client);
02039        do_run(client->server->postrun, client->exportname);
02040 
02041        if (-1 != client->transactionlogfd)
02042        {
02043               close(client->transactionlogfd);
02044               client->transactionlogfd = -1;
02045        }
02046 }
02047 
02060 int set_peername(int net, CLIENT *client) {
02061        struct sockaddr_storage addrin;
02062        struct sockaddr_storage netaddr;
02063        struct sockaddr_in  *netaddr4 = NULL;
02064        struct sockaddr_in6 *netaddr6 = NULL;
02065        socklen_t addrinlen = sizeof( addrin );
02066        struct addrinfo hints;
02067        struct addrinfo *ai = NULL;
02068        char peername[NI_MAXHOST];
02069        char netname[NI_MAXHOST];
02070        char *tmp = NULL;
02071        int i;
02072        int e;
02073        int shift;
02074 
02075        if (getpeername(net, (struct sockaddr *) &addrin, &addrinlen) < 0) {
02076               msg2(LOG_INFO, "getpeername failed: %m");
02077               return -1;
02078        }
02079 
02080        if((e = getnameinfo((struct sockaddr *)&addrin, addrinlen,
02081                      peername, sizeof (peername), NULL, 0, NI_NUMERICHOST))) {
02082               msg3(LOG_INFO, "getnameinfo failed: %s", gai_strerror(e));
02083               return -1;
02084        }
02085 
02086        memset(&hints, '\0', sizeof (hints));
02087        hints.ai_flags = AI_ADDRCONFIG;
02088        e = getaddrinfo(peername, NULL, &hints, &ai);
02089 
02090        if(e != 0) {
02091               msg3(LOG_INFO, "getaddrinfo failed: %s", gai_strerror(e));
02092               freeaddrinfo(ai);
02093               return -1;
02094        }
02095 
02096        switch(client->server->virtstyle) {
02097               case VIRT_NONE:
02098                      msg2(LOG_DEBUG, "virtualization is off");
02099                      client->exportname=g_strdup(client->server->exportname);
02100                      break;
02101               case VIRT_IPHASH:
02102                      msg2(LOG_DEBUG, "virtstyle iphash");
02103                      for(i=0;i<strlen(peername);i++) {
02104                             if(peername[i]=='.') {
02105                                    peername[i]='/';
02106                             }
02107                      }
02108               case VIRT_IPLIT:
02109                      msg2(LOG_DEBUG, "virststyle ipliteral");
02110                      client->exportname=g_strdup_printf(client->server->exportname, peername);
02111                      break;
02112               case VIRT_CIDR:
02113                      msg3(LOG_DEBUG, "virtstyle cidr %d", client->server->cidrlen);
02114                      memcpy(&netaddr, &addrin, addrinlen);
02115                      if(ai->ai_family == AF_INET) {
02116                             netaddr4 = (struct sockaddr_in *)&netaddr;
02117                             (netaddr4->sin_addr).s_addr>>=32-(client->server->cidrlen);
02118                             (netaddr4->sin_addr).s_addr<<=32-(client->server->cidrlen);
02119 
02120                             getnameinfo((struct sockaddr *) netaddr4, addrinlen,
02121                                                  netname, sizeof (netname), NULL, 0, NI_NUMERICHOST);
02122                             tmp=g_strdup_printf("%s/%s", netname, peername);
02123                      }else if(ai->ai_family == AF_INET6) {
02124                             netaddr6 = (struct sockaddr_in6 *)&netaddr;
02125 
02126                             shift = 128-(client->server->cidrlen);
02127                             i = 3;
02128                             while(shift >= 8) {
02129                                    ((netaddr6->sin6_addr).s6_addr[i])=0;
02130                                    shift-=8;
02131                                    i--;
02132                             }
02133                             (netaddr6->sin6_addr).s6_addr[i]>>=shift;
02134                             (netaddr6->sin6_addr).s6_addr[i]<<=shift;
02135 
02136                             getnameinfo((struct sockaddr *)netaddr6, addrinlen,
02137                                        netname, sizeof(netname), NULL, 0, NI_NUMERICHOST);
02138                             tmp=g_strdup_printf("%s/%s", netname, peername);
02139                      }
02140 
02141                      if(tmp != NULL)
02142                        client->exportname=g_strdup_printf(client->server->exportname, tmp);
02143 
02144                      break;
02145        }
02146 
02147        freeaddrinfo(ai);
02148        msg4(LOG_INFO, "connect from %s, assigned file is %s", 
02149             peername, client->exportname);
02150        client->clientname=g_strdup(peername);
02151        return 0;
02152 }
02153 
02158 void destroy_pid_t(gpointer data) {
02159        g_free(data);
02160 }
02161 
02162 static void
02163 handle_connection(GArray *servers, int net, SERVER *serve, CLIENT *client)
02164 {
02165        int sock_flags_old;
02166        int sock_flags_new;
02167 
02168        if(serve->max_connections > 0 &&
02169           g_hash_table_size(children) >= serve->max_connections) {
02170               msg2(LOG_INFO, "Max connections reached");
02171               goto handle_connection_out;
02172        }
02173        if((sock_flags_old = fcntl(net, F_GETFL, 0)) == -1) {
02174               err("fcntl F_GETFL");
02175        }
02176        sock_flags_new = sock_flags_old & ~O_NONBLOCK;
02177        if (sock_flags_new != sock_flags_old &&
02178            fcntl(net, F_SETFL, sock_flags_new) == -1) {
02179               err("fcntl F_SETFL ~O_NONBLOCK");
02180        }
02181        if(!client) {
02182               client = g_new0(CLIENT, 1);
02183               client->server=serve;
02184               client->exportsize=OFFT_MAX;
02185               client->net=net;
02186               client->transactionlogfd = -1;
02187        }
02188        if (set_peername(net, client)) {
02189               goto handle_connection_out;
02190        }
02191        if (!authorized_client(client)) {
02192               msg2(LOG_INFO,"Unauthorized client") ;
02193               goto handle_connection_out;
02194        }
02195        msg2(LOG_INFO,"Authorized client") ;
02196 
02197        if (!dontfork) {
02198               pid_t pid;
02199               int i;
02200               sigset_t newset;
02201               sigset_t oldset;
02202 
02203               sigemptyset(&newset);
02204               sigaddset(&newset, SIGCHLD);
02205               sigaddset(&newset, SIGTERM);
02206               sigprocmask(SIG_BLOCK, &newset, &oldset);
02207               if ((pid = fork()) < 0) {
02208                      msg3(LOG_INFO,"Could not fork (%s)",strerror(errno)) ;
02209                      sigprocmask(SIG_SETMASK, &oldset, NULL);
02210                      goto handle_connection_out;
02211               }
02212               if (pid > 0) { /* parent */
02213                      pid_t *pidp;
02214 
02215                      pidp = g_malloc(sizeof(pid_t));
02216                      *pidp = pid;
02217                      g_hash_table_insert(children, pidp, pidp);
02218                      sigprocmask(SIG_SETMASK, &oldset, NULL);
02219                      goto handle_connection_out;
02220               }
02221               /* child */
02222               signal(SIGCHLD, SIG_DFL);
02223               signal(SIGTERM, SIG_DFL);
02224               sigprocmask(SIG_SETMASK, &oldset, NULL);
02225 
02226               g_hash_table_destroy(children);
02227               children = NULL;
02228               for(i=0;i<servers->len;i++) {
02229                      serve=&g_array_index(servers, SERVER, i);
02230                      close(serve->socket);
02231               }
02232               /* FALSE does not free the
02233                  actual data. This is required,
02234                  because the client has a
02235                  direct reference into that
02236                  data, and otherwise we get a
02237                  segfault... */
02238               g_array_free(servers, FALSE);
02239               close(modernsock);
02240        }
02241 
02242        msg2(LOG_INFO,"Starting to serve");
02243        serveconnection(client);
02244        exit(EXIT_SUCCESS);
02245 
02246 handle_connection_out:
02247        g_free(client);
02248        close(net);
02249 }
02250 
02254 int serveloop(GArray* servers) {
02255        struct sockaddr_storage addrin;
02256        socklen_t addrinlen=sizeof(addrin);
02257        int i;
02258        int max;
02259        int sock;
02260        fd_set mset;
02261        fd_set rset;
02262 
02263        /* 
02264         * Set up the master fd_set. The set of descriptors we need
02265         * to select() for never changes anyway and it buys us a *lot*
02266         * of time to only build this once. However, if we ever choose
02267         * to not fork() for clients anymore, we may have to revisit
02268         * this.
02269         */
02270        max=0;
02271        FD_ZERO(&mset);
02272        for(i=0;i<servers->len;i++) {
02273               if((sock=(g_array_index(servers, SERVER, i)).socket)) {
02274                      FD_SET(sock, &mset);
02275                      max=sock>max?sock:max;
02276               }
02277        }
02278        if(modernsock >= 0) {
02279               FD_SET(modernsock, &mset);
02280               max=modernsock>max?modernsock:max;
02281        }
02282        for(;;) {
02283               memcpy(&rset, &mset, sizeof(fd_set));
02284               if(select(max+1, &rset, NULL, NULL, NULL)>0) {
02285                      int net;
02286 
02287                      DEBUG("accept, ");
02288                      if(modernsock >= 0 && FD_ISSET(modernsock, &rset)) {
02289                             CLIENT *client;
02290 
02291                             if((net=accept(modernsock, (struct sockaddr *) &addrin, &addrinlen)) < 0) {
02292                                    err_nonfatal("accept: %m");
02293                                    continue;
02294                             }
02295                             client = negotiate(net, NULL, servers, NEG_INIT | NEG_MODERN);
02296                             if(!client) {
02297                                    err_nonfatal("negotiation failed");
02298                                    close(net);
02299                                    continue;
02300                             }
02301                             handle_connection(servers, net, client->server, client);
02302                      }
02303                      for(i=0; i < servers->len; i++) {
02304                             SERVER *serve;
02305 
02306                             serve=&(g_array_index(servers, SERVER, i));
02307                             if(FD_ISSET(serve->socket, &rset)) {
02308                                    if ((net=accept(serve->socket, (struct sockaddr *) &addrin, &addrinlen)) < 0) {
02309                                           err_nonfatal("accept: %m");
02310                                           continue;
02311                                    }
02312                                    handle_connection(servers, net, serve, NULL);
02313                             }
02314                      }
02315               }
02316        }
02317 }
02318 
02319 void dosockopts(int socket) {
02320 #ifndef sun
02321        int yes=1;
02322 #else
02323        char yes='1';
02324 #endif /* sun */
02325        struct linger l;
02326 
02327        //int sock_flags;
02328 
02329        /* lose the pesky "Address already in use" error message */
02330        if (setsockopt(socket,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) {
02331                err("setsockopt SO_REUSEADDR");
02332        }
02333        l.l_onoff = 1;
02334        l.l_linger = 10;
02335        if (setsockopt(socket,SOL_SOCKET,SO_LINGER,&l,sizeof(l)) == -1) {
02336                perror("setsockopt SO_LINGER");
02337               exit(EXIT_FAILURE);
02338        }
02339        if (setsockopt(socket,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(int)) == -1) {
02340               err("setsockopt SO_KEEPALIVE");
02341        }
02342 
02343        /* make the listening socket non-blocking */
02344        /*if ((sock_flags = fcntl(socket, F_GETFL, 0)) == -1) {
02345               err("fcntl F_GETFL");
02346        }
02347        if (fcntl(socket, F_SETFL, sock_flags | O_NONBLOCK) == -1) {
02348               err("fcntl F_SETFL O_NONBLOCK");
02349        }*/
02350 }
02351 
02357 int setup_serve(SERVER *serve) {
02358        struct addrinfo hints;
02359        struct addrinfo *ai = NULL;
02360        gchar *port = NULL;
02361        int e;
02362 
02363        if(!(glob_flags & F_OLDSTYLE)) {
02364               return serve->servename ? 1 : 0;
02365        }
02366        memset(&hints,'\0',sizeof(hints));
02367        hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG | AI_NUMERICSERV;
02368        hints.ai_socktype = SOCK_STREAM;
02369        hints.ai_family = serve->socket_family;
02370 
02371        port = g_strdup_printf ("%d", serve->port);
02372        if (port == NULL)
02373               return 0;
02374 
02375        e = getaddrinfo(serve->listenaddr,port,&hints,&ai);
02376 
02377        g_free(port);
02378 
02379        if(e != 0) {
02380               fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
02381               serve->socket = -1;
02382               freeaddrinfo(ai);
02383               exit(EXIT_FAILURE);
02384        }
02385 
02386        if(serve->socket_family == AF_UNSPEC)
02387               serve->socket_family = ai->ai_family;
02388 
02389 #ifdef WITH_SDP
02390        if ((serve->flags) && F_SDP) {
02391               if (ai->ai_family == AF_INET)
02392                      ai->ai_family = AF_INET_SDP;
02393               else (ai->ai_family == AF_INET6)
02394                      ai->ai_family = AF_INET6_SDP;
02395        }
02396 #endif
02397        if ((serve->socket = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol)) < 0)
02398               err("socket: %m");
02399 
02400        dosockopts(serve->socket);
02401 
02402        DEBUG("Waiting for connections... bind, ");
02403        e = bind(serve->socket, ai->ai_addr, ai->ai_addrlen);
02404        if (e != 0 && errno != EADDRINUSE)
02405               err("bind: %m");
02406        DEBUG("listen, ");
02407        if (listen(serve->socket, 1) < 0)
02408               err("listen: %m");
02409 
02410        freeaddrinfo (ai);
02411        if(serve->servename) {
02412               return 1;
02413        } else {
02414               return 0;
02415        }
02416 }
02417 
02418 void open_modern(void) {
02419        struct addrinfo hints;
02420        struct addrinfo* ai = NULL;
02421        struct sock_flags;
02422        int e;
02423 
02424        memset(&hints, '\0', sizeof(hints));
02425        hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG;
02426        hints.ai_socktype = SOCK_STREAM;
02427        hints.ai_family = AF_UNSPEC;
02428        hints.ai_protocol = IPPROTO_TCP;
02429        e = getaddrinfo(modern_listen, modernport, &hints, &ai);
02430        if(e != 0) {
02431               fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e));
02432               exit(EXIT_FAILURE);
02433        }
02434        if((modernsock = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol))<0) {
02435               err("socket: %m");
02436        }
02437 
02438        dosockopts(modernsock);
02439 
02440        if(bind(modernsock, ai->ai_addr, ai->ai_addrlen)) {
02441               err("bind: %m");
02442        }
02443        if(listen(modernsock, 10) <0) {
02444               err("listen: %m");
02445        }
02446 
02447        freeaddrinfo(ai);
02448 }
02449 
02453 void setup_servers(GArray* servers) {
02454        int i;
02455        struct sigaction sa;
02456        int want_modern=0;
02457 
02458        for(i=0;i<servers->len;i++) {
02459               want_modern |= setup_serve(&(g_array_index(servers, SERVER, i)));
02460        }
02461        if(want_modern) {
02462               open_modern();
02463        }
02464        children=g_hash_table_new_full(g_int_hash, g_int_equal, NULL, destroy_pid_t);
02465 
02466        sa.sa_handler = sigchld_handler;
02467        sigemptyset(&sa.sa_mask);
02468        sigaddset(&sa.sa_mask, SIGTERM);
02469        sa.sa_flags = SA_RESTART;
02470        if(sigaction(SIGCHLD, &sa, NULL) == -1)
02471               err("sigaction: %m");
02472 
02473        sa.sa_handler = sigterm_handler;
02474        sigemptyset(&sa.sa_mask);
02475        sigaddset(&sa.sa_mask, SIGCHLD);
02476        sa.sa_flags = SA_RESTART;
02477        if(sigaction(SIGTERM, &sa, NULL) == -1)
02478               err("sigaction: %m");
02479 }
02480 
02488 #if !defined(NODAEMON)
02489 void daemonize(SERVER* serve) {
02490        FILE*pidf;
02491 
02492        if(serve && !(serve->port)) {
02493               return;
02494        }
02495        if(daemon(0,0)<0) {
02496               err("daemon");
02497        }
02498        if(!*pidftemplate) {
02499               if(serve) {
02500                      strncpy(pidftemplate, "/var/run/nbd-server.%d.pid", 255);
02501               } else {
02502                      strncpy(pidftemplate, "/var/run/nbd-server.pid", 255);
02503               }
02504        }
02505        snprintf(pidfname, 255, pidftemplate, serve ? serve->port : 0);
02506        pidf=fopen(pidfname, "w");
02507        if(pidf) {
02508               fprintf(pidf,"%d\n", (int)getpid());
02509               fclose(pidf);
02510        } else {
02511               perror("fopen");
02512               fprintf(stderr, "Not fatal; continuing");
02513        }
02514 }
02515 #else
02516 #define daemonize(serve)
02517 #endif /* !defined(NODAEMON) */
02518 
02519 /*
02520  * Everything beyond this point (in the file) is run in non-daemon mode.
02521  * The stuff above daemonize() isn't.
02522  */
02523 
02524 void serve_err(SERVER* serve, const char* msg) G_GNUC_NORETURN;
02525 
02526 void serve_err(SERVER* serve, const char* msg) {
02527        g_message("Export of %s on port %d failed:", serve->exportname,
02528                      serve->port);
02529        err(msg);
02530 }
02531 
02535 void dousers(void) {
02536        struct passwd *pw;
02537        struct group *gr;
02538        gchar* str;
02539        if(rungroup) {
02540               gr=getgrnam(rungroup);
02541               if(!gr) {
02542                      str = g_strdup_printf("Invalid group name: %s", rungroup);
02543                      err(str);
02544               }
02545               if(setgid(gr->gr_gid)<0) {
02546                      err("Could not set GID: %m"); 
02547               }
02548        }
02549        if(runuser) {
02550               pw=getpwnam(runuser);
02551               if(!pw) {
02552                      str = g_strdup_printf("Invalid user name: %s", runuser);
02553                      err(str);
02554               }
02555               if(setuid(pw->pw_uid)<0) {
02556                      err("Could not set UID: %m");
02557               }
02558        }
02559 }
02560 
02561 #ifndef ISSERVER
02562 void glib_message_syslog_redirect(const gchar *log_domain,
02563                                   GLogLevelFlags log_level,
02564                                   const gchar *message,
02565                                   gpointer user_data)
02566 {
02567     int level=LOG_DEBUG;
02568     
02569     switch( log_level )
02570     {
02571       case G_LOG_FLAG_FATAL:
02572       case G_LOG_LEVEL_CRITICAL:
02573       case G_LOG_LEVEL_ERROR:    
02574         level=LOG_ERR; 
02575         break;
02576       case G_LOG_LEVEL_WARNING:
02577         level=LOG_WARNING;
02578         break;
02579       case G_LOG_LEVEL_MESSAGE:
02580       case G_LOG_LEVEL_INFO:
02581         level=LOG_INFO;
02582         break;
02583       case G_LOG_LEVEL_DEBUG:
02584         level=LOG_DEBUG;
02585        break;
02586       default:
02587         level=LOG_ERR;
02588     }
02589     syslog(level, "%s", message);
02590 }
02591 #endif
02592 
02596 int main(int argc, char *argv[]) {
02597        SERVER *serve;
02598        GArray *servers;
02599        GError *err=NULL;
02600 
02601        if (sizeof( struct nbd_request )!=28) {
02602               fprintf(stderr,"Bad size of structure. Alignment problems?\n");
02603               exit(EXIT_FAILURE) ;
02604        }
02605 
02606        memset(pidftemplate, '\0', 256);
02607 
02608        logging();
02609        config_file_pos = g_strdup(CFILE);
02610        serve=cmdline(argc, argv);
02611        servers = parse_cfile(config_file_pos, TRUE, &err);
02612        
02613        if(serve) {
02614               serve->socket_family = AF_UNSPEC;
02615 
02616               append_serve(serve, servers);
02617      
02618               if (!(serve->port)) {
02619                      CLIENT *client;
02620 #ifndef ISSERVER
02621                      /* You really should define ISSERVER if you're going to use
02622                       * inetd mode, but if you don't, closing stdout and stderr
02623                       * (which inetd had connected to the client socket) will let it
02624                       * work. */
02625                      close(1);
02626                      close(2);
02627                      open("/dev/null", O_WRONLY);
02628                      open("/dev/null", O_WRONLY);
02629                      g_log_set_default_handler( glib_message_syslog_redirect, NULL );
02630 #endif
02631                      client=g_malloc(sizeof(CLIENT));
02632                      client->server=serve;
02633                      client->net=-1;
02634                      client->exportsize=OFFT_MAX;
02635                      if (set_peername(0, client))
02636                             exit(EXIT_FAILURE);
02637                      serveconnection(client);
02638                      return 0;
02639               }
02640        }
02641     
02642        if(!servers || !servers->len) {
02643               if(err && !(err->domain == g_quark_from_string("parse_cfile")
02644                             && err->code == CFILE_NOTFOUND)) {
02645                      g_warning("Could not parse config file: %s", 
02646                                    err ? err->message : "Unknown error");
02647               }
02648        }
02649        if(serve) {
02650               g_warning("Specifying an export on the command line is deprecated.");
02651               g_warning("Please use a configuration file instead.");
02652        }
02653 
02654        if((!serve) && (!servers||!servers->len)) {
02655               g_message("No configured exports; quitting.");
02656               exit(EXIT_FAILURE);
02657        }
02658        if (!dontfork)
02659               daemonize(serve);
02660        setup_servers(servers);
02661        dousers();
02662        serveloop(servers);
02663        return 0 ;
02664 }