Back to index

lightning-sunbird  0.9+nobinonly
httpget.c
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* ***** BEGIN LICENSE BLOCK *****
00003  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License. You may obtain a copy of the License at
00008  * http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * The Original Code is the Netscape Portable Runtime (NSPR).
00016  *
00017  * The Initial Developer of the Original Code is
00018  * Netscape Communications Corporation.
00019  * Portions created by the Initial Developer are Copyright (C) 1998-2000
00020  * the Initial Developer. All Rights Reserved.
00021  *
00022  * Contributor(s):
00023  *
00024  * Alternatively, the contents of this file may be used under the terms of
00025  * either the GNU General Public License Version 2 or later (the "GPL"), or
00026  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00027  * in which case the provisions of the GPL or the LGPL are applicable instead
00028  * of those above. If you wish to allow use of your version of this file only
00029  * under the terms of either the GPL or the LGPL, and not to allow others to
00030  * use your version of this file under the terms of the MPL, indicate your
00031  * decision by deleting the provisions above and replace them with the notice
00032  * and other provisions required by the GPL or the LGPL. If you do not delete
00033  * the provisions above, a recipient may use your version of this file under
00034  * the terms of any one of the MPL, the GPL or the LGPL.
00035  *
00036  * ***** END LICENSE BLOCK ***** */
00037 
00038 
00039 /*
00040  * Author: Wan-Teh Chang
00041  *
00042  * Given an HTTP URL, httpget uses the GET method to fetch the file.
00043  * The fetched file is written to stdout by default, or can be
00044  * saved in an output file.
00045  *
00046  * This is a single-threaded program.
00047  */
00048 
00049 #include "prio.h"
00050 #include "prnetdb.h"
00051 #include "prlog.h"
00052 #include "prerror.h"
00053 #include "prprf.h"
00054 #include "prinit.h"
00055 
00056 #include <stdio.h>
00057 #include <string.h>
00058 #include <stdlib.h>  /* for atoi */
00059 
00060 #define FCOPY_BUFFER_SIZE (16 * 1024)
00061 #define INPUT_BUFFER_SIZE 1024
00062 #define LINE_SIZE 512
00063 #define HOST_SIZE 256
00064 #define PORT_SIZE 32
00065 #define PATH_SIZE 512
00066 
00067 /*
00068  * A buffer for storing the excess input data for ReadLine.
00069  * The data in the buffer starts from (including) the element pointed to
00070  * by inputHead, and ends just before (not including) the element pointed
00071  * to by inputTail.  The buffer is empty if inputHead == inputTail.
00072  */
00073 
00074 static char inputBuf[INPUT_BUFFER_SIZE];
00075 /*
00076  * inputBufEnd points just past the end of inputBuf
00077  */
00078 static char *inputBufEnd = inputBuf + sizeof(inputBuf);
00079 static char *inputHead = inputBuf;
00080 static char *inputTail = inputBuf;
00081 
00082 static PRBool endOfStream = PR_FALSE;
00083 
00084 /*
00085  * ReadLine --
00086  *
00087  * Read in a line of text, terminated by CRLF or LF, from fd into buf.
00088  * The terminating CRLF or LF is included (always as '\n').  The text
00089  * in buf is terminated by a null byte.  The excess bytes are stored in
00090  * inputBuf for use in the next ReadLine call or FetchFile call.
00091  * Returns the number of bytes in buf.  0 means end of stream.  Returns
00092  * -1 if read fails.
00093  */
00094 
00095 PRInt32 ReadLine(PRFileDesc *fd, char *buf, PRUint32 bufSize)
00096 {
00097     char *dst = buf;
00098     char *bufEnd = buf + bufSize;  /* just past the end of buf */
00099     PRBool lineFound = PR_FALSE;
00100     char *crPtr = NULL;  /* points to the CR ('\r') character */
00101     PRInt32 nRead;
00102 
00103 loop:
00104     PR_ASSERT(inputBuf <= inputHead && inputHead <= inputTail
00105            && inputTail <= inputBufEnd);
00106     while (lineFound == PR_FALSE && inputHead != inputTail
00107            && dst < bufEnd - 1) {
00108        if (*inputHead == '\r') {
00109            crPtr = dst;
00110        } else if (*inputHead == '\n') {
00111            lineFound = PR_TRUE;
00112            if (crPtr == dst - 1) {
00113               dst--; 
00114            }
00115        }
00116        *(dst++) = *(inputHead++);
00117     }
00118     if (lineFound == PR_TRUE || dst == bufEnd - 1 || endOfStream == PR_TRUE) {
00119        *dst = '\0';
00120        return dst - buf;
00121     }
00122 
00123     /*
00124      * The input buffer should be empty now
00125      */
00126     PR_ASSERT(inputHead == inputTail);
00127 
00128     nRead = PR_Read(fd, inputBuf, sizeof(inputBuf));
00129     if (nRead == -1) {
00130        *dst = '\0';
00131        return -1;
00132     } else if (nRead == 0) {
00133        endOfStream = PR_TRUE;
00134        *dst = '\0';
00135        return dst - buf;
00136     }
00137     inputHead = inputBuf;
00138     inputTail = inputBuf + nRead;
00139     goto loop;
00140 }
00141 
00142 PRInt32 DrainInputBuffer(char *buf, PRUint32 bufSize)
00143 {
00144     PRInt32 nBytes = inputTail - inputHead;
00145 
00146     if (nBytes == 0) {
00147        if (endOfStream) {
00148            return -1;
00149        } else {
00150            return 0;
00151        }
00152     }
00153     if ((PRInt32) bufSize < nBytes) {
00154        nBytes = bufSize;
00155     }
00156     memcpy(buf, inputHead, nBytes);
00157     inputHead += nBytes;
00158     return nBytes;
00159 }
00160 
00161 PRStatus FetchFile(PRFileDesc *in, PRFileDesc *out)
00162 {
00163     char buf[FCOPY_BUFFER_SIZE];
00164     PRInt32 nBytes;
00165 
00166     while ((nBytes = DrainInputBuffer(buf, sizeof(buf))) > 0) {
00167        if (PR_Write(out, buf, nBytes) != nBytes) {
00168             fprintf(stderr, "httpget: cannot write to file\n");
00169            return PR_FAILURE;
00170        }
00171     }
00172     if (nBytes < 0) {
00173        /* Input buffer is empty and end of stream */
00174        return PR_SUCCESS;
00175     }
00176     while ((nBytes = PR_Read(in, buf, sizeof(buf))) > 0) {
00177        if (PR_Write(out, buf, nBytes) != nBytes) {
00178            fprintf(stderr, "httpget: cannot write to file\n");
00179            return PR_FAILURE;
00180         }
00181     }
00182     if (nBytes < 0) {
00183        fprintf(stderr, "httpget: cannot read from socket\n");
00184        return PR_FAILURE;
00185     }
00186     return PR_SUCCESS;
00187 }
00188 
00189 PRStatus FastFetchFile(PRFileDesc *in, PRFileDesc *out, PRUint32 size)
00190 {
00191     PRInt32 nBytes;
00192     PRFileMap *outfMap;
00193     void *addr;
00194     char *start;
00195     PRUint32 rem;
00196     PRUint32 bytesToRead;
00197     PRStatus rv;
00198     PRInt64 sz64;
00199 
00200     LL_UI2L(sz64, size);
00201     outfMap = PR_CreateFileMap(out, sz64, PR_PROT_READWRITE);
00202     PR_ASSERT(outfMap);
00203     addr = PR_MemMap(outfMap, LL_ZERO, size);
00204     if (addr == (void *) -1) {
00205        fprintf(stderr, "cannot memory-map file: (%d, %d)\n", PR_GetError(),
00206               PR_GetOSError());
00207 
00208        PR_CloseFileMap(outfMap);
00209        return PR_FAILURE;
00210     }
00211     PR_ASSERT(addr != (void *) -1);
00212     start = (char *) addr;
00213     rem = size;
00214     while ((nBytes = DrainInputBuffer(start, rem)) > 0) {
00215        start += nBytes;
00216        rem -= nBytes;
00217     }
00218     if (nBytes < 0) {
00219        /* Input buffer is empty and end of stream */
00220        return PR_SUCCESS;
00221     }
00222     bytesToRead = (rem < FCOPY_BUFFER_SIZE) ? rem : FCOPY_BUFFER_SIZE;
00223     while (rem > 0 && (nBytes = PR_Read(in, start, bytesToRead)) > 0) {
00224        start += nBytes;
00225        rem -= nBytes;
00226         bytesToRead = (rem < FCOPY_BUFFER_SIZE) ? rem : FCOPY_BUFFER_SIZE;
00227     }
00228     if (nBytes < 0) {
00229        fprintf(stderr, "httpget: cannot read from socket\n");
00230        return PR_FAILURE;
00231     }
00232     rv = PR_MemUnmap(addr, size);
00233     PR_ASSERT(rv == PR_SUCCESS);
00234     rv = PR_CloseFileMap(outfMap);
00235     PR_ASSERT(rv == PR_SUCCESS);
00236     return PR_SUCCESS;
00237 }
00238 
00239 PRStatus ParseURL(char *url, char *host, PRUint32 hostSize,
00240     char *port, PRUint32 portSize, char *path, PRUint32 pathSize)
00241 {
00242     char *start, *end;
00243     char *dst;
00244     char *hostEnd;
00245     char *portEnd;
00246     char *pathEnd;
00247 
00248     if (strncmp(url, "http", 4)) {
00249        fprintf(stderr, "httpget: the protocol must be http\n");
00250        return PR_FAILURE;
00251     }
00252     if (strncmp(url + 4, "://", 3) || url[7] == '\0') {
00253        fprintf(stderr, "httpget: malformed URL: %s\n", url);
00254        return PR_FAILURE;
00255     }
00256 
00257     start = end = url + 7;
00258     dst = host;
00259     hostEnd = host + hostSize;
00260     while (*end && *end != ':' && *end != '/') {
00261        if (dst == hostEnd - 1) {
00262            fprintf(stderr, "httpget: host name too long\n");
00263            return PR_FAILURE;
00264        }
00265        *(dst++) = *(end++);
00266     }
00267     *dst = '\0';
00268 
00269     if (*end == '\0') {
00270        PR_snprintf(port, portSize, "%d", 80);
00271        PR_snprintf(path, pathSize, "%s", "/");
00272        return PR_SUCCESS;
00273     }
00274 
00275     if (*end == ':') {
00276        end++;
00277        dst = port;
00278        portEnd = port + portSize;
00279        while (*end && *end != '/') {
00280            if (dst == portEnd - 1) {
00281               fprintf(stderr, "httpget: port number too long\n");
00282               return PR_FAILURE;
00283            }
00284            *(dst++) = *(end++);
00285         }
00286        *dst = '\0';
00287        if (*end == '\0') {
00288            PR_snprintf(path, pathSize, "%s", "/");
00289            return PR_SUCCESS;
00290         }
00291     } else {
00292        PR_snprintf(port, portSize, "%d", 80);
00293     }
00294 
00295     dst = path;
00296     pathEnd = path + pathSize;
00297     while (*end) {
00298        if (dst == pathEnd - 1) {
00299            fprintf(stderr, "httpget: file pathname too long\n");
00300            return PR_FAILURE;
00301        }
00302        *(dst++) = *(end++);
00303     }
00304     *dst = '\0';
00305     return PR_SUCCESS;
00306 }
00307 
00308 void PrintUsage(void) {
00309     fprintf(stderr, "usage: httpget url\n"
00310                   "       httpget -o outputfile url\n"
00311                   "       httpget url -o outputfile\n");
00312 }
00313 
00314 int main(int argc, char **argv)
00315 {
00316     PRHostEnt hostentry;
00317     char buf[PR_NETDB_BUF_SIZE];
00318     PRNetAddr addr;
00319     PRFileDesc *socket = NULL, *file = NULL;
00320     PRIntn cmdSize;
00321     char host[HOST_SIZE];
00322     char port[PORT_SIZE];
00323     char path[PATH_SIZE];
00324     char line[LINE_SIZE];
00325     int exitStatus = 0;
00326     PRBool endOfHeader = PR_FALSE;
00327     char *url;
00328     char *fileName = NULL;
00329     PRUint32 fileSize;
00330 
00331     if (argc != 2 && argc != 4) {
00332        PrintUsage();
00333        exit(1);
00334     }
00335 
00336     if (argc == 2) {
00337        /*
00338         * case 1: httpget url
00339         */
00340        url = argv[1];
00341     } else {
00342        if (strcmp(argv[1], "-o") == 0) {
00343            /*
00344             * case 2: httpget -o outputfile url
00345             */
00346            fileName = argv[2];
00347            url = argv[3];
00348         } else {
00349            /*
00350             * case 3: httpget url -o outputfile
00351             */
00352            url = argv[1];
00353            if (strcmp(argv[2], "-o") != 0) {
00354               PrintUsage();
00355               exit(1);
00356             }
00357            fileName = argv[3];
00358        }
00359     }
00360 
00361     if (ParseURL(url, host, sizeof(host), port, sizeof(port),
00362            path, sizeof(path)) == PR_FAILURE) {
00363        exit(1);
00364     }
00365 
00366     if (PR_GetHostByName(host, buf, sizeof(buf), &hostentry)
00367            == PR_FAILURE) {
00368         fprintf(stderr, "httpget: unknown host name: %s\n", host);
00369        exit(1);
00370     }
00371 
00372     addr.inet.family = PR_AF_INET;
00373     addr.inet.port = PR_htons((short) atoi(port));
00374     addr.inet.ip = *((PRUint32 *) hostentry.h_addr_list[0]);
00375 
00376     socket = PR_NewTCPSocket();
00377     if (socket == NULL) {
00378        fprintf(stderr, "httpget: cannot create new tcp socket\n");
00379        exit(1);
00380     }
00381 
00382     if (PR_Connect(socket, &addr, PR_INTERVAL_NO_TIMEOUT) == PR_FAILURE) {
00383        fprintf(stderr, "httpget: cannot connect to http server\n");
00384        exitStatus = 1;
00385        goto done;
00386     }
00387 
00388     if (fileName == NULL) {
00389        file = PR_STDOUT;
00390     } else {
00391         file = PR_Open(fileName, PR_RDWR | PR_CREATE_FILE | PR_TRUNCATE,
00392               00777);
00393         if (file == NULL) {
00394            fprintf(stderr, "httpget: cannot open file %s: (%d, %d)\n",
00395                   fileName, PR_GetError(), PR_GetOSError());
00396            exitStatus = 1;
00397            goto done;
00398        }
00399     }
00400 
00401     cmdSize = PR_snprintf(buf, sizeof(buf), "GET %s HTTP/1.0\r\n\r\n", path);
00402     PR_ASSERT(cmdSize == (PRIntn) strlen("GET  HTTP/1.0\r\n\r\n")
00403             + (PRIntn) strlen(path));
00404     if (PR_Write(socket, buf, cmdSize) != cmdSize) {
00405        fprintf(stderr, "httpget: cannot write to http server\n");
00406        exitStatus = 1;
00407        goto done;
00408     }
00409 
00410     if (ReadLine(socket, line, sizeof(line)) <= 0) {
00411        fprintf(stderr, "httpget: cannot read line from http server\n");
00412        exitStatus = 1;
00413        goto done;
00414     }
00415 
00416     /* HTTP response: 200 == OK */
00417     if (strstr(line, "200") == NULL) {
00418        fprintf(stderr, "httpget: %s\n", line);
00419        exitStatus = 1;
00420        goto done;
00421     }
00422 
00423     while (ReadLine(socket, line, sizeof(line)) > 0) {
00424        if (line[0] == '\n') {
00425            endOfHeader = PR_TRUE;
00426            break;
00427        }
00428        if (strncmp(line, "Content-Length", 14) == 0
00429               || strncmp(line, "Content-length", 14) == 0) {
00430            char *p = line + 14;
00431 
00432            while (*p == ' ' || *p == '\t') {
00433               p++;
00434            }
00435            if (*p != ':') {
00436               continue;
00437             }
00438            p++;
00439            while (*p == ' ' || *p == '\t') {
00440               p++;
00441            }
00442            fileSize = 0;
00443            while ('0' <= *p && *p <= '9') {
00444               fileSize = 10 * fileSize + (*p - '0');
00445               p++;
00446             }
00447        }
00448     }
00449     if (endOfHeader == PR_FALSE) {
00450        fprintf(stderr, "httpget: cannot read line from http server\n");
00451        exitStatus = 1;
00452        goto done;
00453     }
00454 
00455     if (fileName == NULL || fileSize == 0) {
00456         FetchFile(socket, file);
00457     } else {
00458        FastFetchFile(socket, file, fileSize);
00459     }
00460 
00461 done:
00462     if (socket) PR_Close(socket);
00463     if (file) PR_Close(file);
00464     PR_Cleanup();
00465     return exitStatus;
00466 }