Back to index

wims  3.65+svn20090927
webget.c
Go to the documentation of this file.
00001 /*    Copyright (C) 1998-2003 XIAO, Gang of Universite de Nice - Sophia Antipolis
00002  *
00003  *  This program is free software; you can redistribute it and/or modify
00004  *  it under the terms of the GNU General Public License as published by
00005  *  the Free Software Foundation; either version 2 of the License, or
00006  *  (at your option) any later version.
00007  *
00008  *  This program is distributed in the hope that it will be useful,
00009  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00010  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00011  *  GNU General Public License for more details.
00012  *
00013  *  You should have received a copy of the GNU General Public License
00014  *  along with this program; if not, write to the Free Software
00015  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00016  */
00017 
00018  /* This is a small program used simply to fetch web pages.
00019   * No fancy functionalities such as link redirection or site sucking is
00020   * present.
00021   * Page fetched can only be sent to stdout. */
00022 
00023 #include "../wims.h"
00024 #include <netdb.h>
00025 #include <sys/socket.h>
00026 #include <netinet/in.h>
00027 
00028 char *cheater1="User-Agent: WIMS-webget";
00029 char *cheater2="Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/png, */*\r\n\
00030 Accept-Encoding: gzip\r\n\
00031 Accept-Language: en, fr, it, de, es\r\n\
00032 Accept-Charset: iso-8859-1,*,utf-8";
00033 char pbuf[4096], tbuf[4096];
00034 char tfname[1024];
00035 char *tmpdir="/tmp";
00036 int soc, port, https;
00037 int charcnt;
00038 FILE *outf;
00039 
00040 void errorquit(char *msg)
00041 {
00042     fprintf(stderr,"%s: %s\n",msg,strerror(errno)); exit(1);
00043 }
00044 
00045        /* Points to the end of the word */
00046 char *find_word_end(char *p)
00047 {
00048     int i;
00049     for(i=0;!isspace(*p) && *p!=0 && i<MAX_LINELEN; p++,i++);
00050     return p;
00051 }
00052 
00053        /* Strips leading spaces */
00054 char *find_word_start(char *p)
00055 {
00056     int i;
00057     for(i=0; isspace(*p) && i<MAX_LINELEN; p++,i++);
00058     return p;
00059 }
00060 
00061        /* Secured execution */
00062 void secure(char *host)
00063 {
00064     char *p1, *p2, *p3, buf[MAX_LINELEN+1];
00065     long int l;
00066     FILE *f;
00067 
00068     p1=getenv("w_module"); if(p1==NULL || *p1==0) return;
00069     p1=getenv("untrust"); if(p1==NULL || *p1==0) return;
00070     f=fopen("webget.sites","r"); if(f==NULL) return;
00071     l=fread(buf,1,MAX_LINELEN,f); fclose(f);
00072     if(l<=0 || l>MAX_LINELEN) return;
00073     buf[l]=0;
00074     for(p1=find_word_start(buf);*p1;p1=find_word_start(p2)) {
00075        p2=find_word_end(p1); if(*p2) *p2++=0;
00076        p3=strstr(host,p1); if(p3==NULL) continue;
00077        if((p3==host || *(p3-1)=='.') && *(p3+strlen(p1))==0) return;
00078     }
00079     exit(1);  /* unauthorized sites refused. */
00080 }
00081 
00082        /* open a TCP/IP socket with host/port
00083         * returns the file descriptor for the socket */
00084 int net_connect(char *host)
00085 {
00086     struct hostent *hp;
00087     struct sockaddr_in sin;
00088     int soc;
00089 
00090     secure(host);
00091     if(!(hp = gethostbyname(host))) errorquit("unknown host.");
00092     if((soc = socket(hp->h_addrtype,SOCK_STREAM,0))<0)
00093       errorquit("socket() error");
00094     memmove(&sin.sin_addr,hp->h_addr,hp->h_length);
00095     sin.sin_port=htons(port);
00096     sin.sin_family = hp->h_addrtype;
00097     if(connect(soc,(struct sockaddr *)&sin,sizeof(sin))<0) {
00098        close(soc); errorquit("connect() error");
00099     }
00100     return soc;
00101 }
00102 
00103 int gethttps(char *host)
00104 {
00105     char buf[65536];
00106     char *tp;
00107     
00108     tp=getenv("tmp_dir"); if(tp!=NULL && *tp!=0) tmpdir=tp;    
00109     snprintf(tfname,sizeof(tfname),"%s/https.tmp",tmpdir);
00110     snprintf(buf,sizeof(buf),"\
00111 mkdir -p %s\n\
00112 openssl s_client -connect %s:%d -quiet 2>/dev/null >%s <<@\n\
00113 %s\n\
00114 @\n", tmpdir,host,port,tfname,tbuf);
00115     system(buf);
00116     return open(tfname,O_RDONLY);
00117 }
00118 
00119 int main(int argc, char *argv[])
00120 {
00121     char *parm, *pt, *p1, *p2, *p3, *p4, *dp, *pre;
00122     char nbuf[4096], *pp1, *pp2;
00123     char c;
00124     
00125     parm=getenv("wims_exec_parm");
00126     if(parm==NULL || *parm==0) errorquit("no_parameter");
00127     snprintf(pbuf,sizeof(pbuf),"%s",parm);
00128     p1=find_word_start(pbuf); p2=find_word_end(p1);
00129     if(*p2!=0) *p2++=0; https=0;
00130     outf=stdout; pp1=getenv("w_webget_output");
00131     pp2=getenv("session_dir");
00132     if(pp1!=NULL && strstr(pp1,"..")==NULL && isalnum(*pp1) && pp2!=NULL) {
00133        snprintf(nbuf,sizeof(nbuf),"%s/%s",pp2,pp1);
00134        outf=fopen(nbuf,"w"); if(outf==NULL) outf=stdout;
00135     }
00136     dp=getenv("w_webget_option");
00137     if(dp!=NULL && strstr(dp,"direct")!=NULL) {  /* direct get */
00138        p1=getenv("w_webget_host");
00139        p2=getenv("w_webget_port");
00140        if(p1==NULL || p2==NULL) errorquit("incomplete_request");
00141        port=atoi(p2);
00142        soc=net_connect(p1); if(soc==-1) return 1;
00143        c=' '; for(p3=parm; *p3; p3++) {
00144            if(*p3=='\n' && c!='\r') write(soc,"\r",1);
00145            write(soc,p3,1); c=*p3;
00146        }
00147        write(soc,"\r\n\r\n",4);
00148        pt=getenv("w_module");
00149        if(pt==NULL || *pt==0 || strncmp(pt,"adm/",4)==0 ) {  /* File to post? */
00150            pt=getenv("w_webget_post"); if(pt!=NULL && *pt!=0) {
00151               FILE *f;
00152               char buf[4096];
00153               size_t l;
00154               f=fopen(pt,"r"); if(f!=NULL) {
00155                   do {
00156                      l=fread(buf,1,sizeof(buf),f);
00157                      if(l>0 && l<=sizeof(buf)) write(soc,buf,l);
00158                   } while(l==sizeof(buf));
00159                   fclose(f);
00160               }
00161            }
00162        }
00163        if(strstr(dp,"normalread")!=NULL) goto read;
00164        charcnt=0;
00165        while(read(soc,pbuf,1)>0 && charcnt<10240) {
00166            fputc(pbuf[0],outf); charcnt++;
00167        }
00168        close(soc);
00169        return 0;
00170     }
00171     if(strncasecmp(p1,"http://",strlen("http://"))==0) p1+=strlen("http://");
00172     else if(strncasecmp(p1,"https://",strlen("https://"))==0) {
00173        https=1; p1+=strlen("https://");
00174     }
00175     p3=strchr(p1,'/'); if(p3==NULL) p3="";
00176     else {*p3++=0; while(*p3=='/') p3++;}
00177     if(strncasecmp(p3,"http://",strlen("http://"))==0 ||
00178        strncasecmp(p3,"https://",strlen("https://"))==0) pre="";
00179     else pre="/";
00180     snprintf(tbuf,sizeof(tbuf),"GET %s%s HTTP/1.0\r\n%s\r\n\
00181 Host: %s\r\n\
00182 %s\r\n\r\n",
00183             pre,p3,cheater1,p1,cheater2);
00184     p4=strchr(p1,':'); if(p4==NULL) {
00185        if(https) port=443; else port=80;
00186     }
00187     else {*p4++=0; port=atoi(p4);}
00188     if(https) {
00189        soc=gethttps(p1); goto read;
00190     }
00191     soc=net_connect(p1);
00192     write(soc,tbuf,strlen(tbuf));
00193        /* header */
00194     read: if(soc==-1) return 1;
00195     c=-1;
00196     while(read(soc,pbuf,1)>0) {
00197        if(pbuf[0]=='\r') continue;
00198        fputc(pbuf[0],stderr);
00199        if((c=='\n') && (pbuf[0]=='\n')) break; else c=pbuf[0];
00200     }
00201        /* body */
00202     charcnt=0;
00203     while(read(soc,pbuf,1)>0 && charcnt<MAX_WEBGETFLEN) {
00204        fputc(pbuf[0],outf); charcnt++;
00205     }
00206     close(soc);
00207     if(outf!=stdout) fclose(outf);
00208     if(https) unlink(tfname);
00209     return 0;
00210 }
00211