Back to index

php5  5.3.10
compress.c
Go to the documentation of this file.
00001 /*
00002  * Copyright (c) Ian F. Darwin 1986-1995.
00003  * Software written by Ian F. Darwin and others;
00004  * maintained 1995-present by Christos Zoulas and others.
00005  * 
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  * 1. Redistributions of source code must retain the above copyright
00010  *    notice immediately at the beginning of the file, without modification,
00011  *    this list of conditions, and the following disclaimer.
00012  * 2. Redistributions in binary form must reproduce the above copyright
00013  *    notice, this list of conditions and the following disclaimer in the
00014  *    documentation and/or other materials provided with the distribution.
00015  *  
00016  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
00017  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00018  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00019  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
00020  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00021  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00022  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00023  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00024  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00025  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00026  * SUCH DAMAGE.
00027  */
00028 /*
00029  * compress routines:
00030  *     zmagic() - returns 0 if not recognized, uncompresses and prints
00031  *               information if recognized
00032  *     uncompress(method, old, n, newch) - uncompress old into new, 
00033  *                                     using method, return sizeof new
00034  */
00035 #include "config.h"
00036 #include "file.h"
00037 
00038 #ifndef lint
00039 FILE_RCSID("@(#)$File: compress.c,v 1.63 2009/03/23 14:21:51 christos Exp $")
00040 #endif
00041 
00042 #include "magic.h"
00043 #include <stdlib.h>
00044 #ifdef HAVE_UNISTD_H
00045 #include <unistd.h>
00046 #endif
00047 #include <string.h>
00048 #include <errno.h>
00049 #include <sys/types.h>
00050 #ifndef PHP_WIN32
00051 #include <sys/ioctl.h>
00052 #endif
00053 #ifdef HAVE_SYS_WAIT_H
00054 #include <sys/wait.h>
00055 #endif
00056 #if defined(HAVE_SYS_TIME_H)
00057 #include <sys/time.h>
00058 #endif
00059 #if defined(HAVE_ZLIB_H) && defined(HAVE_LIBZ)
00060 #define BUILTIN_DECOMPRESS
00061 #include <zlib.h>
00062 #endif
00063 
00064 #undef FIONREAD
00065 
00066 
00067 private const struct {
00068        const char magic[8];
00069        size_t maglen;
00070        const char *argv[3];
00071        int silent;
00072 } compr[] = {
00073        { "\037\235", 2, { "gzip", "-cdq", NULL }, 1 },         /* compressed */
00074        /* Uncompress can get stuck; so use gzip first if we have it
00075         * Idea from Damien Clark, thanks! */
00076        { "\037\235", 2, { "uncompress", "-c", NULL }, 1 },     /* compressed */
00077        { "\037\213", 2, { "gzip", "-cdq", NULL }, 1 },         /* gzipped */
00078        { "\037\236", 2, { "gzip", "-cdq", NULL }, 1 },         /* frozen */
00079        { "\037\240", 2, { "gzip", "-cdq", NULL }, 1 },         /* SCO LZH */
00080        /* the standard pack utilities do not accept standard input */
00081        { "\037\036", 2, { "gzip", "-cdq", NULL }, 0 },         /* packed */
00082        { "PK\3\4",   4, { "gzip", "-cdq", NULL }, 1 },         /* pkzipped, */
00083                                        /* ...only first file examined */
00084        { "BZh",      3, { "bzip2", "-cd", NULL }, 1 },         /* bzip2-ed */
00085        { "LZIP",     4, { "lzip", "-cdq", NULL }, 1 },
00086        { "\3757zXZ\0",6,{ "xz", "-cd", NULL }, 1 },            /* XZ Utils */
00087 };
00088 
00089 #define NODATA ((size_t)~0)
00090 
00091 private ssize_t swrite(int, const void *, size_t);
00092 #ifdef PHP_FILEINFO_UNCOMPRESS
00093 private size_t uncompressbuf(struct magic_set *, int, size_t,
00094     const unsigned char *, unsigned char **, size_t);
00095 #ifdef BUILTIN_DECOMPRESS
00096 private size_t uncompressgzipped(struct magic_set *, const unsigned char *,
00097     unsigned char **, size_t);
00098 #endif
00099 
00100 protected int
00101 file_zmagic(struct magic_set *ms, int fd, const char *name,
00102     const unsigned char *buf, size_t nbytes)
00103 {
00104        unsigned char *newbuf = NULL;
00105        size_t i, nsz;
00106        int rv = 0;
00107        int mime = ms->flags & MAGIC_MIME;
00108        size_t ncompr;
00109 
00110        if ((ms->flags & MAGIC_COMPRESS) == 0)
00111               return 0;
00112 
00113        ncompr = sizeof(compr) / sizeof(compr[0]);
00114 
00115        for (i = 0; i < ncompr; i++) {
00116               if (nbytes < compr[i].maglen)
00117                      continue;
00118               if (memcmp(buf, compr[i].magic, compr[i].maglen) == 0 &&
00119                   (nsz = uncompressbuf(ms, fd, i, buf, &newbuf,
00120                   nbytes)) != NODATA) {
00121                      ms->flags &= ~MAGIC_COMPRESS;
00122                      rv = -1;
00123                      if (file_buffer(ms, -1, name, newbuf, nsz) == -1)
00124                             goto error;
00125 
00126                      if (mime == MAGIC_MIME || mime == 0) {
00127                             if (file_printf(ms, mime ?
00128                                 " compressed-encoding=" : " (") == -1)
00129                                    goto error;
00130                      }
00131 
00132                      if ((mime == 0 || mime & MAGIC_MIME_ENCODING) &&
00133                          file_buffer(ms, -1, NULL, buf, nbytes) == -1)
00134                             goto error;
00135 
00136                      if (!mime && file_printf(ms, ")") == -1)
00137                             goto error;
00138                      rv = 1;
00139                      break;
00140               }
00141        }
00142 error:
00143        if (newbuf)
00144               efree(newbuf);
00145        ms->flags |= MAGIC_COMPRESS;
00146        return rv;
00147 }
00148 #endif
00149 
00150 /*
00151  * `safe' write for sockets and pipes.
00152  */
00153 private ssize_t
00154 swrite(int fd, const void *buf, size_t n)
00155 {
00156        int rv;
00157        size_t rn = n;
00158 
00159        do
00160               switch (rv = write(fd, buf, n)) {
00161               case -1:
00162                      if (errno == EINTR)
00163                             continue;
00164                      return -1;
00165               default:
00166                      n -= rv;
00167                      buf = ((const char *)buf) + rv;
00168                      break;
00169               }
00170        while (n > 0);
00171        return rn;
00172 }
00173 
00174 
00175 /*
00176  * `safe' read for sockets and pipes.
00177  */
00178 protected ssize_t
00179 sread(int fd, void *buf, size_t n, int canbepipe)
00180 {
00181        int rv;
00182 #ifdef FIONREAD
00183        int t = 0;
00184 #endif
00185        size_t rn = n;
00186 
00187        if (fd == STDIN_FILENO)
00188               goto nocheck;
00189 
00190 #ifdef FIONREAD
00191        if ((canbepipe && (ioctl(fd, FIONREAD, &t) == -1)) || (t == 0)) {
00192 #ifdef FD_ZERO
00193               int cnt;
00194               for (cnt = 0;; cnt++) {
00195                      fd_set check;
00196                      struct timeval tout = {0, 100 * 1000};
00197                      int selrv;
00198 
00199                      FD_ZERO(&check);
00200                      FD_SET(fd, &check);
00201 
00202                      /*
00203                       * Avoid soft deadlock: do not read if there
00204                       * is nothing to read from sockets and pipes.
00205                       */
00206                      selrv = select(fd + 1, &check, NULL, NULL, &tout);
00207                      if (selrv == -1) {
00208                             if (errno == EINTR || errno == EAGAIN)
00209                                    continue;
00210                      } else if (selrv == 0 && cnt >= 5) {
00211                             return 0;
00212                      } else
00213                             break;
00214               }
00215 #endif
00216               (void)ioctl(fd, FIONREAD, &t);
00217        }
00218 
00219        if (t > 0 && (size_t)t < n) {
00220               n = t;
00221               rn = n;
00222        }
00223 #endif
00224 
00225 nocheck:
00226        do
00227               switch ((rv = read(fd, buf, n))) {
00228               case -1:
00229                      if (errno == EINTR)
00230                             continue;
00231                      return -1;
00232               case 0:
00233                      return rn - n;
00234               default:
00235                      n -= rv;
00236                      buf = ((char *)buf) + rv;
00237                      break;
00238               }
00239        while (n > 0);
00240        return rn;
00241 }
00242 
00243 protected int
00244 file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
00245     size_t nbytes)
00246 {
00247        char buf[4096];
00248        int r, tfd;
00249 
00250        (void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof buf);
00251 #ifndef HAVE_MKSTEMP
00252        {
00253               char *ptr = mktemp(buf);
00254               tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600);
00255               r = errno;
00256               (void)unlink(ptr);
00257               errno = r;
00258        }
00259 #else
00260        tfd = mkstemp(buf);
00261        r = errno;
00262        (void)unlink(buf);
00263        errno = r;
00264 #endif
00265        if (tfd == -1) {
00266               file_error(ms, errno,
00267                   "cannot create temporary file for pipe copy");
00268               return -1;
00269        }
00270 
00271        if (swrite(tfd, startbuf, nbytes) != (ssize_t)nbytes)
00272               r = 1;
00273        else {
00274               while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
00275                      if (swrite(tfd, buf, (size_t)r) != r)
00276                             break;
00277        }
00278 
00279        switch (r) {
00280        case -1:
00281               file_error(ms, errno, "error copying from pipe to temp file");
00282               return -1;
00283        case 0:
00284               break;
00285        default:
00286               file_error(ms, errno, "error while writing to temp file");
00287               return -1;
00288        }
00289 
00290        /*
00291         * We duplicate the file descriptor, because fclose on a
00292         * tmpfile will delete the file, but any open descriptors
00293         * can still access the phantom inode.
00294         */
00295        if ((fd = dup2(tfd, fd)) == -1) {
00296               file_error(ms, errno, "could not dup descriptor for temp file");
00297               return -1;
00298        }
00299        (void)close(tfd);
00300        if (lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1) {
00301               file_badseek(ms);
00302               return -1;
00303        }
00304        return fd;
00305 }
00306 
00307 #ifdef PHP_FILEINFO_UNCOMPRESS
00308 #ifdef BUILTIN_DECOMPRESS
00309 
00310 #define FHCRC        (1 << 1)
00311 #define FEXTRA              (1 << 2)
00312 #define FNAME        (1 << 3)
00313 #define FCOMMENT     (1 << 4)
00314 
00315 
00316 private size_t
00317 uncompressgzipped(struct magic_set *ms, const unsigned char *old,
00318     unsigned char **newch, size_t n)
00319 {
00320        unsigned char flg = old[3];
00321        size_t data_start = 10;
00322        z_stream z;
00323        int rc;
00324 
00325        if (flg & FEXTRA) {
00326               if (data_start+1 >= n)
00327                      return 0;
00328               data_start += 2 + old[data_start] + old[data_start + 1] * 256;
00329        }
00330        if (flg & FNAME) {
00331               while(data_start < n && old[data_start])
00332                      data_start++;
00333               data_start++;
00334        }
00335        if(flg & FCOMMENT) {
00336               while(data_start < n && old[data_start])
00337                      data_start++;
00338               data_start++;
00339        }
00340        if(flg & FHCRC)
00341               data_start += 2;
00342 
00343        if (data_start >= n)
00344               return 0;
00345        *newch = (unsigned char *)emalloc(HOWMANY + 1));
00346        
00347        /* XXX: const castaway, via strchr */
00348        z.next_in = (Bytef *)strchr((const char *)old + data_start,
00349            old[data_start]);
00350        z.avail_in = n - data_start;
00351        z.next_out = *newch;
00352        z.avail_out = HOWMANY;
00353        z.zalloc = Z_NULL;
00354        z.zfree = Z_NULL;
00355        z.opaque = Z_NULL;
00356 
00357        rc = inflateInit2(&z, -15);
00358        if (rc != Z_OK) {
00359               file_error(ms, 0, "zlib: %s", z.msg);
00360               return 0;
00361        }
00362 
00363        rc = inflate(&z, Z_SYNC_FLUSH);
00364        if (rc != Z_OK && rc != Z_STREAM_END) {
00365               file_error(ms, 0, "zlib: %s", z.msg);
00366               return 0;
00367        }
00368 
00369        n = (size_t)z.total_out;
00370        (void)inflateEnd(&z);
00371        
00372        /* let's keep the nul-terminate tradition */
00373        (*newch)[n] = '\0';
00374 
00375        return n;
00376 }
00377 #endif
00378 
00379 private size_t
00380 uncompressbuf(struct magic_set *ms, int fd, size_t method,
00381     const unsigned char *old, unsigned char **newch, size_t n)
00382 {
00383        int fdin[2], fdout[2];
00384        int r;
00385 
00386 #ifdef BUILTIN_DECOMPRESS
00387         /* FIXME: This doesn't cope with bzip2 */
00388        if (method == 2)
00389               return uncompressgzipped(ms, old, newch, n);
00390 #endif
00391        (void)fflush(stdout);
00392        (void)fflush(stderr);
00393 
00394        if ((fd != -1 && pipe(fdin) == -1) || pipe(fdout) == -1) {
00395               file_error(ms, errno, "cannot create pipe");     
00396               return NODATA;
00397        }
00398        switch (fork()) {
00399        case 0:       /* child */
00400               (void) close(0);
00401               if (fd != -1) {
00402                   (void) dup(fd);
00403                   (void) lseek(0, (off_t)0, SEEK_SET);
00404               } else {
00405                   (void) dup(fdin[0]);
00406                   (void) close(fdin[0]);
00407                   (void) close(fdin[1]);
00408               }
00409 
00410               (void) close(1);
00411               (void) dup(fdout[1]);
00412               (void) close(fdout[0]);
00413               (void) close(fdout[1]);
00414 #ifndef DEBUG
00415               if (compr[method].silent)
00416                      (void)close(2);
00417 #endif
00418 
00419               (void)execvp(compr[method].argv[0],
00420                   (char *const *)(intptr_t)compr[method].argv);
00421 #ifdef DEBUG
00422               (void)fprintf(stderr, "exec `%s' failed (%s)\n",
00423                   compr[method].argv[0], strerror(errno));
00424 #endif
00425               exit(1);
00426               /*NOTREACHED*/
00427        case -1:
00428               file_error(ms, errno, "could not fork");
00429               return NODATA;
00430 
00431        default: /* parent */
00432               (void) close(fdout[1]);
00433               if (fd == -1) {
00434                      (void) close(fdin[0]);
00435                      /* 
00436                       * fork again, to avoid blocking because both
00437                       * pipes filled
00438                       */
00439                      switch (fork()) {
00440                      case 0: /* child */
00441                             (void)close(fdout[0]);
00442                             if (swrite(fdin[1], old, n) != (ssize_t)n) {
00443 #ifdef DEBUG
00444                                    (void)fprintf(stderr,
00445                                        "Write failed (%s)\n",
00446                                        strerror(errno));
00447 #endif
00448                                    exit(1);
00449                             }
00450                             exit(0);
00451                             /*NOTREACHED*/
00452 
00453                      case -1:
00454 #ifdef DEBUG
00455                             (void)fprintf(stderr, "Fork failed (%s)\n",
00456                                 strerror(errno));
00457 #endif
00458                             exit(1);
00459                             /*NOTREACHED*/
00460 
00461                      default:  /* parent */
00462                             break;
00463                      }
00464                      (void) close(fdin[1]);
00465                      fdin[1] = -1;
00466               }
00467 
00468               *newch = (unsigned char *) emalloc(HOWMANY + 1);
00469 
00470               if ((r = sread(fdout[0], *newch, HOWMANY, 0)) <= 0) {
00471 #ifdef DEBUG
00472                      (void)fprintf(stderr, "Read failed (%s)\n",
00473                          strerror(errno));
00474 #endif
00475                      efree(*newch);
00476                      n = 0;
00477                      newch[0] = '\0';
00478                      goto err;
00479               } else {
00480                      n = r;
00481               }
00482               /* NUL terminate, as every buffer is handled here. */
00483               (*newch)[n] = '\0';
00484 err:
00485               if (fdin[1] != -1)
00486                      (void) close(fdin[1]);
00487               (void) close(fdout[0]);
00488 #ifdef WNOHANG
00489               while (waitpid(-1, NULL, WNOHANG) != -1)
00490                      continue;
00491 #else
00492               (void)wait(NULL);
00493 #endif
00494               (void) close(fdin[0]);
00495            
00496               return n;
00497        }
00498 }
00499 #endif /* if PHP_FILEINFO_UNCOMPRESS */