Back to index

php5  5.3.10
funcs.c
Go to the documentation of this file.
00001 /*
00002  * Copyright (c) Christos Zoulas 2003.
00003  * All Rights Reserved.
00004  *
00005  * Redistribution and use in source and binary forms, with or without
00006  * modification, are permitted provided that the following conditions
00007  * are met:
00008  * 1. Redistributions of source code must retain the above copyright
00009  *    notice immediately at the beginning of the file, without modification,
00010  *    this list of conditions, and the following disclaimer.
00011  * 2. Redistributions in binary form must reproduce the above copyright
00012  *    notice, this list of conditions and the following disclaimer in the
00013  *    documentation and/or other materials provided with the distribution.
00014  *
00015  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
00016  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00017  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00018  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
00019  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00020  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00021  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00022  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00023  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00024  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00025  * SUCH DAMAGE.
00026  */
00027 #include "file.h"
00028 
00029 #ifndef       lint
00030 FILE_RCSID("@(#)$File: funcs.c,v 1.53 2009/04/07 11:07:00 christos Exp $")
00031 #endif /* lint */
00032 
00033 #include "magic.h"
00034 #include <stdarg.h>
00035 #include <stdlib.h>
00036 #include <string.h>
00037 #include <ctype.h>
00038 #if defined(HAVE_WCHAR_H)
00039 #include <wchar.h>
00040 #endif
00041 #if defined(HAVE_WCTYPE_H)
00042 #include <wctype.h>
00043 #endif
00044 
00045 #ifndef SIZE_MAX 
00046 # define SIZE_MAX ((size_t) -1) 
00047 #endif
00048 
00049 /*
00050  * Like printf, only we append to a buffer.
00051  */
00052 protected int
00053 file_printf(struct magic_set *ms, const char *fmt, ...)
00054 {
00055        va_list ap;
00056        int len;
00057        char *buf = NULL, *newstr;
00058 
00059        va_start(ap, fmt);
00060        len = vspprintf(&buf, 0, fmt, ap);
00061        va_end(ap);
00062 
00063        if (ms->o.buf != NULL) {
00064               len = spprintf(&newstr, 0, "%s%s", ms->o.buf, (buf ? buf : ""));
00065               if (buf) {
00066                      efree(buf);
00067               }
00068               efree(ms->o.buf);
00069               ms->o.buf = newstr;
00070        } else {
00071               ms->o.buf = buf;
00072        }
00073        return 0;
00074 }
00075 
00076 /*
00077  * error - print best error message possible
00078  */
00079 /*VARARGS*/
00080 private void
00081 file_error_core(struct magic_set *ms, int error, const char *f, va_list va,
00082     uint32_t lineno)
00083 {
00084        char *buf = NULL;
00085        
00086        /* Only the first error is ok */
00087        if (ms->event_flags & EVENT_HAD_ERR) {
00088               return;
00089        }
00090        
00091        if (lineno != 0) {
00092               efree(ms->o.buf);
00093               ms->o.buf = NULL;
00094               file_printf(ms, "line %u: ", lineno);
00095        }
00096 
00097        vspprintf(&buf, 0, f, va);
00098        va_end(va);
00099        
00100        if (error > 0) {
00101               file_printf(ms, "%s (%s)", (*buf ? buf : ""), strerror(error));
00102        } else if (*buf) {
00103               file_printf(ms, "%s", buf);
00104        }
00105        
00106        if (buf) {
00107               efree(buf);
00108        }
00109 
00110        ms->event_flags |= EVENT_HAD_ERR;
00111        ms->error = error;
00112 }
00113 
00114 /*VARARGS*/
00115 protected void
00116 file_error(struct magic_set *ms, int error, const char *f, ...)
00117 {
00118        va_list va;
00119        va_start(va, f);
00120        file_error_core(ms, error, f, va, 0);
00121        va_end(va);
00122 }
00123 
00124 /*
00125  * Print an error with magic line number.
00126  */
00127 /*VARARGS*/
00128 protected void
00129 file_magerror(struct magic_set *ms, const char *f, ...)
00130 {
00131        va_list va;
00132        va_start(va, f);
00133        file_error_core(ms, 0, f, va, ms->line);
00134        va_end(va);
00135 }
00136 
00137 protected void
00138 file_oomem(struct magic_set *ms, size_t len)
00139 {
00140        file_error(ms, errno, "cannot allocate %zu bytes", len);
00141 }
00142 
00143 protected void
00144 file_badseek(struct magic_set *ms)
00145 {
00146        file_error(ms, errno, "error seeking");
00147 }
00148 
00149 protected void
00150 file_badread(struct magic_set *ms)
00151 {
00152        file_error(ms, errno, "error reading");
00153 }
00154 
00155 protected int
00156 file_buffer(struct magic_set *ms, php_stream *stream, const char *inname, const void *buf,
00157     size_t nb)
00158 {
00159        int m = 0, rv = 0, looks_text = 0;
00160        int mime = ms->flags & MAGIC_MIME;
00161        const unsigned char *ubuf = buf;
00162        unichar *u8buf = NULL;
00163        size_t ulen;
00164        const char *code = NULL;
00165        const char *code_mime = "binary";
00166        const char *type = NULL;
00167 
00168 
00169 
00170        if (nb == 0) {
00171               if ((!mime || (mime & MAGIC_MIME_TYPE)) &&
00172                   file_printf(ms, mime ? "application/x-empty" :
00173                   "empty") == -1)
00174                      return -1;
00175               return 1;
00176        } else if (nb == 1) {
00177               if ((!mime || (mime & MAGIC_MIME_TYPE)) &&
00178                   file_printf(ms, mime ? "application/octet-stream" :
00179                   "very short file (no magic)") == -1)
00180                      return -1;
00181               return 1;
00182        }
00183 
00184        if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) {
00185               looks_text = file_encoding(ms, ubuf, nb, &u8buf, &ulen,
00186                   &code, &code_mime, &type);
00187        }
00188 
00189 #if defined(__EMX__)
00190        if ((ms->flags & MAGIC_NO_CHECK_APPTYPE) == 0 && inname) {
00191               switch (file_os2_apptype(ms, inname, buf, nb)) {
00192               case -1:
00193                      return -1;
00194               case 0:
00195                      break;
00196               default:
00197                      return 1;
00198               }
00199        }
00200 #endif
00201 
00202 #if PHP_FILEINFO_UNCOMPRESS
00203        if ((ms->flags & MAGIC_NO_CHECK_COMPRESS) == 0)
00204               if ((m = file_zmagic(ms, stream, inname, ubuf, nb)) != 0) {
00205                      if ((ms->flags & MAGIC_DEBUG) != 0)
00206                             (void)fprintf(stderr, "zmagic %d\n", m);
00207                      goto done;
00208        }
00209 #endif
00210 
00211        /* Check if we have a tar file */
00212        if ((ms->flags & MAGIC_NO_CHECK_TAR) == 0)
00213               if ((m = file_is_tar(ms, ubuf, nb)) != 0) {
00214                      if ((ms->flags & MAGIC_DEBUG) != 0)
00215                             (void)fprintf(stderr, "tar %d\n", m);
00216                      goto done;
00217               }
00218 
00219        /* Check if we have a CDF file */
00220        if ((ms->flags & MAGIC_NO_CHECK_CDF) == 0) {
00221               int fd;
00222               TSRMLS_FETCH();
00223               if (stream && SUCCESS == php_stream_cast(stream, PHP_STREAM_AS_FD, (void **)&fd, 0)) {
00224                      if ((m = file_trycdf(ms, fd, ubuf, nb)) != 0) {
00225                             if ((ms->flags & MAGIC_DEBUG) != 0)
00226                                    (void)fprintf(stderr, "cdf %d\n", m);
00227                             goto done;
00228                      }
00229               }
00230        }
00231 
00232        /* try soft magic tests */
00233        if ((ms->flags & MAGIC_NO_CHECK_SOFT) == 0)
00234               if ((m = file_softmagic(ms, ubuf, nb, BINTEST)) != 0) {
00235                      if ((ms->flags & MAGIC_DEBUG) != 0)
00236                             (void)fprintf(stderr, "softmagic %d\n", m);
00237 #ifdef BUILTIN_ELF
00238                      if ((ms->flags & MAGIC_NO_CHECK_ELF) == 0 && m == 1 &&
00239                          nb > 5 && fd != -1) {
00240                             /*
00241                              * We matched something in the file, so this
00242                              * *might* be an ELF file, and the file is at
00243                              * least 5 bytes long, so if it's an ELF file
00244                              * it has at least one byte past the ELF magic
00245                              * number - try extracting information from the
00246                              * ELF headers that cannot easily * be
00247                              * extracted with rules in the magic file.
00248                              */
00249                             if ((m = file_tryelf(ms, fd, ubuf, nb)) != 0)
00250                                    if ((ms->flags & MAGIC_DEBUG) != 0)
00251                                           (void)fprintf(stderr,
00252                                               "elf %d\n", m);
00253                      }
00254 #endif
00255                      goto done;
00256               }
00257 
00258        /* try text properties (and possibly text tokens) */
00259        if ((ms->flags & MAGIC_NO_CHECK_TEXT) == 0) {
00260 
00261               if ((m = file_ascmagic(ms, ubuf, nb)) != 0) {
00262                      if ((ms->flags & MAGIC_DEBUG) != 0)
00263                             (void)fprintf(stderr, "ascmagic %d\n", m);
00264                      goto done;
00265               }
00266 
00267               /* try to discover text encoding */
00268               if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) {
00269                      if (looks_text == 0)
00270                             if ((m = file_ascmagic_with_encoding( ms, ubuf,
00271                                 nb, u8buf, ulen, code, type)) != 0) {
00272                                    if ((ms->flags & MAGIC_DEBUG) != 0)
00273                                           (void)fprintf(stderr,
00274                                               "ascmagic/enc %d\n", m);
00275                                    goto done;
00276                             }
00277               }
00278        }
00279 
00280        /* give up */
00281        m = 1;
00282        if ((!mime || (mime & MAGIC_MIME_TYPE)) &&
00283            file_printf(ms, mime ? "application/octet-stream" : "data") == -1) {
00284            rv = -1;
00285        }
00286  done:
00287        if ((ms->flags & MAGIC_MIME_ENCODING) != 0) {
00288               if (ms->flags & MAGIC_MIME_TYPE)
00289                      if (file_printf(ms, "; charset=") == -1)
00290                             rv = -1;
00291               if (file_printf(ms, "%s", code_mime) == -1)
00292                      rv = -1;
00293        }
00294        if (u8buf)
00295               free(u8buf);
00296        if (rv)
00297               return rv;
00298 
00299        return m;
00300 }
00301 
00302 protected int
00303 file_reset(struct magic_set *ms)
00304 {
00305        if (ms->mlist == NULL) {
00306               file_error(ms, 0, "no magic files loaded");
00307               return -1;
00308        }
00309        if (ms->o.buf) {
00310               efree(ms->o.buf);
00311               ms->o.buf = NULL;
00312        }
00313        if (ms->o.pbuf) {
00314               efree(ms->o.pbuf);
00315               ms->o.pbuf = NULL;
00316        }
00317        ms->event_flags &= ~EVENT_HAD_ERR;
00318        ms->error = -1;
00319        return 0;
00320 }
00321 
00322 #define OCTALIFY(n, o)      \
00323        /*LINTED*/ \
00324        (void)(*(n)++ = '\\', \
00325        *(n)++ = (((uint32_t)*(o) >> 6) & 3) + '0', \
00326        *(n)++ = (((uint32_t)*(o) >> 3) & 7) + '0', \
00327        *(n)++ = (((uint32_t)*(o) >> 0) & 7) + '0', \
00328        (o)++)
00329 
00330 protected const char *
00331 file_getbuffer(struct magic_set *ms)
00332 {
00333        char *pbuf, *op, *np;
00334        size_t psize, len;
00335 
00336        if (ms->event_flags & EVENT_HAD_ERR)
00337               return NULL;
00338 
00339        if (ms->flags & MAGIC_RAW)
00340               return ms->o.buf;
00341 
00342        if (ms->o.buf == NULL)
00343               return NULL;
00344 
00345        /* * 4 is for octal representation, + 1 is for NUL */
00346        len = strlen(ms->o.buf);
00347        if (len > (SIZE_MAX - 1) / 4) {
00348               return NULL;
00349        }
00350        psize = len * 4 + 1;
00351        pbuf = erealloc(ms->o.pbuf, psize);
00352        ms->o.pbuf = pbuf;
00353 
00354 #if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH)
00355        {
00356               mbstate_t state;
00357               wchar_t nextchar;
00358               int mb_conv = 1;
00359               size_t bytesconsumed;
00360               char *eop;
00361               (void)memset(&state, 0, sizeof(mbstate_t));
00362 
00363               np = ms->o.pbuf;
00364               op = ms->o.buf;
00365               eop = op + len;
00366 
00367               while (op < eop) {
00368                      bytesconsumed = mbrtowc(&nextchar, op,
00369                          (size_t)(eop - op), &state);
00370                      if (bytesconsumed == (size_t)(-1) ||
00371                          bytesconsumed == (size_t)(-2)) {
00372                             mb_conv = 0;
00373                             break;
00374                      }
00375 
00376                      if (iswprint(nextchar)) {
00377                             (void)memcpy(np, op, bytesconsumed);
00378                             op += bytesconsumed;
00379                             np += bytesconsumed;
00380                      } else {
00381                             while (bytesconsumed-- > 0)
00382                                    OCTALIFY(np, op);
00383                      }
00384               }
00385               *np = '\0';
00386 
00387               /* Parsing succeeded as a multi-byte sequence */
00388               if (mb_conv != 0)
00389                      return ms->o.pbuf;
00390        }
00391 #endif
00392 
00393        for (np = ms->o.pbuf, op = ms->o.buf; *op; op++) {
00394               if (isprint((unsigned char)*op)) {
00395                      *np++ = *op;
00396               } else {
00397                      OCTALIFY(np, op);
00398               }
00399        }
00400        *np = '\0';
00401        return ms->o.pbuf;
00402 }
00403 
00404 protected int
00405 file_check_mem(struct magic_set *ms, unsigned int level)
00406 {
00407        size_t len;
00408 
00409        if (level >= ms->c.len) {
00410               len = (ms->c.len += 20) * sizeof(*ms->c.li);
00411               ms->c.li = (ms->c.li == NULL) ? emalloc(len) : erealloc(ms->c.li, len);
00412        }
00413        ms->c.li[level].got_match = 0;
00414 #ifdef ENABLE_CONDITIONALS
00415        ms->c.li[level].last_match = 0;
00416        ms->c.li[level].last_cond = COND_NONE;
00417 #endif /* ENABLE_CONDITIONALS */
00418        return 0;
00419 }