Back to index

php5  5.3.10
Defines | Functions
ascmagic.c File Reference
#include "file.h"
#include "magic.h"
#include <string.h>
#include <memory.h>
#include <ctype.h>
#include <stdlib.h>
#include "names.h"

Go to the source code of this file.

Defines

#define MAXLINELEN   300 /* longest sane line length */
#define ISSPC(x)

Functions

private int ascmatch (const unsigned char *, const unichar *, size_t)
private unsigned char * encode_utf8 (unsigned char *, size_t, unichar *, size_t)
private size_t trim_nuls (const unsigned char *, size_t)
protected int file_ascmagic (struct magic_set *ms, const unsigned char *buf, size_t nbytes)
protected int file_ascmagic_with_encoding (struct magic_set *ms, const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t ulen, const char *code, const char *type)

Define Documentation

#define ISSPC (   x)
Value:
((x) == ' ' || (x) == '\t' || (x) == '\r' || (x) == '\n' \
                || (x) == 0x85 || (x) == '\f')

Definition at line 53 of file ascmagic.c.

#define MAXLINELEN   300 /* longest sane line length */

Definition at line 52 of file ascmagic.c.


Function Documentation

private int ascmatch ( const unsigned char *  s,
const unichar us,
size_t  ulen 
)

Definition at line 317 of file ascmagic.c.

{
       size_t i;

       for (i = 0; i < ulen; i++) {
              if (s[i] != us[i])
                     return 0;
       }

       if (s[i])
              return 0;
       else
              return 1;
}

Here is the caller graph for this function:

private unsigned char * encode_utf8 ( unsigned char *  buf,
size_t  len,
unichar ubuf,
size_t  ulen 
)

Definition at line 337 of file ascmagic.c.

{
       size_t i;
       unsigned char *end = buf + len;

       for (i = 0; i < ulen; i++) {
              if (ubuf[i] <= 0x7f) {
                     if (end - buf < 1)
                            return NULL;
                     *buf++ = (unsigned char)ubuf[i];
              } else if (ubuf[i] <= 0x7ff) {
                     if (end - buf < 2)
                            return NULL;
                     *buf++ = (unsigned char)((ubuf[i] >> 6) + 0xc0);
                     *buf++ = (unsigned char)((ubuf[i] & 0x3f) + 0x80);
              } else if (ubuf[i] <= 0xffff) {
                     if (end - buf < 3)
                            return NULL;
                     *buf++ = (unsigned char)((ubuf[i] >> 12) + 0xe0);
                     *buf++ = (unsigned char)(((ubuf[i] >> 6) & 0x3f) + 0x80);
                     *buf++ = (unsigned char)((ubuf[i] & 0x3f) + 0x80);
              } else if (ubuf[i] <= 0x1fffff) {
                     if (end - buf < 4)
                            return NULL;
                     *buf++ = (unsigned char)((ubuf[i] >> 18) + 0xf0);
                     *buf++ = (unsigned char)(((ubuf[i] >> 12) & 0x3f) + 0x80);
                     *buf++ = (unsigned char)(((ubuf[i] >>  6) & 0x3f) + 0x80);
                     *buf++ = (unsigned char)((ubuf[i] & 0x3f) + 0x80);
              } else if (ubuf[i] <= 0x3ffffff) {
                     if (end - buf < 5)
                            return NULL;
                     *buf++ = (unsigned char)((ubuf[i] >> 24) + 0xf8);
                     *buf++ = (unsigned char)(((ubuf[i] >> 18) & 0x3f) + 0x80);
                     *buf++ = (unsigned char)(((ubuf[i] >> 12) & 0x3f) + 0x80);
                     *buf++ = (unsigned char)(((ubuf[i] >>  6) & 0x3f) + 0x80);
                     *buf++ = (unsigned char)((ubuf[i] & 0x3f) + 0x80);
              } else if (ubuf[i] <= 0x7fffffff) {
                     if (end - buf < 6)
                            return NULL;
                     *buf++ = (unsigned char)((ubuf[i] >> 30) + 0xfc);
                     *buf++ = (unsigned char)(((ubuf[i] >> 24) & 0x3f) + 0x80);
                     *buf++ = (unsigned char)(((ubuf[i] >> 18) & 0x3f) + 0x80);
                     *buf++ = (unsigned char)(((ubuf[i] >> 12) & 0x3f) + 0x80);
                     *buf++ = (unsigned char)(((ubuf[i] >>  6) & 0x3f) + 0x80);
                     *buf++ = (unsigned char)((ubuf[i] & 0x3f) + 0x80);
              } else /* Invalid character */
                     return NULL;
       }

       return buf;
}

Here is the caller graph for this function:

protected int file_ascmagic ( struct magic_set ms,
const unsigned char *  buf,
size_t  nbytes 
)

Definition at line 74 of file ascmagic.c.

{
       unichar *ubuf = NULL;
       size_t ulen;
       int rv = 1;

       const char *code = NULL;
       const char *code_mime = NULL;
       const char *type = NULL;

       if (ms->flags & MAGIC_APPLE)
              return 0;

       nbytes = trim_nuls(buf, nbytes);

       /* If file doesn't look like any sort of text, give up. */
       if (file_encoding(ms, buf, nbytes, &ubuf, &ulen, &code, &code_mime,
           &type) == 0) {
              rv = 0;
              goto done;
       }

       rv = file_ascmagic_with_encoding(ms, buf, nbytes, ubuf, ulen, code, 
           type);

 done:
       if (ubuf)
              free(ubuf);

       return rv;
}

Here is the call graph for this function:

Here is the caller graph for this function:

protected int file_ascmagic_with_encoding ( struct magic_set ms,
const unsigned char *  buf,
size_t  nbytes,
unichar ubuf,
size_t  ulen,
const char *  code,
const char *  type 
)

Definition at line 107 of file ascmagic.c.

{
       unsigned char *utf8_buf = NULL, *utf8_end;
       size_t mlen, i;
       const struct names *p;
       int rv = -1;
       int mime = ms->flags & MAGIC_MIME;

       const char *subtype = NULL;
       const char *subtype_mime = NULL;

       int has_escapes = 0;
       int has_backspace = 0;
       int seen_cr = 0;

       int n_crlf = 0;
       int n_lf = 0;
       int n_cr = 0;
       int n_nel = 0;

       size_t last_line_end = (size_t)-1;
       int has_long_lines = 0;

       if (ms->flags & MAGIC_APPLE)
              return 0;

       nbytes = trim_nuls(buf, nbytes);

       /* If we have fewer than 2 bytes, give up. */
       if (nbytes <= 1) {
              rv = 0;
              goto done;
       }

       /* Convert ubuf to UTF-8 and try text soft magic */
       /* malloc size is a conservative overestimate; could be
          improved, or at least realloced after conversion. */
       mlen = ulen * 6;
       utf8_buf = emalloc(mlen);

       if ((utf8_end = encode_utf8(utf8_buf, mlen, ubuf, ulen)) == NULL)
              goto done;
       if ((rv = file_softmagic(ms, utf8_buf, (size_t)(utf8_end - utf8_buf),
           TEXTTEST)) != 0)
              goto done;
       else
              rv = -1;

       /* look for tokens from names.h - this is expensive! */
       if ((ms->flags & MAGIC_NO_CHECK_TOKENS) != 0)
              goto subtype_identified;

       i = 0;
       while (i < ulen) {
              size_t end;

              /* skip past any leading space */
              while (i < ulen && ISSPC(ubuf[i]))
                     i++;
              if (i >= ulen)
                     break;

              /* find the next whitespace */
              for (end = i + 1; end < nbytes; end++)
                     if (ISSPC(ubuf[end]))
                            break;

              /* compare the word thus isolated against the token list */
              for (p = names; p < names + NNAMES; p++) {
                     if (ascmatch((const unsigned char *)p->name, ubuf + i,
                         end - i)) {
                            subtype = types[p->type].human;
                            subtype_mime = types[p->type].mime;
                            goto subtype_identified;
                     }
              }

              i = end;
       }

subtype_identified:

       /* Now try to discover other details about the file. */
       for (i = 0; i < ulen; i++) {
              if (ubuf[i] == '\n') {
                     if (seen_cr)
                            n_crlf++;
                     else
                            n_lf++;
                     last_line_end = i;
              } else if (seen_cr)
                     n_cr++;

              seen_cr = (ubuf[i] == '\r');
              if (seen_cr)
                     last_line_end = i;

              if (ubuf[i] == 0x85) { /* X3.64/ECMA-43 "next line" character */
                     n_nel++;
                     last_line_end = i;
              }

              /* If this line is _longer_ than MAXLINELEN, remember it. */
              if (i > last_line_end + MAXLINELEN)
                     has_long_lines = 1;

              if (ubuf[i] == '\033')
                     has_escapes = 1;
              if (ubuf[i] == '\b')
                     has_backspace = 1;
       }

       /* Beware, if the data has been truncated, the final CR could have
          been followed by a LF.  If we have HOWMANY bytes, it indicates
          that the data might have been truncated, probably even before
          this function was called. */
       if (seen_cr && nbytes < HOWMANY)
              n_cr++;

       if (strcmp(type, "binary") == 0) {
              rv = 0;
              goto done;
       }
       if (mime) {
              if ((mime & MAGIC_MIME_TYPE) != 0) {
                     if (subtype_mime) {
                            if (file_printf(ms, "%s", subtype_mime) == -1)
                                   goto done;
                     } else {
                            if (file_printf(ms, "text/plain") == -1)
                                   goto done;
                     }
              }
       } else {
              if (file_printf(ms, "%s", code) == -1)
                     goto done;

              if (subtype) {
                     if (file_printf(ms, " %s", subtype) == -1)
                            goto done;
              }

              if (file_printf(ms, " %s", type) == -1)
                     goto done;

              if (has_long_lines)
                     if (file_printf(ms, ", with very long lines") == -1)
                            goto done;

              /*
               * Only report line terminators if we find one other than LF,
               * or if we find none at all.
               */
              if ((n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) ||
                  (n_crlf != 0 || n_cr != 0 || n_nel != 0)) {
                     if (file_printf(ms, ", with") == -1)
                            goto done;

                     if (n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) {
                            if (file_printf(ms, " no") == -1)
                                   goto done;
                     } else {
                            if (n_crlf) {
                                   if (file_printf(ms, " CRLF") == -1)
                                          goto done;
                                   if (n_cr || n_lf || n_nel)
                                          if (file_printf(ms, ",") == -1)
                                                 goto done;
                            }
                            if (n_cr) {
                                   if (file_printf(ms, " CR") == -1)
                                          goto done;
                                   if (n_lf || n_nel)
                                          if (file_printf(ms, ",") == -1)
                                                 goto done;
                            }
                            if (n_lf) {
                                   if (file_printf(ms, " LF") == -1)
                                          goto done;
                                   if (n_nel)
                                          if (file_printf(ms, ",") == -1)
                                                 goto done;
                            }
                            if (n_nel)
                                   if (file_printf(ms, " NEL") == -1)
                                          goto done;
                     }

                     if (file_printf(ms, " line terminators") == -1)
                            goto done;
              }

              if (has_escapes)
                     if (file_printf(ms, ", with escape sequences") == -1)
                            goto done;
              if (has_backspace)
                     if (file_printf(ms, ", with overstriking") == -1)
                            goto done;
       }
       rv = 1;
done:
       if (utf8_buf)
              efree(utf8_buf);

       return rv;
}

Here is the call graph for this function:

Here is the caller graph for this function:

private size_t trim_nuls ( const unsigned char *  buf,
size_t  nbytes 
)

Definition at line 65 of file ascmagic.c.

{
       while (nbytes > 1 && buf[nbytes - 1] == '\0')
              nbytes--;

       return nbytes;
}

Here is the caller graph for this function: