Back to index

texmacs  1.0.7.15
Functions
pdfparse.h File Reference
#include "numbers.h"
#include "pdfobj.h"
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

void dump (const char *start, const char *end)
void skip_line (char **start, char *end)
void skip_white (char **start, char *end)
char * parse_number (char **start, char *end)
char * parse_unsigned (char **start, char *end)
char * parse_ident (char **start, char *end)
char * parse_val_ident (char **start, char *end)
char * parse_opt_ident (char **start, char *end)
pdf_objparse_pdf_name (char **pp, char *endptr)
pdf_objparse_pdf_boolean (char **pp, char *endptr)
pdf_objparse_pdf_number (char **pp, char *endptr)
pdf_objparse_pdf_null (char **pp, char *endptr)
pdf_objparse_pdf_string (char **pp, char *endptr)
pdf_objparse_pdf_dict (char **pp, char *endptr, pdf_file *pf)
pdf_objparse_pdf_array (char **pp, char *endptr, pdf_file *pf)
pdf_objparse_pdf_object (char **pp, char *endptr, pdf_file *pf)
pdf_objparse_pdf_tainted_dict (char **pp, char *endptr)

Function Documentation

void dump ( const char *  start,
const char *  end 
)

Definition at line 77 of file pdfparse.c.

{
  const char *p = start;

#define DUMP_LIMIT 50
  MESG("\nCurrent input buffer is -->");
  while (p < end && p < start + DUMP_LIMIT)
    MESG("%c", *(p++));
  if (p == start+DUMP_LIMIT)
    MESG("...");
  MESG("<--\n");
}

Here is the call graph for this function:

char* parse_ident ( char **  start,
char *  end 
)

Definition at line 203 of file pdfparse.c.

{
  static const char *valid_chars =
    "!\"#$&'*+,-.0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\\^_`abcdefghijklmnopqrstuvwxyz|~";

  return parse_gen_ident(start, end, valid_chars);
}

Here is the call graph for this function:

Here is the caller graph for this function:

char* parse_number ( char **  start,
char *  end 
)

Definition at line 149 of file pdfparse.c.

{
  char *number, *p;

  skip_white(start, end);
  p = *start;
  if (p < end && (*p == '+' || *p == '-'))
    p++;
  while (p < end && isdigit(*p))
    p++;
  if (p < end && *p == '.') {
    p++;
    while (p < end && isdigit(*p))
      p++;
  }
  number = parsed_string(*start, p);

  *start = p;
  return number;
}

Here is the call graph for this function:

Here is the caller graph for this function:

char* parse_opt_ident ( char **  start,
char *  end 
)

Definition at line 221 of file pdfparse.c.

{
  if (*start < end && **start == '@') {
    (*start)++;
    return parse_ident(start, end);
  }

  return NULL;
}

Here is the call graph for this function:

pdf_obj* parse_pdf_array ( char **  pp,
char *  endptr,
pdf_file pf 
)

Definition at line 743 of file pdfparse.c.

{
  pdf_obj *result;
  char    *p;

  p = *pp;

  skip_white(&p, endptr);
  if (p + 2 > endptr || p[0] != '[') {
    WARN("Could not find an array object.");
    return NULL;
  }

  result = pdf_new_array();

  p++;
  skip_white(&p, endptr);

  while (p < endptr && p[0] != ']') {
    pdf_obj *elem;

    elem = parse_pdf_object(&p, endptr, pf);
    if (!elem) {
      pdf_release_obj(result); 
      WARN("Could not find a valid object in array object.");
      return NULL;
    }
    pdf_add_array(result, elem);

    skip_white(&p, endptr);
  }

  if (p >= endptr || p[0] != ']') {
    WARN("Array object ended prematurely.");
    pdf_release_obj(result);
    return NULL;
  }

  *pp = p + 1; /* skip ] */
  return result;
}

Here is the call graph for this function:

Here is the caller graph for this function:

pdf_obj* parse_pdf_boolean ( char **  pp,
char *  endptr 
)

Definition at line 391 of file pdfparse.c.

{
  skip_white(pp, endptr);
  if (*pp + 4 <= endptr &&
      !strncmp(*pp, "true", 4)) {
    if (*pp + 4 == endptr ||
       istokensep(*(*pp + 4))) {
      *pp += 4;
      return pdf_new_boolean(1);
    }
  } else if (*pp + 5 <= endptr &&
            !strncmp(*pp, "false", 5)) {
    if (*pp + 5 == endptr ||
       istokensep(*(*pp + 5))) {
      *pp += 5;
      return pdf_new_boolean(0);
    }
  }

  WARN("Not a boolean object.");

  return NULL;
}

Here is the call graph for this function:

Here is the caller graph for this function:

pdf_obj* parse_pdf_dict ( char **  pp,
char *  endptr,
pdf_file pf 
)

Definition at line 686 of file pdfparse.c.

{
  pdf_obj *result = NULL;
  char    *p;

  p = *pp;

  skip_white(&p, endptr);

  /* At least four letter <<>>. */
  if (p + 4 > endptr ||
      p[0] != '<'    || p[1] != '<') {
    return NULL;
  }
  p += 2;

  result = pdf_new_dict();

  skip_white(&p, endptr);
  while (p < endptr && p[0] != '>') {
    pdf_obj *key, *value;

    skip_white(&p, endptr);
    key = parse_pdf_name(&p, endptr);
    if (!key) {
      WARN("Could not find a key in dictionary object.");
      pdf_release_obj(result);
      return NULL;
    }

    skip_white(&p, endptr);

    value = parse_pdf_object(&p, endptr, pf);
    if (!value) {
      pdf_release_obj(key); 
      pdf_release_obj(value);
      pdf_release_obj(result);
      WARN("Could not find a value in dictionary object.");
      return NULL;
    }
    pdf_add_dict(result, key, value);

    skip_white(&p, endptr);
  }

  if (p + 2 > endptr ||
      p[0] != '>'    || p[1] != '>') {
    WARN("Syntax error: Dictionary object ended prematurely.");
    pdf_release_obj(result);
    return NULL;
  }

  *pp = p + 2; /* skip >> */
  return result;
}

Here is the call graph for this function:

Here is the caller graph for this function:

pdf_obj* parse_pdf_name ( char **  pp,
char *  endptr 
)

Definition at line 353 of file pdfparse.c.

{
  char  name[PDF_NAME_LEN_MAX+1];
  int   ch, len = 0;

  skip_white(pp, endptr);
  if (*pp >= endptr || **pp != '/') {
    WARN("Could not find a name object.");
    return NULL;
  }

  (*pp)++;
  while (*pp < endptr && !istokensep(**pp)) {
    ch = pn_getc(pp, endptr);
    if (ch < 0 || ch > 0xff) {
      WARN("Invalid char in PDF name object. (ignored)");
    } else if (ch == 0) {
      WARN("Null char not allowed in PDF name object. (ignored)");
    } else if (len < STRING_BUFFER_SIZE) {
      if (len == PDF_NAME_LEN_MAX) {
       WARN("PDF name length too long. (>= %d bytes)", PDF_NAME_LEN_MAX);
      }
      name[len++] = ch;
    } else {
      WARN("PDF name length too long. (>= %d bytes, truncated)",
          STRING_BUFFER_SIZE);
    }
  }
  if (len < 1) {
    WARN("No valid name object found.");
    return NULL;
  }
  name[len] = '\0';

  return pdf_new_name(name);
}

Here is the call graph for this function:

Here is the caller graph for this function:

pdf_obj* parse_pdf_null ( char **  pp,
char *  endptr 
)

Definition at line 416 of file pdfparse.c.

{
  skip_white(pp, endptr);
  if (*pp + 4 > endptr) {
    WARN("Not a null object.");
    return NULL;
  } else if (*pp + 4 < endptr &&
            !istokensep(*(*pp+4))) {
    WARN("Not a null object.");
    return NULL;
  } else if (!strncmp(*pp, "null", 4)) {
    *pp += 4;
    return pdf_new_null();
  }

  WARN("Not a null object.");

  return NULL;
}

Here is the call graph for this function:

Here is the caller graph for this function:

pdf_obj* parse_pdf_number ( char **  pp,
char *  endptr 
)

Definition at line 233 of file pdfparse.c.

{
  char    *p;
  unsigned long ipart = 0, dpart = 0;
  int      nddigits = 0, sign = 1;
  int      has_dot = 0;
  static double ipot[DDIGITS_MAX+1] = {
    1.0,
    0.1,
    0.01,
    0.001,
    0.0001,
    0.00001,
    0.000001,
    0.0000001,
    0.00000001,
    0.000000001,
    0.0000000001
  };

  p = *pp;
  skip_white(&p, endptr);
  if (p >= endptr ||
      (!isdigit(p[0]) && p[0] != '.' &&
       p[0] != '+' && p[0] != '-')) {
    WARN("Could not find a numeric object.");
    return NULL;
  }

  if (p[0] == '-') {
    if (p + 1 >= endptr) {
      WARN("Could not find a numeric object.");
      return NULL;
    }
    sign = -1;
    p++;
  } else if (p[0] == '+') {
    if (p + 1 >= endptr) {
      WARN("Could not find a numeric object.");
      return NULL;
    }
    sign =  1;
    p++;
  }

  while (p < endptr && !istokensep(p[0])) {
    if (p[0] == '.') {
      if (has_dot) { /* Two dots */
       WARN("Could not find a numeric object.");
       return NULL;
      } else {
       has_dot = 1;
      }
    } else if (isdigit(p[0])) {
      if (has_dot) {
       if (nddigits == DDIGITS_MAX && pdf_obj_get_verbose() > 1) {
         WARN("Number with more than %d fractional digits.", DDIGITS_MAX);
       } else if (nddigits < DDIGITS_MAX) {
         dpart = dpart * 10 + p[0] - '0';
         nddigits++;
       } /* Ignore decimal digits more than DDIGITS_MAX */
      } else {
       ipart = ipart * 10 + p[0] - '0';
      }
    } else {
      WARN("Could not find a numeric object.");
      return NULL;
    }
    p++;
  }

  *pp = p;
  return pdf_new_number((double) sign * (((double ) ipart) + dpart * ipot[nddigits]));
}

Here is the call graph for this function:

Here is the caller graph for this function:

pdf_obj* parse_pdf_object ( char **  pp,
char *  endptr,
pdf_file pf 
)

Definition at line 964 of file pdfparse.c.

{
  pdf_obj *result = NULL;
  char    *nextptr;

  skip_white(pp, endptr);
  if (*pp >= endptr) {
    WARN("Could not find any valid object.");
    return NULL;
  }

  switch (**pp) {

  case '<': 

    if (*(*pp + 1) != '<') {
      result = parse_pdf_hex_string(pp, endptr);
    } else {
      pdf_obj *dict;

      result = parse_pdf_dict(pp, endptr, pf);
      skip_white(pp, endptr);
      if ( result &&
          *pp <= endptr - 15 &&
          !memcmp(*pp, "stream", 6)) {
        dict   = result;
        result = parse_pdf_stream(pp, endptr, dict, pf);
        pdf_release_obj(dict);
      }
    }

    break;
  case '(':
    result = parse_pdf_string(pp, endptr);
    break;
  case '[':
    result = parse_pdf_array(pp, endptr, pf);
    break;
  case '/':
    result = parse_pdf_name(pp, endptr);
    break;
  case 'n':
    result = parse_pdf_null(pp, endptr);
    break;
  case 't': case 'f':
    result = parse_pdf_boolean(pp, endptr);
    break;
  case '+': case '-': case '.':
    result = parse_pdf_number(pp, endptr);
    break;
  case '0': case '1': case '2': case '3': case '4':
  case '5': case '6': case '7': case '8': case '9':

    /*
     * If pf != NULL, then we are parsing a PDF file,
     * and indirect references are allowed.
     */
    if (pf && (result = try_pdf_reference(*pp, endptr, &nextptr, pf))) {
      *pp = nextptr;
    } else {
      result = parse_pdf_number(pp, endptr);
    }
    break;

  case '@':

#ifndef PDF_PARSE_STRICT
    result = parse_pdf_reference(pp, endptr);
#endif /* !PDF_PARSE_STRICT */
    break;

  default:
    WARN("Unknown PDF object type.");
    result = NULL;
  }

  return result;
}

Here is the call graph for this function:

Here is the caller graph for this function:

pdf_obj* parse_pdf_string ( char **  pp,
char *  endptr 
)

Definition at line 648 of file pdfparse.c.

{
  skip_white(pp, endptr);
  if (*pp + 2 <= endptr) {
    if (**pp == '(')
      return parse_pdf_literal_string(pp, endptr);
    else if (**pp == '<' &&
            (*(*pp + 1) == '>' || isxdigit(*(*pp + 1)))) {
      return parse_pdf_hex_string(pp, endptr);
    }
  }

  WARN("Could not find a string object.");

  return NULL;
}

Here is the call graph for this function:

Here is the caller graph for this function:

pdf_obj* parse_pdf_tainted_dict ( char **  pp,
char *  endptr 
)

Definition at line 679 of file pdfparse.c.

{
  return parse_pdf_dict(pp, endptr, NULL);
}

Here is the call graph for this function:

char* parse_unsigned ( char **  start,
char *  end 
)

Definition at line 171 of file pdfparse.c.

{
  char *number, *p;

  skip_white(start, end);
  for (p = *start; p < end; p++) {
    if (!isdigit(*p))
      break;
  }
  number = parsed_string(*start, p);

  *start = p;
  return number;
}

Here is the call graph for this function:

Here is the caller graph for this function:

char* parse_val_ident ( char **  start,
char *  end 
)

Definition at line 212 of file pdfparse.c.

{
  static const char *valid_chars =
    "!\"#$&'*+,-./0123456789:;?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\\^_`abcdefghijklmnopqrstuvwxyz|~";

  return parse_gen_ident(start, end, valid_chars);
}

Here is the call graph for this function:

void skip_line ( char **  start,
char *  end 
)

Definition at line 99 of file pdfparse.c.

{
  while (*start < end && **start != '\n' && **start != '\r')
    (*start)++;
  /* The carriage return (CR; \r; 0x0D) and line feed (LF; \n; 0x0A)
   * characters, also called newline characters, are treated as
   * end-of-line (EOL) markers. The combination of a carriage return
   * followed immediately by a line feed is treated as one EOL marker.
   */
  if (*start < end && **start == '\r')
    (*start)++;
  if (*start < end && **start == '\n')
    (*start)++;
}
void skip_white ( char **  start,
char *  end 
)

Definition at line 115 of file pdfparse.c.

{
  /*
   * The null (NUL; 0x00) character is a white-space character in PDF spec
   * but isspace(0x00) returns FALSE; on the other hand, the vertical tab
   * (VT; 0x0B) character is not a white-space character in PDF spec but
   * isspace(0x0B) returns TRUE.
   */
  while (*start < end && (is_space(**start) || **start == '%')) {
    if (**start == '%')
      skip_line(start, end);
    else
      (*start)++;
  }
}

Here is the call graph for this function:

Here is the caller graph for this function: