Back to index

tetex-bin  3.0
Functions
pdfparse.h File Reference
#include "numbers.h"
#include "pdfobj.h"
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

int is_an_int (const char *s)
int is_a_number (const char *s)
char * parse_ident (char **start, char *end)
char * parse_val_ident (char **start, char *end)
char * parse_c_ident (char **start, char *end)
char * parse_number (char **start, char *end)
char * parse_unsigned (char **start, char *end)
void parse_crap (char **start, char *end)
void skip_white (char **start, char *end)
void skip_line (char **start, char *end)
char * parse_c_string (char **start, char *end)
pdf_objparse_pdf_string (char **start, char *end)
pdf_objparse_pdf_name (char **start, char *end)
pdf_objparse_pdf_array (char **start, char *end)
pdf_objparse_pdf_object (char **start, char *end)
pdf_objparse_pdf_dict (char **start, char *end)
pdf_objparse_pdf_boolean (char **start, char *end)
pdf_objparse_pdf_null (char **start, char *end)
char * parse_pdf_reference (char **start, char *end)
char * parse_opt_ident (char **start, char *end)
void parse_key_val (char **start, char *end, char **key, char **val)
void dump (char *start, char *end)

Function Documentation

void dump ( char *  start,
char *  end 
)

Definition at line 44 of file pdfparse.c.

{
  char *p = start;
  fprintf (stderr, "\nCurrent input buffer is ");
  fprintf (stderr, "-->");
  while (p < end && p < start+DUMP_LIMIT)
    fprintf (stderr, "%c", *(p++));
  if (p == start+DUMP_LIMIT)
    fprintf (stderr, "...\n");
  fprintf (stderr, "<--\n");
}

Here is the call graph for this function:

Here is the caller graph for this function:

int is_a_number ( const char *  s)

Definition at line 101 of file pdfparse.c.

{
  int i, period = 0;
  for (i=0; i<strlen(s); i++) {
    if (s[i] == '-' && i == 0)
      continue;
    if (s[i] == '.' && !period) {
      period = 1;
      continue;
    }
    if (!isdigit (s[i]))
      return 0;
  }
  return 1;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int is_an_int ( const char *  s)

Definition at line 89 of file pdfparse.c.

{
  int i;
  for (i=0; i<strlen(s); i++) {
    if (i == 0 && s[i] == '-')
      continue;
    if (!isdigit (s[i]))
      return 0;
  }
  return 1;
}

Here is the call graph for this function:

Here is the caller graph for this function:

char* parse_c_ident ( char **  start,
char *  end 
)

Definition at line 289 of file pdfparse.c.

{
  static char *valid_chars =
    "0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz";
  return parse_gen_ident (start, end, valid_chars);
}

Here is the call graph for this function:

Here is the caller graph for this function:

char* parse_c_string ( char **  start,
char *  end 
)

Definition at line 491 of file pdfparse.c.

{
  char *string, *save;
  int strlength;
  skip_white(start, end);
  save = *start;
  if (*start == end || **start != '"') {
    return NULL;
  }
  ++(*start);
  string = NEW (end - *start, char);
  strlength = 0;
  while (*start < end && (**start != '"')) {
    if (**start == '\\')
      switch (*(++(*start))) {
      case '"':
       string[strlength++] = '"';
       (*start)++;
       break;
      case 'n':
       string[strlength++] = '\n';
       (*start)++;
       break;
      case 'r':
       string[strlength++] = '\r';
       (*start)++;
       break;
      case 't':
       string[strlength++] = '\t';
       (*start)++;
       break;
      case 'b':
       string[strlength++] = '\b';
       (*start)++;
       break;
      default:
       if (isdigit(**start)) {
         int i;
         string[strlength] = 0;
         for (i=0; i<3; i++) 
           string[strlength] = string[strlength]*8 + (*((*start)++)-'0');
         strlength+= 1;
       } else {
         string[strlength++] = *((*start)++);
       }
      }
    else {
      string[strlength++] = *((*start)++);
    }
    string[strlength]=0;
  }
  if (*start >= end) {
    fprintf (stderr, "\nString ended prematurely\n");
    dump (save, *start);
    return NULL;
  }
  string[strlength] = 0;
  (*start)++;
  return string;
}

Here is the caller graph for this function:

void parse_crap ( char **  start,
char *  end 
)

Definition at line 80 of file pdfparse.c.

{
  skip_white(start, end);
  if (*start != end) {
    fprintf (stderr, "\nCrap left over after object!!\n");
    dump(*start, end);
  }
}

Here is the call graph for this function:

char* parse_ident ( char **  start,
char *  end 
)

Definition at line 275 of file pdfparse.c.

{
  static char *valid_chars =
    "!\"#$&'*+,-.0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\\^_`abcdefghijklmnopqrstuvwxyz|~";
  return parse_gen_ident (start, end, valid_chars);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void parse_key_val ( char **  start,
char *  end,
char **  key,
char **  val 
)

Definition at line 672 of file pdfparse.c.

{
  *key = NULL;
  *val = NULL;
  skip_white (start, end);
  if ((*key = parse_c_ident (start, end))) {
    skip_white (start, end);
    if (*start < end && **start == '=')
      {
       (*start) += 1;
       skip_white (start, end);
       if (*start < end) switch (**start) {
       case '"':
         *val = parse_c_string (start, end);
         break;
       default:
         *val = parse_val_ident (start, end);
       }
      }
  }
}

Here is the call graph for this function:

Here is the caller graph for this function:

char* parse_number ( char **  start,
char *  end 
)

Definition at line 203 of file pdfparse.c.

{
  char *number, *save;
#ifdef MEM_DEBUG
MEM_START
#endif
  skip_white(start, end);
  save = *start;
  if (*start < end && (**start == '+' || **start == '-')) {
    *start += 1;
  }
  while (*start < end &&
        isdigit(**start))
    (*start)++;
  if (*start < end && **start == '.') {
    (*start)++;
    while (*start < end &&
          isdigit(**start))
      (*start)++;
  }
  if (*start > save) {
    number = NEW ((*start-save)+1, char);
    memcpy (number, save, (*start-save));
    number[*start-save] = 0;
    return number;
  }
  *start = save;
#ifdef MEM_DEBUG
MEM_END
#endif
  return NULL;
}

Here is the call graph for this function:

Here is the caller graph for this function:

char* parse_opt_ident ( char **  start,
char *  end 
)

Definition at line 296 of file pdfparse.c.

{
  if (*start  >= end || (**start) != '@')
    return NULL;
  (*start)++;
  return parse_ident(start, end);
}

Here is the call graph for this function:

Here is the caller graph for this function:

pdf_obj* parse_pdf_array ( char **  start,
char *  end 
)

Definition at line 171 of file pdfparse.c.

{
  pdf_obj *result, *tmp1;
#ifdef MEM_DEBUG
MEM_START
#endif
  skip_white(start, end);
  if (*((*start)++) != '[')
    return NULL;
  result = pdf_new_array ();
  skip_white(start, end);
  while (*start < end &&
        **start != ']') {
    if ((tmp1 = parse_pdf_object (start, end)) == NULL) {
      pdf_release_obj (result);
      return NULL;
    };
    pdf_add_array (result, tmp1);
    skip_white(start, end);
  }
  if (*start >= end) {
    pdf_release_obj (result);
    fprintf (stderr, "\nArray ended prematurely\n");
    return NULL;
  }
  (*start)++;
#ifdef MEM_DEBUG
MEM_END
#endif
  return result;
}

Here is the call graph for this function:

Here is the caller graph for this function:

pdf_obj* parse_pdf_boolean ( char **  start,
char *  end 
)

Definition at line 336 of file pdfparse.c.

{
  skip_white (start, end);
  if (end-*start > strlen ("true") &&
      !strncmp (*start, "true", strlen("true"))) {
    *start += strlen("true");
    return pdf_new_boolean (1);
  }
  if (end - *start > strlen ("false") &&
      !strncmp (*start, "false", strlen("false"))) {
    *start += strlen("false");
    return pdf_new_boolean (0);
  }
  return NULL;
}

Here is the call graph for this function:

pdf_obj* parse_pdf_dict ( char **  start,
char *  end 
)

Definition at line 118 of file pdfparse.c.

{
  pdf_obj *result, *tmp1, *tmp2;
  char *save = *start;
  skip_white(start, end);
  if (*((*start)++) != '<' ||
      *((*start)++) != '<') {
    *start = save;
    dump (*start, end);
    return NULL;
  }
  result = pdf_new_dict ();
    skip_white(start, end);
  while (*start < end &&
        **start != '>') {
    if ((tmp1 = parse_pdf_name (start, end)) == NULL) {
      pdf_release_obj (result); 
      {
       *start = save;
       dump (*start, end);
       return NULL;
      }
    };
    if ((tmp2 = parse_pdf_object (start, end)) == NULL) {
      pdf_release_obj (result);
      pdf_release_obj (tmp1); 
      {
       *start = save;
       dump (*start, end);
       return NULL;
      }
    }
    pdf_add_dict (result, tmp1, tmp2);
    skip_white(start, end);
  }
  if (*start >= end) {
    pdf_release_obj (result);
    *start = save;
    dump (*start, end);
    return NULL;
  }
  if (*((*start)++) == '>' &&
      *((*start)++) == '>') {
    return result;
  } else {
    pdf_release_obj (result);
    fprintf (stderr, "\nDictionary object ended prematurely\n");
    *start = save;
    dump (*start, end);
    return NULL;
  }
}

Here is the call graph for this function:

Here is the caller graph for this function:

pdf_obj* parse_pdf_name ( char **  start,
char *  end 
)

Definition at line 305 of file pdfparse.c.

{
  pdf_obj *result;
  char *name;
  skip_white(start, end);
  if (**start != '/') {
    fprintf (stderr, "\nPDF Name expected and not found.\n");
    dump(*start, end);
    return NULL;
  }
  (*start)++;
  if ((name = parse_ident(start, end)) != NULL) {
    result = pdf_new_name (name);
    RELEASE (name);
    return result;
  }
  return NULL;
}

Here is the call graph for this function:

Here is the caller graph for this function:

pdf_obj* parse_pdf_null ( char **  start,
char *  end 
)

Definition at line 352 of file pdfparse.c.

{
  char *save = *start;
  char *ident;
  skip_white (start, end);
  ident = parse_ident(start, end);
  if (!strcmp (ident, "null")) {
    RELEASE(ident);
    return pdf_new_null();
  }
  *start = save;
  fprintf (stderr, "\nNot a valid object\n");
  dump(*start, end);
  return NULL;
}

Here is the call graph for this function:

pdf_obj* parse_pdf_object ( char **  start,
char *  end 
)

Definition at line 585 of file pdfparse.c.

{
  pdf_obj *result, *tmp1=NULL, *tmp2=NULL;
  char *save = *start;
  char *position2;
  skip_white(start, end);
  if (*start >= end)
    return NULL;
  switch (**start) {
  case '<': 
    /* Check for those troublesome strings starting with '<' */
    if (*start+1 < end && *(*start+1) != '<') {
      result = parse_pdf_hex_string (start, end);
      break;
    }
    result = parse_pdf_dict (start, end);
    skip_white(start, end);
    if (end - *start > strlen("stream") &&
       !strncmp(*start, "stream", strlen("stream"))) {
      result = parse_pdf_stream (start, end, result);
    }
    /* Check for stream */
    break;
  case '(':
    result = parse_pdf_string(start, end);
    break;
  case '[':
    result = parse_pdf_array(start, end);
    break;
  case '/':
    result = parse_pdf_name(start, end);
    break;
  case '@':
    result = get_reference(start, end);
    break;
  case 't':
  case 'f':
    result = parse_pdf_boolean(start, end);
    break;
  default:
    /* This is a bit of a hack, but PDF doesn't easily allow you to
       tell a number from an indirect object reference with some
       serious looking ahead */
    
    if (*start < end && 
       (isdigit(**start) || **start == '+' || **start == '-' || **start == '.')) {
      tmp1 = parse_pdf_number(start, end);
      tmp2 = NULL;
      /* This could be a # # R type reference.  We can't be sure unless
        we look ahead for the second number and the 'R' */
      skip_white(start, end);
      position2 = *start;
      if (*start < end && isdigit(**start)) {
       tmp2 = parse_pdf_number(start, end);
      } else
       tmp2 = NULL;
      skip_white(start, end);
      if (tmp1 != NULL && tmp2 != NULL && *start < end && *((*start)++) == 'R') {
       result = pdf_new_ref ((unsigned long) pdf_number_value (tmp1), 
                           (int) pdf_number_value (tmp2));
       pdf_release_obj (tmp1);
       pdf_release_obj (tmp2);
       break;
      }
      /* Following checks if we got two numbers, but not 'r' */
      if (tmp1 != NULL && tmp2 != NULL) {
       pdf_release_obj (tmp2);
       *start = position2;
      }
      result = tmp1;
      break;
    }
    if (*start < end && **start == 'n') {
      result = parse_pdf_null(start, end);
      break;
    }
    result = NULL;
    break;
  }
  if (result == NULL) {
    fprintf (stderr, "\nExpecting an object, but didn't find one");
    *start = save;
    dump(*start, end);
  }
  return result;
}

Here is the call graph for this function:

Here is the caller graph for this function:

char* parse_pdf_reference ( char **  start,
char *  end 
)

Definition at line 324 of file pdfparse.c.

{
  skip_white (start, end);
  if (**start != '@') {
    fprintf (stderr, "\nPDF Name expected and not found.\n");
    dump(*start, end);
    return NULL;
  }
  (*start)++;
  return parse_ident(start, end);
}

Here is the call graph for this function:

Here is the caller graph for this function:

pdf_obj* parse_pdf_string ( char **  start,
char *  end 
)

Definition at line 425 of file pdfparse.c.

{
  pdf_obj *result;
  int balance = 0;
  char *save;
  unsigned char *string;
  int strlength;
  skip_white(start, end);
  save = *start;
  if (*start == end || **start != '(') {
    return NULL;
  }
  ++(*start);
  string = NEW (end - *start, unsigned char);
  strlength = 0;
  balance = 0;
  while (*start < end &&
        (**start != ')' || balance > 0)) {
    if (**start == '\\')
      switch (*(++(*start))) {
      case 'n':
       string[strlength++] = '\n';
       (*start)++;
       break;
      case 'r':
       string[strlength++] = '\r';
       (*start)++;
       break;
      case 't':
       string[strlength++] = '\t';
       (*start)++;
       break;
      case 'b':
       string[strlength++] = '\b';
       (*start)++;
       break;
      default:
       if (isdigit(**start)) {
         int i;
         string[strlength] = 0;
         for (i=0; i<3; i++) 
           string[strlength] = string[strlength]*8 + (*((*start)++)-'0');
         strlength+= 1;
       } else {
         string[strlength++] = *((*start)++);
       }
      }
    else {
      if (**start == '(')
       balance += 1;
      if (**start == ')')
       balance -= 1;
      string[strlength++] = *((*start)++);
    }
  }
  if (*start >= end) {
    fprintf (stderr, "\nString object ended prematurely\n");
    dump (save, *start);
    return NULL;
  }
  (*start)++;
  result = pdf_new_string (string, strlength);
  RELEASE (string);
  return result;
}

Here is the caller graph for this function:

char* parse_unsigned ( char **  start,
char *  end 
)

Definition at line 236 of file pdfparse.c.

{
  char *number, *save;
#ifdef MEM_DEBUG
MEM_START
#endif
  skip_white(start, end);
  save = *start;
  while (*start < end &&
        isdigit(**start))
    (*start)++;
  if (*start > save) {
    number = NEW ((*start-save)+1, char);
    memcpy (number, save, (*start-save));
    number[*start-save] = 0;
    return number;
  }
  *start = save;
#ifdef MEM_DEBUG
MEM_END
#endif
  return NULL;
}

Here is the call graph for this function:

Here is the caller graph for this function:

char* parse_val_ident ( char **  start,
char *  end 
)

Definition at line 282 of file pdfparse.c.

{
  static char *valid_chars =
    "!\"#$&'*+,-./0123456789:;?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\\^_`abcdefghijklmnopqrstuvwxyz|~";
  return parse_gen_ident (start, end, valid_chars);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void skip_line ( char **  start,
char *  end 
)

Definition at line 67 of file pdfparse.c.

{
  /* Note: PDF spec says that all platforms must end line with '\n'
     after a "stream" keyword */
  while (*start < end && **start != '\n' && **start != '\r')
    (*start)++;
  if (*start < end && **start == '\r')
    (*start) += 1;
  if (*start < end && **start == '\n')
    (*start) += 1;
  return;
}

Here is the caller graph for this function:

void skip_white ( char **  start,
char *  end 
)

Definition at line 56 of file pdfparse.c.

{
  while (*start < end && (isspace (**start) || **start == '%')) {
    if (**start == '%') 
      skip_line (start, end);
    else /* Skip the white char  */
      (*start)++;
  }
  return;
}

Here is the call graph for this function: