Back to index

tetex-bin  3.0
htex.c
Go to the documentation of this file.
00001 /*  $Header$
00002 
00003     This is dvipdfm, a DVI to PDF translator.
00004     Copyright (C) 1998, 1999 by Mark A. Wicks
00005 
00006     This program is free software; you can redistribute it and/or modify
00007     it under the terms of the GNU General Public License as published by
00008     the Free Software Foundation; either version 2 of the License, or
00009     (at your option) any later version.
00010 
00011     This program is distributed in the hope that it will be useful,
00012     but WITHOUT ANY WARRANTY; without even the implied warranty of
00013     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014     GNU General Public License for more details.
00015 
00016     You should have received a copy of the GNU General Public License
00017     along with this program; if not, write to the Free Software
00018     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00019     
00020     The author may be contacted via the e-mail address
00021 
00022         mwicks@kettering.edu
00023 */
00024 
00025 #include <string.h>
00026 #include <stdlib.h>
00027 #include <ctype.h>
00028 #include "system.h"
00029 #include "mem.h"
00030 #include "mfileio.h"
00031 #include "dvi.h"
00032 #include "htex.h"
00033 #include "pdfparse.h"
00034 #include "pdfdoc.h"
00035 
00036 static int is_htex_special (char **start, char *end)
00037 {
00038   skip_white(start, end);
00039   if (end-*start >= strlen ("html:") &&
00040       !strncmp (*start, "html:", strlen("html:"))) {
00041     *start += strlen("html:");
00042     return 1;
00043   }
00044   return 0;
00045 }
00046 
00047 static void downcase (char *s)
00048 {
00049   while (*s != 0) {
00050     if (*s >= 'A' && *s <= 'Z')
00051       *s = (*s-'A')+'a';
00052     s += 1;
00053   }
00054 }
00055 
00056 
00057 #define ANCHOR 0
00058 #define IMAGE 1
00059 #define BASE 2
00060 #define END_ANCHOR 3
00061 
00062 static char *tags[] = {"a", "img", "base" };
00063 
00064 static int parse_html_tag (char **start, char *end)
00065 {
00066   int result = -1;
00067   char *token = NULL;
00068   int closing = 0;
00069   skip_white(start, end);
00070   if (*start < end) {
00071     if (**start == '/') {
00072       (*start)++;
00073       closing = 1;
00074     }
00075     if (*start < end && (token = parse_ident (start, end))) {
00076       downcase (token);
00077       {
00078        int i;
00079        for (i=0; i<sizeof(tags)/sizeof(tags[0]); i++) {
00080          if (!strcmp (token, tags[i])) {
00081            result = i;
00082            if (closing) 
00083              result += sizeof(tags)/sizeof(tags[0]);
00084          }
00085          break;
00086        }
00087        if (i>=sizeof(tags)/sizeof(tags[0]))
00088          result = -1;
00089       }
00090       RELEASE (token);
00091     }
00092   }
00093   return result;
00094 }
00095 
00096 #define HREF 1
00097 #define NAME 2
00098 static pdf_obj *link_dict = NULL;
00099 static unsigned pending_type = 0;
00100 
00101 char *base_value = NULL;
00102 static void html_make_link_dict (char *name)
00103 {
00104   pdf_obj *color;
00105   if (!link_dict) {
00106     link_dict = pdf_new_dict();
00107     pdf_add_dict(link_dict, pdf_new_name("Type"), pdf_new_name ("Annot"));
00108     pdf_add_dict(link_dict, pdf_new_name("Subtype"), pdf_new_name ("Link"));
00109     color = pdf_new_array ();
00110     pdf_add_array (color, pdf_new_number (0));
00111     pdf_add_array (color, pdf_new_number (1));
00112     pdf_add_array (color, pdf_new_number (1));
00113     pdf_add_dict(link_dict, pdf_new_name("C"), color);
00114     if (name && *name == '#' && !(base_value)) {
00115       pdf_add_dict (link_dict, pdf_new_name("Dest"), pdf_new_string(name+1,strlen(name+1)));
00116     } else if (name) {    /* Assume its a URL */
00117       char *url;
00118       int len;
00119       pdf_obj *action;
00120       len = strlen(name)+1;
00121       if (base_value)
00122        len+=strlen(base_value);
00123       url = NEW (len, char);
00124       if (base_value)
00125        strcpy (url, base_value);
00126       else
00127        url[0] = 0;
00128       strcat (url, name);
00129       action = pdf_new_dict();
00130       pdf_add_dict (action, pdf_new_name ("Type"), pdf_new_name ("Action"));
00131       pdf_add_dict (action, pdf_new_name ("S"), pdf_new_name ("URI"));
00132       pdf_add_dict (action, pdf_new_name ("URI"),
00133                   pdf_new_string (url, len));
00134       pdf_add_dict (link_dict, pdf_new_name ("A"), pdf_ref_obj (action));
00135       pdf_release_obj (action);
00136       RELEASE (url);
00137     }
00138     pdf_doc_begin_annot (link_dict);
00139   } else {
00140     fprintf (stderr, "\nAttempt to nest links\n");
00141   }
00142 }
00143 
00144 void html_make_dest (char *name) 
00145 {
00146   pdf_obj *array;
00147   array = pdf_new_array ();
00148   pdf_add_array (array, pdf_doc_this_page_ref());
00149   pdf_add_array (array, pdf_new_name("XYZ"));
00150   pdf_add_array (array, pdf_new_null());
00151   pdf_add_array (array, pdf_new_number(dev_phys_y()+24.0));
00152   pdf_add_array (array, pdf_new_null());
00153   pdf_doc_add_dest (name, strlen(name), pdf_ref_obj(array));
00154   pdf_release_obj (array);
00155 }
00156 
00157 void html_start_anchor (char *key, char *value) 
00158 {
00159   if (pending_type <= 0 && !link_dict) {
00160     downcase (key);
00161     if (!strcmp (key, "href")) {
00162       html_make_link_dict (value);
00163       pending_type = HREF;
00164     } else if (!strcmp (key, "name")) {
00165       html_make_dest (value);
00166       pending_type = NAME;
00167     } else {
00168       fprintf (stderr, "\nUnrecognized keyword in anchor tag: %s\n",
00169               key);
00170     }
00171   } else {
00172     fprintf (stderr, "\nWarning: Nested html anchors\n");
00173   }
00174   if (key)
00175     RELEASE (key);
00176   if (value) {
00177     RELEASE (value);
00178   }
00179 }
00180 
00181 void html_end_anchor (void)
00182 {
00183   switch (pending_type) {
00184   case HREF:
00185     if (link_dict) {
00186       pdf_doc_end_annot ();
00187       pdf_release_obj (link_dict);
00188       link_dict = NULL;
00189       pending_type = 0;
00190     } else {
00191       fprintf (stderr, "\nhtml_end_anchor:  Ending anchor tag without starting tag!\n");
00192     }
00193     break;
00194   case NAME:
00195     pending_type = 0;
00196     break;
00197   }
00198 }
00199 
00200 void html_set_base (char *value)
00201 {
00202   if (base_value)
00203     RELEASE (base_value);
00204   base_value = value;
00205 }
00206 
00207 
00208 
00209 int htex_parse_special(char *buffer, UNSIGNED_QUAD size)
00210 {
00211   int result = 0;
00212   char *key, *value;
00213   char *save = buffer;
00214   char *end = buffer + size;
00215   int htmltag;
00216   skip_white (&buffer, end);
00217   if (is_htex_special(&buffer, end)) {
00218     result = 1; /* Must be html special (doesn't mean it will succeed) */
00219     skip_white (&buffer, end);
00220     if (buffer < end && *(buffer++) == '<' ) {
00221       htmltag = parse_html_tag(&buffer, end);
00222       switch (htmltag) {
00223       case ANCHOR:
00224        parse_key_val (&buffer, end, &key, &value);
00225        if (key && value)
00226          html_start_anchor (key, value);
00227        break;
00228       case IMAGE:
00229        fprintf (stderr, "\nImage html tag not yet implemented\n");
00230        parse_key_val (&buffer, end, &key, &value);
00231        if (key) RELEASE (key);
00232        if (value) RELEASE (value);
00233        break;
00234       case BASE:
00235        parse_key_val (&buffer, end, &key, &value);
00236        if (key && value)
00237          html_set_base (value);
00238        if (key)
00239          RELEASE (key);
00240        break;
00241       case END_ANCHOR:
00242        html_end_anchor ();
00243        break;
00244       default:
00245        fprintf (stderr, "Invalid tag\n");
00246        dump (save, end);
00247       }
00248     }
00249     skip_white(&buffer, end);
00250     if (buffer >= end || *buffer != '>') {
00251       fprintf (stderr, "\nBadly terminated tag..\n");
00252     }
00253   }
00254   return result;
00255 }
00256 
00257 
00258