Back to index

courier  0.68.2
Classes | Defines | Functions | Variables
html.c File Reference
#include "html.h"
#include "unicode/unicode.h"
#include "rfc2045/rfc2045.h"
#include <stdlib.h>
#include <string.h>

Go to the source code of this file.

Classes

struct  taginfo
struct  attr
struct  htmlfilter_info

Defines

#define SPACE(c)   ((c) == ' ' || (c) == '\t' || (c) == '\n' || (c) == '\r')
#define FLAG_NOENDTAG   0x01
#define FLAG_DISCARD   0x02
#define FLAG_NOPRINT   0x04
#define FLAG_BLOCKQUOTE_CITE   0x1000

Functions

static void free_last_attr (struct htmlfilter_info *p)
static void free_attrs (struct htmlfilter_info *p)
static size_t handle_chars (struct htmlfilter_info *p, const unicode_char *uc, size_t cnt)
static size_t handle_entity (struct htmlfilter_info *p, const unicode_char *uc, size_t cnt)
static size_t seen_lt (struct htmlfilter_info *p, const unicode_char *uc, size_t cnt)
static size_t seen_ltexcl (struct htmlfilter_info *p, const unicode_char *uc, size_t cnt)
static size_t seen_sgentity (struct htmlfilter_info *p, const unicode_char *uc, size_t cnt)
static size_t seen_ltspace (struct htmlfilter_info *p, const unicode_char *uc, size_t cnt)
static size_t seen_pi (struct htmlfilter_info *p, const unicode_char *uc, size_t cnt)
static size_t seen_piq (struct htmlfilter_info *p, const unicode_char *uc, size_t cnt)
static size_t seen_comment (struct htmlfilter_info *p, const unicode_char *uc, size_t cnt)
static size_t seen_commentdash (struct htmlfilter_info *p, const unicode_char *uc, size_t cnt)
static size_t seen_commentdashdash (struct htmlfilter_info *p, const unicode_char *uc, size_t cnt)
static size_t seen_closing_elem (struct htmlfilter_info *p, const unicode_char *uc, size_t cnt)
static size_t seen_opening_elem (struct htmlfilter_info *p, const unicode_char *uc, size_t cnt)
static size_t seen_attr (struct htmlfilter_info *p, const unicode_char *uc, size_t cnt)
static size_t seen_attrname (struct htmlfilter_info *p, const unicode_char *uc, size_t cnt)
static size_t seen_attrvalue_1stchar (struct htmlfilter_info *p, const unicode_char *uc, size_t cnt)
static size_t seen_attrvalue (struct htmlfilter_info *p, const unicode_char *uc, size_t cnt)
static size_t seen_attrvalue_entity (struct htmlfilter_info *p, const unicode_char *uc, size_t cnt)
struct htmlfilter_infohtmlfilter_alloc (void(*output_func)(const unicode_char *, size_t, void *), void *output_func_arg)
static void close_elements_until (struct htmlfilter_info *p, size_t i)
void htmlfilter_free (struct htmlfilter_info *p)
void htmlfilter_set_contentbase (struct htmlfilter_info *p, const char *contentbase)
void htmlfilter_set_http_prefix (struct htmlfilter_info *p, const char *http_prefix)
void htmlfilter_set_mailto_prefix (struct htmlfilter_info *p, const char *mailto_prefix)
void htmlfilter_set_convertcid (struct htmlfilter_info *p, char *(*convert_cid_func)(const char *, void *), void *convert_cid_func_arg)
void htmlfilter (struct htmlfilter_info *p, const unicode_char *str, size_t cnt)
static void output (struct htmlfilter_info *p, const unicode_char *uc, size_t cnt)
static void output_chars (struct htmlfilter_info *p, const char *str, size_t cnt)
static unicode_char isualnum (unicode_char c)
static int search_tags (const void *key, const void *elem)
static struct taginfochange_element (const struct taginfo *tag)
static void output_escaped (struct htmlfilter_info *p, const unicode_char *uc, size_t cnt)
static void open_element (struct htmlfilter_info *p)
static void close_element (struct htmlfilter_info *p, const struct taginfo *tag)
static void save_attr (struct htmlfilter_info *p)
static void append_orig_href (struct htmlfilter_info *p, struct unicode_buf *dst, const char *url)
static int change_href (struct htmlfilter_info *p, char *url, struct unicode_buf *dst, int must_be_cid, int *was_http_url)
static void save_attr_int (struct htmlfilter_info *p, struct unicode_buf *name, struct unicode_buf *value)
static int is_attr (struct htmlfilter_info *p, const char *c)
static char * resolve_url (struct htmlfilter_info *p)
static char * handle_url (struct htmlfilter_info *p, int must_be_cid)
static int attr_already_exists (struct htmlfilter_info *p, struct unicode_buf *name)
static void append_entity (struct htmlfilter_info *p)

Variables

static const char hex [] = "0123456789ABCDEF"
static struct taginfo []

Class Documentation

struct taginfo

Definition at line 18 of file html.c.

Class Members
int flags
char tagname
struct attr

Definition at line 111 of file html.c.


Define Documentation

#define FLAG_BLOCKQUOTE_CITE   0x1000

Definition at line 31 of file html.c.

#define FLAG_DISCARD   0x02

Definition at line 28 of file html.c.

#define FLAG_NOENDTAG   0x01

Definition at line 27 of file html.c.

#define FLAG_NOPRINT   0x04

Definition at line 29 of file html.c.

#define SPACE (   c)    ((c) == ' ' || (c) == '\t' || (c) == '\n' || (c) == '\r')

Definition at line 16 of file html.c.


Function Documentation

static void append_entity ( struct htmlfilter_info p) [static]

Definition at line 1637 of file html.c.

{
       unicode_char v=0;

       if (unicode_buf_len(&p->atom2) &&
           unicode_buf_ptr(&p->atom2)[0] == '#')
       {
              const unicode_char *u=unicode_buf_ptr(&p->atom2);
              size_t n=unicode_buf_len(&p->atom2);

              ++u;
              --n;

              if (n && (*u == 'x' || *u == 'X'))
              {
                     while (--n)
                     {
                            unicode_char c=*++u;
                            const char *cp;

                            if (c >= 'a' && c <= 'f')
                                   c += 'A'-'a';

                            if (c < ' ' || c > 127)
                                   break;

                            cp=strchr(hex, c);

                            if (!cp)
                                   break;

                            v = v * 16 + (cp-hex);
                     }
              }
              else
              {
                     while (n)
                     {
                            unicode_char c= *u++;

                            --n;

                            if (c < '0' || c > '9')
                                   break;

                            v = v * 10 + (c-'0');
                     }
              }
       }
       else
       {
              char entitybuf[32];
              size_t i;

              if (unicode_buf_len(&p->atom2) >= sizeof(entitybuf))
                     return;

              for (i=0; i<unicode_buf_len(&p->atom2); ++i)
              {
                     unicode_char c=unicode_buf_ptr(&p->atom2)[i];

                     if ((unsigned char)c != c)
                            return;
                     entitybuf[i]=c;
              }
              entitybuf[i]=0;

              if ((v=unicode_html40ent_lookup(entitybuf)) == 0)
                     return;
       }

       unicode_buf_append(&p->value, &v, 1);
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void append_orig_href ( struct htmlfilter_info p,
struct unicode_buf dst,
const char *  url 
) [static]

Definition at line 1144 of file html.c.

{
       size_t n=strlen(url);

       while (n)
       {
              size_t i;

              for (i=0; i<n; i++)
                     if (!isualnum(url[i]))
                            break;

              if (i == 0)
              {
                     unicode_char b[3];

                     b[0]='%';
                     b[1]=hex[ (url[0] >> 4) & 15];
                     b[2]=hex[ url[0] & 15];

                     unicode_buf_append(dst, b, 3);
                     ++url;
                     --n;
                     continue;
              }

              unicode_buf_append_char(dst, url, i);
              url += i;
              n -= i;
       }
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int attr_already_exists ( struct htmlfilter_info p,
struct unicode_buf name 
) [static]

Definition at line 1371 of file html.c.

{
       size_t i;

       for (i=0; i<p->attrs_index; ++i)
       {
              if (unicode_buf_cmp(&p->attrs[i].name, name) == 0)
                     return 1;
       }
       return 0;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static struct taginfo* change_element ( const struct taginfo tag) [static, read]

Definition at line 745 of file html.c.

{
       if (strcmp(tag->tagname, "base") == 0)
              return &div_tag;

       if (strcmp(tag->tagname, "script") == 0 ||
           strcmp(tag->tagname, "style") == 0)
              return &span_discard_tag;
       return tag;
}

Here is the caller graph for this function:

static int change_href ( struct htmlfilter_info p,
char *  url,
struct unicode_buf dst,
int  must_be_cid,
int *  was_http_url 
) [static]

Definition at line 1187 of file html.c.

{
       size_t i;

       *was_http_url=0;

       /* Convert the method to lowercase */

       for (i=0; url[i] && url[i] != ':'; ++i)
       {
              if (url[i] >= 'A' && url[i] <= 'Z')
                     url[i] += 'a'-'A';
       }

       if (strncmp(url, "cid:", 4) == 0 && p->convert_cid_func)
       {
              char *q;

              if ((q=(*p->convert_cid_func)
                   (url+4, p->convert_cid_func_arg)) != NULL)
              {
                     unicode_buf_append_char(dst, q, strlen(q));
                     free(q);
                     return 1;
              }
       }

       if (must_be_cid)
              return 0;

       if ((strncmp(url, "http:", 5) == 0 ||
            strncmp(url, "https:", 6) == 0)
           && p->http_prefix && *p->http_prefix)
       {
              *was_http_url=1;
              unicode_buf_append_char(dst, p->http_prefix, strlen(p->http_prefix));
              append_orig_href(p, dst, url);
              return 1;
       }

       if (strncmp(url, "mailto:", 7) == 0
           && p->mailto_prefix && *p->mailto_prefix)
       {
              size_t i;

              for (i=0; url[i]; ++i)
                     if (url[i] == '?')
                     {
                            url[i]='&';
                            break;
                     }

              unicode_buf_append_char(dst, p->mailto_prefix,
                                   strlen(p->mailto_prefix));
              append_orig_href(p, dst, url+7);
              return 1;
       }

       return 0;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void close_element ( struct htmlfilter_info p,
const struct taginfo tag 
) [static]

Definition at line 951 of file html.c.

{
       size_t i;

       tag=change_element(tag);

       /* Search for the tag that we are closing */

       i=p->n_open_elements;

       while (i)
       {
              if (strcmp(p->open_elements[i-1]->tagname, tag->tagname) == 0)
                     break;
              --i;
       }

       if (!i)
              return; /* Did not find a matching open element */

       close_elements_until(p, --i);
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void close_elements_until ( struct htmlfilter_info p,
size_t  i 
) [static]

Definition at line 975 of file html.c.

{
       while (p->n_open_elements > i)
       {
              --p->n_open_elements;

              if (!p->n_discarded &&
                  (p->open_elements[p->n_open_elements]->flags & FLAG_NOPRINT)
                  == 0)
              {
                     const char *cp=
                            p->open_elements[p->n_open_elements]->tagname;

                     if (p->open_elements[p->n_open_elements]->flags &
                         p->tag->flags & FLAG_BLOCKQUOTE_CITE)
                            output_chars(p, "</div>", 6);

                     output_chars(p, "</", 2);
                     output_chars(p, cp, strlen(cp));
                     output_chars(p, ">", 1);
              }

              if (p->n_discarded)
                     --p->n_discarded;
       }
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void free_attrs ( struct htmlfilter_info p) [static]

Definition at line 199 of file html.c.

{
       while (p->attrs_index)
              free_last_attr(p);
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void free_last_attr ( struct htmlfilter_info p) [static]

Definition at line 191 of file html.c.

{
       size_t i=--p->attrs_index;

       unicode_buf_deinit(&p->attrs[i].name);
       unicode_buf_deinit(&p->attrs[i].value);
}

Here is the call graph for this function:

Here is the caller graph for this function:

static size_t handle_chars ( struct htmlfilter_info p,
const unicode_char uc,
size_t  cnt 
) [static]

Definition at line 411 of file html.c.

{
       size_t i;

       for (i=0; i<cnt; ++i)
              switch (uc[i]) {
              case '&':
                     if (i)
                     {
                            output(p, uc, i);
                            return i;
                     }

                     unicode_buf_clear(&p->atom);

                     unicode_buf_append(&p->atom, uc+i, 1);
                     p->handler_func=handle_entity;
                     return 1;
              case '<':
                     if (i)
                     {
                            output(p, uc, i);
                            return i;
                     }
                     p->handler_func=seen_lt;

                     free_attrs(p);
                     return 1;

              case '>':
                     if (i)
                            output(p, uc, i);
                     {
                            static const unicode_char gt[]=
                                   {'&','g','t',';'};

                            output(p, gt, 4);
                     }
                     return i+1;
              }

       output(p, uc, cnt);
       return cnt;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static size_t handle_entity ( struct htmlfilter_info p,
const unicode_char uc,
size_t  cnt 
) [static]

Definition at line 482 of file html.c.

{
       size_t i;

       if (unicode_buf_len(&p->atom) == 1 && *uc == '#')
       {
              unicode_buf_append(&p->atom, uc, 1);
              return 1;
       }

       for (i=0; i<cnt; ++i)
       {
              unicode_char c=isualnum(uc[i]);

              if (c != 0)
              {
                     unicode_buf_append(&p->atom, &c, 1);
                     continue;
              }

              p->handler_func=handle_chars;
              if (uc[i] == ';')
              {
                     /*
                     ** It's well-formed
                     */
                     output(p, unicode_buf_ptr(&p->atom),
                            unicode_buf_len(&p->atom));
                     output_chars(p, ";", 1);
                     return ++i;
              }

              break;
       }
       return i;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static char* handle_url ( struct htmlfilter_info p,
int  must_be_cid 
) [static]

Definition at line 1331 of file html.c.

{
       struct unicode_buf new_href;
       char *cp;
       int http_url;

       char *retval=NULL;

       if ((cp=resolve_url(p)) == NULL)
              return NULL;

       unicode_buf_init(&new_href, (size_t)-1);

       if (change_href(p, cp, &new_href, must_be_cid, &http_url))
       {
              save_attr_int(p, &p->atom, &new_href);

              if (!http_url)
              {
                     free(cp);
                     cp=NULL;
              }

              retval=cp;
              cp=NULL;
       }

       if (cp)
              free(cp);

       unicode_buf_deinit(&new_href);
       return retval;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void htmlfilter ( struct htmlfilter_info p,
const unicode_char str,
size_t  cnt 
)

Definition at line 358 of file html.c.

{
       while (cnt)
       {
              size_t n=(*p->handler_func)(p, str, cnt);

              str += n;
              cnt -= n;
       }
}

Here is the caller graph for this function:

struct htmlfilter_info* htmlfilter_alloc ( void(*)(const unicode_char *, size_t, void *)  output_func,
void *  output_func_arg 
) [read]

Definition at line 277 of file html.c.

{
       struct htmlfilter_info *p;

       p=calloc(1, sizeof(*p));
       if (!p)
              return p;

       p->output_func=output_func;
       p->output_func_arg=output_func_arg;

       unicode_buf_init(&p->atom, 2048);
       unicode_buf_init(&p->atom2, 2048);
       unicode_buf_init(&p->value, 8192);

       p->handler_func=handle_chars;
       return p;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void htmlfilter_free ( struct htmlfilter_info p)

Definition at line 300 of file html.c.

{
       close_elements_until(p, 0);

       free_attrs(p);

       unicode_buf_deinit(&p->atom);
       unicode_buf_deinit(&p->atom2);
       unicode_buf_deinit(&p->value);

       if (p->contentbase)
              free(p->contentbase);

       if (p->http_prefix)
              free(p->http_prefix);

       if (p->mailto_prefix)
              free(p->mailto_prefix);

       free(p);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void htmlfilter_set_contentbase ( struct htmlfilter_info p,
const char *  contentbase 
)

Definition at line 322 of file html.c.

Here is the call graph for this function:

Here is the caller graph for this function:

void htmlfilter_set_convertcid ( struct htmlfilter_info p,
char *(*)(const char *, void *)  convert_cid_func,
void *  convert_cid_func_arg 
)

Definition at line 350 of file html.c.

Here is the caller graph for this function:

void htmlfilter_set_http_prefix ( struct htmlfilter_info p,
const char *  http_prefix 
)

Definition at line 332 of file html.c.

Here is the call graph for this function:

Here is the caller graph for this function:

void htmlfilter_set_mailto_prefix ( struct htmlfilter_info p,
const char *  mailto_prefix 
)

Definition at line 341 of file html.c.

Here is the call graph for this function:

Here is the caller graph for this function:

static int is_attr ( struct htmlfilter_info p,
const char *  c 
) [static]

Definition at line 1278 of file html.c.

{
       return unicode_buf_cmp_str(&p->atom, c, strlen(c)) == 0;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static unicode_char isualnum ( unicode_char  c) [static]

Definition at line 464 of file html.c.

{
       if (c >= 'a' && c <= 'z')
              return c;

       if (c >= 'A' && c <= 'Z')
              return c + ('a'-'A');

       if (c >= '0' && c <= '9')
              return c;

       return 0;
}

Here is the caller graph for this function:

static void open_element ( struct htmlfilter_info p) [static]

Definition at line 810 of file html.c.

{
       size_t i=0;
       int discard_was_increased=0;

       p->tag=change_element(p->tag);

       if (p->n_open_elements >=
           sizeof(p->open_elements)/sizeof(p->open_elements[0]))
              return; /* Too many open elements */

       if ((p->tag->flags & FLAG_DISCARD) || p->n_discarded)
       {
              ++p->n_discarded;
              discard_was_increased=1;
       }

       if (p->tag->flags & FLAG_NOENDTAG)
              p->tag_empty=1; /* Make it so, Number One. */

       if (p->tag->flags & FLAG_NOPRINT)
              ++p->n_discarded; /* Temporary */

       p->open_elements[p->n_open_elements++]=p->tag;

       /*
       ** For A elements, the title attribute will have the full target
       ** URL. Attempt to extract the hostname and show it before the
       ** A element.
       */

       if (strcmp(p->tag->tagname, "a") == 0)
       {
              size_t i;

              for (i=0; i<p->attrs_index; ++i)
              {
                     if (unicode_buf_cmp_str(&p->attrs[i].name, "title", 5)
                         == 0)
                     {
                            size_t j, k;

                            for (j=0; j<unicode_buf_len(&p->attrs[i].value);
                                 ++j)
                            {
                                   if (unicode_buf_ptr(&p->attrs[i].value)
                                       [j] == ':')
                                   {
                                          ++j;
                                          break;
                                   }
                            }

                            while (j<unicode_buf_len(&p->attrs[i].value) &&
                                   unicode_buf_ptr(&p->attrs[i].value)[j]
                                   == '/')
                                   ++j;
                            k=j;

                            while (k<unicode_buf_len(&p->attrs[i].value))
                            {
                                   switch (unicode_buf_ptr(&p->attrs[i]
                                                        .value)[k]) {
                                   case '/':
                                   case '?':
                                   case '#':
                                          break;
                                   default:
                                          ++k;
                                          continue;
                                   }
                                   break;
                            }

                            if (k > j)
                            {
                                   static const char span[]=
                                          "<span class=\"urlhost\">[";

                                   output_chars(p, span,
                                               sizeof(span)-1);
                                   output_escaped(p,
                                                 unicode_buf_ptr(&p->
                                                               attrs[i]
                                                               .value)
                                                 +j, k-j);

                                   output_chars(p, "]</span>", 8);
                            }
                            break;
                     }
              }
       }

       output_chars(p, "<", 1);
       output_chars(p, p->tag->tagname, strlen(p->tag->tagname));

       for (i=0; i<p->attrs_index; ++i)
       {
              output_chars(p, " ", 1);
              output(p, unicode_buf_ptr(&p->attrs[i].name),
                     unicode_buf_len(&p->attrs[i].name));

              if (unicode_buf_len(&p->attrs[i].value) > 0)
              {
                     output_chars(p, "=\"", 2);

                     output_escaped(p, unicode_buf_ptr(&p->attrs[i].value),
                                   unicode_buf_len(&p->attrs[i].value));
                     output_chars(p, "\"", 1);
              }
       }

       if (p->tag_empty)
              output_chars(p, " /", 2);

       output_chars(p, ">", 1);

       if (p->tag_empty)
       {
              /* This tag did not really open */

              --p->n_open_elements;

              if (discard_was_increased)
                     --p->n_discarded;
       }

       if (!p->tag_empty && p->tag->flags & FLAG_BLOCKQUOTE_CITE)
       {
              static const char str[]="<div class=\"quotedtext\">";

              output_chars(p, str, sizeof(str)-1);
       }

       if (p->tag->flags & FLAG_NOPRINT)
              --p->n_discarded; /* Was temporary */
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void output ( struct htmlfilter_info p,
const unicode_char uc,
size_t  cnt 
) [static]

Definition at line 374 of file html.c.

{
       if (cnt && !p->n_discarded)
              (*p->output_func)(uc, cnt, p->output_func_arg);
}

Here is the caller graph for this function:

static void output_chars ( struct htmlfilter_info p,
const char *  str,
size_t  cnt 
) [static]

Definition at line 385 of file html.c.

{
       unicode_char unicode_buf[256];

       while (cnt)
       {
              size_t n=sizeof(unicode_buf)/sizeof(unicode_buf[0]), i;

              if (n > cnt)
                     n=cnt;

              for (i=0; i<n; ++i)
                     unicode_buf[i]=(unsigned char)str[i];

              str += n;
              cnt -= n;
              output(p, unicode_buf, n);
       }
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void output_escaped ( struct htmlfilter_info p,
const unicode_char uc,
size_t  cnt 
) [static]

Definition at line 760 of file html.c.

{
       while (cnt)
       {
              size_t i;

              for (i=0; i<cnt; i++)
                     if (uc[i] < ' ' || uc[i] > 127 ||
                         uc[i] == '<' || uc[i] == '>' || uc[i] == '&' ||
                         uc[i] == '"')
                            break;

              if (i)
                     output(p, uc, i);

              uc += i;
              cnt -= i;

              if (cnt)
              {
                     unicode_char c;
                     char buf[sizeof(unicode_char)*2+4];
                     char *cp;

                     c= *uc++;
                     --cnt;

                     cp=buf+sizeof(buf)-1;
                     *cp=0;
                     *--cp=';';
                     do
                     {
                            *--cp=hex[c & 15];
                            c /= 16;
                     } while (c);
                     *--cp='x';
                     *--cp='#';
                     *--cp='&';

                     output_chars(p, cp, strlen(cp));
              }
       }
}

Here is the call graph for this function:

Here is the caller graph for this function:

static char* resolve_url ( struct htmlfilter_info p) [static]

Definition at line 1287 of file html.c.

{
       char *buf;
       size_t size;
       char *cp;

       libmail_u_convert_handle_t h=
              libmail_u_convert_fromu_init("utf-8", &buf, &size, 1);

       if (h)
       {
              libmail_u_convert_uc(h, unicode_buf_ptr(&p->value),
                                 unicode_buf_len(&p->value));

              if (libmail_u_convert_deinit(h, NULL))
                     buf=NULL;
       }
       else
       {
              buf=NULL;
       }

       if (!buf)
              return NULL;

       if (p->contentbase && *p->contentbase)
       {
              cp=rfc2045_append_url(p->contentbase, buf);

              free(buf);
              buf=cp;
       }
       return (buf);
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void save_attr ( struct htmlfilter_info p) [static]

Definition at line 1384 of file html.c.

{
       p->handler_func=seen_attr;

       if (attr_already_exists(p, &p->atom))
              return;

       /*
       ** Transform <blockquote type="cite"> into
       **
       ** <blockquote class="citeN"> where N nests from 0 to 2.
       */

       if (is_attr(p, "type") && strcmp(p->tag->tagname, "blockquote") == 0 &&
           unicode_buf_len(&p->value) == 4)
       {
              size_t i;

              for (i=0; i<4; ++i)
                     if (isualnum(unicode_buf_ptr(&p->value)[i])
                         != "cite"[i])
                            break;

              if (i == 4)
              {
                     size_t n=0, j;
                     char buf[10];

                     for (j=0; j<p->n_open_elements; ++j)
                            if (p->open_elements[j]->flags &
                                FLAG_BLOCKQUOTE_CITE)
                                   ++n;

                     p->tag=&blockquote_cite_tag;

                     sprintf(buf, "cite%d", (int)(n % 3));

                     unicode_buf_clear(&p->value);
                     unicode_buf_append_char(&p->value, buf, strlen(buf));

                     unicode_buf_clear(&p->atom);
                     unicode_buf_append_char(&p->atom, "class", 5);

                     if (!attr_already_exists(p, &p->atom))
                     {
                            save_attr_int(p, &p->atom, &p->value);
                            return;
                     }
              }
       }

       /*
       ** Do not allow title attributes on an A element, we'll supply our
       ** own.
       */

       if (is_attr(p, "title") &&
           strcmp(p->tag->tagname, "a") == 0)
              return;

       if (is_attr(p, "lang")
           || is_attr(p, "title")
           || is_attr(p, "dir")
           || is_attr(p, "size")
           || is_attr(p, "color")
           || is_attr(p, "face")

           || is_attr(p, "span")
           || is_attr(p, "width")
           || is_attr(p, "height")
           || is_attr(p, "align")
           || is_attr(p, "char")
           || is_attr(p, "charoff")
           || is_attr(p, "valign")
           || is_attr(p, "alt")
           )
       {
              /* Safe attributes */

              save_attr_int(p, &p->atom, &p->value);
              return;
       }

       if (is_attr(p, "src") && strcmp(p->tag->tagname, "img") == 0)
       {
              char *url=handle_url(p, 1);

              if (url)
                     free(url);
              return;
       }

       if (is_attr(p, "href"))
       {
              if (strcmp(p->tag->tagname, "base") == 0)
              {
                     char *buf=malloc(unicode_buf_len(&p->value)+1);

                     if (buf)
                     {
                            size_t i;

                            for (i=0; i<unicode_buf_len(&p->value); ++i)
                            {
                                   buf[i]=unicode_buf_ptr(&p->value)[i];
                            }
                            buf[i]=0;

                            htmlfilter_set_contentbase(p, buf);
                            free(buf);
                     }
                     return;
              }


              if (strcmp(p->tag->tagname, "a") == 0)
              {
                     char *url;

                     if ((url=handle_url(p, 0)) != NULL)
                     {
                            /* Append target=_blank to HREF */

                            unicode_buf_clear(&p->atom);
                            unicode_buf_append_char(&p->atom, "target", 6);
                            unicode_buf_clear(&p->value);
                            unicode_buf_append_char(&p->value, "_blank", 6);
                            save_attr_int(p, &p->atom, &p->value);

                            /* Append the full URL in the title tag */

                            unicode_buf_clear(&p->atom);
                            unicode_buf_append_char(&p->atom, "title", 5);
                            unicode_buf_clear(&p->value);
                            unicode_buf_append_char(&p->value, url, strlen(url));
                            save_attr_int(p, &p->atom, &p->value);
                            free(url);

                     }
                     return;
              }
       }
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void save_attr_int ( struct htmlfilter_info p,
struct unicode_buf name,
struct unicode_buf value 
) [static]

Definition at line 1261 of file html.c.

{
       struct attr *cur_attr;

       if (p->attrs_index >= sizeof(p->attrs)/sizeof(p->attrs[0]))
              return;

       cur_attr=p->attrs + p->attrs_index;

       ++p->attrs_index;

       unicode_buf_init_copy(&cur_attr->name, name);
       unicode_buf_init_copy(&cur_attr->value, value);
}

Here is the caller graph for this function:

static int search_tags ( const void *  key,
const void *  elem 
) [static]

Definition at line 719 of file html.c.

{
       size_t i;
       const char *cp=((const struct taginfo *)elem)->tagname;
       unicode_char c;
       const struct unicode_buf *ukey=(struct unicode_buf *)key;
       const unicode_char *k=unicode_buf_ptr(ukey);
       size_t kl=unicode_buf_len(ukey);

       for (i=0; (c=i >= kl ? 0:k[i]) != 0 || cp[i] != 0; ++i)
       {
              unicode_char c2=(unsigned char)cp[i];

              if (c < c2)
                     return -1;

              if (c > c2)
                     return 1;
       }
       return 0;
}

Here is the caller graph for this function:

static size_t seen_attr ( struct htmlfilter_info p,
const unicode_char uc,
size_t  cnt 
) [static]

Definition at line 1112 of file html.c.

{
       if (SPACE(*uc))
              return 1;

       if (*uc == '/')
       {
              p->tag_empty=1;
              return 1;
       }

       if (isualnum(*uc))
       {
              unicode_buf_clear(&p->atom);
              p->handler_func=seen_attrname;
              return seen_attrname(p, uc, cnt);
       }

       p->handler_func=handle_chars;

       if (*uc == '>')
              open_element(p);

       return 1;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static size_t seen_attrname ( struct htmlfilter_info p,
const unicode_char uc,
size_t  cnt 
) [static]

Definition at line 1532 of file html.c.

{
       size_t i;

       for (i=0; i<cnt; ++i)
       {
              unicode_char c;

              if ((c=uc[i]) == ':' || c == '-' || (c=isualnum(c)) != 0)
              {
                     unicode_buf_append(&p->atom, &c, 1);
                     continue;
              }

              unicode_buf_clear(&p->value);
              p->value_quote=0;

              p->handler_func=seen_attr; /* No value expected */

              if (uc[i] == '=')
              {
                     p->handler_func=seen_attrvalue_1stchar;
                     return ++i;
              }
              save_attr(p);
              return i;
       }
       return cnt;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static size_t seen_attrvalue ( struct htmlfilter_info p,
const unicode_char uc,
size_t  cnt 
) [static]

Definition at line 1588 of file html.c.

{
       size_t i;

       for (i=0; i<cnt; ++i)
       {
              if (uc[i] == '&')
              {
                     unicode_buf_append(&p->value, uc, i);
                     unicode_buf_clear(&p->atom2);
                     p->handler_func=seen_attrvalue_entity;
                     return i+1;
              }

              /*
              ** If the first char in the value is ' or ", another one ends
              ** the value. Otherwise, the value gets ended by a / or >
              */

              if (p->value_quote)
              {
                     if (uc[i] == p->value_quote)
                     {
                            unicode_buf_append(&p->value, uc, i);
                            save_attr(p);
                            return i+1;
                     }
              }
              else if (SPACE(uc[i]) || uc[i] == '/' || uc[i] == '>')
              {
                     unicode_buf_append(&p->value, uc, i);
                     save_attr(p);
                     return i;
              }
       }
       unicode_buf_append(&p->value, uc, i);
       return cnt;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static size_t seen_attrvalue_1stchar ( struct htmlfilter_info p,
const unicode_char uc,
size_t  cnt 
) [static]

Definition at line 1568 of file html.c.

{
       p->handler_func=seen_attrvalue;

       switch (*uc) {
       case '\'':
       case '\"':
              p->value_quote= *uc;
              return 1;
       }

       return seen_attrvalue(p, uc, cnt);
}

Here is the call graph for this function:

Here is the caller graph for this function:

static size_t seen_attrvalue_entity ( struct htmlfilter_info p,
const unicode_char uc,
size_t  cnt 
) [static]

Definition at line 1720 of file html.c.

{
       size_t i;

       if (unicode_buf_len(&p->atom2) == 0 && *uc == '#')
       {
              unicode_buf_append(&p->atom2, uc, 1);
              return 1;
       }

       for (i=0; i<cnt; ++i)
       {
              unicode_char c=isualnum(uc[i]);

              if (c)
              {
                     unicode_buf_append(&p->atom2, uc+i, 1);
                     continue;
              }

              switch (uc[i]) {
              case ';':
                     append_entity(p);
                     ++i;
                     break;
              case '&':
              case '=':

                     /* Broken URL, most likely */

                     {
                            unicode_char amp='&';

                            unicode_buf_append(&p->value, &amp, 1);
                     }
                     unicode_buf_append_buf(&p->value, &p->atom2);
                     break;
              default:
                     /* Not ...&foo;..., not ...&foo&..., not ...&foo=... */

                     /* forget the whole thing */
                     break;
              }
              p->handler_func=seen_attrvalue;
              return i;
       }
       return cnt;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static size_t seen_closing_elem ( struct htmlfilter_info p,
const unicode_char uc,
size_t  cnt 
) [static]

Definition at line 1006 of file html.c.

{
       size_t i;
       unicode_char c;

       for (i=0; i<cnt; ++i)
       {
              if (uc[i] == '>')
              {
                     const struct taginfo *tag;

                     p->handler_func=handle_chars;

                     tag=bsearch(&p->atom,
                                tags,
                                sizeof(tags)/sizeof(tags[0]),
                                sizeof(tags[0]),
                                search_tags);

                     /*
                     ** Change unknown elements to a <span>
                     */

                     if (!tag)
                            tag= &unknown_tag;

                     close_element(p, tag);
                     return i+1;
              }

              /* Loose parsing - ignore spaces wherever they are */

              if (SPACE(uc[i]))
                     continue;

              if ((c=uc[i]) == ':' || (c=isualnum(c)) != 0)
              {
                     unicode_buf_append(&p->atom, &c, 1);
                     continue;
              }

              /*
              ** Syntax error, punt.
              */

              p->handler_func=handle_chars;
              return i;
       }

       return i;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static size_t seen_comment ( struct htmlfilter_info p,
const unicode_char uc,
size_t  cnt 
) [static]

Definition at line 663 of file html.c.

{
       size_t i;

       for (i=0; i<cnt; ++i)
       {
              if (uc[i] == '-')
              {
                     p->handler_func=seen_commentdash;
                     return i+1;
              }
       }
       return i;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static size_t seen_commentdash ( struct htmlfilter_info p,
const unicode_char uc,
size_t  cnt 
) [static]

Definition at line 684 of file html.c.

{
       if (*uc == '-')
       {
              p->handler_func=seen_commentdashdash;
              return 1;
       }
       p->handler_func=seen_comment;
       return seen_comment(p, uc, cnt);
}

Here is the call graph for this function:

Here is the caller graph for this function:

static size_t seen_commentdashdash ( struct htmlfilter_info p,
const unicode_char uc,
size_t  cnt 
) [static]

Definition at line 701 of file html.c.

{
       if (*uc == '>')
       {
              p->handler_func=handle_chars;
              return 1;
       }

       p->handler_func=seen_commentdash;
       return seen_commentdash(p, uc, cnt);
}

Here is the call graph for this function:

Here is the caller graph for this function:

static size_t seen_lt ( struct htmlfilter_info p,
const unicode_char uc,
size_t  cnt 
) [static]

Definition at line 525 of file html.c.

{
       if (*uc == '?')
       {
              p->handler_func=seen_pi;
              return 1;
       }

       if (*uc == '!')
       {
              p->handler_func=seen_ltexcl;
              return 1;
       }

       unicode_buf_clear(&p->atom);
       p->handler_func=seen_ltspace;
       return seen_ltspace(p, uc, cnt);
}

Here is the call graph for this function:

Here is the caller graph for this function:

static size_t seen_ltexcl ( struct htmlfilter_info p,
const unicode_char uc,
size_t  cnt 
) [static]

Definition at line 550 of file html.c.

{
       if (*uc == '-')
       {
              /* Assume an SGML comment */

              p->handler_func=seen_comment;

              return seen_comment(p, uc, cnt);
       }

       p->handler_func=seen_sgentity;
       return seen_sgentity(p, uc, cnt);
}

Here is the call graph for this function:

Here is the caller graph for this function:

static size_t seen_ltspace ( struct htmlfilter_info p,
const unicode_char uc,
size_t  cnt 
) [static]

Definition at line 592 of file html.c.

{
       if (SPACE(*uc))
              return 1;

       if (*uc == '/')
       {
              p->handler_func=seen_closing_elem;
              return 1;
       }

       if (isualnum(*uc))
       {
              p->handler_func=seen_opening_elem;
              return seen_opening_elem(p, uc, cnt);
       }

       /* Syntax error, punt */

       p->handler_func=handle_chars;
       return handle_chars(p, uc, cnt);
}

Here is the call graph for this function:

Here is the caller graph for this function:

static size_t seen_opening_elem ( struct htmlfilter_info p,
const unicode_char uc,
size_t  cnt 
) [static]

Definition at line 1066 of file html.c.

{
       size_t i;

       for (i=0; i<cnt; ++i)
       {
              unicode_char c;

              if ((c=uc[i]) == ':' || (c=isualnum(c)) != 0)
              {
                     unicode_buf_append(&p->atom, &c, 1);
                     continue;
              }

              /*
              ** End of element name.
              */

              p->tag=bsearch(&p->atom,
                            tags,
                            sizeof(tags)/sizeof(tags[0]),
                            sizeof(tags[0]),
                            search_tags);

              /*
              ** Change unknown elements to a <span>
              */

              if (!p->tag)
                     p->tag= &unknown_tag;

              p->handler_func=seen_attr;
              p->tag_empty=0;
              return i;
       }
       return i;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static size_t seen_pi ( struct htmlfilter_info p,
const unicode_char uc,
size_t  cnt 
) [static]

Definition at line 621 of file html.c.

{
       size_t i;

       for (i=0; i<cnt; ++i)
       {
              if (uc[i] == '?')
              {
                     p->handler_func=seen_piq;
                     return i+1;
              }
       }
       return i;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static size_t seen_piq ( struct htmlfilter_info p,
const unicode_char uc,
size_t  cnt 
) [static]

Definition at line 642 of file html.c.

{
       p->handler_func=seen_pi;

       if (*uc == '>')
       {
              p->handler_func=handle_chars;
              return 1;
       }

       /* Look for the next ? */

       return seen_pi(p, uc, cnt);
}

Here is the call graph for this function:

Here is the caller graph for this function:

static size_t seen_sgentity ( struct htmlfilter_info p,
const unicode_char uc,
size_t  cnt 
) [static]

Definition at line 571 of file html.c.

{
       size_t i;

       for (i=0; i<cnt; i++)
              if (uc[i] == '>')
              {
                     p->handler_func=handle_chars;

                     return i+1;
              }

       return i;
}

Here is the call graph for this function:

Here is the caller graph for this function:


Variable Documentation

const char hex[] = "0123456789ABCDEF" [static]

Definition at line 24 of file html.c.

static struct taginfo [static]

Definition at line 33 of file html.c.