Back to index

avfs  1.0.1
hip_xml.c
Go to the documentation of this file.
00001 /* 
00002    Higher Level Interface to XML Parsers.
00003    Copyright (C) 1999-2001, Joe Orton <joe@light.plus.com>
00004 
00005    This library is free software; you can redistribute it and/or
00006    modify it under the terms of the GNU Library General Public
00007    License as published by the Free Software Foundation; either
00008    version 2 of the License, or (at your option) any later version.
00009    
00010    This library is distributed in the hope that it will be useful,
00011    but WITHOUT ANY WARRANTY; without even the implied warranty of
00012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013    Library General Public License for more details.
00014 
00015    You should have received a copy of the GNU Library General Public
00016    License along with this library; if not, write to the Free
00017    Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
00018    MA 02111-1307, USA
00019 
00020 */
00021 
00022 #include "config.h"
00023 
00024 #ifdef HAVE_STDLIB_H
00025 #include <stdlib.h>
00026 #endif
00027 
00028 #ifdef HAVE_STRING_H
00029 #include <string.h>
00030 #endif
00031 
00032 #include "neon_i18n.h"
00033 
00034 #include "ne_alloc.h"
00035 #include "http_utils.h"
00036 #include "string_utils.h"
00037 #include "hip_xml.h"
00038 
00039 #ifdef HAVE_EXPAT
00040 
00041 /******** Expat ***********/
00042 
00043 # include "xmlparse.h"
00044 typedef XML_Char hip_xml_char;
00045 
00046 #else /* not HAVE_EXPAT */
00047 # ifdef HAVE_LIBXML
00048 
00049 /******** libxml **********/
00050 #  include <libxml/parser.h>
00051 typedef xmlChar hip_xml_char;
00052 
00053 # else /* not HAVE_LIBXML */
00054 #  error need an XML parser
00055 # endif /* not HAVE_LIBXML */
00056 #endif /* not HAVE_EXPAT */
00057 
00058 /* Approx. one screen of text: */
00059 #define HIP_ERR_SIZE (2048)
00060 
00061 /* A list of elements */
00062 struct hip_xml_handler {
00063     const struct hip_xml_elm *elements; /* put it in static memory */
00064     hip_xml_validate_cb validate_cb; /* validation function */
00065     hip_xml_startelm_cb startelm_cb; /* on-complete element function */
00066     hip_xml_endelm_cb endelm_cb; /* on-complete element function */
00067     hip_xml_cdata_cb cdata_cb; /* cdata callback for mixed mode */
00068     void *userdata;
00069     struct hip_xml_handler *next;
00070 };
00071 
00072 struct hip_xml_state {
00073     /* The element details */
00074     const struct hip_xml_elm *elm;
00075 
00076     /* Storage for an unknown element */
00077     struct hip_xml_elm elm_real;
00078     char *real_name;
00079     
00080     /* Namespaces declared in this element */
00081     hip_xml_char *default_ns; /* A default namespace */
00082     struct hip_xml_nspace *nspaces; /* List of other namespace scopes */
00083 
00084     unsigned int mixed:1; /* are we in MIXED mode? */
00085 
00086     /* Extras */
00087     struct hip_xml_handler *handler; /* Where the element was declared */
00088     struct hip_xml_state *parent; /* The parent in the tree */
00089 };
00090 
00091   
00092 /* TODO: 
00093  * could move 'valid' into state, maybe allow optional
00094  * continuation past an invalid branch.
00095  */
00096 
00097 /* We pass around a hip_xml_parser as the userdata in the parsing
00098  * library.  This maintains the current state of the parse and various
00099  * other bits and bobs. Within the parse, we store the current branch
00100  * of the tree, i.e., the current element and all its parents, up to
00101  * the root, but nothing other than that.  */
00102 struct hip_xml_parser_s {
00103     struct hip_xml_state *root; /* the root of the document */
00104     struct hip_xml_state *current; /* current element in the branch */
00105     sbuffer buffer; /* the CDATA/collect buffer */
00106     unsigned int valid:1; /* currently valid? */
00107     unsigned int want_cdata:1; /* currently collecting CDATA? */
00108     unsigned int collect; /* current collect depth */
00109     struct hip_xml_handler *top_handlers; /* always points at the 
00110                                       * handler on top of the stack. */
00111 #ifdef HAVE_EXPAT
00112     XML_Parser parser;
00113 #else
00114     xmlParserCtxtPtr parser;
00115 #endif
00116     char error[HIP_ERR_SIZE];
00117 };
00118 
00119 static void destroy_state(struct hip_xml_state *s);
00120 
00121 static const char *friendly_name(const struct hip_xml_elm *elm)
00122 {
00123     switch(elm->id) {
00124     case HIP_ELM_root:
00125        return _("document root");
00126     case HIP_ELM_unknown:
00127        return _("unknown element");
00128     default:
00129        if (elm->name) {
00130            return elm->name;
00131        } else {
00132            return _("unspecified");
00133        }
00134     }
00135 }
00136 
00137 const static struct hip_xml_elm root_element = 
00138 { "@<root>@", HIP_ELM_root, 0 };
00139 
00140 /* The callback handlers */
00141 static void start_element(void *userdata, const hip_xml_char *name, const hip_xml_char **atts);
00142 static void end_element(void *userdata, const hip_xml_char *name);
00143 static void char_data(void *userdata, const hip_xml_char *cdata, int len);
00144 
00145 #define HIP_XML_DECODE_UTF8
00146 
00147 #ifdef HIP_XML_DECODE_UTF8
00148 
00149 /* UTF-8 decoding */
00150 
00151 /* Single byte range 0x00 -> 0x7F */
00152 #define SINGLEBYTE_UTF8(ch) (((unsigned char) (ch)) < 0x80)
00153 
00154 /* Decode a double byte UTF8 string.
00155  * Returns 0 on success or non-zero on error. */
00156 static inline int decode_utf8_double(char *dest, const char *src);
00157 
00158 #endif
00159 
00160 /* Linked list of namespace scopes */
00161 struct hip_xml_nspace {
00162     hip_xml_char *name;
00163     hip_xml_char *uri;
00164     struct hip_xml_nspace *next;
00165 };
00166 
00167 /* And an auxiliary */
00168 static int parse_element(hip_xml_parser *p, struct hip_xml_state *state,
00169                       const hip_xml_char *name, const hip_xml_char **atts);
00170 
00171 #ifdef HAVE_LIBXML
00172 
00173 /* Could be const as far as we care, but libxml doesn't want that */
00174 static xmlSAXHandler sax_handler = {
00175     NULL, /* internalSubset */
00176     NULL, /* isStandalone */
00177     NULL, /* hasInternalSubset */
00178     NULL, /* hasExternalSubset */
00179     NULL, /* resolveEntity */
00180     NULL, /* getEntity */
00181     NULL, /* entityDecl */
00182     NULL, /* notationDecl */
00183     NULL, /* attributeDecl */
00184     NULL, /* elementDecl */
00185     NULL, /* unparsedEntityDecl */
00186     NULL, /* setDocumentLocator */
00187     NULL, /* startDocument */
00188     NULL, /* endDocument */
00189     start_element, /* startElement */
00190     end_element, /* endElement */
00191     NULL, /* reference */
00192     char_data, /* characters */
00193     NULL, /* ignorableWhitespace */
00194     NULL, /* processingInstruction */
00195     NULL, /* comment */
00196     NULL, /* xmlParserWarning */
00197     NULL, /* xmlParserError */
00198     NULL, /* xmlParserError */
00199     NULL, /* getParameterEntity */
00200     char_data /* cdataBlock */
00201 };
00202 
00203 #endif /* HAVE_LIBXML */
00204 
00205 #ifdef HIP_XML_DECODE_UTF8
00206 
00207 static inline int 
00208 decode_utf8_double(char *dest, const char *src) 
00209 {
00210     /* From utf-8 man page; two-byte encoding is:
00211      *    0x00000080 - 0x000007FF:
00212      *       110xxxxx 10xxxxxx
00213      * If more than 8-bits of those x's are set, we fail.
00214      * So, we check that the first 6 bits of the first byte are:
00215      *       110000.
00216      * Then decode like:
00217      *       110000xx 10yyyyyy  -> xxyyyyyy
00218      * Do this with a mask and a compare:
00219      *       zzzzzzzz
00220      *     & 11111100  <=> 0xFC
00221      *    == 11000000  <=> 0xC0    
00222      * 
00223      * joe: A real C hacker would probably do some funky bit
00224      * inversion, and turn this into an is-not-zero test, 
00225      * but I'm a fake, so...
00226      */
00227     if ((src[0] & 0xFC) == 0xC0) {
00228        dest[0] = ((src[0] & 0x03) << 6) | (src[1] & 0x3F);
00229        /* nb.
00230         *    00000011  = 0x03
00231         *    00111111  = 0x3F
00232         */
00233        return 0;
00234     } else {
00235        return -1;
00236     }
00237 }
00238 
00239 #endif
00240 
00241 int hip_xml_currentline(hip_xml_parser *p) 
00242 {
00243 #ifdef HAVE_EXPAT
00244     return XML_GetCurrentLineNumber(p->parser);
00245 #else
00246     return p->parser->input->line;
00247 #endif
00248 }
00249 
00250 static int find_handler(hip_xml_parser *p, struct hip_xml_state *state) 
00251 {
00252     struct hip_xml_handler *cur, *unk_handler = NULL;
00253     const char *name = state->elm_real.name, *nspace = state->elm_real.nspace;
00254     int n, got_unknown = 0;
00255 
00256     for (cur = state->parent->handler; cur != NULL; cur = cur->next) {
00257        for (n = 0; (cur->elements[n].nspace != NULL || (
00258                    cur->elements[n].nspace == NULL && 
00259                    cur->elements[n].id == HIP_ELM_unknown)); n++) {
00260            if (cur->elements[n].nspace != NULL && 
00261               (strcasecmp(cur->elements[n].name, name) == 0 && 
00262                strcasecmp(cur->elements[n].nspace, nspace) == 0)) {
00263 
00264               switch ((*cur->validate_cb)(state->parent->elm->id, cur->elements[n].id)) {
00265               case HIP_XML_VALID:
00266                   DEBUG(DEBUG_XML, "Validated by handler.\n");
00267                   state->handler = cur;
00268                   state->elm = &cur->elements[n];
00269                   return 0;
00270               case HIP_XML_INVALID:
00271                   DEBUG(DEBUG_XML, "Invalid context.\n");
00272                   snprintf(p->error, HIP_ERR_SIZE, 
00273                           _("XML is not valid (%s found in parent %s)"),
00274                           friendly_name(&cur->elements[n]), 
00275                           friendly_name(state->parent->elm));
00276                   return -1;
00277               default:
00278                   /* ignore it */
00279                   DEBUG(DEBUG_XML, "Declined by handler.\n");
00280                   break;
00281               }
00282            }
00283            if (!got_unknown && cur->elements[n].id == HIP_ELM_unknown) {
00284               switch ((*cur->validate_cb)(state->parent->elm->id, HIP_ELM_unknown)) {
00285               case HIP_XML_VALID:
00286                   unk_handler = cur;
00287                   got_unknown = 1;
00288                   state->elm_real.id = HIP_ELM_unknown;
00289                   state->elm_real.flags = cur->elements[n].flags;
00290                   break;
00291               case HIP_XML_INVALID:
00292                   DEBUG(DEBUG_XML, "Invalid context.\n");
00293                   snprintf(p->error, HIP_ERR_SIZE, 
00294                           _("XML is not valid (%s found in parent %s)"),
00295                           friendly_name(&cur->elements[n]), 
00296                           friendly_name(state->parent->elm));
00297                   return -1;
00298               default:
00299                   DEBUG(DEBUG_XML, "Declined by handler.\n");
00300                   break;
00301               }
00302            }
00303        }
00304     }
00305     if (!cur && got_unknown) {
00306        /* Give them the unknown handler */
00307        DEBUG(DEBUG_XMLPARSE, "Using unknown element handler\n");
00308        state->handler = unk_handler;
00309        state->elm = &state->elm_real;
00310        return 0;
00311     } else {
00312        DEBUG(DEBUG_XMLPARSE, "Unexpected element\n");
00313        snprintf(p->error, HIP_ERR_SIZE, 
00314                _("Unknown XML element `%s (in %s)'"), name, nspace);
00315        return -1;
00316     }
00317 }
00318 
00319 /* Called with the start of a new element. */
00320 static void 
00321 start_element(void *userdata, const hip_xml_char *name, const hip_xml_char **atts) 
00322 {
00323     hip_xml_parser *p = userdata;
00324     struct hip_xml_state *s;
00325 
00326     if (!p->valid) {
00327        /* We've stopped parsing */
00328        DEBUG(DEBUG_XML, "Parse died. Ignoring start of element: %s\n", name);
00329        return;
00330     }
00331 
00332     /* If we are in collect mode, print the element to the buffer */
00333     if (p->collect) {
00334        /* In Collect Mode. */
00335        const hip_xml_char *pnt = strchr(name, ':');
00336        if (pnt == NULL) {
00337            pnt = name;
00338        } else {
00339            pnt++;
00340        }
00341        sbuffer_concat(p->buffer, "<", pnt, NULL);
00342        if (atts != NULL) {
00343            int n;
00344            for (n = 0; atts[n] != NULL; n+=2) {
00345               sbuffer_concat(p->buffer, " ", atts[n], "=", atts[n+1],
00346                             NULL);
00347            }
00348        }
00349        sbuffer_zappend(p->buffer, ">");
00350        /* One deeper */
00351        p->collect++;
00352        return;
00353     }
00354 
00355     /* Set the new state */
00356     s = ne_calloc(sizeof(struct hip_xml_state));
00357     s->parent = p->current;
00358     p->current = s;
00359 
00360     /* We need to handle namespaces ourselves */
00361     if (parse_element(p, s, name, atts)) {
00362        /* it bombed. */
00363        p->valid = 0;
00364        return;
00365     }
00366 
00367     /* Map the element name to an id */
00368     DEBUG(DEBUG_XML, "Mapping element name %s@@%s... ", 
00369          s->elm_real.nspace, s->elm_real.name);
00370 
00371     if (find_handler(p, s)) {
00372        p->valid = 0;
00373        return;
00374     }
00375 
00376     DEBUG(DEBUG_XMLPARSE, "mapped to id %d\n", s->elm->id);
00377 
00378     /* Do we want cdata? */
00379     p->want_cdata = ((s->elm->flags & HIP_XML_CDATA) == HIP_XML_CDATA);
00380     p->collect = ((s->elm->flags & HIP_XML_COLLECT) == HIP_XML_COLLECT);
00381     
00382     /* Is this element using mixed-mode? */
00383     s->mixed = ((s->elm->flags & HIP_XML_MIXED) == HIP_XML_MIXED);
00384 
00385     if (s->handler->startelm_cb) {
00386        if ((*s->handler->startelm_cb)(s->handler->userdata, s->elm, 
00387                                    (const char **) atts)) {
00388            DEBUG(DEBUG_XML, "Startelm callback failed.\n");
00389            p->valid = 0;
00390        }
00391     } else {
00392        DEBUG(DEBUG_XML, "No startelm handler.\n");
00393     }
00394 
00395 }
00396 
00397 /* Destroys given state */
00398 static void destroy_state(struct hip_xml_state *s) 
00399 {
00400     struct hip_xml_nspace *this_ns, *next_ns;
00401     DEBUG(DEBUG_XMLPARSE, "Freeing namespaces...\n");
00402     HTTP_FREE(s->default_ns);
00403     HTTP_FREE(s->real_name);
00404     /* Free the namespaces */
00405     this_ns = s->nspaces;
00406     while (this_ns != NULL) {
00407        next_ns = this_ns->next;
00408        free(this_ns->name);
00409        free(this_ns->uri);
00410        free(this_ns);
00411        this_ns = next_ns;
00412     };
00413     DEBUG(DEBUG_XMLPARSE, "Finished freeing namespaces.\n");
00414     free(s);
00415 }
00416 
00417 static void char_data(void *userdata, const hip_xml_char *data, int len) 
00418 {
00419     hip_xml_parser *p = userdata;
00420     
00421     if (p->current->mixed) {
00422        (*p->current->handler->cdata_cb)( 
00423            p->current->handler->userdata, p->current->elm, data, len);
00424        return;
00425     }
00426 
00427     if (!p->want_cdata || !p->valid) return;
00428     /* First, if this is the beginning of the CDATA, skip all
00429      * leading whitespace, we don't want it. */
00430     DEBUG(DEBUG_XMLPARSE, "Given %d bytes of cdata.\n", len);
00431     if (sbuffer_size(p->buffer) == 0) {
00432        int wslen = 0;
00433        /* Ignore any leading whitespace */
00434        while (wslen < len && 
00435               (data[wslen] == ' ' || data[wslen] == '\r' ||
00436               data[wslen] == '\n' || data[wslen] == '\t')) {
00437            wslen++;
00438        }
00439        data += wslen;
00440        len -= wslen;
00441        DEBUG(DEBUG_XMLPARSE, "Skipped %d bytes of leading whitespace.\n", 
00442               wslen);
00443        if (len == 0) {
00444            DEBUG(DEBUG_XMLPARSE, "Zero bytes of content.\n");
00445            return;
00446        }
00447     }
00448 
00449 #ifdef HIP_XML_DECODE_UTF8
00450 
00451     if ((p->current->elm->flags & HIP_XML_UTF8DECODE) == HIP_XML_UTF8DECODE) {
00452        int n, m, clen;
00453        char *dest;
00454 
00455        clen = sbuffer_size(p->buffer);
00456        sbuffer_grow(p->buffer, clen + len + 1);
00457        dest = sbuffer_data(p->buffer) + clen;
00458 
00459 /* #define TOO_MUCH_DEBUG 1 */
00460        for (n = 0, m = 0; n < len; n++, m++) {
00461 #ifdef TOO_MUCH_DEBUG
00462            DEBUG(DEBUG_XML, "decoding 0x%02x", 0xFF & data[n]);
00463 #endif
00464            if (SINGLEBYTE_UTF8(data[n])) {
00465               dest[m] = data[n];
00466            } else {
00467               /* An optimisation here: we only deal with 8-bit 
00468                * data, which will be encoded as two bytes of UTF-8 */
00469               if ((len - n < 2) ||
00470                   decode_utf8_double(&dest[m], &data[n])) {
00471                   /* Failed to decode! */
00472                   DEBUG(DEBUG_XML, "Could not decode UTF-8 data.\n");
00473                   strcpy(p->error, "XML parser received non-8-bit data");
00474                   p->valid = 0;
00475                   return;
00476               } else {
00477 #ifdef TOO_MUCH_DEBUG
00478                   DEBUG(DEBUG_XML, "UTF-8 two-bytes decode: "
00479                         "0x%02hx 0x%02hx -> 0x%02hx!\n",
00480                         data[n] & 0xFF, data[n+1] & 0xFF, dest[m] & 0xFF);
00481 #endif
00482                   /* Skip the second byte */
00483                   n++;
00484               }
00485            }
00486        }
00487        sbuffer_altered(p->buffer);
00488     } else {
00489        sbuffer_append(p->buffer, data, len);
00490     }
00491 
00492 #else /* !HIP_XML_DECODE_UTF8 */
00493 
00494     sbuffer_append(p->buffer, data, len);
00495 
00496 #endif
00497 
00498 }
00499 
00500 /* Called with the end of an element */
00501 static void end_element(void *userdata, const hip_xml_char *name) 
00502 {
00503     hip_xml_parser *p = userdata;
00504     struct hip_xml_state *s = p->current;
00505     if (!p->valid) {
00506        /* We've stopped parsing */
00507        DEBUG(DEBUG_XML, "Parse died. Ignoring end of element: %s\n", name);
00508        return;
00509     }
00510     if (p->collect > 0) {
00511        if (--p->collect) {
00512            const hip_xml_char *pnt = strchr(name, ':');
00513            if (pnt == NULL) {
00514               pnt = name;
00515            } else {
00516               pnt++;
00517            }
00518            sbuffer_concat(p->buffer, "</", pnt, ">", NULL);
00519            return;
00520        }
00521     }
00522        
00523     /* process it */
00524     if (s->handler->endelm_cb) {
00525        DEBUG(DEBUG_XMLPARSE, "Calling endelm callback for %s.\n", s->elm->name);
00526        if ((*s->handler->endelm_cb)(s->handler->userdata, s->elm,
00527                                   p->want_cdata?sbuffer_data(p->buffer):
00528                                   NULL)) {
00529            DEBUG(DEBUG_XML, "Endelm callback failed.\n");
00530            p->valid = 0;
00531        }
00532     }
00533     p->current = s->parent;
00534     /* Move the current pointer up the branch */
00535     DEBUG(DEBUG_XML, "Back in element: %s\n", friendly_name(p->current->elm));
00536     if (p->want_cdata) {
00537        sbuffer_clear(p->buffer);
00538     } 
00539     destroy_state(s);
00540 }
00541 
00542 /* Parses the attributes, and handles XML namespaces. 
00543  * With a little bit of luck.
00544  * Returns:
00545  *   the element name on success
00546  *   or NULL on error.
00547  */
00548 static int parse_element(hip_xml_parser *p, struct hip_xml_state *state,
00549                       const hip_xml_char *name, const hip_xml_char **atts)
00550 {
00551     struct hip_xml_nspace *ns;
00552     const hip_xml_char *pnt;
00553     struct hip_xml_state *xmlt;
00554 
00555     DEBUG(DEBUG_XMLPARSE, "Parsing elm of name: [%s]\n", name);
00556     /* Parse the atts for namespace declarations... if we have any atts.
00557      * expat will never pass us atts == NULL, but libxml will. */
00558     if (atts != NULL) {
00559        int attn;
00560        for (attn = 0; atts[attn]!=NULL; attn+=2) {
00561            DEBUG(DEBUG_XMLPARSE, "Got attribute: [%s] = [%s]\n", atts[attn], atts[attn+1]);
00562            if (strcasecmp(atts[attn], "xmlns") == 0) {
00563               /* New default namespace */
00564               state->default_ns = ne_strdup(atts[attn+1]);
00565               DEBUG(DEBUG_XMLPARSE, "New default namespace: %s\n", 
00566                      state->default_ns);
00567            } else if (strncasecmp(atts[attn], "xmlns:", 6) == 0) {
00568               /* New namespace scope */
00569               ns = ne_calloc(sizeof(struct hip_xml_nspace));
00570               ns->next = state->nspaces;
00571               state->nspaces = ns;
00572               ns->name = ne_strdup(atts[attn]+6); /* skip the xmlns= */
00573               ns->uri = ne_strdup(atts[attn+1]);
00574               DEBUG(DEBUG_XMLPARSE, "New namespace scope: %s -> %s\n",
00575                      ns->name, ns->uri);
00576            }
00577        }
00578     }
00579     /* Now check the elm name for a namespace scope */
00580     pnt = strchr(name, ':');
00581     if (pnt == NULL) {
00582        /* No namespace prefix - have we got a default? */
00583        state->real_name = ne_strdup(name);
00584        DEBUG(DEBUG_XMLPARSE, "No prefix found, searching for default.\n");
00585        for (xmlt = state; xmlt!=NULL; xmlt=xmlt->parent) {
00586            if (xmlt->default_ns != NULL) {
00587               state->elm_real.nspace = xmlt->default_ns;
00588               break;
00589            }
00590        }
00591        if (state->elm_real.nspace == NULL) {
00592            DEBUG(DEBUG_XMLPARSE, "No default namespace, using empty.\n");
00593            state->elm_real.nspace = "";
00594        }
00595     } else {
00596        DEBUG(DEBUG_XMLPARSE, "Got namespace scope. Trying to resolve...");
00597        /* Have a scope - resolve it */
00598        for (xmlt = state; state->elm_real.nspace==NULL && xmlt!=NULL; xmlt=xmlt->parent) {
00599            for (ns = xmlt->nspaces; ns!=NULL; ns=ns->next) {
00600               /* Just compare against the bit before the :
00601                * pnt points to the colon. */
00602               if (strncasecmp(ns->name, name, pnt-name) == 0) {
00603                   /* Scope matched! Hoorah */
00604                   state->elm_real.nspace = ns->uri;
00605                   /* end the search */
00606                   break;
00607               }
00608            }
00609        }
00610        if (state->elm_real.nspace != NULL) {
00611            DEBUG(DEBUG_XMLPARSE, "Resolved prefix to [%s]\n", state->elm_real.nspace);
00612            /* The name is everything after the ':' */
00613            if (pnt[1] == '\0') {
00614               snprintf(p->error, HIP_ERR_SIZE, 
00615                        "Element name missing in '%s' at line %d.",
00616                        name, hip_xml_currentline(p));
00617               DEBUG(DEBUG_XMLPARSE, "No element name after ':'. Failed.\n");
00618               return -1;
00619            }
00620            state->real_name = ne_strdup(pnt+1);
00621        } else {
00622            DEBUG(DEBUG_XMLPARSE, "Undeclared namespace.\n");
00623            snprintf(p->error, HIP_ERR_SIZE, 
00624                     "Undeclared namespace in '%s' at line %d.",
00625                     name, hip_xml_currentline(p));
00626            return -1;
00627        }
00628     }
00629     state->elm_real.name = state->real_name;
00630     return 0;
00631 }
00632 
00633 hip_xml_parser *hip_xml_create(void) 
00634 {
00635     hip_xml_parser *p = ne_calloc(sizeof *p);
00636     /* Initialize other stuff */
00637     p->valid = 1;
00638     /* Placeholder for the root element */
00639     p->current = p->root = ne_calloc(sizeof(struct hip_xml_state));
00640     p->root->elm = &root_element;
00641     /* Initialize the cdata buffer */
00642     p->buffer = sbuffer_create();
00643 #ifdef HAVE_EXPAT
00644     p->parser = XML_ParserCreate(NULL);
00645     if (p->parser == NULL) {
00646        abort();
00647     }
00648     XML_SetElementHandler(p->parser, start_element, end_element);
00649     XML_SetCharacterDataHandler(p->parser, char_data);
00650     XML_SetUserData(p->parser, (void *) p);
00651 #else
00652     p->parser = xmlCreatePushParserCtxt(&sax_handler, 
00653                                    (void *)p, NULL, 0, NULL);
00654     if (p->parser == NULL) {
00655        abort();
00656     }
00657 #endif
00658     return p;
00659 }
00660 
00661 static void push_handler(hip_xml_parser *p,
00662                       struct hip_xml_handler *handler)
00663 {
00664 
00665     /* If this is the first handler registered, update the
00666      * base pointer too. */
00667     if (p->top_handlers == NULL) {
00668        p->root->handler = handler;
00669        p->top_handlers = handler;
00670     } else {
00671        p->top_handlers->next = handler;
00672        p->top_handlers = handler;
00673     }
00674 }
00675 
00676 void hip_xml_push_handler(hip_xml_parser *p,
00677                        const struct hip_xml_elm *elements, 
00678                        hip_xml_validate_cb validate_cb, 
00679                        hip_xml_startelm_cb startelm_cb, 
00680                        hip_xml_endelm_cb endelm_cb,
00681                        void *userdata)
00682 {
00683     struct hip_xml_handler *hand = ne_calloc(sizeof(struct hip_xml_handler));
00684 
00685     hand->elements = elements;
00686     hand->validate_cb = validate_cb;
00687     hand->startelm_cb = startelm_cb;
00688     hand->endelm_cb = endelm_cb;
00689     hand->userdata = userdata;
00690 
00691     push_handler(p, hand);
00692 }
00693 
00694 void hip_xml_push_mixed_handler(hip_xml_parser *p,
00695                             const struct hip_xml_elm *elements,
00696                             hip_xml_validate_cb validate_cb,
00697                             hip_xml_startelm_cb startelm_cb,
00698                             hip_xml_cdata_cb cdata_cb,
00699                             hip_xml_endelm_cb endelm_cb,
00700                             void *userdata)
00701 {
00702     struct hip_xml_handler *hand = ne_calloc(sizeof *hand);
00703     
00704     hand->elements = elements;
00705     hand->validate_cb = validate_cb;
00706     hand->startelm_cb = startelm_cb;
00707     hand->cdata_cb = cdata_cb;
00708     hand->endelm_cb = endelm_cb;
00709     hand->userdata = userdata;
00710     
00711     push_handler(p, hand);
00712 }
00713 
00714 void hip_xml_parse_v(void *userdata, const char *block, size_t len) 
00715 {
00716     hip_xml_parser *p = userdata;
00717     /* FIXME: The two XML parsers break all our nice abstraction by
00718      * choosing different char *'s. The swine. This cast will come
00719      * back and bite us someday, no doubt. */
00720     hip_xml_parse(p, block, len);
00721 }
00722 
00723 /* Parse the given block of input of length len */
00724 void hip_xml_parse(hip_xml_parser *p, const char *block, size_t len) 
00725 {
00726     int ret, flag;
00727     /* duck out if it's broken */
00728     if (!p->valid) {
00729        DEBUG(DEBUG_XML, "Not parsing %d bytes.\n", len);
00730        return;
00731     }
00732     if (len == 0) {
00733        flag = -1;
00734        block = "";
00735        DEBUG(DEBUG_XML, "Got 0-length buffer, end of document.\n");
00736     } else {  
00737        DEBUG(DEBUG_XML, "Parsing %d length buffer.\n", len);
00738        flag = 0;
00739     }
00740     /* Note, don't write a parser error if !p->valid, since an error
00741      * will already have been written in that case. */
00742 #ifdef HAVE_EXPAT
00743     ret = XML_Parse(p->parser, block, len, flag);
00744     DEBUG(DEBUG_XMLPARSE, "XML_Parse returned %d\n", ret);
00745     if (ret == 0 && p->valid) {
00746        snprintf(p->error, HIP_ERR_SIZE,
00747                 "XML parse error at line %d: %s", 
00748                 XML_GetCurrentLineNumber(p->parser),
00749                 XML_ErrorString(XML_GetErrorCode(p->parser)));
00750        p->valid = 0;
00751     }
00752 #else
00753     ret = xmlParseChunk(p->parser, block, len, flag);
00754     DEBUG(DEBUG_XMLPARSE, "xmlParseChunk returned %d\n", ret);
00755     if (p->parser->errNo && p->valid) {
00756        /* FIXME: error handling */
00757        snprintf(p->error, HIP_ERR_SIZE, "XML parse error at line %d.", 
00758                 hip_xml_currentline(p));
00759        p->valid = 0;
00760     }
00761 #endif
00762 }
00763 
00764 int hip_xml_valid(hip_xml_parser *p)
00765 {
00766     return p->valid;
00767 }
00768 
00769 void hip_xml_destroy(hip_xml_parser *p) 
00770 {
00771     struct hip_xml_state *s, *parent;
00772     struct hip_xml_handler *hand, *next;
00773 
00774     sbuffer_destroy(p->buffer);
00775     /* Clean up any states which may remain.
00776      * If p.valid, then this should be only the root element. */
00777     for (s = p->current; s!=NULL; s=parent) {
00778        parent = s->parent;
00779        destroy_state(s);
00780     }
00781 
00782     /* Free up the handlers on the stack: the base of the stack
00783      * is the handler in the root element, so start there. */
00784     for (hand = p->root->handler; hand!=NULL; hand=next) {
00785        next = hand->next;
00786        free(hand);
00787     }
00788         
00789 #ifdef HAVE_EXPAT
00790     XML_ParserFree(p->parser);
00791 #else
00792     xmlFreeParserCtxt(p->parser);
00793 #endif
00794 
00795     free(p);
00796 }
00797 
00798 void hip_xml_set_error(hip_xml_parser *p, const char *msg)
00799 {
00800     snprintf(p->error, HIP_ERR_SIZE, msg);
00801 }
00802 
00803 const char *hip_xml_get_error(hip_xml_parser *p)
00804 {
00805     return p->error;
00806 }
00807