Back to index

citadel  8.12
rss_atom_parser.c
Go to the documentation of this file.
00001 /*
00002  * Bring external RSS feeds into rooms.
00003  *
00004  * Copyright (c) 2007-2012 by the citadel.org team
00005  *
00006  * This program is open source software; you can redistribute it and/or modify
00007  * it under the terms of the GNU General Public License version 3.
00008  * 
00009  * 
00010  *
00011  * This program is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU General Public License for more details.
00015  *
00016  * 
00017  * 
00018  * 
00019  */
00020 
00021 #include <stdlib.h>
00022 #include <unistd.h>
00023 #include <stdio.h>
00024 
00025 #if TIME_WITH_SYS_TIME
00026 # include <sys/time.h>
00027 # include <time.h>
00028 #else
00029 # if HAVE_SYS_TIME_H
00030 #  include <sys/time.h>
00031 # else
00032 #  include <time.h>
00033 # endif
00034 #endif
00035 
00036 #include <ctype.h>
00037 #include <string.h>
00038 #include <errno.h>
00039 #include <sys/types.h>
00040 #include <sys/stat.h>
00041 #include <expat.h>
00042 #include <curl/curl.h>
00043 #include <libcitadel.h>
00044 #include "citadel.h"
00045 #include "server.h"
00046 #include "citserver.h"
00047 #include "support.h"
00048 #include "config.h"
00049 #include "threads.h"
00050 #include "ctdl_module.h"
00051 #include "clientsocket.h"
00052 #include "msgbase.h"
00053 #include "parsedate.h"
00054 #include "database.h"
00055 #include "citadel_dirs.h"
00056 #include "md5.h"
00057 #include "context.h"
00058 #include "event_client.h"
00059 #include "rss_atom_parser.h"
00060 
00061 void rss_save_item(rss_item *ri, rss_aggregator *Cfg);
00062 
00063 int RSSAtomParserDebugEnabled = 0;
00064 
00065 #define N ((rss_aggregator*)IO->Data)->QRnumber
00066 
00067 #define DBGLOG(LEVEL) if ((LEVEL != LOG_DEBUG) || (RSSAtomParserDebugEnabled != 0))
00068 
00069 #define EVRSSATOM_syslog(LEVEL, FORMAT, ...)                          \
00070        DBGLOG(LEVEL) syslog(LEVEL,                             \
00071                           "IO[%ld]CC[%d][%ld]RSSP" FORMAT,            \
00072                           IO->ID, CCID, N, __VA_ARGS__)
00073 
00074 #define EVRSSATOMM_syslog(LEVEL, FORMAT)                       \
00075        DBGLOG(LEVEL) syslog(LEVEL,                             \
00076                           "IO[%ld]CC[%d][%ld]RSSP" FORMAT,            \
00077                           IO->ID, CCID, N)
00078 
00079 #define EVRSSATOMCS_syslog(LEVEL, FORMAT, ...)                 \
00080        DBGLOG(LEVEL) syslog(LEVEL, "IO[%ld][%ld]RSSP" FORMAT,  \
00081                           IO->ID, N, __VA_ARGS__)
00082 
00083 #define EVRSSATOMSM_syslog(LEVEL, FORMAT)               \
00084        DBGLOG(LEVEL) syslog(LEVEL, "IO[%ld][%ld]RSSP" FORMAT,  \
00085                           IO->ID, N)
00086 
00087 /*
00088  * Convert an RDF/RSS datestamp into a time_t
00089  */
00090 time_t rdf_parsedate(const char *p)
00091 {
00092        struct tm tm;
00093        time_t t = 0;
00094 
00095        if (!p) return 0L;
00096        if (strlen(p) < 10) return 0L;
00097 
00098        memset(&tm, 0, sizeof tm);
00099 
00100        /*
00101         * If the timestamp appears to be in W3C datetime format, try to
00102         * parse it.  See also: http://www.w3.org/TR/NOTE-datetime
00103         *
00104         * This code, along with parsedate.c, is a potential candidate for
00105         * moving into libcitadel.
00106         */
00107        if ( (p[4] == '-') && (p[7] == '-') ) {
00108               tm.tm_year = atoi(&p[0]) - 1900;
00109               tm.tm_mon = atoi(&p[5]) - 1;
00110               tm.tm_mday = atoi(&p[8]);
00111               if ( (p[10] == 'T') && (p[13] == ':') ) {
00112                      tm.tm_hour = atoi(&p[11]);
00113                      tm.tm_min = atoi(&p[14]);
00114               }
00115               return mktime(&tm);
00116        }
00117 
00118        /* hmm... try RFC822 date stamp format */
00119 
00120        t = parsedate(p);
00121        if (t > 0) return(t);
00122 
00123        /* yeesh.  ok, just return the current date and time. */
00124        return(time(NULL));
00125 }
00126 
00127 void flush_rss_item(rss_item *ri)
00128 {
00129        /* Initialize the feed item data structure */
00130        FreeStrBuf(&ri->guid);
00131        FreeStrBuf(&ri->title);
00132        FreeStrBuf(&ri->link);
00133        FreeStrBuf(&ri->author_or_creator);
00134        FreeStrBuf(&ri->author_email);
00135        FreeStrBuf(&ri->author_url);
00136        FreeStrBuf(&ri->description);
00137 
00138        FreeStrBuf(&ri->linkTitle);
00139        FreeStrBuf(&ri->reLink);
00140        FreeStrBuf(&ri->reLinkTitle);
00141        FreeStrBuf(&ri->channel_title);
00142 }
00143 
00144 
00145 /******************************************************************************
00146  *                              XML-Handler                                   *
00147  ******************************************************************************/
00148 
00149 
00150 void RSS_item_rss_start (StrBuf *CData,
00151                       rss_item *ri,
00152                       rss_aggregator *RSSAggr,
00153                       const char** Attr)
00154 {
00155        AsyncIO              *IO = &RSSAggr->IO;
00156        EVRSSATOMM_syslog(LOG_DEBUG, "RSS: This is an RSS feed.\n");
00157        RSSAggr->ItemType = RSS_RSS;
00158 }
00159 
00160 void RSS_item_rdf_start(StrBuf *CData,
00161                      rss_item *ri,
00162                      rss_aggregator *RSSAggr,
00163                      const char** Attr)
00164 {
00165        AsyncIO              *IO = &RSSAggr->IO;
00166        EVRSSATOMM_syslog(LOG_DEBUG, "RSS: This is an RDF feed.\n");
00167        RSSAggr->ItemType = RSS_RSS;
00168 }
00169 
00170 void ATOM_item_feed_start(StrBuf *CData,
00171                        rss_item *ri,
00172                        rss_aggregator *RSSAggr,
00173                        const char** Attr)
00174 {
00175        AsyncIO              *IO = &RSSAggr->IO;
00176        EVRSSATOMM_syslog(LOG_DEBUG, "RSS: This is an ATOM feed.\n");
00177        RSSAggr->ItemType = RSS_ATOM;
00178 }
00179 
00180 
00181 void RSS_item_item_start(StrBuf *CData,
00182                       rss_item *ri,
00183                       rss_aggregator *RSSAggr,
00184                       const char** Attr)
00185 {
00186        ri->item_tag_nesting ++;
00187        flush_rss_item(ri);
00188 }
00189 
00190 void ATOM_item_entry_start(StrBuf *CData,
00191                         rss_item *ri,
00192                         rss_aggregator *RSSAggr,
00193                         const char** Attr)
00194 {
00195 /* Atom feed... */
00196        ri->item_tag_nesting ++;
00197        flush_rss_item(ri);
00198 }
00199 
00200 void ATOM_item_link_start (StrBuf *CData,
00201                         rss_item *ri,
00202                         rss_aggregator *RSSAggr,
00203                         const char** Attr)
00204 {
00205        int i;
00206        const char *pHref = NULL;
00207        const char *pType = NULL;
00208        const char *pRel = NULL;
00209        const char *pTitle = NULL;
00210 
00211        for (i = 0; Attr[i] != NULL; i+=2)
00212        {
00213               if (!strcmp(Attr[i], "href"))
00214               {
00215                      pHref = Attr[i+1];
00216               }
00217               else if (!strcmp(Attr[i], "rel"))
00218               {
00219                      pRel = Attr[i+1];
00220               }
00221               else if (!strcmp(Attr[i], "type"))
00222               {
00223                      pType = Attr[i+1];
00224               }
00225               else if (!strcmp(Attr[i], "title"))
00226               {
00227                      pTitle = Attr[i+1];
00228               }
00229        }
00230        if (pHref == NULL)
00231               return; /* WHUT? Pointing... where? */
00232        if ((pType != NULL) && !strcasecmp(pType, "application/atom+xml"))
00233               return;
00234        /* these just point to other rss resources,
00235           we're not interested in them. */
00236        if (pRel != NULL)
00237        {
00238               if (!strcasecmp (pRel, "replies"))
00239               {
00240                      NewStrBufDupAppendFlush(&ri->reLink, NULL, pHref, -1);
00241                      StrBufTrim(ri->link);
00242                      NewStrBufDupAppendFlush(&ri->reLinkTitle,
00243                                           NULL,
00244                                           pTitle,
00245                                           -1);
00246               }
00247               else if (!strcasecmp(pRel, "alternate"))
00248               { /* Alternative representation of this Item... */
00249                      NewStrBufDupAppendFlush(&ri->link, NULL, pHref, -1);
00250                      StrBufTrim(ri->link);
00251                      NewStrBufDupAppendFlush(&ri->linkTitle,
00252                                           NULL,
00253                                           pTitle,
00254                                           -1);
00255 
00256               }
00257 #if 0 /* these are also defined, but dunno what to do with them.. */
00258               else if (!strcasecmp(pRel, "related"))
00259               {
00260               }
00261               else if (!strcasecmp(pRel, "self"))
00262               {
00263               }
00264               else if (!strcasecmp(pRel, "enclosure"))
00265               {/*...reference can get big, and is probably the full article*/
00266               }
00267               else if (!strcasecmp(pRel, "via"))
00268               {/* this article was provided via... */
00269               }
00270 #endif
00271        }
00272        else if (StrLength(ri->link) == 0)
00273        {
00274               NewStrBufDupAppendFlush(&ri->link, NULL, pHref, -1);
00275               StrBufTrim(ri->link);
00276               NewStrBufDupAppendFlush(&ri->linkTitle, NULL, pTitle, -1);
00277        }
00278 }
00279 
00280 
00281 
00282 
00283 void ATOMRSS_item_title_end(StrBuf *CData,
00284                          rss_item *ri,
00285                          rss_aggregator *RSSAggr,
00286                          const char** Attr)
00287 {
00288        if ((ri->item_tag_nesting == 0) && (StrLength(CData) > 0)) {
00289               NewStrBufDupAppendFlush(&ri->channel_title, CData, NULL, 0);
00290               StrBufTrim(ri->channel_title);
00291        }
00292 }
00293 
00294 void RSS_item_guid_end(StrBuf *CData,
00295                      rss_item *ri,
00296                      rss_aggregator *RSSAggr,
00297                      const char** Attr)
00298 {
00299        if (StrLength(CData) > 0) {
00300               NewStrBufDupAppendFlush(&ri->guid, CData, NULL, 0);
00301        }
00302 }
00303 
00304 void ATOM_item_id_end(StrBuf *CData,
00305                     rss_item *ri, rss_aggregator *RSSAggr, const char** Attr)
00306 {
00307        if (StrLength(CData) > 0) {
00308               NewStrBufDupAppendFlush(&ri->guid, CData, NULL, 0);
00309        }
00310 }
00311 
00312 
00313 void RSS_item_link_end (StrBuf *CData,
00314                      rss_item *ri,
00315                      rss_aggregator *RSSAggr,
00316                      const char** Attr)
00317 {
00318        if (StrLength(CData) > 0) {
00319               NewStrBufDupAppendFlush(&ri->link, CData, NULL, 0);
00320               StrBufTrim(ri->link);
00321        }
00322 }
00323 void RSS_item_relink_end(StrBuf *CData,
00324                       rss_item *ri,
00325                       rss_aggregator *RSSAggr,
00326                       const char** Attr)
00327 {
00328        if (StrLength(CData) > 0) {
00329               NewStrBufDupAppendFlush(&ri->reLink, CData, NULL, 0);
00330               StrBufTrim(ri->reLink);
00331        }
00332 }
00333 
00334 void RSSATOM_item_title_end (StrBuf *CData,
00335                           rss_item *ri,
00336                           rss_aggregator *RSSAggr,
00337                           const char** Attr)
00338 {
00339        if (StrLength(CData) > 0) {
00340               NewStrBufDupAppendFlush(&ri->title, CData, NULL, 0);
00341               StrBufTrim(ri->title);
00342        }
00343 }
00344 
00345 void ATOM_item_content_end (StrBuf *CData,
00346                          rss_item *ri,
00347                          rss_aggregator *RSSAggr,
00348                          const char** Attr)
00349 {
00350        long olen = StrLength (ri->description);
00351        long clen = StrLength (CData);
00352        if (clen > 0)
00353        {
00354               if (olen == 0) {
00355                      NewStrBufDupAppendFlush(&ri->description,
00356                                           CData,
00357                                           NULL,
00358                                           0);
00359                      StrBufTrim(ri->description);
00360               }
00361               else if (olen < clen) {
00362                      FlushStrBuf(ri->description);
00363                      NewStrBufDupAppendFlush(&ri->description,
00364                                           CData,
00365                                           NULL,
00366                                           0);
00367 
00368                      StrBufTrim(ri->description);
00369               }
00370        }
00371 }
00372 void ATOM_item_summary_end (StrBuf *CData,
00373                          rss_item *ri,
00374                          rss_aggregator *RSSAggr,
00375                          const char** Attr)
00376 {
00377        /*
00378         * this can contain an abstract of the article.
00379         * but we don't want to verwrite a full document if we already have it.
00380         */
00381        if ((StrLength(CData) > 0) && (StrLength(ri->description) == 0))
00382        {
00383               NewStrBufDupAppendFlush(&ri->description, CData, NULL, 0);
00384               StrBufTrim(ri->description);
00385        }
00386 }
00387 
00388 void RSS_item_description_end (StrBuf *CData,
00389                             rss_item *ri,
00390                             rss_aggregator *RSSAggr,
00391                             const char** Attr)
00392 {
00393        long olen = StrLength (ri->description);
00394        long clen = StrLength (CData);
00395        if (clen > 0)
00396        {
00397               if (olen == 0) {
00398                      NewStrBufDupAppendFlush(&ri->description,
00399                                           CData,
00400                                           NULL,
00401                                           0);
00402                      StrBufTrim(ri->description);
00403               }
00404               else if (olen < clen) {
00405                      FlushStrBuf(ri->description);
00406                      NewStrBufDupAppendFlush(&ri->description,
00407                                           CData,
00408                                           NULL,
00409                                           0);
00410                      StrBufTrim(ri->description);
00411               }
00412        }
00413 }
00414 
00415 void ATOM_item_published_end (StrBuf *CData,
00416                            rss_item *ri,
00417                            rss_aggregator *RSSAggr,
00418                            const char** Attr)
00419 {
00420        if (StrLength(CData) > 0) {
00421               StrBufTrim(CData);
00422               ri->pubdate = rdf_parsedate(ChrPtr(CData));
00423        }
00424 }
00425 
00426 void ATOM_item_updated_end (StrBuf *CData,
00427                          rss_item *ri,
00428                          rss_aggregator *RSSAggr,
00429                          const char** Attr)
00430 {
00431        if (StrLength(CData) > 0) {
00432               StrBufTrim(CData);
00433               ri->pubdate = rdf_parsedate(ChrPtr(CData));
00434        }
00435 }
00436 
00437 void RSS_item_pubdate_end (StrBuf *CData,
00438                         rss_item *ri,
00439                         rss_aggregator *RSSAggr,
00440                         const char** Attr)
00441 {
00442        if (StrLength(CData) > 0) {
00443               StrBufTrim(CData);
00444               ri->pubdate = rdf_parsedate(ChrPtr(CData));
00445        }
00446 }
00447 
00448 
00449 void RSS_item_date_end (StrBuf *CData,
00450                      rss_item *ri,
00451                      rss_aggregator *RSSAggr,
00452                      const char** Attr)
00453 {
00454        if (StrLength(CData) > 0) {
00455               StrBufTrim(CData);
00456               ri->pubdate = rdf_parsedate(ChrPtr(CData));
00457        }
00458 }
00459 
00460 
00461 
00462 void RSS_item_author_end(StrBuf *CData,
00463                       rss_item *ri,
00464                       rss_aggregator *RSSAggr,
00465                       const char** Attr)
00466 {
00467        if (StrLength(CData) > 0) {
00468               NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0);
00469               StrBufTrim(ri->author_or_creator);
00470        }
00471 }
00472 
00473 
00474 void ATOM_item_name_end(StrBuf *CData,
00475                      rss_item *ri,
00476                      rss_aggregator *RSSAggr,
00477                      const char** Attr)
00478 {
00479        if (StrLength(CData) > 0) {
00480               NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0);
00481               StrBufTrim(ri->author_or_creator);
00482        }
00483 }
00484 
00485 void ATOM_item_email_end(StrBuf *CData,
00486                       rss_item *ri,
00487                       rss_aggregator *RSSAggr,
00488                       const char** Attr)
00489 {
00490        if (StrLength(CData) > 0) {
00491               NewStrBufDupAppendFlush(&ri->author_email, CData, NULL, 0);
00492               StrBufTrim(ri->author_email);
00493        }
00494 }
00495 
00496 void RSS_item_creator_end(StrBuf *CData,
00497                        rss_item *ri,
00498                        rss_aggregator *RSSAggr,
00499                        const char** Attr)
00500 {
00501        if ((StrLength(CData) > 0) &&
00502            (StrLength(ri->author_or_creator) == 0))
00503        {
00504               NewStrBufDupAppendFlush(&ri->author_or_creator, CData, NULL, 0);
00505               StrBufTrim(ri->author_or_creator);
00506        }
00507 }
00508 
00509 
00510 void ATOM_item_uri_end(StrBuf *CData,
00511                      rss_item *ri,
00512                      rss_aggregator *RSSAggr,
00513                      const char** Attr)
00514 {
00515        if (StrLength(CData) > 0) {
00516               NewStrBufDupAppendFlush(&ri->author_url, CData, NULL, 0);
00517               StrBufTrim(ri->author_url);
00518        }
00519 }
00520 
00521 void RSS_item_item_end(StrBuf *CData,
00522                      rss_item *ri,
00523                      rss_aggregator *RSSAggr,
00524                      const char** Attr)
00525 {
00526        --ri->item_tag_nesting;
00527        rss_save_item(ri, RSSAggr);
00528 }
00529 
00530 
00531 void ATOM_item_entry_end(StrBuf *CData,
00532                       rss_item *ri,
00533                       rss_aggregator *RSSAggr,
00534                       const char** Attr)
00535 {
00536        --ri->item_tag_nesting;
00537        rss_save_item(ri, RSSAggr);
00538 }
00539 
00540 void RSS_item_rss_end(StrBuf *CData,
00541                     rss_item *ri,
00542                     rss_aggregator *RSSAggr,
00543                     const char** Attr)
00544 {
00545        AsyncIO              *IO = &RSSAggr->IO;
00546        EVRSSATOMM_syslog(LOG_DEBUG, "End of feed detected.  Closing parser.\n");
00547        ri->done_parsing = 1;
00548 }
00549 
00550 void RSS_item_rdf_end(StrBuf *CData,
00551                     rss_item *ri,
00552                     rss_aggregator *RSSAggr,
00553                     const char** Attr)
00554 {
00555        AsyncIO              *IO = &RSSAggr->IO;
00556        EVRSSATOMM_syslog(LOG_DEBUG, "End of feed detected.  Closing parser.\n");
00557        ri->done_parsing = 1;
00558 }
00559 
00560 
00561 void RSSATOM_item_ignore(StrBuf *CData,
00562                       rss_item *ri,
00563                       rss_aggregator *RSSAggr,
00564                       const char** Attr)
00565 {
00566 }
00567 
00568 
00569 
00570 /*
00571  * This callback stores up the data which appears in between tags.
00572  */
00573 void rss_xml_cdata_start(void *data)
00574 {
00575        rss_aggregator *RSSAggr = (rss_aggregator*) data;
00576 
00577        FlushStrBuf(RSSAggr->CData);
00578 }
00579 
00580 void rss_xml_cdata_end(void *data)
00581 {
00582 }
00583 void rss_xml_chardata(void *data, const XML_Char *s, int len)
00584 {
00585        rss_aggregator *RSSAggr = (rss_aggregator*) data;
00586 
00587        StrBufAppendBufPlain (RSSAggr->CData, s, len, 0);
00588 }
00589 
00590 
00591 /******************************************************************************
00592  *                            RSS parser logic                                *
00593  ******************************************************************************/
00594 
00595 extern pthread_mutex_t RSSQueueMutex;
00596 
00597 HashList *StartHandlers = NULL;
00598 HashList *EndHandlers = NULL;
00599 HashList *KnownNameSpaces = NULL;
00600 
00601 void FreeNetworkSaveMessage (void *vMsg)
00602 {
00603        networker_save_message *Msg = (networker_save_message *) vMsg;
00604 
00605        CtdlFreeMessageContents(&Msg->Msg);
00606        FreeStrBuf(&Msg->Message);
00607        FreeStrBuf(&Msg->MsgGUID);
00608        free(Msg);
00609 }
00610 
00611 
00612 void AppendLink(StrBuf *Message,
00613               StrBuf *link,
00614               StrBuf *LinkTitle,
00615               const char *Title)
00616 {
00617        if (StrLength(link) > 0)
00618        {
00619               StrBufAppendBufPlain(Message, HKEY("<a href=\""), 0);
00620               StrBufAppendBuf(Message, link, 0);
00621               StrBufAppendBufPlain(Message, HKEY("\">"), 0);
00622               if (StrLength(LinkTitle) > 0)
00623                      StrBufAppendBuf(Message, LinkTitle, 0);
00624               else if ((Title != NULL) && !IsEmptyStr(Title))
00625                      StrBufAppendBufPlain(Message, Title, -1, 0);
00626               else
00627                      StrBufAppendBuf(Message, link, 0);
00628               StrBufAppendBufPlain(Message, HKEY("</a><br>\n"), 0);
00629        }
00630 }
00631 
00632 /*
00633  * Commit a fetched and parsed RSS item to disk
00634  */
00635 void rss_save_item(rss_item *ri, rss_aggregator *RSSAggr)
00636 {
00637        networker_save_message *SaveMsg;
00638        struct MD5Context md5context;
00639        u_char rawdigest[MD5_DIGEST_LEN];
00640        int msglen = 0;
00641        StrBuf *Message;
00642        StrBuf *guid;
00643        AsyncIO *IO = &RSSAggr->IO;
00644        int n;
00645 
00646 
00647        SaveMsg = (networker_save_message *) malloc(
00648               sizeof(networker_save_message));
00649        memset(SaveMsg, 0, sizeof(networker_save_message));
00650 
00651        /* Construct a GUID to use in the S_USETABLE table.
00652         * If one is not present in the item itself, make one up.
00653         */
00654        if (ri->guid != NULL) {
00655               StrBufSpaceToBlank(ri->guid);
00656               StrBufTrim(ri->guid);
00657               guid = NewStrBufPlain(HKEY("rss/"));
00658               StrBufAppendBuf(guid, ri->guid, 0);
00659        }
00660        else {
00661               MD5Init(&md5context);
00662               if (ri->title != NULL) {
00663                      MD5Update(&md5context,
00664                               (const unsigned char*)SKEY(ri->title));
00665               }
00666               if (ri->link != NULL) {
00667                      MD5Update(&md5context,
00668                               (const unsigned char*)SKEY(ri->link));
00669               }
00670               MD5Final(rawdigest, &md5context);
00671               guid = NewStrBufPlain(NULL,
00672                                   MD5_DIGEST_LEN * 2 + 12 /* _rss2ctdl*/);
00673               StrBufHexEscAppend(guid, NULL, rawdigest, MD5_DIGEST_LEN);
00674               StrBufAppendBufPlain(guid, HKEY("_rss2ctdl"), 0);
00675        }
00676 
00677        /* translate Item into message. */
00678        EVRSSATOMM_syslog(LOG_DEBUG, "RSS: translating item...\n");
00679        if (ri->description == NULL) ri->description = NewStrBufPlain(HKEY(""));
00680        StrBufSpaceToBlank(ri->description);
00681        SaveMsg->Msg.cm_magic = CTDLMESSAGE_MAGIC;
00682        SaveMsg->Msg.cm_anon_type = MES_NORMAL;
00683        SaveMsg->Msg.cm_format_type = FMT_RFC822;
00684 
00685        if (ri->guid != NULL) {
00686               SaveMsg->Msg.cm_fields['E'] = strdup(ChrPtr(ri->guid));
00687        }
00688 
00689        if (ri->author_or_creator != NULL) {
00690               char *From;
00691               StrBuf *Encoded = NULL;
00692               int FromAt;
00693 
00694               From = html_to_ascii(ChrPtr(ri->author_or_creator),
00695                                  StrLength(ri->author_or_creator),
00696                                  512, 0);
00697               StrBufPlain(ri->author_or_creator, From, -1);
00698               StrBufTrim(ri->author_or_creator);
00699               free(From);
00700 
00701               FromAt = strchr(ChrPtr(ri->author_or_creator), '@') != NULL;
00702               if (!FromAt && StrLength (ri->author_email) > 0)
00703               {
00704                      StrBufRFC2047encode(&Encoded, ri->author_or_creator);
00705                      SaveMsg->Msg.cm_fields['A'] = SmashStrBuf(&Encoded);
00706                      SaveMsg->Msg.cm_fields['P'] =
00707                             SmashStrBuf(&ri->author_email);
00708               }
00709               else
00710               {
00711                      if (FromAt)
00712                      {
00713                             SaveMsg->Msg.cm_fields['A'] =
00714                                    SmashStrBuf(&ri->author_or_creator);
00715                             SaveMsg->Msg.cm_fields['P'] =
00716                                    strdup(SaveMsg->Msg.cm_fields['A']);
00717                      }
00718                      else
00719                      {
00720                             StrBufRFC2047encode(&Encoded,
00721                                               ri->author_or_creator);
00722                             SaveMsg->Msg.cm_fields['A'] =
00723                                    SmashStrBuf(&Encoded);
00724                             SaveMsg->Msg.cm_fields['P'] =
00725                                    strdup("rss@localhost");
00726 
00727                      }
00728                      if (ri->pubdate <= 0) {
00729                             ri->pubdate = time(NULL);
00730                      }
00731               }
00732        }
00733        else {
00734               SaveMsg->Msg.cm_fields['A'] = strdup("rss");
00735        }
00736 
00737        SaveMsg->Msg.cm_fields['N'] = strdup(NODENAME);
00738        if (ri->title != NULL) {
00739               long len;
00740               char *Sbj;
00741               StrBuf *Encoded, *QPEncoded;
00742 
00743               QPEncoded = NULL;
00744               StrBufSpaceToBlank(ri->title);
00745               len = StrLength(ri->title);
00746               Sbj = html_to_ascii(ChrPtr(ri->title), len, 512, 0);
00747               len = strlen(Sbj);
00748               if ((len > 0) && (Sbj[len - 1] == '\n'))
00749               {
00750                      len --;
00751                      Sbj[len] = '\0';
00752               }
00753               Encoded = NewStrBufPlain(Sbj, len);
00754               free(Sbj);
00755 
00756               StrBufTrim(Encoded);
00757               StrBufRFC2047encode(&QPEncoded, Encoded);
00758 
00759               SaveMsg->Msg.cm_fields['U'] = SmashStrBuf(&QPEncoded);
00760               FreeStrBuf(&Encoded);
00761        }
00762        SaveMsg->Msg.cm_fields['T'] = malloc(64);
00763        snprintf(SaveMsg->Msg.cm_fields['T'], 64, "%ld", ri->pubdate);
00764        if (ri->channel_title != NULL) {
00765               if (StrLength(ri->channel_title) > 0) {
00766                      SaveMsg->Msg.cm_fields['O'] =
00767                             strdup(ChrPtr(ri->channel_title));
00768               }
00769        }
00770        if (ri->link == NULL)
00771               ri->link = NewStrBufPlain(HKEY(""));
00772 
00773 #if 0 /* temporarily disable shorter urls. */
00774        SaveMsg->Msg.cm_fields[TMP_SHORTER_URLS] =
00775               GetShorterUrls(ri->description);
00776 #endif
00777 
00778        msglen += 1024 + StrLength(ri->link) + StrLength(ri->description) ;
00779 
00780        Message = NewStrBufPlain(NULL, msglen);
00781 
00782        StrBufPlain(Message, HKEY(
00783                          "Content-type: text/html; charset=\"UTF-8\"\r\n\r\n"
00784                          "<html><body>\n"));
00785 #if 0 /* disable shorter url for now. */
00786        SaveMsg->Msg.cm_fields[TMP_SHORTER_URL_OFFSET] = StrLength(Message);
00787 #endif
00788        StrBufAppendBuf(Message, ri->description, 0);
00789        StrBufAppendBufPlain(Message, HKEY("<br><br>\n"), 0);
00790 
00791        AppendLink(Message, ri->link, ri->linkTitle, NULL);
00792        AppendLink(Message, ri->reLink, ri->reLinkTitle, "Reply to this");
00793        StrBufAppendBufPlain(Message, HKEY("</body></html>\n"), 0);
00794 
00795        SaveMsg->MsgGUID = guid;
00796        SaveMsg->Message = Message;
00797 
00798        n = GetCount(RSSAggr->Messages) + 1;
00799        Put(RSSAggr->Messages, IKEY(n), SaveMsg, FreeNetworkSaveMessage);
00800 }
00801 
00802 
00803 void rss_xml_start(void *data, const char *supplied_el, const char **attr)
00804 {
00805        rss_xml_handler *h;
00806        rss_aggregator  *RSSAggr = (rss_aggregator*) data;
00807        AsyncIO              *IO = &RSSAggr->IO;
00808        rss_item        *ri = RSSAggr->Item;
00809        void            *pv;
00810        const char      *pel;
00811        char            *sep = NULL;
00812 
00813        /* Axe the namespace, we don't care about it */
00814        /*
00815          syslog(LOG_DEBUG,
00816          "RSS: supplied el %d: %s\n", RSSAggr->RSSAggr->ItemType, supplied_el);
00817        */
00818        pel = supplied_el;
00819        while (sep = strchr(pel, ':'), sep) {
00820               pel = sep + 1;
00821        }
00822 
00823        if (pel != supplied_el)
00824        {
00825               void *v;
00826 
00827               if (!GetHash(KnownNameSpaces,
00828                           supplied_el,
00829                           pel - supplied_el - 1,
00830                           &v))
00831               {
00832                      EVRSSATOM_syslog(LOG_DEBUG,
00833                                     "RSS: START ignoring "
00834                                     "because of wrong namespace [%s]\n",
00835                                     supplied_el);
00836                      return;
00837               }
00838        }
00839 
00840        StrBufPlain(RSSAggr->Key, pel, -1);
00841        StrBufLowerCase(RSSAggr->Key);
00842        if (GetHash(StartHandlers, SKEY(RSSAggr->Key), &pv))
00843        {
00844               h = (rss_xml_handler*) pv;
00845 
00846               if (((h->Flags & RSS_UNSET) != 0) &&
00847                   (RSSAggr->ItemType == RSS_UNSET))
00848               {
00849                      h->Handler(RSSAggr->CData, ri, RSSAggr, attr);
00850               }
00851               else if (((h->Flags & RSS_RSS) != 0) &&
00852                   (RSSAggr->ItemType == RSS_RSS))
00853               {
00854                      h->Handler(RSSAggr->CData, ri, RSSAggr, attr);
00855               }
00856               else if (((h->Flags & RSS_ATOM) != 0) &&
00857                       (RSSAggr->ItemType == RSS_ATOM))
00858               {
00859                      h->Handler(RSSAggr->CData,
00860                                ri,
00861                                RSSAggr,
00862                                attr);
00863               }
00864               else
00865                      EVRSSATOM_syslog(LOG_DEBUG,
00866                                      "RSS: START unhandled: [%s] [%s]...\n",
00867                                     pel,
00868                                     supplied_el);
00869        }
00870        else
00871               EVRSSATOM_syslog(LOG_DEBUG,
00872                              "RSS: START unhandled: [%s] [%s]...\n",
00873                              pel,
00874                              supplied_el);
00875 }
00876 
00877 void rss_xml_end(void *data, const char *supplied_el)
00878 {
00879        rss_xml_handler *h;
00880        rss_aggregator  *RSSAggr = (rss_aggregator*) data;
00881        AsyncIO              *IO = &RSSAggr->IO;
00882        rss_item        *ri = RSSAggr->Item;
00883        const char      *pel;
00884        char            *sep = NULL;
00885        void            *pv;
00886 
00887        /* Axe the namespace, we don't care about it */
00888        pel = supplied_el;
00889        while (sep = strchr(pel, ':'), sep) {
00890               pel = sep + 1;
00891        }
00892        EVRSSATOM_syslog(LOG_DEBUG, "RSS: END %s...\n", supplied_el);
00893        if (pel != supplied_el)
00894        {
00895               void *v;
00896 
00897               if (!GetHash(KnownNameSpaces,
00898                           supplied_el,
00899                           pel - supplied_el - 1,
00900                           &v))
00901               {
00902                      EVRSSATOM_syslog(LOG_DEBUG,
00903                                     "RSS: END ignoring because of wrong namespace"
00904                                     "[%s] = [%s]\n",
00905                                     supplied_el,
00906                                     ChrPtr(RSSAggr->CData));
00907                      FlushStrBuf(RSSAggr->CData);
00908                      return;
00909               }
00910        }
00911 
00912        StrBufPlain(RSSAggr->Key, pel, -1);
00913        StrBufLowerCase(RSSAggr->Key);
00914        if (GetHash(EndHandlers, SKEY(RSSAggr->Key), &pv))
00915        {
00916               h = (rss_xml_handler*) pv;
00917 
00918               if (((h->Flags & RSS_UNSET) != 0) &&
00919                   (RSSAggr->ItemType == RSS_UNSET))
00920               {
00921                      h->Handler(RSSAggr->CData, ri, RSSAggr, NULL);
00922               }
00923               else if (((h->Flags & RSS_RSS) != 0) &&
00924                   (RSSAggr->ItemType == RSS_RSS))
00925               {
00926                      h->Handler(RSSAggr->CData, ri, RSSAggr, NULL);
00927               }
00928               else if (((h->Flags & RSS_ATOM) != 0) &&
00929                       (RSSAggr->ItemType == RSS_ATOM))
00930               {
00931                      h->Handler(RSSAggr->CData, ri, RSSAggr, NULL);
00932               }
00933               else
00934                      EVRSSATOM_syslog(LOG_DEBUG,
00935                                     "RSS: END   unhandled: [%s]  [%s] = [%s]...\n",
00936                                     pel,
00937                                     supplied_el,
00938                                     ChrPtr(RSSAggr->CData));
00939        }
00940        else
00941               EVRSSATOM_syslog(LOG_DEBUG,
00942                              "RSS: END   unhandled: [%s]  [%s] = [%s]...\n",
00943                              pel,
00944                              supplied_el,
00945                              ChrPtr(RSSAggr->CData));
00946        FlushStrBuf(RSSAggr->CData);
00947 }
00948 
00949 /*
00950  * Callback function for passing libcurl's output to expat for parsing
00951  * we don't do streamed parsing so expat can handle non-utf8 documents
00952 size_t rss_libcurl_callback(void *ptr, size_t size, size_t nmemb, void *stream)
00953 {
00954        XML_Parse((XML_Parser)stream, ptr, (size * nmemb), 0);
00955        return (size*nmemb);
00956 }
00957  */
00958 
00959 eNextState RSSAggregator_ParseReply(AsyncIO *IO)
00960 {
00961        StrBuf *Buf;
00962        rss_aggregator *RSSAggr;
00963        rss_item *ri;
00964        const char *at;
00965        char *ptr;
00966        long len;
00967        const char *Key;
00968 
00969 
00970        if (IO->HttpReq.httpcode != 200)
00971        {
00972 
00973               EVRSSATOM_syslog(LOG_ALERT, "need a 200, got a %ld !\n",
00974                              IO->HttpReq.httpcode);
00975 // TODO: aide error message with rate limit
00976               return eAbort;
00977        }
00978 
00979        RSSAggr = IO->Data;
00980        ri = RSSAggr->Item;
00981        RSSAggr->CData = NewStrBufPlain(NULL, SIZ);
00982        RSSAggr->Key = NewStrBuf();
00983        at = NULL;
00984        StrBufSipLine(RSSAggr->Key, IO->HttpReq.ReplyData, &at);
00985        ptr = NULL;
00986 
00987 #define encoding "encoding=\""
00988        ptr = strstr(ChrPtr(RSSAggr->Key), encoding);
00989        if (ptr != NULL)
00990        {
00991               char *pche;
00992 
00993               ptr += sizeof (encoding) - 1;
00994               pche = strchr(ptr, '"');
00995               if (pche != NULL)
00996                      StrBufCutAt(RSSAggr->Key, -1, pche);
00997               else
00998                      ptr = "UTF-8";
00999        }
01000        else
01001               ptr = "UTF-8";
01002 
01003        EVRSSATOM_syslog(LOG_DEBUG, "RSS: Now parsing [%s] \n", ChrPtr(RSSAggr->Url));
01004 
01005        RSSAggr->xp = XML_ParserCreateNS(ptr, ':');
01006        if (!RSSAggr->xp) {
01007               EVRSSATOMM_syslog(LOG_ALERT, "Cannot create XML parser!\n");
01008               return eAbort;
01009        }
01010        FlushStrBuf(RSSAggr->Key);
01011 
01012        RSSAggr->Messages = NewHash(1, Flathash);
01013        XML_SetElementHandler(RSSAggr->xp, rss_xml_start, rss_xml_end);
01014        XML_SetCharacterDataHandler(RSSAggr->xp, rss_xml_chardata);
01015        XML_SetUserData(RSSAggr->xp, RSSAggr);
01016        XML_SetCdataSectionHandler(RSSAggr->xp,
01017                                rss_xml_cdata_start,
01018                                rss_xml_cdata_end);
01019 
01020 
01021        len = StrLength(IO->HttpReq.ReplyData);
01022        ptr = SmashStrBuf(&IO->HttpReq.ReplyData);
01023        XML_Parse(RSSAggr->xp, ptr, len, 0);
01024        free (ptr);
01025        if (ri->done_parsing == 0)
01026               XML_Parse(RSSAggr->xp, "", 0, 1);
01027 
01028 
01029        EVRSSATOM_syslog(LOG_DEBUG, "RSS: XML Status [%s] \n",
01030                       XML_ErrorString(XML_GetErrorCode(RSSAggr->xp)));
01031 
01032        XML_ParserFree(RSSAggr->xp);
01033        flush_rss_item(ri);
01034 
01035        Buf = NewStrBufDup(RSSAggr->rooms);
01036        RSSAggr->recp.recp_room = SmashStrBuf(&Buf);
01037        RSSAggr->recp.num_room = RSSAggr->roomlist_parts;
01038        RSSAggr->recp.recptypes_magic = RECPTYPES_MAGIC;
01039 
01040        RSSAggr->Pos = GetNewHashPos(RSSAggr->Messages, 1);
01041 
01042 //RSSAggr->next_poll = time(NULL) + config.c_net_freq;
01043        if (GetNextHashPos(RSSAggr->Messages,
01044                         RSSAggr->Pos,
01045                         &len,
01046                         &Key,
01047                         (void**) &RSSAggr->ThisMsg))
01048               return QueueDBOperation(IO, RSS_FetchNetworkUsetableEntry);
01049        else
01050               return eAbort;
01051 }
01052 
01053 
01054 /******************************************************************************
01055  *                    RSS handler registering logic                           *
01056  ******************************************************************************/
01057 
01058 void AddRSSStartHandler(rss_handler_func Handler,
01059                      int Flags,
01060                      const char *key,
01061                      long len)
01062 {
01063        rss_xml_handler *h;
01064        h = (rss_xml_handler*) malloc(sizeof (rss_xml_handler));
01065        h->Flags = Flags;
01066        h->Handler = Handler;
01067        Put(StartHandlers, key, len, h, NULL);
01068 }
01069 
01070 void AddRSSEndHandler(rss_handler_func Handler,
01071                     int Flags,
01072                     const char *key,
01073                     long len)
01074 {
01075        rss_xml_handler *h;
01076        h = (rss_xml_handler*) malloc(sizeof (rss_xml_handler));
01077        h->Flags = Flags;
01078        h->Handler = Handler;
01079        Put(EndHandlers, key, len, h, NULL);
01080 }
01081 
01082 void rss_parser_cleanup(void)
01083 {
01084        DeleteHash(&StartHandlers);
01085        DeleteHash(&EndHandlers);
01086        DeleteHash(&KnownNameSpaces);
01087 }
01088 
01089 void LogDebugEnableRSSATOMParser(const int n)
01090 {
01091        RSSAtomParserDebugEnabled = n;
01092 }
01093 
01094 CTDL_MODULE_INIT(rssparser)
01095 {
01096        if (!threading)
01097        {
01098               StartHandlers = NewHash(1, NULL);
01099               EndHandlers = NewHash(1, NULL);
01100 
01101               AddRSSStartHandler(RSS_item_rss_start,     RSS_UNSET, HKEY("rss"));
01102               AddRSSStartHandler(RSS_item_rdf_start,     RSS_UNSET, HKEY("rdf"));
01103               AddRSSStartHandler(ATOM_item_feed_start,   RSS_UNSET, HKEY("feed"));
01104               AddRSSStartHandler(RSS_item_item_start,    RSS_RSS, HKEY("item"));
01105               AddRSSStartHandler(ATOM_item_entry_start,  RSS_ATOM, HKEY("entry"));
01106               AddRSSStartHandler(ATOM_item_link_start,   RSS_ATOM, HKEY("link"));
01107 
01108               AddRSSEndHandler(ATOMRSS_item_title_end,   RSS_ATOM|RSS_RSS|RSS_REQUIRE_BUF, HKEY("title"));
01109               AddRSSEndHandler(RSS_item_guid_end,        RSS_RSS|RSS_REQUIRE_BUF, HKEY("guid"));
01110               AddRSSEndHandler(ATOM_item_id_end,         RSS_ATOM|RSS_REQUIRE_BUF, HKEY("id"));
01111               AddRSSEndHandler(RSS_item_link_end,        RSS_RSS|RSS_REQUIRE_BUF, HKEY("link"));
01112 #if 0
01113 // hm, rss to the comments of that blog, might be interesting in future, but...
01114               AddRSSEndHandler(RSS_item_relink_end,      RSS_RSS|RSS_REQUIRE_BUF, HKEY("commentrss"));
01115 // comment count...
01116               AddRSSEndHandler(RSS_item_relink_end,      RSS_RSS|RSS_REQUIRE_BUF, HKEY("comments"));
01117 #endif
01118               AddRSSEndHandler(RSSATOM_item_title_end,   RSS_ATOM|RSS_RSS|RSS_REQUIRE_BUF, HKEY("title"));
01119               AddRSSEndHandler(ATOM_item_content_end,    RSS_ATOM|RSS_REQUIRE_BUF, HKEY("content"));
01120               AddRSSEndHandler(RSS_item_description_end, RSS_RSS|RSS_ATOM|RSS_REQUIRE_BUF, HKEY("encoded"));
01121               AddRSSEndHandler(ATOM_item_summary_end,    RSS_ATOM|RSS_REQUIRE_BUF, HKEY("summary"));
01122               AddRSSEndHandler(RSS_item_description_end, RSS_RSS|RSS_REQUIRE_BUF, HKEY("description"));
01123               AddRSSEndHandler(ATOM_item_published_end,  RSS_ATOM|RSS_REQUIRE_BUF, HKEY("published"));
01124               AddRSSEndHandler(ATOM_item_updated_end,    RSS_ATOM|RSS_REQUIRE_BUF, HKEY("updated"));
01125               AddRSSEndHandler(RSS_item_pubdate_end,     RSS_RSS|RSS_REQUIRE_BUF, HKEY("pubdate"));
01126               AddRSSEndHandler(RSS_item_date_end,        RSS_RSS|RSS_REQUIRE_BUF, HKEY("date"));
01127               AddRSSEndHandler(RSS_item_author_end,      RSS_RSS|RSS_REQUIRE_BUF, HKEY("author"));
01128               AddRSSEndHandler(RSS_item_creator_end,     RSS_RSS|RSS_REQUIRE_BUF, HKEY("creator"));
01129 /* <author> */
01130               AddRSSEndHandler(ATOM_item_email_end,      RSS_ATOM|RSS_REQUIRE_BUF, HKEY("email"));
01131               AddRSSEndHandler(ATOM_item_name_end,       RSS_ATOM|RSS_REQUIRE_BUF, HKEY("name"));
01132               AddRSSEndHandler(ATOM_item_uri_end,        RSS_ATOM|RSS_REQUIRE_BUF, HKEY("uri"));
01133 /* </author> */
01134               AddRSSEndHandler(RSS_item_item_end,        RSS_RSS, HKEY("item"));
01135               AddRSSEndHandler(RSS_item_rss_end,         RSS_RSS, HKEY("rss"));
01136               AddRSSEndHandler(RSS_item_rdf_end,         RSS_RSS, HKEY("rdf"));
01137               AddRSSEndHandler(ATOM_item_entry_end,      RSS_ATOM, HKEY("entry"));
01138 
01139 
01140 /* at the start of atoms: <seq> <li>link to resource</li></seq> ignore them. */
01141               AddRSSStartHandler(RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("seq"));
01142               AddRSSEndHandler  (RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("seq"));
01143               AddRSSStartHandler(RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("li"));
01144               AddRSSEndHandler  (RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("li"));
01145 
01146 /* links to other feed generators... */
01147               AddRSSStartHandler(RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("feedflare"));
01148               AddRSSEndHandler  (RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("feedflare"));
01149               AddRSSStartHandler(RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("browserfriendly"));
01150               AddRSSEndHandler  (RSSATOM_item_ignore,      RSS_RSS|RSS_ATOM, HKEY("browserfriendly"));
01151 
01152               KnownNameSpaces = NewHash(1, NULL);
01153               Put(KnownNameSpaces, HKEY("http://a9.com/-/spec/opensearch/1.1/"), NULL, reference_free_handler);
01154               Put(KnownNameSpaces, HKEY("http://a9.com/-/spec/opensearchrss/1.0/"), NULL, reference_free_handler);
01155               Put(KnownNameSpaces, HKEY("http://backend.userland.com/creativeCommonsRssModule"), NULL, reference_free_handler);
01156               Put(KnownNameSpaces, HKEY("http://purl.org/atom/ns#"), NULL, reference_free_handler);
01157               Put(KnownNameSpaces, HKEY("http://purl.org/dc/elements/1.1/"), NULL, reference_free_handler);
01158               Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/"), NULL, reference_free_handler);
01159               Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/modules/content/"), NULL, reference_free_handler);
01160               Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/modules/slash/"), NULL, reference_free_handler);
01161               Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/modules/syndication/"), NULL, reference_free_handler);
01162               Put(KnownNameSpaces, HKEY("http://purl.org/rss/1.0/"), NULL, reference_free_handler);
01163               Put(KnownNameSpaces, HKEY("http://purl.org/syndication/thread/1.0"), NULL, reference_free_handler);
01164               Put(KnownNameSpaces, HKEY("http://rssnamespace.org/feedburner/ext/1.0"), NULL, reference_free_handler);
01165               Put(KnownNameSpaces, HKEY("http://schemas.google.com/g/2005"), NULL, reference_free_handler);
01166               Put(KnownNameSpaces, HKEY("http://webns.net/mvcb/"), NULL, reference_free_handler);
01167               Put(KnownNameSpaces, HKEY("http://web.resource.org/cc/"), NULL, reference_free_handler);
01168               Put(KnownNameSpaces, HKEY("http://wellformedweb.org/CommentAPI/"), NULL, reference_free_handler);
01169               Put(KnownNameSpaces, HKEY("http://www.georss.org/georss"), NULL, reference_free_handler);
01170               Put(KnownNameSpaces, HKEY("http://www.w3.org/1999/xhtml"), NULL, reference_free_handler);
01171               Put(KnownNameSpaces, HKEY("http://www.w3.org/1999/02/22-rdf-syntax-ns#"), NULL, reference_free_handler);
01172               Put(KnownNameSpaces, HKEY("http://www.w3.org/1999/02/22-rdf-syntax-ns#"), NULL, reference_free_handler);
01173               Put(KnownNameSpaces, HKEY("http://www.w3.org/2003/01/geo/wgs84_pos#"), NULL, reference_free_handler);
01174               Put(KnownNameSpaces, HKEY("http://www.w3.org/2005/Atom"), NULL, reference_free_handler);
01175               Put(KnownNameSpaces, HKEY("urn:flickr:"), NULL, reference_free_handler);
01176 #if 0
01177               /* we don't like these namespaces because of they shadow our usefull parameters. */
01178               Put(KnownNameSpaces, HKEY("http://search.yahoo.com/mrss/"), NULL, reference_free_handler);
01179 #endif
01180               CtdlRegisterDebugFlagHook(HKEY("RSSAtomParser"), LogDebugEnableRSSATOMParser, &RSSAtomParserDebugEnabled);
01181               CtdlRegisterCleanupHook(rss_parser_cleanup);
01182        }
01183        return "rssparser";
01184 }