Back to index

moin  1.9.0~rc2
text_docbook.py
Go to the documentation of this file.
00001 # -*- coding: iso-8859-1 -*-
00002 """
00003     MoinMoin - DocBook Formatter
00004 
00005     @copyright: 2005,2008 by Mikko Virkkilä <mvirkkil@cc.hut.fi>
00006     @copyright: 2005 by MoinMoin:AlexanderSchremmer (small modifications)
00007     @copyright: 2005 by MoinMoin:Petr Pytelka <pyta@lightcomp.com> (small modifications)
00008 
00009     @license: GNU GPL, see COPYING for details.
00010 """
00011 
00012 import os
00013 
00014 from xml.dom import getDOMImplementation
00015 from xml.dom.ext.reader import Sax
00016 from xml.dom.ext import Node
00017 
00018 from MoinMoin.formatter import FormatterBase
00019 from MoinMoin import wikiutil
00020 from MoinMoin.error import CompositeError
00021 from MoinMoin.action import AttachFile
00022 
00023 #For revision history
00024 from MoinMoin.logfile import editlog
00025 from MoinMoin import user
00026 
00027 
00028 class InternalError(CompositeError):
00029     pass
00030 
00031 try:
00032     dom = getDOMImplementation("4DOM")
00033 except ImportError:
00034     raise InternalError("You need to install 4suite to use the DocBook formatter.")
00035 
00036 
00037 class Formatter(FormatterBase):
00038     #TODO: How to handle revision history and other meta-info from included files?
00039     #      The problem is that we don't know what the original page is, since
00040     #      the Inlcude-macro doesn't pass us the information.
00041 
00042     # this list is extended as the page is parsed. Could be optimized by adding them here?
00043     section_should_break = ['abstract', 'para', 'emphasis']
00044 
00045     blacklisted_macros = ('TableOfContents', 'ShowSmileys', 'Navigation')
00046 
00047     # If the current node is one of the following and we are about the emit
00048     # text, the text should be wrapped in a paragraph
00049     wrap_text_in_para = ('listitem', 'glossdef', 'article', 'chapter', 'tip', 'warning', 'note', 'caution', 'important')
00050 
00051     # from dtd
00052     _can_contain_section = ("section", "appendix", "article", "chapter", "patintro", "preface")
00053 
00054     def __init__(self, request, doctype="article", **kw):
00055         FormatterBase.__init__(self, request, **kw)
00056         self.request = request
00057 
00058         '''
00059         If the formatter is used by the Include macro, it will set
00060         is_included=True in which case we know we need to call startDocument
00061         and endDocument from startContent and endContent respectively, since
00062         the Include macro will not be calling them, and the formatter doesn't
00063         work properly unless they are called.
00064         '''
00065         if kw.has_key("is_included") and kw["is_included"]:
00066             self.include_kludge = True
00067         else:
00068             self.include_kludge = False
00069 
00070         self.doctype = doctype
00071         self.curdepth = 0
00072         self.cur = None
00073 
00074     def startDocument(self, pagename):
00075         self.doc = dom.createDocument(None, self.doctype, dom.createDocumentType(
00076             self.doctype, "-//OASIS//DTD DocBook XML V4.4//EN",
00077             "http://www.docbook.org/xml/4.4/docbookx.dtd"))
00078 
00079         self.title = pagename
00080         self.root = self.doc.documentElement
00081 
00082         if not self.include_kludge and self.doctype == "article":
00083             info = self.doc.createElement("articleinfo")
00084             self.root.appendChild(info)
00085             self._addTitleElement(self.title, targetNode=info)
00086             self._addRevisionHistory(targetNode=info)
00087         else:
00088             self._addTitleElement(self.title, targetNode=self.root)
00089 
00090         self.cur = self.root
00091         return ""
00092 
00093     def startContent(self, content_id="content", **kw):
00094         if self.include_kludge and not self.cur:
00095             return self.startDocument("OnlyAnIdiotWouldCreateSuchaPage")
00096         return ""
00097 
00098     def endContent(self):
00099         if self.include_kludge:
00100             return self.endDocument()
00101         return ""
00102 
00103     def endDocument(self):
00104         from xml.dom.ext import PrettyPrint, Print
00105         import StringIO
00106 
00107         f = StringIO.StringIO()
00108         Print(self.doc, f)
00109         txt = f.getvalue()
00110         f.close()
00111 
00112         self.cur = None
00113         return txt
00114 
00115     def text(self, text, **kw):
00116         if text == "\\n":
00117             srcText = "\n"
00118         else:
00119             srcText = text
00120 
00121         if srcText and self._isInsidePreformatted():
00122 
00123             if self.cur.lastChild is not None and self.cur.lastChild.nodeType == Node.CDATA_SECTION_NODE:
00124                 # We can add it to a previous CDATA section
00125                 self.cur.lastChild.nodeValue = self.cur.lastChild.nodeValue + srcText
00126             else:
00127                 # We create a new cdata section
00128                 self.cur.appendChild(self.doc.createCDATASection(srcText))
00129 
00130         elif self.cur.nodeName in self.wrap_text_in_para:
00131             """
00132             If we already wrapped one text item in a para, we should add to that para
00133             and not create a new one. Another question is if we should add a space?
00134             """
00135             if self.cur.lastChild is not None and self.cur.lastChild.nodeName == 'para':
00136                 self.cur.lastChild.appendChild(self.doc.createTextNode(srcText))
00137             else:
00138                 self.paragraph(1)
00139                 self.text(text)
00140                 self.paragraph(0)
00141         else:
00142             self.cur.appendChild(self.doc.createTextNode(srcText))
00143         return ""
00144 
00145     def heading(self, on, depth, **kw):
00146         while self.cur.nodeName in self.section_should_break:
00147             self.cur = self.cur.parentNode
00148 
00149         if on:
00150             # try to go to higher level if needed
00151             if depth <= self.curdepth:
00152                 # number of levels we want to go higher
00153                 numberOfLevels = self.curdepth - depth + 1
00154                 for dummy in range(numberOfLevels):
00155                     # find first non section node
00156                     while not self.cur.nodeName in self._can_contain_section:
00157                         self.cur = self.cur.parentNode
00158 
00159                     if self.cur.nodeName == "section":
00160                         self.cur = self.cur.parentNode
00161 
00162             section = self.doc.createElement("section")
00163             self.cur.appendChild(section)
00164             self.cur = section
00165 
00166             title = self.doc.createElement("title")
00167             self.cur.appendChild(title)
00168             self.cur = title
00169             self.curdepth = depth
00170         else:
00171             self.cur = self.cur.parentNode
00172 
00173         return ""
00174 
00175     def paragraph(self, on, **kw):
00176         FormatterBase.paragraph(self, on)
00177 
00178         # Let's prevent empty paras
00179         if not on:
00180             if not self._hasContent(self.cur):
00181                 oldnode = self.cur
00182                 self.cur = oldnode.parentNode
00183                 self.cur.removeChild(oldnode)
00184                 return ""
00185 
00186         # Let's prevent para inside para
00187         if on and self.cur.nodeName == "para":
00188             return ""
00189         return self._handleNode("para", on)
00190 
00191     def linebreak(self, preformatted=1):
00192         """
00193         If preformatted, it will simply output a linebreak.
00194         If we are in a paragraph, we will close it, and open another one.
00195         """
00196         if preformatted:
00197             self.text('\\n')
00198         elif self.cur.nodeName == "para":
00199             self.paragraph(0)
00200             self.paragraph(1)
00201         else:
00202             self._emitComment("Warning: Probably not emitting right sort of linebreak")
00203             self.text('\n')
00204         return ""
00205 
00206 ### Inline ##########################################################
00207 
00208     def strong(self, on, **kw):
00209         return self._handleFormatting("emphasis", on, (('role', 'strong'), ))
00210 
00211     def emphasis(self, on, **kw):
00212         return self._handleFormatting("emphasis", on)
00213 
00214     def underline(self, on, **kw):
00215         return self._handleFormatting("emphasis", on, (('role', 'underline'), ))
00216 
00217     def highlight(self, on, **kw):
00218         return self._handleFormatting("emphasis", on, (('role', 'highlight'), ))
00219 
00220     def sup(self, on, **kw):
00221         return self._handleFormatting("superscript", on)
00222 
00223     def sub(self, on, **kw):
00224         return self._handleFormatting("subscript", on)
00225 
00226     def strike(self, on, **kw):
00227         # does not yield <strike> using the HTML XSLT files here ...
00228         # but seems to be correct
00229         return self._handleFormatting("emphasis", on,
00230                                       (('role', 'strikethrough'), ))
00231 
00232     def code(self, on, **kw):
00233         # Let's prevent empty code
00234         if not on:
00235             if not self._hasContent(self.cur):
00236                 oldnode = self.cur
00237                 self.cur = oldnode.parentNode
00238                 self.cur.removeChild(oldnode)
00239                 return ""
00240         return self._handleFormatting("code", on)
00241 
00242     def preformatted(self, on, **kw):
00243         return self._handleFormatting("screen", on)
00244 
00245 
00246 ### Lists ###########################################################
00247 
00248     def number_list(self, on, type=None, start=None, **kw):
00249         docbook_ol_types = {'1': "arabic",
00250                             'a': "loweralpha",
00251                             'A': "upperalpha",
00252                             'i': "lowerroman",
00253                             'I': "upperroman"}
00254 
00255         if type and docbook_ol_types.has_key(type):
00256             attrs = [("numeration", docbook_ol_types[type])]
00257         else:
00258             attrs = []
00259 
00260         return self._handleNode('orderedlist', on, attrs)
00261 
00262     def bullet_list(self, on, **kw):
00263         return self._handleNode("itemizedlist", on)
00264 
00265     def listitem(self, on, style=None, **kw):
00266         if self.cur.nodeName == "glosslist" or self.cur.nodeName == "glossentry":
00267             return self.definition_desc(on)
00268         if on and self.cur.nodeName == "listitem":
00269             """If we are inside a listitem, and someone wants to create a new one, it
00270             means they forgot to close the old one, and we need to do it for them."""
00271             self.listitem(0)
00272 
00273         args = []
00274         if on and style:
00275             styles = self._convertStylesToDict(style)
00276             if styles.has_key('list-style-type'):
00277                 args.append(('override', styles['list-style-type']))
00278 
00279         return self._handleNode("listitem", on, attributes=args)
00280 
00281     def definition_list(self, on, **kw):
00282         return self._handleNode("glosslist", on)
00283 
00284     def definition_term(self, on, compact=0, **kw):
00285         if on:
00286             self._handleNode("glossentry", on)
00287             self._handleNode("glossterm", on)
00288         else:
00289             if self._hasContent(self.cur):
00290                 self._handleNode("glossterm", on)
00291                 self._handleNode("glossentry", on)
00292             else:
00293                 # No term info :(
00294                 term = self.cur
00295                 entry = term.parentNode
00296                 self.cur = entry.parentNode
00297                 self.cur.removeChild(entry)
00298         return ""
00299 
00300     def definition_desc(self, on, **kw):
00301         if on:
00302             if self.cur.nodeName == "glossentry":
00303                 # Good, we can add it here.
00304                 self._handleNode("glossdef", on)
00305                 return ""
00306 
00307             # We are somewhere else, let's see...
00308             if self.cur.nodeName != "glosslist":
00309                 self._emitComment("Trying to add a definition, but we arent in a glosslist")
00310                 return ""
00311             if not self.cur.lastChild or self.cur.lastChild.nodeName != "glossentry":
00312                 self._emitComment("Trying to add a definition, but there is no entry")
00313                 return ""
00314 
00315             # Found it, calling again
00316             self.cur = self.cur.lastChild
00317             return self.definition_desc(on)
00318         else:
00319             if not self._hasContent(self.cur):
00320                 # Seems no valuable info was added
00321                 assert(self.cur.nodeName == "glossdef")
00322                 toRemove = self.cur
00323                 self.cur = toRemove.parentNode
00324                 self.cur.removeChild(toRemove)
00325 
00326             while self.cur.nodeName != "glosslist":
00327                 self.cur = self.cur.parentNode
00328         return ""
00329 
00330 ### Links ###########################################################
00331     # TODO: Fix anchors to documents which are included. Needs probably to be
00332     #       a postprocessing rule. Could be done by having the anchors have
00333     #       the "linkend" value of PageName#anchor. Then at post process the
00334     #       following would be done for all urls:
00335     #        - get all ulinks with an anchor part in their url
00336     #        - get the ulink's PageName#anchor -part by removing baseurl part
00337     #        - if any of our <anchor> elements have the same PageName#anchor
00338     #          value as our <ulink>, then replace the ulink with a link
00339     #          element.
00340     #       Note: This would the case when someone wants to link to a
00341     #             section on the original webpage impossible. The link would
00342     #             instead point within the docbook page and not to the webpage.
00343 
00344 
00345     def pagelink(self, on, pagename='', page=None, **kw):
00346         FormatterBase.pagelink(self, on, pagename, page, **kw)
00347         return self.interwikilink(on, 'Self', pagename, **kw)
00348 
00349     def interwikilink(self, on, interwiki='', pagename='', **kw):
00350         if not on:
00351             return self.url(on, **kw)
00352 
00353         wikitag, wikiurl, wikitail, wikitag_bad = wikiutil.resolve_interwiki(self.request, interwiki, pagename)
00354         wikiurl = wikiutil.mapURL(self.request, wikiurl)
00355         href = wikiutil.join_wiki(wikiurl, wikitail)
00356         if kw.has_key("anchor"):
00357             href="%s#%s"%(href, kw['anchor'])
00358 
00359         if pagename == self.page.page_name:
00360             kw['is_self']=True
00361 
00362         return self.url(on, href, **kw)
00363 
00364     def url(self, on, url=None, css=None, **kw):
00365         if url and url.startswith("/"):
00366             # convert to absolute path:
00367             url = "%s%s"%(self.request.base_url, url)
00368 
00369         if not on:
00370             self._cleanupUlinkNode()
00371 
00372         if kw.has_key("anchor") and kw.has_key("is_self") and kw["is_self"]:
00373             #handle the case where we are pointing to somewhere insidee our own document
00374             return self._handleNode("link", on, attributes=(('linkend', kw["anchor"]), ))
00375         else:
00376             return self._handleNode("ulink", on, attributes=(('url', url), ))
00377 
00378     def anchordef(self, name):
00379         self._handleNode("anchor", True, attributes=(('id', name), ))
00380         self._handleNode("anchor", False)
00381         return ""
00382 
00383     def anchorlink(self, on, name='', **kw):
00384         linkid = kw.get('id', None)
00385         attrs = []
00386         if name != '':
00387             attrs.append(('endterm', name))
00388         if id is not None:
00389             attrs.append(('linkend', linkid))
00390         elif name != '':
00391             attrs.append(('linkend', name))
00392 
00393         return self._handleNode("link", on, attrs)
00394 
00395 ### Attachments ######################################################
00396 
00397     def attachment_link(self, on, url=None, **kw):
00398         assert on in (0, 1, False, True) # make sure we get called the new way, not like the 1.5 api was
00399         # we do not output a "upload link" when outputting docbook
00400         if on:
00401             pagename, filename = AttachFile.absoluteName(url, self.page.page_name)
00402             fname = wikiutil.taintfilename(filename)
00403             target = AttachFile.getAttachUrl(pagename, filename, self.request)
00404             return self.url(1, target, title="attachment:%s" % url)
00405         else:
00406             return self.url(0)
00407 
00408     def attachment_image(self, url, **kw):
00409         """
00410         Figures out the absolute path to the image and then hands over to
00411         the image function. Any title is also handed over, and an additional
00412         title suggestion is made based on filename. The image function will
00413         use the suggestion if no other text alternative is found.
00414 
00415         If the file is not found, then a simple text will replace it.
00416         """
00417         _ = self.request.getText
00418         pagename, filename = AttachFile.absoluteName(url, self.page.page_name)
00419         fname = wikiutil.taintfilename(filename)
00420         fpath = AttachFile.getFilename(self.request, pagename, fname)
00421         if not os.path.exists(fpath):
00422             return self.text("[attachment:%s]" % url)
00423         else:
00424             return self.image(
00425                 src=AttachFile.getAttachUrl(pagename, filename, self.request, addts=1),
00426                 attachment_title=url,
00427                 **kw)
00428 
00429 
00430     def attachment_drawing(self, url, text, **kw):
00431         _ = self.request.getText
00432         pagename, filename = AttachFile.absoluteName(url, self.page.page_name)
00433         fname = wikiutil.taintfilename(filename)
00434         drawing = fname
00435         fname = fname + ".png"
00436         filename = filename + ".png"
00437         fpath = AttachFile.getFilename(self.request, pagename, fname)
00438         if not os.path.exists(fpath):
00439             return self.text("[drawing:%s]" % url)
00440         else:
00441             src = AttachFile.getAttachUrl(pagename, filename, self.request, addts=1)
00442             return self.image(alt=drawing, src=src, html_class="drawing")
00443 
00444 ### Images and Smileys ##############################################
00445 
00446     def image(self, src=None, **kw):
00447         if src:
00448             kw['src'] = src
00449         media = self.doc.createElement('inlinemediaobject')
00450 
00451         imagewrap = self.doc.createElement('imageobject')
00452         media.appendChild(imagewrap)
00453 
00454         image = self.doc.createElement('imagedata')
00455         if kw.has_key('src'):
00456             src = kw['src']
00457             if src.startswith("/"):
00458                 # convert to absolute path:
00459                 src = self.request.url_root + src
00460             image.setAttribute('fileref', src)
00461         if kw.has_key('width'):
00462             image.setAttribute('width', str(kw['width']))
00463         if kw.has_key('height'):
00464             image.setAttribute('depth', str(kw['height']))
00465         imagewrap.appendChild(image)
00466 
00467         # Look for any suitable title, order is important.
00468         title = ''
00469         for a in ('title', 'html_title', 'alt', 'html_alt', 'attachment_title'):
00470             if kw.has_key(a):
00471                 title = kw[a]
00472                 break
00473         if title:
00474             txtcontainer = self.doc.createElement('textobject')
00475             self._addTextElem(txtcontainer, "phrase", title)
00476             media.appendChild(txtcontainer)
00477 
00478         self.cur.appendChild(media)
00479         return ""
00480 
00481     def transclusion(self, on, **kw):
00482         # TODO, see text_html formatter
00483         self._emitComment('transclusion is not implemented in DocBook formatter')
00484         return ""
00485 
00486     def transclusion_param(self, **kw):
00487         # TODO, see text_html formatter
00488         self._emitComment('transclusion parameters are not implemented in DocBook formatter')
00489         return ""
00490 
00491     def smiley(self, text):
00492         return self.request.theme.make_icon(text)
00493 
00494     def icon(self, type):
00495         return '' # self.request.theme.make_icon(type)
00496 
00497 
00498 ### Code area #######################################################
00499 
00500     def code_area(self, on, code_id, code_type=None, show=0, start=-1, step=-1, msg=None):
00501         """Creates a formatted code region using screen or programlisting,
00502         depending on if a programming language was defined (code_type).
00503 
00504         The code_id is not used for anything in this formatter, but is just
00505         there to remain compatible with the HTML formatter's function.
00506 
00507         Line numbering is supported natively by DocBook so if linenumbering
00508         is requested the relevant attribute will be set.
00509 
00510         Call once with on=1 to start the region, and a second time
00511         with on=0 to end it.
00512         """
00513 
00514         if not on:
00515             return self._handleNode(None, on)
00516 
00517         show = show and 'numbered' or 'unnumbered'
00518         if start < 1:
00519             start = 1
00520 
00521         programming_languages = {"ColorizedJava": "java",
00522                                  "ColorizedPython": "python",
00523                                  "ColorizedCPlusPlus": "c++",
00524                                  "ColorizedPascal": "pascal",
00525                                 }
00526 
00527         if code_type is None:
00528             attrs = (('linenumbering', show),
00529                      ('startinglinenumber', str(start)),
00530                      ('format', 'linespecific'),
00531                      )
00532             return self._handleNode("screen", on, attributes=attrs)
00533         else:
00534             if programming_languages.has_key(code_type):
00535                 code_type = programming_languages[code_type]
00536 
00537             attrs = (('linenumbering', show),
00538                      ('startinglinenumber', str(start)),
00539                      ('language', code_type),
00540                      ('format', 'linespecific'),
00541                      )
00542             return self._handleNode("programlisting", on, attributes=attrs)
00543 
00544     def code_line(self, on):
00545         if on:
00546             self.cur.appendChild(self.doc.createTextNode('\n'))
00547         return ''
00548 
00549     def code_token(self, on, tok_type):
00550         """
00551         DocBook has some support for semantic annotation of code so the
00552         known tokens will be mapped to DocBook entities.
00553         """
00554         toks_map = {'ID': 'methodname',
00555                     'Operator': '',
00556                     'Char': '',
00557                     'Comment': 'lineannotation',
00558                     'Number': '',
00559                     'String': 'phrase',
00560                     'SPChar': '',
00561                     'ResWord': 'token',
00562                     'ConsWord': 'symbol',
00563                     'Error': 'errortext',
00564                     'ResWord2': 'type',
00565                     'Special': '',
00566                     'Preprc': '',
00567                     'Text': '',
00568                    }
00569         if toks_map.has_key(tok_type) and toks_map[tok_type]:
00570             return self._handleFormatting(toks_map[tok_type], on)
00571         else:
00572             return ""
00573 ### Macro ###########################################################
00574 
00575     def macro(self, macro_obj, name, args, markup=None):
00576         """As far as the DocBook formatter is conserned there are three
00577         kinds of macros: Bad, Handled and Unknown.
00578 
00579         The Bad ones are the ones that are known not to work, and are on its
00580         blacklist. They will be ignored and an XML comment will be written
00581         noting that the macro is not supported.
00582 
00583         Handled macros are such macros that code is written to handle them.
00584         For example for the FootNote macro it means that instead of executing
00585         the macro, a DocBook footnote entity is created, with the relevant
00586         pieces of information filles in.
00587 
00588         The Unknown are handled by executing the macro and capturing any
00589         textual output. There shouldn't be any textual output since macros
00590         should call formatter methods. This is unfortunately not always true,
00591         so the output it is then fed in to an xml parser and the
00592         resulting nodes copied to the DocBook-dom tree. If the output is not
00593         valid xml then a comment is written in the DocBook that the macro
00594         should be fixed.
00595 
00596         """
00597         # Another alternative would be to feed the output to rawHTML or even
00598         # combining these two approaches. The _best_ alternative would be to
00599         # fix the macros.
00600         excludes=("articleinfo", "title")
00601 
00602         if name in self.blacklisted_macros:
00603             self._emitComment("The macro %s doesn't work with the DocBook formatter." % name)
00604 
00605         elif name == "FootNote":
00606             footnote = self.doc.createElement('footnote')
00607             self._addTextElem(footnote, "para", str(args))
00608             self.cur.appendChild(footnote)
00609 
00610         elif name == "Include":
00611             was_in_para = self.cur.nodeName == "para"
00612             if was_in_para:
00613                 self.paragraph(0)
00614             text = FormatterBase.macro(self, macro_obj, name, args)
00615             if text.strip():
00616                 self._copyExternalNodes(Sax.FromXml(text).documentElement.childNodes, exclude=excludes)
00617             if was_in_para:
00618                 self.paragraph(1)
00619 
00620         else:
00621             text = FormatterBase.macro(self, macro_obj, name, args)
00622             if text:
00623                 from xml.parsers.expat import ExpatError
00624                 try:
00625                     xml_dom = Sax.FromXml(text).documentElement.childNodes
00626                     self._copyExternalNodes(xml_dom, exclude=excludes)
00627                 except ExpatError:
00628                     self._emitComment("The macro %s caused an error and should be blacklisted. It returned the data '%s' which caused the docbook-formatter to choke. Please file a bug." % (name, text))
00629 
00630         return u""
00631 
00632 ### Util functions ##################################################
00633 
00634     def _copyExternalNodes(self, nodes, deep=1, target=None, exclude=()):
00635         if not target:
00636             target = self.cur
00637 
00638         for node in nodes:
00639             if node.nodeName in exclude:
00640                 pass
00641             elif target.nodeName == "para" and node.nodeName == "para":
00642                 self._copyExternalNodes(node.childNodes, target=target)
00643                 self.cur = target.parentNode
00644             else:
00645                 target.appendChild(self.doc.importNode(node, deep))
00646 
00647     def _emitComment(self, text):
00648         text = text.replace("--", "- -") # There cannot be "--" in XML comment
00649         self.cur.appendChild(self.doc.createComment(text))
00650 
00651     def _handleNode(self, name, on, attributes=()):
00652         if on:
00653             node = self.doc.createElement(name)
00654             self.cur.appendChild(node)
00655             if len(attributes) > 0:
00656                 for name, value in attributes:
00657                     node.setAttribute(name, value)
00658             self.cur = node
00659         else:
00660             """
00661                 Because we prevent para inside para, we might get extra "please
00662                 exit para" when we are no longer inside one.
00663 
00664                 TODO: Maybe rethink the para in para case
00665             """
00666             if name == "para" and self.cur.nodeName != "para":
00667                 return ""
00668 
00669             self.cur = self.cur.parentNode
00670         return ""
00671 
00672     def _handleFormatting(self, name, on, attributes=()):
00673         # We add all the elements we create to the list of elements that should not contain a section
00674         if name not in self.section_should_break:
00675             self.section_should_break.append(name)
00676         return self._handleNode(name, on, attributes)
00677 
00678     def _isInsidePreformatted(self):
00679         """Walks all parents and checks if one is of a preformatted type, which
00680            means the child would need to be preformatted == embedded in a cdata
00681            section"""
00682         n = self.cur
00683         while n:
00684             if n.nodeName in ("screen", "programlisting"):
00685                 return True
00686             n = n.parentNode
00687         return False
00688 
00689     def _hasContent(self, node):
00690         if node.attributes and len(node.attributes):
00691             return True
00692         for child in node.childNodes:
00693             if child.nodeType == Node.TEXT_NODE and child.nodeValue.strip():
00694                 return True
00695             elif child.nodeType == Node.CDATA_SECTION_NODE and child.nodeValue.strip():
00696                 return True
00697 
00698             if self._hasContent(child):
00699                 return True
00700         return False
00701 
00702     def _addTitleElement(self, titleTxt, targetNode=None):
00703         if not targetNode:
00704             targetNode = self.cur
00705         self._addTextElem(targetNode, "title", titleTxt)
00706 
00707     def _convertStylesToDict(self, styles):
00708         '''Takes the CSS styling information and converts it to a dict'''
00709         attrs = {}
00710         for s in styles.split(";"):
00711             if s.strip(' "') == "":
00712                 continue
00713             if ":" not in s:
00714                 continue
00715             (key, value) = s.split(":", 1)
00716             key = key.strip(' "')
00717             value = value.strip(' "')
00718 
00719             if key == 'vertical-align':
00720                 key = 'valign'
00721             elif key == 'text-align':
00722                 key = 'align'
00723             elif key == 'background-color':
00724                 key = 'bgcolor'
00725 
00726             attrs[key] = value
00727         return attrs
00728 
00729     def _cleanupUlinkNode(self):
00730         """
00731         Moin adds the url as the text to a link, if no text is specified.
00732         Docbook does it when a docbook is rendered, so we don't want moin to
00733         do it and so if the url is exactly the same as the text node inside
00734         the ulink, we remove the text node.
00735         """
00736         if self.cur.nodeName == "ulink" and len(self.cur.childNodes) == 1 \
00737                 and self.cur.firstChild.nodeType == Node.TEXT_NODE \
00738                 and self.cur.firstChild.nodeValue.strip() == self.cur.getAttribute('url').strip():
00739             self.cur.removeChild(self.cur.firstChild)
00740 
00741     def _addTextElem(self, target, elemName, text):
00742         """
00743         Creates an element of the name elemName and adds a text node to it
00744         with the nodeValue of text. The new element is then added as a child
00745         to the element target.
00746         """
00747         newElement = self.doc.createElement(elemName)
00748         newElement.appendChild(self.doc.createTextNode(text))
00749         target.appendChild(newElement)
00750 
00751 
00752     def _addRevisionHistory(self, targetNode):
00753         """
00754         This will generate a revhistory element which it will populate with
00755         revision nodes. Each revision has the revnumber, date and author-
00756         initial elements, and if a comment was supplied, the comment element.
00757 
00758         The date elements format depends on the users settings, so it will
00759         be in the same format as the revision history as viewed in the
00760         page info on the wiki.
00761 
00762         The authorinitials will be the UserName or if it was an anonymous
00763         edit, then it will be the hostname/ip-address.
00764 
00765         The revision history of included documents is NOT included at the
00766         moment due to technical difficulties.
00767         """
00768         _ = self.request.getText
00769         log = editlog.EditLog(self.request, rootpagename=self.title)
00770         user_cache = {}
00771 
00772         history = self.doc.createElement("revhistory")
00773 
00774         # read in the complete log of this page
00775         for line in log.reverse():
00776             if not line.action in ('SAVE', 'SAVENEW', 'SAVE/REVERT', 'SAVE/RENAME', ):
00777                 #Let's ignore adding of attachments
00778                 continue
00779             revision = self.doc.createElement("revision")
00780 
00781             # Revision number (without preceeding zeros)
00782             self._addTextElem(revision, "revnumber", line.rev.lstrip('0'))
00783 
00784             # Date of revision
00785             date_text = self.request.user.getFormattedDateTime(
00786                 wikiutil.version2timestamp(line.ed_time_usecs))
00787             self._addTextElem(revision, "date", date_text)
00788 
00789             # Author or revision
00790             if not (line.userid in user_cache):
00791                 user_cache[line.userid] = user.User(self.request, line.userid, auth_method="text_docbook:740")
00792             author = user_cache[line.userid]
00793             if author and author.name:
00794                 self._addTextElem(revision, "authorinitials", author.name)
00795             else:
00796                 self._addTextElem(revision, "authorinitials", line.hostname)
00797 
00798             # Comment from author of revision
00799             comment = line.comment
00800             if not comment:
00801                 if '/REVERT' in line.action:
00802                     comment = _("Revert to revision %(rev)d.") % {'rev': int(line.extra)}
00803                 elif '/RENAME' in line.action:
00804                     comment = _("Renamed from '%(oldpagename)s'.") % {'oldpagename': line.extra}
00805             if comment:
00806                 self._addTextElem(revision, "revremark", comment)
00807 
00808             history.appendChild(revision)
00809 
00810         if history.firstChild:
00811             #only add revision history is there is history to add
00812             targetNode.appendChild(history)
00813 
00814 ### Not supported ###################################################
00815 
00816     def rule(self, size=0, **kw):
00817         self._emitComment('rule (<hr>) is not applicable to DocBook')
00818         return ""
00819 
00820     def small(self, on, **kw):
00821         if on:
00822             self._emitComment('"~-smaller-~" is not applicable to DocBook')
00823         return ""
00824 
00825     def big(self, on, **kw):
00826         if on:
00827             self._emitComment('"~+bigger+~" is not applicable to DocBook')
00828         return ""
00829 
00830     def rawHTML(self, markup):
00831         if markup.strip() == "":
00832             return ""
00833 
00834         if "<" not in markup and ">" not in markup:
00835             # Seems there are no tags.
00836             # Let's get all the "entity references".
00837             cleaned = markup
00838             import re
00839             entities = re.compile("&(?P<e>[a-zA-Z]+);").findall(cleaned)
00840             from htmlentitydefs import name2codepoint
00841             for ent in entities:
00842                 if name2codepoint.has_key(ent):
00843                     cleaned = cleaned.replace("&%s;" % ent, unichr(name2codepoint[ent]))
00844 
00845             # Then we replace all escaped unicodes.
00846             escapedunicodes = re.compile("&#(?P<h>[0-9]+);").findall(markup)
00847             for uni in escapedunicodes:
00848                 cleaned = cleaned.replace("&#%s;" % uni, unichr(int(uni)))
00849 
00850             self.text(cleaned)
00851 
00852         self._emitComment("RAW HTML: "+markup)
00853         return ""
00854 
00855     def div(self, on, **kw):
00856         """A div cannot really be supported in DocBook as it carries no
00857         semantic meaning, but the special cases can be handled when the class
00858         of the div carries the information.
00859 
00860         A dictionary is used for mapping between class names and the
00861         corresponding DocBook element.
00862 
00863         A MoinMoin comment is represented in DocBook by the remark element.
00864 
00865         The rest of the known classes are the admonitions in DocBook:
00866         warning, caution, important, note and hint
00867 
00868         Note: The remark entity can only contain inline elements, so it is
00869               very likely that the use of a comment div will produce invalid
00870               DocBook.
00871         """
00872         # Map your styles to docbook elements.
00873         # Even though comment is right now the only one that needs to be
00874         # mapped, having two different ways is more complicated than having
00875         # a single common way. Code clarity and generality first, especially
00876         # since we might want to do more div to docbook mappings in the future.
00877         class_to_docbook = {"warning":   "warning",
00878                             "caution":   "caution",
00879                             "important": "important",
00880                             "note":      "note",
00881                             "tip":       "tip",
00882                             "comment":   "remark"}
00883 
00884         if on and kw.get('css_class'):
00885             css_classes = kw.get('css_class').split()
00886             for style in class_to_docbook.keys():
00887                 if style in css_classes:
00888                     return self._handleNode(class_to_docbook[style], on)
00889 
00890         elif not on:
00891             if self.cur.nodeName in class_to_docbook.values():
00892                 return self._handleNode(self.cur.nodeName, on)
00893 
00894         return ""
00895 
00896     def span(self, on, **kw):
00897         """A span cannot really be supported in DocBook as it carries no
00898         semantic meaning, but the special case of a comment can be handled.
00899 
00900         A comment is represented in DocBook by the remark element.
00901 
00902         A comment span is recognized by the fact that it has the class
00903         "comment". Other cases of div use are ignored.
00904         """
00905         css_class = kw.get('css_class')
00906         if on and css_class and 'comment' in css_class.split():
00907             self._handleFormatting("remark", on)
00908         if not on and self.cur.nodeName == "remark":
00909             self._handleFormatting("remark", on)
00910         return ""
00911 
00912 
00913 
00914 ### Tables ##########################################################
00915 
00916     def table(self, on, attrs=(), **kw):
00917         if(on):
00918             if attrs:
00919                 self.curtable = Table(self, self.doc, self.cur, dict(attrs))
00920             else:
00921                 self.curtable = Table(self, self.doc, self.cur)
00922             self.cur = self.curtable.tableNode
00923         else:
00924             self.cur = self.curtable.finalizeTable()
00925             self.curtable = None
00926         return ""
00927 
00928     def table_row(self, on, attrs=(), **kw):
00929         if(on):
00930             if attrs:
00931                 self.curtable.addRow(dict(attrs))
00932             else:
00933                 self.cur = self.curtable.addRow()
00934         return ""
00935 
00936     def table_cell(self, on, attrs=(), **kw):
00937         if(on):
00938             if attrs:
00939                 self.cur = self.curtable.addCell(dict(attrs))
00940             else:
00941                 self.cur = self.curtable.addCell()
00942         return ""
00943 
00944 class Table:
00945     '''The Table class is used as a helper for collecting information about
00946     what kind of table we are building. When all relelvant data is gathered
00947     it calculates the different spans of the cells and columns.
00948 
00949     Note that it expects all arguments to be passed in a dict.
00950     '''
00951 
00952     def __init__(self, formatter, doc, parent, argsdict={}):
00953         self.formatter = formatter
00954         self.doc = doc
00955 
00956         self.tableNode = self.doc.createElement('informaltable')
00957         parent.appendChild(self.tableNode)
00958         self.colWidths = {}
00959         self.tgroup = self.doc.createElement('tgroup')
00960         # Bug in yelp, the two lines below don't affect rendering
00961         #self.tgroup.setAttribute('rowsep', '1')
00962         #self.tgroup.setAttribute('colsep', '1')
00963         self.curColumn = 0
00964         self.maxColumn = 0
00965         self.row = None
00966         self.tableNode.appendChild(self.tgroup)
00967 
00968         self.tbody = self.doc.createElement('tbody') # Note: This gets appended in finalizeTable
00969 
00970     def finalizeTable(self):
00971         """Calculates the final width of the whole table and the width of each
00972         column. Adds the colspec-elements and applies the colwidth attributes.
00973         Inserts the tbody element to the tgroup and returns the tables container
00974         element.
00975 
00976         A lot of the information is gathered from the style attributes passed
00977         to the functions
00978         """
00979         self.tgroup.setAttribute('cols', str(self.maxColumn))
00980         for colnr in range(0, self.maxColumn):
00981             colspecElem = self.doc.createElement('colspec')
00982             colspecElem.setAttribute('colname', 'col_%s' % str(colnr))
00983             if self.colWidths.has_key(str(colnr)) and self.colWidths[str(colnr)] != "1*":
00984                 colspecElem.setAttribute('colwidth', self.colWidths[str(colnr)])
00985             self.tgroup.appendChild(colspecElem)
00986         self.tgroup.appendChild(self.tbody)
00987         return self.tableNode.parentNode
00988 
00989     def addRow(self, argsdict={}):
00990         self.curColumn = 0
00991         self.row = self.doc.createElement('row')
00992         # Bug in yelp, doesn't affect the outcome.
00993         self.row.setAttribute("rowsep", "1") #Rows should have lines between them
00994         self.tbody.appendChild(self.row)
00995         return self.row
00996 
00997     def addCell(self, argsdict={}):
00998         if 'style' in argsdict:
00999             argsdict.update(self.formatter._convertStylesToDict(argsdict['style'].strip('"')))
01000 
01001         cell = self.doc.createElement('entry')
01002         cell.setAttribute('rowsep', '1')
01003         cell.setAttribute('colsep', '1')
01004 
01005         self.row.appendChild(cell)
01006         self._handleSimpleCellAttributes(cell, argsdict)
01007         self._handleColWidth(argsdict)
01008         self.curColumn += self._handleColSpan(cell, argsdict)
01009 
01010         self.maxColumn = max(self.curColumn, self.maxColumn)
01011 
01012         return cell
01013 
01014     def _handleColWidth(self, argsdict={}):
01015         if not argsdict.has_key("width"):
01016             return
01017         argsdict["width"] = argsdict["width"].strip('"')
01018         if not argsdict["width"].endswith("%"):
01019             self.formatter._emitComment("Width %s not supported" % argsdict["width"])
01020             return
01021 
01022         self.colWidths[str(self.curColumn)] = argsdict["width"][:-1] + "*"
01023 
01024     def _handleColSpan(self, element, argsdict={}):
01025         """Returns the number of colums this entry spans"""
01026         if not argsdict or not argsdict.has_key('colspan'):
01027             return 1
01028         assert(element.nodeName == "entry")
01029         extracols = int(argsdict['colspan'].strip('"')) - 1
01030         element.setAttribute('namest', "col_" + str(self.curColumn))
01031         element.setAttribute('nameend', "col_" + str(self.curColumn + extracols))
01032         return 1 + extracols
01033 
01034     def _handleSimpleCellAttributes(self, element, argsdict={}):
01035         if not argsdict:
01036             return
01037         assert(element.nodeName == "entry")
01038 
01039         safe_values_for = {'valign': ('top', 'middle', 'bottom'),
01040                            'align': ('left', 'center', 'right'),
01041                           }
01042 
01043         if argsdict.has_key('rowspan'):
01044             extrarows = int(argsdict['rowspan'].strip('"')) - 1
01045             element.setAttribute('morerows', str(extrarows))
01046 
01047         if argsdict.has_key('align'):
01048             value = argsdict['align'].strip('"')
01049             if value in safe_values_for['align']:
01050                 element.setAttribute('align', value)
01051             else:
01052                 self.formatter._emitComment("Alignment %s not supported" % value)
01053                 pass
01054 
01055         if argsdict.has_key('valign'):
01056             value = argsdict['valign'].strip('"')
01057             if value in safe_values_for['valign']:
01058                 element.setAttribute('valign', value)
01059             else:
01060                 self.formatter._emitComment("Vertical alignment %s not supported" % value)
01061                 pass
01062 
01063