Back to index

moin  1.9.0~rc2
sitemap.py
Go to the documentation of this file.
00001 # -*- coding: iso-8859-1 -*-
00002 """
00003     MoinMoin - "sitemap" action
00004 
00005     Generate a URL list of all your pages (using google's sitemap XML format).
00006 
00007     @copyright: 2006-2008 MoinMoin:ThomasWaldmann
00008     @license: GNU GPL, see COPYING for details.
00009 """
00010 import time
00011 from MoinMoin import wikiutil
00012 
00013 datetime_fmt = "%Y-%m-%dT%H:%M:%S+00:00"
00014 
00015 def now():
00016     return time.strftime(datetime_fmt, time.gmtime())
00017 
00018 def make_url_xml(request, vars):
00019     """ assemble a single <url> xml fragment """
00020     # add protocol:server - url must be complete path starting with/from /
00021     vars['url'] = request.getQualifiedURL(vars['url'])
00022     return """\
00023 <url>
00024   <loc>%(url)s</loc>
00025   <lastmod>%(lastmod)s</lastmod>
00026   <changefreq>%(changefreq)s</changefreq>
00027   <priority>%(priority)s</priority>
00028 </url>
00029 """ % vars
00030 
00031 def sitemap_url(request, page):
00032     """ return a sitemap <url>..</url> fragment for page object <page> """
00033     url = page.url(request)
00034     pagename = page.page_name
00035     lastmod = page.mtime_printable(request)
00036     if lastmod == "0": # can happen in case of errors
00037         lastmod = now()
00038 
00039     # page's changefreq, priority and lastmod depends on page type / name
00040     if pagename in [u"RecentChanges", u"TitleIndex", ]:
00041         # important dynamic pages with macros
00042         changefreq = "hourly"
00043         priority = "0.9"
00044         lastmod = now() # the page text mtime never changes, but the macro output DOES
00045 
00046     elif pagename in [request.cfg.page_front_page, ]:
00047         # important user edited pages
00048         changefreq = "hourly"
00049         priority = "1.0"
00050 
00051     elif wikiutil.isSystemPage(request, pagename):
00052         # other system pages are rather boring
00053         changefreq = "yearly"
00054         priority = "0.1"
00055 
00056     else:
00057         # these are the content pages:
00058         changefreq = "daily"
00059         priority = "0.5"
00060 
00061     return make_url_xml(request, locals())
00062 
00063 def execute(pagename, request):
00064     _ = request.getText
00065     request.user.datetime_fmt = datetime_fmt
00066 
00067     request.mimetype = 'text/xml'
00068 
00069     # we emit a piece of data so other side doesn't get bored:
00070     request.write("""<?xml version="1.0" encoding="UTF-8"?>\r\n""")
00071 
00072     result = []
00073     result.append("""<urlset xmlns="http://www.google.com/schemas/sitemap/0.84">\n""")
00074 
00075     # we include the root url as an important and often changed URL
00076     rooturl = request.script_root + '/'
00077     result.append(make_url_xml(request, {
00078         'url': rooturl,
00079         'lastmod': now(), # fake
00080         'changefreq': 'hourly',
00081         'priority': '1.0',
00082     }))
00083 
00084     # Get page dict readable by current user
00085     try:
00086         underlay = int(request.values.get('underlay', 1))
00087     except ValueError:
00088         underlay = 1
00089     pages = request.rootpage.getPageDict(include_underlay=underlay)
00090     pagelist = pages.keys()
00091     pagelist.sort()
00092     for name in pagelist:
00093         result.append(sitemap_url(request, pages[name]))
00094 
00095     result.append("""</urlset>\n""")
00096 
00097     result = "".join(result)
00098     result = result.replace("\n", "\r\n") # text/* requires CR/LF
00099 
00100     # emit all real data
00101     request.write(result)
00102