Back to index

plone3  3.1.7
html2captioned.py
Go to the documentation of this file.
00001 # Portal transform for images with captions
00002 #
00003 # We want to be able to support captions in images.
00004 # The easiest way to do this is to define a Portal Transform which is
00005 # applied to the HTML body text on output.
00006 #
00007 # The transform finds all the embedded images, and replaces them with
00008 # an appropriate chunk of HTML to include the caption.
00009 #
00010 try:
00011     from Products.PortalTransforms.z3.interfaces import ITransform
00012 except ImportError:
00013     ITransform = None
00014 from Products.PortalTransforms.interfaces import itransform
00015 
00016 from DocumentTemplate.DT_Util import html_quote
00017 from DocumentTemplate.DT_Var import newline_to_br
00018 from Products.CMFCore.utils import getToolByName
00019 import re
00020 from cgi import escape
00021 from urlparse import urlsplit, urljoin, urlunsplit
00022 from urllib import unquote_plus, quote_plus
00023 from Acquisition import aq_base
00024 from htmlentitydefs import name2codepoint
00025 from Products.kupu.plone.config import UID_PATTERN
00026 from zope.interface import implements
00027 
00028 name2codepoint = name2codepoint.copy()
00029 name2codepoint['apos']=ord("'")
00030 
00031 __revision__ = '$Id$'
00032 
00033 # IMAGE_PATTERN matches an image tag on its own, or an image tag
00034 # enclosed in a simple <p> or <div>. In the latter case we strip out
00035 # the enclosing tag since we are going to insert our own.
00036 PATIMG = '\\<img[^>]+class\s*=[^=>]*captioned[^>]+\\>'
00037 PATA = '(?:(?P<atag0>\\<a[^>]*\\>)'+PATIMG+'\\</a\\>)' + '|' + PATIMG
00038 PAT0 = '(?P<pat0>'+PATA+')'
00039 PAT1 = '<(?:p|div)[^>]*>'+PAT0 + '</(?:p|div)>' + '|' + PAT0.replace('0>','1>')
00040 IMAGE_PATTERN = re.compile(PAT1, re.IGNORECASE)
00041 
00042 # Regex to match stupid IE attributes. In IE generated HTML an
00043 # attribute may not be enclosed by quotes if it doesn't contain
00044 # certain punctuation.
00045 ATTR_VALUE = '=(?:"?)(?P<%s>(?<=")[^"]*|[^ \/>]*)'
00046 ATTR_CLASS = ATTR_VALUE % 'class'
00047 ATTR_WIDTH = ATTR_VALUE % 'width'
00048 ATTR_HEIGHT = ATTR_VALUE % 'height'
00049 ATTR_ALT = ATTR_VALUE % 'alt'
00050 
00051 ATTR_PATTERN = re.compile('''
00052     (?P<tag><
00053      ( class%s
00054      | src\s*=\s*"resolveuid/(?P<src>([^/"#? ]*))
00055      | width%s
00056      | alt%s
00057      | height%s
00058      | .
00059      )*>
00060     )''' % (ATTR_CLASS, ATTR_WIDTH, ATTR_ALT, ATTR_HEIGHT), re.VERBOSE | re.IGNORECASE | re.DOTALL)
00061 SRC_TAIL = re.compile(r'/([^" \/>]+)')
00062 
00063 CLASS_PATTERN = re.compile('\s*class\s*=\s*("[^"]*captioned[^"]*"|[^" \/>]+)')
00064 ALT_PATTERN = re.compile('\\balt\s*=\s*("[^"]*"|[^" \/>]+)')
00065 END_TAG_PATTERN = re.compile('(<img[^>]*?)( */?>)')
00066 IMAGE_TEMPLATE = '''\
00067 <dl class="%(class)s" style="width:%(width)spx;">
00068  <dt style="width:%(width)spx;">
00069   %(tag)s
00070  </dt>
00071  <dd class="image-caption">
00072   %(caption)s
00073  </dd>
00074 </dl>
00075 '''
00076 
00077 class HTMLToCaptioned:
00078     """Transform which adds captions to images embedded in HTML"""
00079     if ITransform is not None:
00080         implements(ITransform)
00081     __implements__ = itransform
00082     __name__ = "html_to_captioned"
00083     inputs = ('text/html',)
00084     output = "text/x-html-captioned"
00085     
00086     def __init__(self, name=None):
00087         self.config_metadata = {
00088             'inputs' : ('list', 'Inputs', 'Input(s) MIME type. Change with care.'),
00089             }
00090         if name is not None:
00091             self.__name__ = name
00092 
00093     def name(self):
00094         return self.__name__
00095 
00096     def __getattr__(self, attr):
00097         if attr == 'inputs':
00098             return self.config['inputs']
00099         if attr == 'output':
00100             return self.config['output']
00101         raise AttributeError(attr)
00102 
00103     def resolveuid(self, context, reference_catalog, uid):
00104         """Convert a uid to an object by looking it up in the reference catalog.
00105         If not found then tries to fallback to a possible hook (e.g. so you could
00106         resolve uids on another system).
00107         """
00108         target = reference_catalog.lookupObject(uid)
00109         if target is not None:
00110             return target
00111         hook = getattr(context, 'kupu_resolveuid_hook', None)
00112         if hook is not None:
00113             target = hook(uid)
00114         return target
00115 
00116     def convert(self, data, idata, filename=None, **kwargs):
00117         """convert the data, store the result in idata and return that
00118         optional argument filename may give the original file name of received data
00119         additional arguments given to engine's convert, convertTo or __call__ are
00120         passed back to the transform
00121         
00122         The object on which the translation was invoked is available as context
00123         (default: None)
00124         """
00125         context = kwargs.get('context', None)
00126         at_tool = None
00127         template = context.kupu_captioned_image
00128         if context is not None:
00129             at_tool = context.archetype_tool
00130             rc = at_tool.reference_catalog
00131 
00132         if context is not None and at_tool is not None:
00133             def replaceImage(match):
00134                 tag = match.group('pat0') or match.group('pat1')
00135                 attrs = ATTR_PATTERN.match(tag)
00136                 atag = match.group('atag0') or match.group('atag1')
00137                 src = attrs.group('src')
00138                 subtarget = None
00139                 m = SRC_TAIL.match(tag, attrs.end('src'))
00140                 if m is not None:
00141                     srctail = m.group(1)
00142                 else:
00143                     srctail = None
00144                 if src is not None:
00145                     d = attrs.groupdict()
00146                     target = self.resolveuid(context, rc, src)
00147                     if target is not None:
00148                         d['class'] = attrs.group('class')
00149                         d['originalwidth'] = attrs.group('width')
00150                         d['originalalt'] = attrs.group('alt')
00151                         d['url_path'] = target.absolute_url_path()
00152                         d['caption'] = newline_to_br(html_quote(target.Description()))
00153                         d['image'] = d['fullimage'] = target
00154                         d['tag'] = None
00155                         d['isfullsize'] = True
00156                         d['width'] = target.width
00157                         if srctail:
00158                             if isinstance(srctail, unicode):
00159                                 srctail =srctail.encode('utf8') # restrictedTraverse doesn't accept unicode
00160                             try:
00161                                 subtarget = target.restrictedTraverse(srctail)
00162                             except:
00163                                 subtarget = getattr(target, srctail, None)
00164                             if subtarget is not None:
00165                                 d['image'] = subtarget
00166 
00167                             if srctail.startswith('image_'):
00168                                 d['tag'] = target.getField('image').tag(target, scale=srctail[6:])
00169                             elif subtarget:
00170                                 d['tag'] = subtarget.tag()
00171 
00172                         if d['tag'] is None:
00173                             d['tag'] = target.tag()
00174 
00175                         if subtarget is not None:
00176                             d['isfullsize'] = subtarget.width == target.width and subtarget.height == target.height
00177                             d['width'] = subtarget.width
00178 
00179                         # strings that may contain non-ascii characters need to be decoded to unicode
00180                         for key in ('caption', 'tag'):
00181                             if isinstance(d[key], str):
00182                                 d[key] = d[key].decode('utf8', 'replace')
00183 
00184                         if atag is not None: # Must preserve original link, don't overwrite with a link to the image
00185                             d['isfullsize'] = True
00186                             d['tag'] = "%s%s</a>" % (atag, d['tag'])
00187 
00188                         result = template(**d)
00189                         if isinstance(result, str):
00190                             result = result.decode('utf8')
00191 
00192                         return result
00193 
00194                 return match.group(0) # No change
00195 
00196             if isinstance(data, str):
00197                 # Transform for end user output should avoid erroring
00198                 # if it can, so use 'replace' on decode.
00199                 data = data.decode('utf8', 'replace')
00200             html = IMAGE_PATTERN.sub(replaceImage, data)
00201 
00202             # Replace urls that use UIDs with human friendly urls.
00203             def replaceUids(match):
00204                 tag = match.group('tag')
00205                 uid = match.group('uid')
00206                 target = self.resolveuid(context, rc, uid)
00207                 if target is not None:
00208                     if getattr(aq_base(target), 'getRemoteUrl', None) is not None:
00209                         url = target.getRemoteUrl()
00210                     else:
00211                         url = target.absolute_url_path()
00212                     return tag + url
00213                 return match.group(0)
00214 
00215             html = UID_PATTERN.sub(replaceUids, html)
00216             if isinstance(html, unicode):
00217                 html = html.encode('utf8') # Indexing requires a string result.
00218             idata.setData(html)
00219             return idata
00220 
00221         # No context to use for replacements, so don't bother trying.
00222         idata.setData(data)
00223         return idata
00224 
00225 def register():
00226     return HTMLToCaptioned()
00227 
00228 def initialize():
00229     engine = getToolByName(portal, 'portal_transforms')
00230     engine.registerTransform(register())
00231 
00232 ATTR_HREF = ATTR_VALUE % 'href'
00233 LINK_PATTERN = re.compile(
00234     r'(?P<prefix><(?:img\s[^>]*src|a\s[^>]*href)\s*=\s*(?:"?))(?P<href>(?<=")[^"]*|[^ \/>]*)',
00235     re.IGNORECASE)
00236 FRAGMENT_TYPE = 'CompositePack Fragments'
00237 NAVIGATION_PAGE = 'Navigation Page'
00238 
00239 SUMMARY_PATTERN = re.compile(r'(<a[^>]*>.*?</a>)|(<img[^>]*>)', re.IGNORECASE|re.DOTALL)
00240 
00241 class Migration:
00242     FIELDS = ('portal_type', 'typename', 'fieldname',
00243         'fieldlabel', 'position', 'action', 'commit_changes',
00244         'image_tails', 'paths', 'pathuids', 'uids', 'found',
00245         'batch_size',
00246     )
00247 
00248     def __init__(self, tool):
00249         self.tool = tool
00250         self.url_tool = getToolByName(tool, 'portal_url')
00251         self.portal = self.url_tool.getPortalObject()
00252         self.portal_base = self.url_tool.getPortalPath()
00253         self.portal_base_url = self.portal.absolute_url()
00254         self.prefix_length = len(self.portal_base)+1
00255         self.uid_catalog = getToolByName(tool, 'uid_catalog')
00256         self.reference_tool = getToolByName(tool, 'reference_catalog')
00257         self.portal_catalog = getToolByName(tool, 'portal_catalog')
00258         self._continue = True
00259         self._firstoutput = False
00260         self.commit_changes = False
00261         self._objects = []
00262         self.image_tails = []
00263 
00264     def initImageSizes(self):
00265         self.image_tails = self.tool._getImageSizes()
00266 
00267     def initFromRequest(self):
00268         self.initImageSizes()
00269         self.uids = None
00270         self.found = 0
00271         request = self.tool.REQUEST
00272         rfg = request.form.get
00273         fields = [f for f in rfg('fields',()) if f.get('selected',0)]
00274         if fields:
00275             f = fields[0]
00276             self.portal_type = f.portal_type
00277             self.typename = f.type.decode('utf-8')
00278             self.fieldname = f.name.decode('utf-8')
00279             self.fieldlabel = f.label.decode('utf-8')
00280         else:
00281             self.portal_type = rfg('portal_type', None)
00282             self.fieldname = None
00283             self.fieldlabel = None
00284             self.typename = None
00285 
00286         self.position = 0
00287         self.action = rfg('button', None)
00288         self.commit_changes = rfg('commit', False)
00289         self.batch_size = 10
00290         if self.commit_changes:
00291             self.uids = rfg('uids', [])
00292 
00293         pathuids = rfg('folderpaths', [])
00294         self.paths = self.tool.convertUidsToPaths(pathuids)
00295         self.pathuids = pathuids
00296 
00297     def initCommit(self):
00298         """Reset counters for a commit pass"""
00299         self.restoreState()
00300         request = self.tool.REQUEST
00301         rfg = request.form.get
00302         self.commit_changes = True
00303         self._firstoutput = True
00304         self.found = 0
00305         self.position = 0
00306         self.batch_size = 5
00307         self.uids = rfg('uids')
00308 
00309     def saveState(self):
00310         SESSION = self.tool.REQUEST.SESSION
00311         SESSION['kupu_migrator'] = dict([(f, getattr(self, f, None)) for f in self.FIELDS])
00312 
00313     def restoreState(self):
00314         SESSION = self.tool.REQUEST.SESSION
00315         state = SESSION['kupu_migrator']
00316         for f in self.FIELDS:
00317             setattr(self, f, state[f])
00318 
00319 #     def clearState(self):
00320 #         return
00321 #         SESSION = self.tool.REQUEST.SESSION
00322 #         if SESSION.has_key('kupu_migrator'):
00323 #             del SESSION['kupu_migrator']
00324 
00325     def status(self):
00326         s = [ '%s=%s' % (f,getattr(self, f, 'unset')) for f in
00327             self.FIELDS ]
00328         return '\n'.join(s)
00329 
00330     def mkQuery(self):
00331         query = {}
00332         if self.portal_type:
00333             query['portal_type'] = sanitize_portal_type(self.portal_type)
00334         if self.paths:
00335             query['path'] = self.paths
00336         query['Language'] = 'all'
00337         return query
00338 
00339     def getInfo(self, saveState=True):
00340         info = {}
00341         if self._continue:
00342             info['nexturi'] = self.tool.absolute_url_path()+'/kupu_migration.xml?button=continue'
00343             if self.commit_changes and self._objects and self.position < getattr(self, '_total', -1):
00344                 info['delay'] = 5 # Avoid killing everyone else with conflict errors.
00345         else:
00346             info['nexturi'] = None
00347 
00348         info['firstoutput'] = self._firstoutput
00349 
00350         if hasattr(self, '_total'):
00351             info['total'] = self._total
00352             info['position'] = self.position
00353             if self._total==0:
00354                 info['percent'] = '100%'
00355             else:
00356                 info['percent'] = '%d%%' % ((100.*self.position)/self._total)
00357 
00358         info['objects'] = self._objects
00359         info['action'] = action = self.action
00360         info['action_check'] = action=='check'
00361         info['action_touid'] = action=='touid'
00362         info['action_topath'] = action=='topath'
00363         info['typename'] = self.typename
00364         info['fieldlabel'] = self.fieldlabel
00365         info['checkboxes'] = action != 'check' and not self.commit_changes
00366         info['commit_changes'] = self.commit_changes
00367         info['dryrun'] = not (self.action == 'check' or self.commit_changes)
00368         info['found'] = self.found
00369 
00370         if saveState:
00371             self.saveState()
00372         return info
00373 
00374     def docontinue(self):
00375         """Scan selected documents looking for convertible links"""
00376         uids = self.uids
00377         if uids is None:
00378             self.uids = uids = []
00379             brains = self.portal_catalog.searchResults(self.mkQuery())
00380             for b in brains:
00381                 uid = self.UIDfromBrain(b)
00382                 if uid:
00383                     uids.append(uid)
00384             self._firstoutput = True
00385             self._continue = True
00386             return True
00387 
00388         pos = self.position
00389         self._total = total = len(uids)
00390 
00391         uids = uids[pos:pos+self.batch_size]
00392         self.position = pos + len(uids)
00393         if not uids:
00394             self._continue = False
00395             return False # Done
00396 
00397         self._objects = res = []
00398         for uid in uids:
00399             obj = self.reference_tool.lookupObject(uid)
00400             if self.portal_type==FRAGMENT_TYPE and obj.portal_type!=FRAGMENT_TYPE:
00401                 try:
00402                     fldr = obj.cp_container.titles
00403                 except:
00404                     continue
00405                 else:
00406                     for o in fldr.objectValues([FRAGMENT_TYPE]):
00407                         objinfo = self.object_check(o)
00408                         if objinfo:
00409                             res.append(objinfo)
00410             else:
00411                 objinfo = self.object_check(obj)
00412                 if objinfo:
00413                     res.append(objinfo)
00414 
00415         self._continue = True
00416         return True
00417 
00418     def brain_check(self, brain):
00419         object = brain.getObject()
00420         return self.object_check(object)
00421 
00422     def link_summary(self, data, start, link):
00423         """Summary information for a link"""
00424         m = SUMMARY_PATTERN.match(data, start)
00425         if m:
00426             text = m.group(0)
00427         else:
00428             text = data[start:start+200]
00429         bits = text.split(link, 1)
00430         if len(bits)==1:
00431             bits.append('')
00432         return bits
00433 
00434     def object_check(self, object):
00435         """Check the relative links within this object."""
00436         def checklink(match):
00437             matched = match.group(0)
00438             newlink = link = decodeEntities(match.group('href'))
00439             classification, uid, relpath, tail = self.classifyLink(link, base)
00440 
00441             if self.action=='check':
00442                 if classification=='bad':
00443                     abslink = urljoin(base, link)
00444                     before, after = self.link_summary(data, match.start(), link)
00445                     summary = {'text':link, 'url':abslink,
00446                         'before': before,
00447                         'after': after, }
00448                     info.append(summary)
00449             elif self.action=='touid':
00450                 if classification=='internal':
00451                     if uid and uid==objuid:
00452                         newlink = tail
00453                     elif uid:
00454                         newlink = 'resolveuid/%s%s' % (uid, tail)
00455                     else:
00456                         newlink = relpath+tail
00457 
00458             elif self.action=='topath':
00459                 if classification=='internal':
00460                     newlink = relpath+tail
00461 
00462             if newlink != link:
00463                 prefix = match.group('prefix')
00464                 newlink = html_quote(newlink).encode('ascii', 'xmlcharrefreplace')
00465                 changes.append((match.start()+len(prefix), match.end(), newlink))
00466                 return prefix + newlink
00467             return matched
00468 
00469         info = []
00470         changes = []
00471         try:
00472             objuid = aq_base(object).UID
00473         except:
00474             return None  # only archetypes objects
00475 
00476         baseobj = object
00477         if object.portal_type==FRAGMENT_TYPE:
00478             baseobj = object.aq_parent.aq_parent.aq_parent
00479         base = baseobj.absolute_url()
00480         if getattr(baseobj.aq_explicit, 'isPrincipiaFolderish', 0):
00481             base += '/'
00482 
00483         field = object.getField(self.fieldname)
00484         if field is None:
00485             return None
00486 
00487         content_type = field.getContentType(object)
00488         if content_type != 'text/html':
00489             # Don't attempt to modify non-html
00490             return None
00491             
00492         data = field.getEditAccessor(object)().decode('utf8')
00493         __traceback_info__ = (object, data)
00494         newdata = LINK_PATTERN.sub(checklink, data)
00495         if data != newdata and self.commit_changes:
00496             mutator = field.getMutator(object)
00497             if mutator:
00498                 mutator(newdata.encode('utf8'), mimetype='text/html')
00499                 object.reindexObject() # Need to flag update
00500 
00501         if info or changes:
00502             self.found += 1
00503             title = object.Title()
00504             if not title:
00505                 title = object.getId()
00506             if not title:
00507                 title = '<object>'
00508             if object.portal_type == FRAGMENT_TYPE:
00509                 title = "%s (%s)" % (baseobj.title_or_id(), title)
00510             if data != newdata:
00511                 diffs = htmlchanges(data, changes)
00512             else:
00513                 diffs = None
00514             return dict(title=title, uid = objuid, info=info, url=baseobj.absolute_url_path(),
00515                 diffs=diffs)
00516         return None
00517 
00518     def UIDfromBrain(self, brain):
00519         """Convert a brain to a UID without hitting the object"""
00520         path = brain.getPath()
00521         if not path.startswith(self.portal_base):
00522             return None
00523         try:
00524             metadata = self.uid_catalog.getMetadataForUID(path[self.prefix_length:])
00525         except KeyError:
00526             return None
00527         return metadata.get('UID', None)
00528 
00529     def UIDfromURL(self, url):
00530         """Convert an absolute URL to a UID"""
00531         if not url.startswith(self.portal_base_url):
00532             return None
00533         path = url[len(self.portal_base_url)+1:]
00534         if not path:
00535             return None
00536         try:
00537             metadata = self.uid_catalog.getMetadataForUID(path)
00538         except KeyError:
00539             return None
00540         return metadata.get('UID', None)
00541 
00542     def brainfromurl(self, url):
00543         """convert a url to a catalog brain"""
00544         if not url.startswith(self.portal_base_url):
00545             return None
00546         url = unquote_plus(url)
00547         url = self.portal_base + url[len(self.portal_base_url):]
00548         if isinstance(url, unicode):
00549             url = url.encode('utf8') # ExtendedPathIndex can't cope with unicode paths
00550         brains = self.portal_catalog.searchResults(path=url, Language='all')
00551         if len(brains) != 1:
00552             # Happens on Plone 2.0 :(
00553             for b in brains:
00554                 if b.getPath()==url:
00555                     return b
00556             return None
00557         return brains[0]
00558 
00559     def resolveToPath(self, absurl):
00560         if 'resolveuid/' in absurl:
00561             bits = absurl.split('resolveuid/', 1)
00562             bits = bits[1].split('/',1)
00563             uid = bits[0]
00564             if len(bits)==1:
00565                 tail = ''
00566             else:
00567                 tail = '/' + bits[1]
00568 
00569             # TODO: should be able to convert uid to brain without
00570             # touching the actual object.
00571             obj = self.reference_tool.lookupObject(uid)
00572             if obj is not None:
00573                 newurl = obj.absolute_url()
00574                 return uid, newurl, tail
00575             # If the uid doesn't exist then we can try the fallback
00576             # script. Even if the fallback works though we'll assume
00577             # an external link for simplicity.
00578             hook = getattr(self.tool, 'kupu_resolveuid_hook', None)
00579             if hook is not None:
00580                 target = hook(uid)
00581                 return None, target, ''
00582         return None, None, None
00583 
00584     def classifyLink(self, url, base, first=True):
00585         """Classify a link as:
00586         internal, external, bad
00587 
00588         Returns a tuple:
00589         (classification, uid, relpath, tail)
00590         giving potential urls: resolveuid/<uid><tail>
00591         or: <relpath><table>
00592         """
00593         if url.startswith('portal_factory'):
00594             url = url[14:]
00595 
00596         absurl = urljoin(base, url)
00597         if not absurl.startswith(self.portal_base_url):
00598             return 'external', None, url, ''
00599 
00600         scheme, netloc, path, query, fragment = urlsplit(absurl)
00601         path = path.strip('/')
00602         tail = urlunsplit(('','','',query,fragment))
00603         absurl = urlunsplit((scheme,netloc,path,'',''))
00604 
00605         if 'resolveuid/' in absurl:
00606             UID, newurl, ntail = self.resolveToPath(absurl)
00607             if UID is None:
00608                 if newurl:
00609                     return 'external', None, newurl, ntail
00610                 return 'bad', None, url, ''
00611             absurl = newurl
00612             tail = ntail + tail
00613         else:
00614             UID = self.UIDfromURL(absurl)
00615 
00616         brain = self.brainfromurl(absurl)
00617         if not brain:
00618             if first:
00619                 # Allow image size modifiers on the end of urls.
00620                 p = absurl.split('/')
00621                 absurl = '/'.join(p[:-1])
00622                 if '/'+p[-1] in self.image_tails:
00623                     tail = '/'+p[-1]+tail
00624                     c, uid, url, _ = self.classifyLink(absurl, base, first=False)
00625                     return c, uid, url, tail
00626             return 'bad', None, url, ''
00627 
00628         relative, _ = makeUrlRelative(absurl, base)
00629         # Don't convert page-internal links to uids.
00630         # Also fix up spurious portal_factory references
00631         if not relative:
00632             return 'internal', None, '', tail
00633         return 'internal', UID, relative, tail
00634 
00635 def makeUrlRelative(url, base):
00636     """Make a link relative to base.
00637     This method assumes we have already checked that url and base have a common prefix.
00638     """
00639     sheme, netloc, path, query, fragment = urlsplit(url)
00640     _, _, basepath, _, _ = urlsplit(base)
00641 
00642     baseparts = basepath.split('/')
00643     pathparts = path.split('/')
00644 
00645     basetail = baseparts.pop(-1)
00646 
00647     # Remove common elements
00648     while pathparts and baseparts and baseparts[0]==pathparts[0]:
00649         baseparts.pop(0)
00650         pathparts.pop(0)
00651 
00652     for i in range(len(baseparts)):
00653         pathparts.insert(0, '..')
00654 
00655     if not pathparts:
00656         pathparts.insert(0, '.')
00657     elif pathparts==[basetail]:
00658         pathparts.pop(0)
00659     
00660 
00661     return '/'.join(pathparts), urlunsplit(('','','',query,fragment))
00662 
00663 def htmlchanges(data, changes):
00664     out = []
00665     prev = 0
00666     lastend = 0
00667     for s,e,new in changes:
00668         start = max(prev, s-10)
00669         if start != prev:
00670             if start-10 > prev:
00671                 out.append(html_quote(data[prev:prev+10]))
00672                 out.append('...')
00673             else:
00674                 out.append(html_quote(data[prev:start]))
00675         out.append(html_quote(data[start:s]))
00676         out.append('<del>%s</del>' % html_quote(data[s:e]))
00677         out.append('<ins>%s</ins>' % html_quote(new))
00678         prev = e
00679     if prev:
00680         out.append(html_quote(data[prev:prev+10]))
00681         if prev+10 < len(data):
00682             out.append('...')
00683     return ''.join(out)
00684 
00685 def sanitize_portal_type(pt):
00686     """Performs portal type mapping prior to database query.
00687     Needed for CompositePack pages"""
00688     if pt==FRAGMENT_TYPE:
00689         return NAVIGATION_PAGE
00690     return pt
00691 
00692 EntityPattern = re.compile('&(?:#(\d+)|(?:#x([\da-fA-F]+))|([a-zA-Z]+));')
00693 def decodeEntities(s, encoding='utf-8'):
00694     def unescape(match):
00695         code = match.group(1)
00696         if code:
00697             return unichr(int(code, 10))
00698         else:
00699             code = match.group(2)
00700             if code:
00701                 return unichr(int(code, 16))
00702             else:
00703                 code = match.group(3)
00704                 if code in name2codepoint:
00705                     return unichr(name2codepoint[code])
00706         return match.group(0)
00707 
00708     if isinstance(s, str):
00709         s = s.decode(encoding)
00710     return EntityPattern.sub(unescape, s)