Back to index

plone3  3.1.7
ExtendedPathIndex.py
Go to the documentation of this file.
00001 # Copyright (c) 2004 Zope Corporation and Plone Solutions
00002 
00003 # ZPL 2.1 license
00004 
00005 import logging
00006 
00007 from Globals import DTMLFile
00008 from BTrees.IIBTree import IISet, intersection, union, multiunion
00009 from BTrees.OOBTree import OOBTree
00010 from BTrees.OIBTree import OIBTree
00011 
00012 from Products.PluginIndexes.common.util import parseIndexRequest
00013 from Products.PluginIndexes.common import safe_callable
00014 from Products.PluginIndexes.PathIndex.PathIndex import PathIndex
00015 
00016 _marker = []
00017 logger = logging.getLogger('ExtendedPathIndex')
00018 
00019 class ExtendedPathIndex(PathIndex):
00020     """ A path index stores all path components of the physical
00021     path of an object:
00022 
00023     Internal datastructure (regular pathindex):
00024 
00025     - a physical path of an object is split into its components
00026 
00027     - every component is kept as a  key of a OOBTree in self._indexes
00028 
00029     - the value is a mapping 'level of the path component' to
00030       'all docids with this path component on this level'
00031 
00032     In addition
00033     
00034     - there is a terminator (None) signifying the last component in the path
00035 
00036     """
00037 
00038     meta_type = "ExtendedPathIndex"
00039 
00040     manage_options= (
00041         {'label': 'Settings',
00042          'action': 'manage_main',
00043          'help': ('ExtendedPathIndex','ExtendedPathIndex_Settings.stx')},
00044     )
00045 
00046     query_options = ("query", "level", "operator", "depth", "navtree",
00047                                                               "navtree_start")
00048 
00049     def __init__(self, id, extra=None, caller=None):
00050         """ ExtendedPathIndex supports indexed_attrs """
00051         PathIndex.__init__(self, id, caller)
00052 
00053         def get(o, k, default):
00054             if isinstance(o, dict):
00055                 return o.get(k, default)
00056             else:
00057                 return getattr(o, k, default)
00058 
00059         attrs = get(extra, 'indexed_attrs', None)
00060         if attrs is None:
00061             return
00062         if isinstance(attrs, str):
00063             attrs = attrs.split(',')
00064         attrs = filter(None, [a.strip() for a in attrs])
00065 
00066         if attrs:
00067             # We only index the first attribute so snip off the rest
00068             self.indexed_attrs = tuple(attrs[:1])
00069 
00070     def clear(self):
00071         PathIndex.clear(self)
00072         self._index_parents = OOBTree()
00073         self._index_items = OIBTree()
00074 
00075     def insertEntry(self, comp, id, level, parent_path=None, object_path=None):
00076         """Insert an entry.
00077 
00078            parent_path is the path of the parent object
00079 
00080            path is the object path, it is assumed to be unique, i.e. there
00081            is a one to one mapping between physical paths and docids.  This
00082            will be large, and is only used for breadcrumbs.
00083 
00084            id is the docid
00085         """
00086 
00087         PathIndex.insertEntry(self, comp, id, level)
00088 
00089         if parent_path is not None:
00090             if not self._index_parents.has_key(parent_path):
00091                 self._index_parents[parent_path] = IISet()
00092 
00093             self._index_parents[parent_path].insert(id)
00094 
00095         # We make the assumption that a full path corresponds one and only
00096         # one object.
00097 
00098         if object_path is not None:
00099             self._index_items[object_path] = id
00100 
00101     def index_object(self, docid, obj ,threshold=100):
00102         """ hook for (Z)Catalog """
00103 
00104         # PathIndex first checks for an attribute matching its id and
00105         # falls back to getPhysicalPath only when failing to get one.
00106         # The presence of 'indexed_attrs' overrides this behavior and
00107         # causes indexing of the custom attribute.
00108 
00109         attrs = getattr(self, 'indexed_attrs', None)
00110         if attrs:
00111             index = attrs[0]
00112         else:
00113             index = self.id
00114 
00115         f = getattr(obj, index, None)
00116         if f is not None:
00117             if safe_callable(f):
00118                 try:
00119                     path = f()
00120                 except AttributeError:
00121                     return 0
00122             else:
00123                 path = f
00124 
00125             if not isinstance(path, (str, tuple)):
00126                 raise TypeError('path value must be string or tuple '
00127                                 'of strings: (%r, %s)' % (index, repr(path)))
00128         else:
00129             try:
00130                 path = obj.getPhysicalPath()
00131             except AttributeError:
00132                 return 0
00133 
00134         if isinstance(path, (list, tuple)):
00135             path = '/'+ '/'.join(path[1:])
00136         comps = filter(None, path.split('/'))
00137         parent_path = '/' + '/'.join(comps[:-1])
00138 
00139         # Make sure we reindex properly when path change
00140         if self._unindex.has_key(docid) and self._unindex.get(docid) != path:
00141             self.unindex_object(docid)
00142 
00143         if not self._unindex.has_key(docid):
00144             self._length.change(1)
00145 
00146         for i in range(len(comps)):
00147             self.insertEntry(comps[i], docid, i)
00148 
00149         # Add terminator
00150         self.insertEntry(None, docid, len(comps)-1, parent_path, path)
00151 
00152         self._unindex[docid] = path
00153         return 1
00154 
00155     def unindex_object(self, docid):
00156         """ hook for (Z)Catalog """
00157 
00158         if not self._unindex.has_key(docid):
00159             logger.log(logging.INFO,
00160                        'Attempt to unindex nonexistent object with id '
00161                        '%s' % docid)
00162             return
00163 
00164         # There is an assumption that paths start with /
00165         path = self._unindex[docid]
00166         if not path.startswith('/'):
00167             path = '/'+path
00168         comps =  path.split('/')
00169         parent_path = '/'.join(comps[:-1])
00170 
00171         def unindex(comp, level, docid=docid, parent_path=None,
00172                                                             object_path=None):
00173             try:
00174                 self._index[comp][level].remove(docid)
00175 
00176                 if not self._index[comp][level]:
00177                     del self._index[comp][level]
00178 
00179                 if not self._index[comp]:
00180                     del self._index[comp]
00181                 # Remove parent_path and object path elements
00182                 if parent_path is not None:
00183                     self._index_parents[parent_path].remove(docid)
00184                     if not self._index_parents[parent_path]:
00185                         del self._index_parents[parent_path]
00186                 if object_path is not None:
00187                     del self._index_items[object_path]
00188             except KeyError:
00189                 logger.log(logging.INFO,
00190                            'Attempt to unindex object with id '
00191                            '%s failed' % docid)
00192 
00193         for level in range(len(comps[1:])):
00194             comp = comps[level+1]
00195             unindex(comp, level)
00196 
00197         # Remove the terminator
00198         level = len(comps[1:])
00199         comp = None
00200         unindex(comp, level-1, parent_path=parent_path, object_path=path)
00201 
00202         self._length.change(-1)
00203         del self._unindex[docid]
00204 
00205     def search(self, path, default_level=0, depth=-1, navtree=0,
00206                                                              navtree_start=0):
00207         """
00208         path is either a string representing a
00209         relative URL or a part of a relative URL or
00210         a tuple (path,level).
00211 
00212         level >= 0  starts searching at the given level
00213         level <  0  not implemented yet
00214         """
00215 
00216         if isinstance(path, basestring):
00217             startlevel = default_level
00218         else:
00219             startlevel = int(path[1])
00220             path = path[0]
00221 
00222         absolute_path = isinstance(path, basestring) and path.startswith('/')
00223 
00224         comps = filter(None, path.split('/'))
00225 
00226         orig_comps = [''] + comps[:]
00227         # Optimization - avoid using the root set
00228         # as it is common for all objects anyway and add overhead
00229         # There is an assumption about catalog/index having
00230         # the same container as content
00231         if default_level == 0:
00232             indexpath = list(filter(None, self.getPhysicalPath()))
00233             while min(len(indexpath), len(comps)):
00234                 if indexpath[0] == comps[0]:
00235                     del indexpath[0]
00236                     del comps[0]
00237                     startlevel += 1
00238                 else:
00239                     break
00240 
00241         if len(comps) == 0:
00242             if depth == -1 and not navtree:
00243                 return IISet(self._unindex.keys())
00244 
00245         # Make sure that we get depth = 1 if in navtree mode
00246         # unless specified otherwise
00247 
00248         orig_depth = depth
00249         if depth == -1:
00250             depth = 0 or navtree
00251 
00252         # Optimized navtree starting with absolute path
00253         if absolute_path and navtree and depth == 1 and default_level==0:
00254             set_list = []
00255             # Insert root element
00256             if navtree_start >= len(orig_comps):
00257                 navtree_start = 0
00258             # create a set of parent paths to search
00259             for i in range(len(orig_comps), navtree_start, -1):
00260                 parent_path = '/'.join(orig_comps[:i])
00261                 parent_path = parent_path and parent_path or '/'
00262                 try:
00263                     set_list.append(self._index_parents[parent_path])
00264                 except KeyError:
00265                     pass
00266             return multiunion(set_list)
00267         # Optimized breadcrumbs
00268         elif absolute_path and navtree and depth == 0 and default_level==0:
00269             item_list = IISet()
00270             # Insert root element
00271             if navtree_start >= len(orig_comps):
00272                 navtree_start = 0
00273             # create a set of parent paths to search
00274             for i in range(len(orig_comps), navtree_start, -1):
00275                 parent_path = '/'.join(orig_comps[:i])
00276                 parent_path = parent_path and parent_path or '/'
00277                 try:
00278                     item_list.insert(self._index_items[parent_path])
00279                 except KeyError:
00280                     pass
00281             return item_list
00282         # Specific object search
00283         elif absolute_path and orig_depth == 0 and default_level == 0:
00284             try:
00285                 return IISet([self._index_items[path]])
00286             except KeyError:
00287                 return IISet()
00288         # Single depth search
00289         elif absolute_path and orig_depth == 1 and default_level == 0:
00290             # only get objects contained in requested folder
00291             try:
00292                 return self._index_parents[path]
00293             except KeyError:
00294                 return IISet()
00295         # Sitemaps, relative paths, and depth queries
00296         elif startlevel >= 0:
00297 
00298             pathset = None # Same as pathindex
00299             navset  = None # For collecting siblings along the way
00300             depthset = None # For limiting depth
00301 
00302             if navtree and depth and \
00303                    self._index.has_key(None) and \
00304                    self._index[None].has_key(startlevel):
00305                 navset = self._index[None][startlevel]
00306 
00307             for level in range(startlevel, startlevel+len(comps) + depth):
00308                 if level-startlevel < len(comps):
00309                     comp = comps[level-startlevel]
00310                     if not self._index.has_key(comp) or not self._index[comp].has_key(level): 
00311                         # Navtree is inverse, keep going even for
00312                         # nonexisting paths
00313                         if navtree:
00314                             pathset = IISet()
00315                         else:
00316                             return IISet()
00317                     else:
00318                         pathset = intersection(pathset,
00319                                                      self._index[comp][level])
00320                     if navtree and depth and \
00321                            self._index.has_key(None) and \
00322                            self._index[None].has_key(level+depth):
00323                         navset  = union(navset, intersection(pathset,
00324                                               self._index[None][level+depth]))
00325                 if level-startlevel >= len(comps) or navtree:
00326                     if self._index.has_key(None) and self._index[None].has_key(level):
00327                         depthset = union(depthset, intersection(pathset,
00328                                                     self._index[None][level]))
00329 
00330             if navtree:
00331                 return union(depthset, navset) or IISet()
00332             elif depth:
00333                 return depthset or IISet()
00334             else:
00335                 return pathset or IISet()
00336 
00337         else:
00338             results = IISet()
00339             for level in range(0,self._depth + 1):
00340                 ids = None
00341                 error = 0
00342                 for cn in range(0,len(comps)):
00343                     comp = comps[cn]
00344                     try:
00345                         ids = intersection(ids,self._index[comp][level+cn])
00346                     except KeyError:
00347                         error = 1
00348                 if error==0:
00349                     results = union(results,ids)
00350             return results
00351 
00352     def _apply_index(self, request, cid=''):
00353         """ hook for (Z)Catalog
00354             'request' --  mapping type (usually {"path": "..." }
00355              additionaly a parameter "path_level" might be passed
00356              to specify the level (see search())
00357 
00358             'cid' -- ???
00359         """
00360 
00361         record = parseIndexRequest(request,self.id,self.query_options)
00362         if record.keys==None: return None
00363 
00364         level    = record.get("level", 0)
00365         operator = record.get('operator', self.useOperator).lower()
00366         depth    = getattr(record, 'depth', -1) # Set to 0 or navtree later
00367                                                 # use getattr to get 0 value
00368         navtree  = record.get('navtree', 0)
00369         navtree_start  = record.get('navtree_start', 0)
00370 
00371         # depending on the operator we use intersection of union
00372         if operator == "or":  set_func = union
00373         else: set_func = intersection
00374 
00375         res = None
00376         for k in record.keys:
00377             rows = self.search(k,level, depth, navtree, navtree_start)
00378             res = set_func(res,rows)
00379 
00380         if res:
00381             return res, (self.id,)
00382         else:
00383             return IISet(), (self.id,)
00384 
00385     def getIndexSourceNames(self):
00386         """ return names of indexed attributes """
00387 
00388         # By default PathIndex advertises getPhysicalPath even
00389         # though the logic in index_object is different.
00390 
00391         try:
00392             return tuple(self.indexed_attrs)
00393         except AttributeError:
00394             return ('getPhysicalPath',)
00395 
00396     index_html = DTMLFile('dtml/index', globals())
00397     manage_workspace = DTMLFile('dtml/manageExtendedPathIndex', globals())
00398 
00399 
00400 manage_addExtendedPathIndexForm = DTMLFile('dtml/addExtendedPathIndex', globals())
00401 
00402 def manage_addExtendedPathIndex(self, id, extra=None, REQUEST=None, RESPONSE=None, URL3=None):
00403     """Add an extended path index"""
00404     return self.manage_addIndex(id, 'ExtendedPathIndex', extra=extra,
00405                 REQUEST=REQUEST, RESPONSE=RESPONSE, URL1=URL3)