Back to index

plone3  3.1.7
AdvancedQuery.py
Go to the documentation of this file.
00001 # Copyright (C) 2003-2006 by Dr. Dieter Maurer, Eichendorffstr. 23, D-66386 St. Ingbert, Germany
00002 # see "LICENSE.txt" for details
00003 #       $Id: AdvancedQuery.py,v 1.13 2006/07/14 19:07:28 dieter Exp $
00004 '''Advanced Query
00005 
00006 see 'AdvancedQuery.html' for documentation.
00007 '''
00008 from copy import copy
00009 
00010 from ExtensionClass import Base
00011 
00012 from types import InstanceType
00013 
00014 from DateTime import DateTime
00015 from BTrees.IIBTree import IISet, IITreeSet, \
00016      difference, union, multiunion, intersection
00017 from BTrees.OOBTree import OOBTree
00018 
00019 _notPassed= []
00020 
00021 
00022 ##############################################################################
00023 ## Query classes
00024 
00025 class _BaseQuery(Base):
00026   ''''Query' base class.'''
00027   
00028   # Interface
00029   def __str__(self):
00030     raise NotImplementedError
00031 
00032   def _eval(self,catalog):
00033     raise NotImplementedError
00034 
00035   def __and__(self, other):
00036     '''self & other'''
00037     if isinstance(self,And): r = self._clone()
00038     else: r = And(self)
00039     r.addSubquery(other)
00040     return r
00041 
00042   def __or__(self, other):
00043     '''self | other'''
00044     if isinstance(self,Or): r = self._clone()
00045     else: r = Or(self)
00046     r.addSubquery(other)
00047     return r
00048 
00049   def __invert__(self):
00050     '''~ self'''
00051     return Not(self)
00052 
00053   def _clone(self):
00054     '''ATT: not a true clone operation.'''
00055     return self
00056 
00057 
00058 class _ElementaryQuery(_BaseQuery):
00059   # to be overridden by derived classes
00060   _functor= None # transform term into query ("None" means identity)
00061   _OP= None     # used for display
00062 
00063   def __init__(self, idx, term, filter=False):
00064     self._idx = idx
00065     self._term = term
00066     self._filter = filter
00067 
00068   def __str__(self):
00069     return '%s %s %r' % (self._idx, self._OP, self._term)
00070 
00071   def _getTerm(self, term = _notPassed):
00072     '''determine term to be used for querying.
00073     '''
00074     if term is _notPassed: term = self._term
00075     return term
00076 
00077 
00078   def _eval(self,context):
00079     functor = self._functor
00080     term = self._getTerm()
00081     if functor is not None: term = functor(term)
00082     return context._applyIndex(self, term)
00083 
00084 
00085 class Eq(_ElementaryQuery):
00086   '''idx = term'''
00087   _OP = '='
00088   def _functor(self,term): return (term,)
00089 
00090 class Le(_ElementaryQuery):
00091   ''' idx <= term'''
00092   _OP = '<='
00093   def _functor(self,term): return {'query':term, 'range':'max'}
00094 
00095 class Ge(_ElementaryQuery):
00096   ''' idx >= term'''
00097   _OP = '>='
00098   def _functor(self,term): return {'query':term, 'range':'min'}
00099 
00100 class MatchGlob(_ElementaryQuery):
00101   '''idx = term'''
00102   _OP = '=~'
00103   def _functor(self,term): return {'query':term, 'match':'glob'}
00104 
00105 class MatchRegexp(_ElementaryQuery):
00106   '''idx = term'''
00107   _OP = '=~~'
00108   def _functor(self,term): return {'query':term, 'match':'regexp'}
00109 
00110 class Generic(_ElementaryQuery):
00111   _OP = '~~'
00112 
00113 class In(Generic):
00114   _OP = 'in'
00115 
00116 class Between(_ElementaryQuery):
00117   '''lb <= idx <= ub'''
00118   def __init__(self, idx, lb, ub, filter=False):
00119     _ElementaryQuery.__init__(self, idx, (lb,ub), filter)
00120    
00121   def __str__(self):
00122     t = self._term
00123     return '%r <= %s <= %r' % (t[0], self._idx, t[1])
00124 
00125   def _functor(self, term): return {'query':term, 'range':'min:max',}
00126 
00127 
00128 class Indexed(_BaseQuery):
00129   def __init__(self, idx):
00130     self._idx = idx
00131 
00132   def __str__(self): return 'Indexed(%s)' % self._idx
00133 
00134   def _eval(self,context):
00135     return context._indexed(self._idx)
00136 
00137 
00138 class Not(_BaseQuery):
00139   '''~(query)'''
00140   def __init__(self,query):
00141     self._query = query
00142 
00143   def __str__(self):
00144     return '~(%s)' % str(self._query)
00145 
00146   def _eval(self,context):
00147     return difference(context._getObjectIds(),self._query._eval(context))
00148 
00149 
00150 class _CompositeQuery(_BaseQuery):
00151   # to be overridden
00152   _OP = None
00153 
00154   def __init__(self, *queries):
00155     self._subqueries= []
00156     for q in queries: self.addSubquery(q)
00157 
00158   def __str__(self):
00159     return '(%s)' % (' %s ' % self._OP).join([str(q) for q in self._subqueries])
00160 
00161   addSubquery__roles__ = None # Public
00162   def addSubquery(self,query):
00163     assert isinstance(query,_BaseQuery)
00164     self._subqueries.append(query)
00165     return self
00166 
00167   def _clone(self):
00168     return self.__class__(*self._subqueries)
00169 
00170   def _classifySubqueries(self):
00171     '''auxiliary method to classify subqueries into various categories:
00172 
00173     'empty' -- empty subquery; leading to a degenerated evaluation
00174 
00175     'index lookup' -- assumed to be fast and giving small results
00176 
00177     'complex' -- some complex subquery of different type (subqueries of
00178       the same type are included)
00179 
00180     'indexed' -- assumed to give rather large results
00181 
00182     'notQ' -- expensive, large results expected
00183     '''
00184     sqs = self._subqueries[:]
00185     empty = []; lookup = []; complex = []; indexed = []; notQ = []
00186     while sqs:
00187       q= sqs.pop()
00188       if isinstance(q,_ElementaryQuery): lookup.append(q); continue
00189       if q.__class__ is self.__class__:
00190         # swallow
00191         sqs.extend(q._subqueries)
00192         continue
00193       if isinstance(q,_CompositeQuery):
00194         if not q._subqueries: empty.append(q); break # degenerate
00195         complex.append(q)
00196         continue
00197       if isinstance(q,Not): notQ.append(q); continue
00198       indexed.append(q); continue
00199     if empty: return {'empty':1} # this is by purpose -- to get remembered when we should derive another class from "_CompositeQuery"
00200     return {'empty':empty, 'lookup':lookup, 'complex':complex,
00201             'indexed':indexed, 'notQ':notQ,
00202             }
00203 
00204       
00205 class And(_CompositeQuery):
00206   _OP = '&'
00207   __iand__ = _CompositeQuery.addSubquery
00208   def _eval(self,context):
00209     csq = self._classifySubqueries()
00210     if csq['empty']: return IISet() # empty result
00211     nsq = csq['lookup'] + csq['complex'] + csq['indexed']
00212     notsq = csq['notQ']
00213     if not nsq and not notsq:
00214       # an empty 'And' query
00215       return context._getObjectIds()
00216     if not nsq: nsq.append(notsq.pop())
00217     r = None
00218     for q in nsq: r = intersection(r, q._eval(context))
00219     for q in notsq: r = difference(r, q._query._eval(context))
00220     return r
00221 
00222 
00223 class Or(_CompositeQuery):
00224   _OP = '|'
00225   __ior__ = _CompositeQuery.addSubquery
00226   def _eval(self,context):
00227     csq = self._classifySubqueries()
00228     if csq['empty']: return context._getObjectIds()
00229     sqs= csq['lookup'] + csq['complex'] + csq['indexed'] + csq['notQ']
00230     if not sqs: return IISet()
00231     if len(sqs) >= 4: return multiunion([q._eval(context) for q in sqs])
00232     r = None
00233     for q in sqs: r = union(r,q._eval(context))
00234     return r
00235 
00236 
00237 class LiteralResultSet(_BaseQuery):
00238   '''Query given by its result set.
00239 
00240   Used to restrict previous query results.
00241   '''
00242   def __init__(self, set):
00243     '''query returning *set*.
00244 
00245     *set* must be an 'IISet' or 'IITreeSet' of catalog record ids.
00246     '''
00247     if not isinstance(set, (IISet, IITreeSet)): set = IITreeSet(set)
00248     self._set = set
00249 
00250   def __str__(self): return 'LiteralResultSet(%s)' % self._set
00251 
00252   def _eval(self,catalog):
00253     return _wrapLookup(self._set)
00254   
00255 
00256 
00257 #############################################################################
00258 ## Auxiliaries
00259 # overridden when IncrementalSearch is present
00260 def _wrapLookup(r):
00261   if not isinstance(r, (IISet, IITreeSet)): r = IITreeSet(r.keys())
00262   return r
00263 
00264 # overridden when IncrementalSearch is present
00265 def _prepareSpec(spec, query): return spec
00266 
00267 
00268 class _QueryContext:
00269   '''auxiliary class to encapsulate the catalog interface.'''
00270   def __init__(self, catalog):
00271     self._catalog = catalog
00272 
00273   def _applyIndex(self, query, spec):
00274     spec = _prepareSpec(spec, query)
00275     cat = self._catalog; index = query._idx
00276     return _wrapLookup(cat.indexes[index].__of__(cat)._apply_index({index:spec})[0])
00277 
00278   # exists to be overridden by derived classes
00279   def _prepareSpec(self, spec, query): return spec
00280 
00281   _objects= None
00282   def _getObjectIds(self):
00283     objs = self._objects
00284     if objs is None:
00285       objs = self._objects = IITreeSet(self._catalog.data.keys())
00286     return objs
00287 
00288   def _indexed(self, index):
00289     cat = self._catalog
00290     # simplified -- hopefully not wrong!
00291     #return _wrapLookup(IITreeSet(cat.indexes[index]._unindex.keys()))
00292     return _wrapLookup(cat.indexes[index]._unindex)
00293 
00294 
00295 #############################################################################
00296 ## Redefinitions when 'IncrementalSearch[2]' is available
00297 ISearch = None
00298 try: from IncrementalSearch2 import IAnd_int as IAnd, IOr_int as IOr, \
00299      INot, IBTree, Enumerator as EBTree, intersection_int as intersection, \
00300      ISearch
00301 except ImportError: pass
00302 
00303 if ISearch is None:
00304   try:
00305     from IncrementalSearch import IAnd, IOr, INot, ISearch, IBTree, EBTree, \
00306          intersection
00307     #raise ImportError # for testing purposes
00308   except ImportError: pass
00309 
00310 if ISearch is None:
00311   class ISearch: pass
00312   IBTree = ISearch
00313 else:
00314   class And(And):
00315     def _eval(self, context):
00316       subqueries = self._subqueries
00317       if not subqueries: return IBTree(context._getObjectIds()) # empty And
00318       if len(subqueries) == 1: return subqueries[0]._eval(context)
00319       search = IAnd(*[sq._eval(context) for sq in subqueries])
00320       search.complete()
00321       return search
00322 
00323   class Or(Or):
00324     def _eval(self, context):
00325       subqueries = self._subqueries
00326       if len(subqueries) == 1: return subqueries[0]._eval(context)
00327       search = IOr(*[sq._eval(context) for sq in subqueries])
00328       search.complete()
00329       return search
00330 
00331   class Not(Not):
00332     def _eval(self, context):
00333       return INot(self._query._eval(context), EBTree(context._getObjectIds()))
00334 
00335   def _prepareSpec(spec, query):
00336     filter = query._filter
00337     # add 'isearch' and 'isearch_filter' to *spec*
00338     # This is tricky -- we follow logic from
00339     # "Products.PluginIndexes.common.util.parseIndexRequest"
00340     if isinstance(spec, InstanceType) and not isinstance(spec, DateTime):
00341       spec = copy(spec)
00342       spec.isearch = True; spec.isearch_filter = filter
00343     elif isinstance(spec, dict):
00344       spec = spec.copy()
00345       spec['isearch'] = True; spec['isearch_filter'] = filter
00346     else: spec = {'query':spec, 'isearch':True, 'isearch_filter':filter}
00347     return spec
00348 
00349   def _wrapLookup(r):
00350     if r is None:
00351       # ATT: we could optimize this, but hopefully nobody specifies such
00352       # silly queries
00353       r = self._getObjectIds()
00354     if not isinstance(r, ISearch): r = IBTree(r)
00355     return r