Back to index

plone3  3.1.7
ranking.py
Go to the documentation of this file.
00001 # Copyright (C) 2003-2006 by Dr. Dieter Maurer, Eichendorffstr. 23, D-66386 St. Ingbert, Germany
00002 #       $Id: ranking.py,v 1.1 2006/06/25 19:11:24 dieter Exp $
00003 '''Ranking Support.'''
00004 
00005 from BTrees.IIBTree import difference
00006 
00007 from AdvancedQuery import _BaseQuery, LiteralResultSet, And, intersection
00008 from sorting import Sorter
00009 from eval import _eval
00010 
00011 
00012 class RankSpec(object):
00013   '''abstract ranking specification.'''
00014   def _prepare(self, cat):
00015     '''return a sorter using *cat*.'''
00016     raise NotImplementedError
00017 
00018 class _Ranker(Sorter):
00019   '''a sorter base class used for ranking.'''
00020   def __init__(self, spec, cat):
00021     self._spec = spec; self._cat = cat
00022 
00023   def group(self, seq):
00024     normalize = self._normalize
00025     for rank, subseq in self._group(seq):
00026       yield normalize(rank), subseq
00027 
00028   # implemented by derived classes
00029   def _group(self, seq):
00030     raise NotImplementedError
00031 
00032   # may be overridden by derived classes
00033   def _normalize(self, rank): return rank
00034 
00035 
00036 class _RankByQueries(RankSpec):
00037   '''Ranking specification base class for rankings based on a sequence of (*query*, *value*) pairs.
00038 
00039   All values must be non negative numbers.
00040   '''
00041   # defined by derived classes
00042   _RankerClass = None
00043 
00044   def __init__(self, *specs):
00045     '''each spec is a pair *query*, *value*.'''
00046     l = []; sum = 0
00047     for q,v in specs:
00048       if not isinstance(q, _BaseQuery):
00049         raise TypeError('Query must be an AdvancedQuery')
00050       if not isinstance(v, (int, float, long)):
00051         raise TypeError('Query value must be a float')
00052       if v < 0: raise ValueError('Query value must not be negative')
00053       if not v: continue
00054       l.append((v,q)); sum += v
00055     l.sort()
00056     self._specs = l; self._sum = sum
00057 
00058   def getQueryValueSum(self): return self._sum
00059   def _getValueQuerySequence(self): return self._specs
00060 
00061   def _prepare(self, cat):
00062     return self._RankerClass(self, cat)
00063 
00064 
00065 class _RankerByQueries_Sum(_Ranker):
00066   '''a sorter corresponding to 'RankByQueries_Sum'.'''
00067   def _group(self, seq):
00068     spec = self._spec; cat = self._cat
00069     mv = spec.getQueryValueSum(); vqs = spec._getValueQuerySequence()
00070     def generate(seq, vqs, mv):
00071       if not vqs: yield 0, seq; return
00072       vqs = vqs[:] # avoid side effects
00073       v,q = vqs.pop(); mv -= v
00074       q = And(LiteralResultSet(seq), q)
00075       qr = _eval(q, cat)
00076       if qr:
00077         feed1 = generate(qr, vqs, mv)
00078         seq = difference(seq, qr)
00079       else: feed1 = None
00080       feed2 = seq and generate(seq, vqs, mv) or None
00081       def fetch1():
00082         if feed1 is None: return None
00083         try: val, subseq = feed1.next(); return val + v, subseq
00084         except StopIteration: return None
00085       def fetch2():
00086         if feed2 is None: return None
00087         try: return feed2.next()
00088         except StopIteration: return None
00089       g1 = fetch1()
00090       # largest value from "feed1" only
00091       while g1 is not None and g1[0] > mv: yield g1; g1 = fetch1()
00092       # merge largest values from "feed1" and "feed2"
00093       g2 = fetch2()
00094       while g1 is not None and g2 is not None:
00095         v1 = g1[0]; v2 = g2[0]
00096         if v1 > v2: yield g1; g1 = fetch1()
00097         elif v2 > v1: yield g2; g2 = fetch2()
00098         # Note: g1[1] was copied (by the "intersection" above); therfore,
00099         #  we can destructively change it
00100         else: g1[1].update(g2[1]); yield g1; g1 = fetch1(); g2 = fetch2()
00101       # handle feed1
00102       while g1 is not None: yield g1; g1 = fetch1()
00103       # handle feed2
00104       while g2 is not None: yield g2; g2 = fetch2()
00105     for g in generate(seq, vqs, mv): yield g
00106 
00107 
00108 class RankByQueries_Sum(_RankByQueries):
00109   '''Rank by the sum of query values for matching queries.
00110 
00111   The rank of a document *d* is the sum the query values for those
00112   queries that match *d*.
00113   '''
00114   _RankerClass = _RankerByQueries_Sum
00115 
00116 
00117 class _RankerByQueries_Max(_Ranker):
00118   '''a sorter corresponding to 'RankByQueries_Max'.'''
00119   def _group(self, seq):
00120     spec = self._spec; cat = self._cat
00121     vqs = spec._getValueQuerySequence()
00122     for i in xrange(len(vqs)-1,-1,-1):
00123       v,q = vqs[i]
00124       q = And(LiteralResultSet(seq), q)
00125       qr = _eval(q, cat)
00126       if qr: yield v, qr; seq = difference(seq, qr)
00127       if not seq: return
00128     yield 0, seq
00129 
00130 
00131 class RankByQueries_Max(_RankByQueries):
00132   '''Rank be the maximum of query values for mathing queries.
00133 
00134   The rank of a document *d* is the maximal query value for
00135   those queries that match *d*.
00136   '''
00137   _RankerClass = _RankerByQueries_Max
00138 
00139   def __init__(self, *specs):
00140     _RankByQueries.__init__(self, *specs)
00141     # merge successive queries with the same value
00142     nspecs = []; cv = None
00143     for v,q in self._specs:
00144       if v == cv:
00145         ls = nspecs[-1]
00146         nspecs[-1] = (ls[0], ls[1] | q)
00147       else: nspecs.append((v,q)); cv = v
00148     self._specs = nspecs
00149 
00150   def getQueryValueMax(self): return self._spec[-1][0]
00151