Back to index

moin  1.9.0~rc2
__init__.py
Go to the documentation of this file.
00001 # -*- coding: iso-8859-1 -*-
00002 """
00003     MoinMoin - search query parser
00004 
00005     @copyright: 2005 MoinMoin:FlorianFesti,
00006                 2005 MoinMoin:NirSoffer,
00007                 2005 MoinMoin:AlexanderSchremmer,
00008                 2006-2008 MoinMoin:ThomasWaldmann,
00009                 2006 MoinMoin:FranzPletz
00010     @license: GNU GPL, see COPYING for details
00011 """
00012 
00013 import re
00014 
00015 from MoinMoin import log
00016 logging = log.getLogger(__name__)
00017 
00018 from MoinMoin import config, wikiutil
00019 from MoinMoin.search.queryparser.expressions import AndExpression, OrExpression, TextSearch, TitleSearch, \
00020     LinkSearch, CategorySearch, DomainSearch, MimetypeSearch, LanguageSearch
00021 
00022 
00023 class QueryError(ValueError):
00024     """ error raised for problems when parsing the query """
00025 
00026 
00027 class QueryParser(object):
00028     """
00029     Converts a String into a tree of Query objects.
00030     """
00031 
00032     def __init__(self, **kw):
00033         """
00034         @keyword titlesearch: treat all terms as title searches
00035         @keyword case: do case sensitive search
00036         @keyword regex: treat all terms as regular expressions
00037         """
00038         self.titlesearch = kw.get('titlesearch', 0)
00039         self.case = kw.get('case', 0)
00040         self.regex = kw.get('regex', 0)
00041         self._M = wikiutil.ParserPrefix('-')
00042 
00043     def _analyse_items(self, items):
00044         terms = AndExpression()
00045         M = self._M
00046         while items:
00047             item = items[0]
00048             items = items[1:]
00049 
00050             if isinstance(item, unicode):
00051                 if item.lower() == 'or':
00052                     sub = terms.subterms()
00053                     if len(sub) >= 1:
00054                         last = sub[-1]
00055                         if last.__class__ == OrExpression:
00056                             orexpr = last
00057                         else:
00058                             # Note: do NOT reduce "terms" when it has a single subterm only!
00059                             # Doing that would break "-someterm" searches as we rely on AndExpression
00060                             # doing a "MatchAll AND_NOT someterm" for that case!
00061                             orexpr = OrExpression(terms)
00062                         terms = AndExpression(orexpr)
00063                     else:
00064                         raise QueryError('Nothing to OR')
00065                     remaining = self._analyse_items(items)
00066                     if remaining.__class__ == OrExpression:
00067                         for sub in remaining.subterms():
00068                             orexpr.append(sub)
00069                     else:
00070                         orexpr.append(remaining)
00071                     break
00072                 elif item.lower() == 'and':
00073                     pass
00074                 else:
00075                     # odd workaround; we should instead ignore this term
00076                     # and reject expressions that contain nothing after
00077                     # being parsed rather than rejecting an empty string
00078                     # before parsing...
00079                     if not item:
00080                         raise QueryError("Term too short")
00081                     regex = self.regex
00082                     case = self.case
00083                     if self.titlesearch:
00084                         terms.append(TitleSearch(item, use_re=regex, case=case))
00085                     else:
00086                         terms.append(TextSearch(item, use_re=regex, case=case))
00087             elif isinstance(item, tuple):
00088                 negate = item[0] == M
00089                 title_search = self.titlesearch
00090                 regex = self.regex
00091                 case = self.case
00092                 linkto = False
00093                 lang = False
00094                 category = False
00095                 mimetype = False
00096                 domain = False
00097                 while len(item) > 1:
00098                     m = item[0]
00099                     if m is None:
00100                         raise QueryError("Invalid search prefix")
00101                     elif m == M:
00102                         negate = True
00103                     elif "title".startswith(m):
00104                         title_search = True
00105                     elif "regex".startswith(m):
00106                         regex = True
00107                     elif "case".startswith(m):
00108                         case = True
00109                     elif "linkto".startswith(m):
00110                         linkto = True
00111                     elif "language".startswith(m):
00112                         lang = True
00113                     elif "category".startswith(m):
00114                         category = True
00115                     elif "mimetype".startswith(m):
00116                         mimetype = True
00117                     elif "domain".startswith(m):
00118                         domain = True
00119                     else:
00120                         raise QueryError("Invalid search prefix")
00121                     item = item[1:]
00122 
00123                 text = item[0]
00124                 if category:
00125                     obj = CategorySearch(text, use_re=regex, case=case)
00126                 elif mimetype:
00127                     obj = MimetypeSearch(text, use_re=regex, case=False)
00128                 elif lang:
00129                     obj = LanguageSearch(text, use_re=regex, case=False)
00130                 elif linkto:
00131                     obj = LinkSearch(text, use_re=regex, case=case)
00132                 elif domain:
00133                     obj = DomainSearch(text, use_re=regex, case=False)
00134                 elif title_search:
00135                     obj = TitleSearch(text, use_re=regex, case=case)
00136                 else:
00137                     obj = TextSearch(text, use_re=regex, case=case)
00138                 obj.negated = negate
00139                 terms.append(obj)
00140             elif isinstance(item, list):
00141                 # strip off the opening parenthesis
00142                 terms.append(self._analyse_items(item[1:]))
00143 
00144         # Note: do NOT reduce "terms" when it has a single subterm only!
00145         # Doing that would break "-someterm" searches as we rely on AndExpression
00146         # doing a "MatchAll AND_NOT someterm" for that case!
00147         return terms
00148 
00149     def parse_query(self, query):
00150         """ transform an string into a tree of Query objects """
00151         if isinstance(query, str):
00152             query = query.decode(config.charset)
00153         try:
00154             items = wikiutil.parse_quoted_separated_ext(query,
00155                                                         name_value_separator=':',
00156                                                         prefixes='-',
00157                                                         multikey=True,
00158                                                         brackets=('()', ),
00159                                                         quotes='\'"')
00160         except wikiutil.BracketError, err:
00161             raise QueryError(str(err))
00162         logging.debug("parse_quoted_separated items: %r" % items)
00163         query = self._analyse_items(items)
00164         logging.debug("analyse_items query: %r" % query)
00165         return query