Back to index

moin  1.9.0~rc2
test_search.py
Go to the documentation of this file.
00001 # -*- coding: iso-8859-1 -*-
00002 """
00003     MoinMoin - MoinMoin.search Tests
00004 
00005     @copyright: 2005 by Nir Soffer <nirs@freeshell.org>,
00006                 2007-2009 by MoinMoin:ThomasWaldmann
00007     @license: GNU GPL, see COPYING for details.
00008 """
00009 
00010 
00011 import os, StringIO, time
00012 
00013 import py
00014 
00015 from MoinMoin.search import QueryError, _get_searcher
00016 from MoinMoin.search.queryparser import QueryParser
00017 from MoinMoin.search.builtin import MoinSearch
00018 from MoinMoin._tests import nuke_xapian_index, wikiconfig, become_trusted, create_page, nuke_page, append_page
00019 from MoinMoin.wikiutil import Version
00020 from MoinMoin.action import AttachFile
00021 
00022 PY_MIN_VERSION = '1.0.0'
00023 if Version(version=py.version) < Version(version=PY_MIN_VERSION):
00024     # There are some generative tests, which won't run on older versions!
00025     # XXX These tests should be refactored to be able to be run with older versions of py.
00026     py.test.skip('Currently py version %s is needed' % PY_MIN_VERSION)
00027 
00028 
00029 class TestQueryParsing(object):
00030     """ search: query parser tests """
00031 
00032     def testQueryParser(self):
00033         """ search: test the query parser """
00034         parser = QueryParser()
00035         for query, wanted in [
00036             # Even a single term is a and expression (this is needed for xapian because it
00037             # only has AND_NOT, but not a simple NOT).  This is why we have many many brackets here.
00038             ("a", '["a"]'),
00039             ("-a", '[-"a"]'),
00040             ("a b", '["a" "b"]'),
00041             ("a -b c", '["a" -"b" "c"]'),
00042             ("aaa bbb -ccc", '["aaa" "bbb" -"ccc"]'),
00043             ("title:aaa title:bbb -title:ccc", '[title:"aaa" title:"bbb" -title:"ccc"]'),
00044             ("title:case:aaa title:re:bbb -title:re:case:ccc", '[title:case:"aaa" title:re:"bbb" -title:re:case:"ccc"]'),
00045             ("linkto:aaa", '[linkto:"aaa"]'),
00046             ("category:aaa", '[category:"aaa"]'),
00047             ("domain:aaa", '[domain:"aaa"]'),
00048             ("re:case:title:aaa", '[title:re:case:"aaa"]'),
00049             ("(aaa or bbb) and (ccc or ddd)", '[[[["aaa"] or ["bbb"]]] [[["ccc"] or ["ddd"]]]]'),
00050             ("(aaa or bbb) (ccc or ddd)", '[[[["aaa"] or ["bbb"]]] [[["ccc"] or ["ddd"]]]]'),
00051             ("aaa or bbb", '[[["aaa"] or ["bbb"]]]'),
00052             ("aaa or bbb or ccc", '[[["aaa"] or [[["bbb"] or ["ccc"]]]]]'),
00053             ("aaa or bbb and ccc", '[[["aaa"] or ["bbb" "ccc"]]]'),
00054             ("aaa and bbb or ccc", '[[["aaa" "bbb"] or ["ccc"]]]'),
00055             ("aaa and bbb and ccc", '["aaa" "bbb" "ccc"]'),
00056             ("aaa or bbb and ccc or ddd", '[[["aaa"] or [[["bbb" "ccc"] or ["ddd"]]]]]'),
00057             ("aaa or bbb ccc or ddd", '[[["aaa"] or [[["bbb" "ccc"] or ["ddd"]]]]]'),
00058             ("(HelpOn) (Administration)", '[["HelpOn"] ["Administration"]]'),
00059             ("(HelpOn) (-Administration)", '[["HelpOn"] [-"Administration"]]'),
00060             ("(HelpOn) and (-Administration)", '[["HelpOn"] [-"Administration"]]'),
00061             ("(HelpOn) and (Administration) or (Configuration)", '[[[["HelpOn"] ["Administration"]] or [["Configuration"]]]]'),
00062             ("(a) and (b) or (c) or -d", '[[[["a"] ["b"]] or [[[["c"]] or [-"d"]]]]]'),
00063             ("a b c d e or f g h", '[[["a" "b" "c" "d" "e"] or ["f" "g" "h"]]]'),
00064             ('"no', '[""no"]'),
00065             ('no"', '["no""]'),
00066             ("'no", "[\"'no\"]"),
00067             ("no'", "[\"no'\"]"),
00068             ('"no\'', '[""no\'"]')]:
00069             result = parser.parse_query(query)
00070             assert str(result) == wanted
00071 
00072     def testQueryParserExceptions(self):
00073         """ search: test the query parser """
00074         parser = QueryParser()
00075 
00076         def _test(q):
00077             py.test.raises(QueryError, parser.parse_query, q)
00078 
00079         for query in ['""', '(', ')', '(a or b']:
00080             yield _test, query
00081 
00082 
00083 class BaseSearchTest(object):
00084     """ search: test search """
00085     doesnotexist = u'jfhsdaASDLASKDJ'
00086 
00087     # key - page name, value - page content. If value is None page
00088     # will not be created but will be used for a search. None should
00089     # be used for pages which already exist.
00090     pages = {u'SearchTestPage': u'this is a test page',
00091              u'SearchTestLinks': u'SearchTestPage',
00092              u'SearchTestLinksLowerCase': u'searchtestpage',
00093              u'SearchTestOtherLinks': u'SearchTestLinks',
00094              u'TestEdit': u'TestEdit',
00095              u'LanguageSetup': None,
00096              u'CategoryHomepage': None,
00097              u'HomePageWiki': None,
00098              u'FrontPage': None,
00099              u'RecentChanges': None,
00100              u'HelpOnCreoleSyntax': None,
00101              u'HelpOnEditing': None,
00102              u'HelpIndex': None}
00103 
00104     searcher_class = None
00105 
00106     def _index_update(self):
00107         pass
00108 
00109     @classmethod
00110     def setup_class(cls):
00111         request = cls.request
00112         become_trusted(request)
00113 
00114         for page, text in cls.pages.iteritems():
00115             if text:
00116                 create_page(request, page, text)
00117 
00118     def teardown_class(self):
00119         for page, text in self.pages.iteritems():
00120             if text:
00121                 nuke_page(self.request, page)
00122 
00123     def get_searcher(self, query):
00124         raise NotImplementedError
00125 
00126     def search(self, query):
00127         if isinstance(query, str) or isinstance(query, unicode):
00128             query = QueryParser().parse_query(query)
00129 
00130         return self.get_searcher(query).run()
00131 
00132     def test_title_search_simple(self):
00133         searches = {u'title:SearchTestPage': 1,
00134                     u'title:LanguageSetup': 1,
00135                     u'title:HelpIndex': 1,
00136                     u'title:Help': 3,
00137                     u'title:HelpOn': 2,
00138                     u'title:SearchTestNotExisting': 0,
00139                     u'title:FrontPage': 1,
00140                     u'title:HelpOnEditing': 1}
00141 
00142         def test(query, res_count):
00143             result = self.search(query)
00144             assert len(result.hits) == res_count
00145 
00146         for query, res_count in searches.iteritems():
00147             yield query, test, query, res_count
00148 
00149     def test_title_search_re(self):
00150         result = self.search(ur'title:re:\bSearchTest')
00151         assert len(result.hits) == 4
00152 
00153         result = self.search(ur'title:re:\bSearchTest\b')
00154         assert not result.hits
00155 
00156     def test_title_search_case(self):
00157         result = self.search(u'title:case:SearchTestPage')
00158         assert len(result.hits) == 1
00159 
00160         result = self.search(u'title:case:searchtestpage')
00161         assert not result.hits
00162 
00163     def test_title_search_case_re(self):
00164         result = self.search(ur'title:case:re:\bSearchTestPage\b')
00165         assert len(result.hits) == 1
00166 
00167         result = self.search(ur'title:case:re:\bsearchtestpage\b')
00168         assert not result.hits
00169 
00170     def test_linkto_search_simple(self):
00171         result = self.search(u'linkto:SearchTestPage')
00172         assert len(result.hits) == 1
00173 
00174         result = self.search(u'linkto:SearchTestNotExisting')
00175         assert not result.hits
00176 
00177     def test_linkto_search_re(self):
00178         result = self.search(ur'linkto:re:\bSearchTest')
00179         assert len(result.hits) == 2
00180 
00181         result = self.search(ur'linkto:re:\bSearchTest\b')
00182         assert not result.hits
00183 
00184     def test_linkto_search_case(self):
00185         result = self.search(u'linkto:case:SearchTestPage')
00186         assert len(result.hits) == 1
00187 
00188         result = self.search(u'linkto:case:searchtestpage')
00189         assert not result.hits
00190 
00191     def test_linkto_search_case_re(self):
00192         result = self.search(ur'linkto:case:re:\bSearchTestPage\b')
00193         assert len(result.hits) == 1
00194 
00195         result = self.search(ur'linkto:case:re:\bsearchtestpage\b')
00196         assert not result.hits
00197 
00198     def test_category_search_simple(self):
00199         result = self.search(u'category:CategoryHomepage')
00200         assert len(result.hits) == 1
00201 
00202         result = self.search(u'category:CategorySearchTestNotExisting')
00203         assert not result.hits
00204 
00205     def test_category_search_re(self):
00206         result = self.search(ur'category:re:\bCategoryHomepage\b')
00207         assert len(result.hits) == 1
00208 
00209         result = self.search(ur'category:re:\bCategoryHomepa\b')
00210         assert not result.hits
00211 
00212     def test_category_search_case(self):
00213         result = self.search(u'category:case:CategoryHomepage')
00214         assert len(result.hits) == 1
00215 
00216         result = self.search(u'category:case:categoryhomepage')
00217         assert not result.hits
00218 
00219     def test_category_search_case_re(self):
00220         result = self.search(ur'category:case:re:\bCategoryHomepage\b')
00221         assert len(result.hits) == 1
00222 
00223         result = self.search(ur'category:case:re:\bcategoryhomepage\b')
00224         assert not result.hits
00225 
00226     def test_mimetype_search_simple(self):
00227         result = self.search(u'mimetype:text/wiki')
00228         assert len(result.hits) == 12
00229 
00230     def test_mimetype_search_re(self):
00231         result = self.search(ur'mimetype:re:\btext/wiki\b')
00232         assert len(result.hits) == 12
00233 
00234         result = self.search(ur'category:re:\bCategoryHomepa\b')
00235         assert not result.hits
00236 
00237     def test_language_search_simple(self):
00238         result = self.search(u'language:en')
00239         assert len(result.hits) == 12
00240 
00241     def test_domain_search_simple(self):
00242         result = self.search(u'domain:system')
00243         assert result.hits
00244 
00245     def test_search_and(self):
00246         """ search: title search with AND expression """
00247         result = self.search(u"title:HelpOnCreoleSyntax lang:en")
00248         assert len(result.hits) == 1
00249 
00250         result = self.search(u"title:HelpOnCreoleSyntax lang:de")
00251         assert len(result.hits) == 0
00252 
00253         result = self.search(u"title:Help title:%s" % self.doesnotexist)
00254         assert not result.hits
00255 
00256     def testTitleSearchOR(self):
00257         """ search: title search with OR expression """
00258         result = self.search(u"title:FrontPage or title:RecentChanges")
00259         assert len(result.hits) == 2
00260 
00261     def testTitleSearchNegatedFindAll(self):
00262         """ search: negated title search for some pagename that does not exist results in all pagenames """
00263         result = self.search(u"-title:%s" % self.doesnotexist)
00264         assert len(result.hits) == len(self.pages)
00265 
00266     def testTitleSearchNegativeTerm(self):
00267         """ search: title search for a AND expression with a negative term """
00268         result = self.search(u"-title:FrontPage")
00269         assert len(result.hits) == len(self.pages) - 1
00270 
00271         result = self.search(u"-title:HelpOn")
00272         assert len(result.hits) == len(self.pages) - 2
00273 
00274     def testFullSearchNegatedFindAll(self):
00275         """ search: negated full search for some string that does not exist results in all pages """
00276         result = self.search(u"-%s" % self.doesnotexist)
00277         assert len(result.hits) == len(self.pages)
00278 
00279     def test_title_search(self):
00280         query = QueryParser(titlesearch=True).parse_query('FrontPage')
00281         result = self.search(query)
00282         assert len(result.hits) == 1
00283 
00284     def test_create_page(self):
00285         self.pages['TestCreatePage'] = 'some text' # Moin search must search this page
00286         try:
00287             create_page(self.request, 'TestCreatePage', self.pages['TestCreatePage'])
00288             self._index_update()
00289             result = self.search(u'TestCreatePage')
00290             assert len(result.hits) == 1
00291         finally:
00292             nuke_page(self.request, 'TestCreatePage')
00293             self._index_update()
00294             del self.pages['TestCreatePage']
00295             result = self.search(u'TestCreatePage')
00296             assert len(result.hits) == 0
00297 
00298     def test_attachment(self):
00299         page_name = u'TestAttachment'
00300         self.pages[page_name] = 'some text' # Moin search must search this page
00301 
00302         filename = "AutoCreatedSillyAttachmentForSearching.png"
00303         data = "Test content"
00304         filecontent = StringIO.StringIO(data)
00305 
00306         result = self.search(filename)
00307         assert len(result.hits) == 0
00308 
00309         try:
00310             create_page(self.request, page_name, self.pages[page_name])
00311             AttachFile.add_attachment(self.request, page_name, filename, filecontent, True)
00312             append_page(self.request, page_name, '[[attachment:%s]]' % filename)
00313             self._index_update()
00314             result = self.search(filename)
00315             assert len(result.hits) > 0
00316         finally:
00317             nuke_page(self.request, page_name)
00318             del self.pages[page_name]
00319             self._index_update()
00320             result = self.search(filename)
00321             assert len(result.hits) == 0
00322 
00323     def test_get_searcher(self):
00324         assert isinstance(_get_searcher(self.request, ''), self.searcher_class)
00325 
00326 
00327 class TestMoinSearch(BaseSearchTest):
00328 
00329     searcher_class = MoinSearch
00330 
00331     def get_searcher(self, query):
00332         pages = [{'pagename': page, 'attachment': '', 'wikiname': 'Self', } for page in self.pages]
00333         return MoinSearch(self.request, query, pages=pages)
00334 
00335     def test_stemming(self):
00336         result = self.search(u"title:edit")
00337         assert len(result.hits) == 2
00338 
00339         result = self.search(u"title:editing")
00340         assert len(result.hits) == 1
00341 
00342 
00343 class TestXapianSearch(BaseSearchTest):
00344     """ search: test Xapian indexing """
00345 
00346     class Config(wikiconfig.Config):
00347 
00348         xapian_search = True
00349 
00350     def _index_update(self):
00351         # for xapian, we queue index updates so they can get indexed later.
00352         # here we make sure the queue will be processed completely,
00353         # before we continue:
00354         from MoinMoin.search.Xapian import XapianIndex
00355         XapianIndex(self.request).do_queued_updates()
00356 
00357     def get_searcher(self, query):
00358         from MoinMoin.search.Xapian.search import XapianSearch
00359         return XapianSearch(self.request, query)
00360 
00361     def get_moin_search_connection(self):
00362         from MoinMoin.search.Xapian import XapianIndex
00363         return XapianIndex(self.request).get_search_connection()
00364 
00365     def setup_class(self):
00366 
00367         try:
00368             from MoinMoin.search.Xapian import XapianIndex
00369             from MoinMoin.search.Xapian.search import XapianSearch
00370             self.searcher_class = XapianSearch
00371 
00372         except ImportError, error:
00373             if not str(error).startswith('Xapian '):
00374                 raise
00375 
00376             py.test.skip('xapian is not installed')
00377 
00378         nuke_xapian_index(self.request)
00379         index = XapianIndex(self.request)
00380         # Additionally, pages which were not created but supposed to be searched
00381         # are indexed.
00382         pages_to_index = [page for page in self.pages if not self.pages[page]]
00383         index.indexPages(mode='add', pages=pages_to_index)
00384 
00385         super(TestXapianSearch, self).setup_class()
00386 
00387     def teardown_class(self):
00388         nuke_xapian_index(self.request)
00389 
00390     def test_get_all_documents(self):
00391         connection = self.get_moin_search_connection()
00392         documents = connection.get_all_documents()
00393 
00394         assert len(self.pages) == len(documents)
00395         for document in documents:
00396             assert document.data['pagename'][0] in self.pages.keys()
00397 
00398     def test_xapian_term(self):
00399         parser = QueryParser()
00400         connection = self.get_moin_search_connection()
00401 
00402         prefixes = {u'': ([u'', u're:', u'case:', u'case:re:'], u'SearchTestPage'),
00403                     u'title:': ([u'', u're:', u'case:', u'case:re:'], u'SearchTestPage'),
00404                     u'linkto:': ([u'', u're:', u'case:', u'case:re:'], u'FrontPage'),
00405                     u'category:': ([u'', u're:', u'case:', u'case:re:'], u'CategoryHomepage'),
00406                     u'mimetype:': ([u'', u're:'], u'text/wiki'),
00407                     u'language:': ([u''], u'en'),
00408                     u'domain:': ([u''], u'system')}
00409 
00410         def test_query(query):
00411             query_ = parser.parse_query(query).xapian_term(self.request, connection)
00412             print str(query_)
00413             assert not query_.empty()
00414 
00415         for prefix, data in prefixes.iteritems():
00416             modifiers, term = data
00417             for modifier in modifiers:
00418                 query = ''.join([prefix, modifier, term])
00419                 yield query, test_query, query
00420 
00421     def test_stemming(self):
00422         result = self.search(u"title:edit")
00423         assert len(result.hits) == 1
00424 
00425         result = self.search(u"title:editing")
00426         assert len(result.hits) == 1
00427 
00428 
00429 class TestXapianSearchStemmed(TestXapianSearch):
00430     class Config(wikiconfig.Config):
00431 
00432         xapian_search = True
00433         xapian_stemming = True
00434 
00435     def test_stemming(self):
00436         py.test.skip("TODO fix TestXapianSearchStemmed - strange effects with stemming")
00437 
00438         result = self.search(u"title:edit")
00439         assert len(result.hits) == 2
00440 
00441         result = self.search(u"title:editing")
00442         assert len(result.hits) == 2
00443 
00444 
00445 class TestGetSearcher(object):
00446 
00447     class Config(wikiconfig.Config):
00448 
00449         xapian_search = True
00450 
00451     def test_get_searcher(self):
00452         assert isinstance(_get_searcher(self.request, ''), MoinSearch), 'Xapian index is not created, despite the configuration, MoinSearch must be used!'
00453 
00454 coverage_modules = ['MoinMoin.search']
00455