Back to index

plone3  3.1.7
testUnicodeSplitter.py
Go to the documentation of this file.
00001 #
00002 # Tests the UnicodeSplitter
00003 #
00004 
00005 from Products.CMFPlone.tests import PloneTestCase
00006 from Products.CMFPlone.tests import dummy
00007 
00008 from Products.CMFPlone.UnicodeSplitter import Splitter
00009 from Products.CMFPlone.UnicodeSplitter import CaseNormalizer
00010 
00011 import locale
00012 LATIN1 = ('de_DE.ISO8859-15', 'de_DE.ISO8859-15@euro', 'nl_NL.iso8859-1')
00013 
00014 def _setlocale(*names):
00015     saved = locale.setlocale(locale.LC_ALL)
00016     for name in names:
00017         try:
00018             locale.setlocale(locale.LC_ALL, name)
00019             break
00020         except locale.Error:
00021             pass
00022     else:
00023         return None
00024     return saved
00025 
00026 
00027 class TestSplitter(PloneTestCase.PloneTestCase):
00028 
00029     def afterSetUp(self):
00030         self.splitter = Splitter()
00031         self.process = self.splitter.process
00032         self.processGlob = self.splitter.processGlob
00033 
00034     def testProcessGerman(self):
00035         # German letters
00036         input = [u"\xc4ffin foo"]
00037         output = [u"\xc4ffin", u"foo"]
00038         output = [t.encode('utf-8') for t in output]
00039 
00040         self.assertEqual(self.process(input), output)
00041         self.assertEqual(self.processGlob(input), output)
00042 
00043         input = [t.encode('utf-8') for t in input]
00044         self.assertEqual(self.process(input), output)
00045         self.assertEqual(self.processGlob(input), output)
00046 
00047     def testProcessGreek(self):
00048         # Greek letters
00049         input = [u'\u039d\u03af\u03ba\u03bf\u03c2 \u03a4\u03b6\u03ac\u03bd\u03bf\u03c2 foo']
00050         output = [u'\u039d\u03af\u03ba\u03bf\u03c2',
00051                   u'\u03a4\u03b6\u03ac\u03bd\u03bf\u03c2', u'foo']
00052         output = [t.encode('utf-8') for t in output]
00053 
00054         self.assertEqual(self.process(input), output)
00055         self.assertEqual(self.processGlob(input), output)
00056 
00057         input = [t.encode('utf-8') for t in input]
00058         self.assertEqual(self.process(input), output)
00059         self.assertEqual(self.processGlob(input), output)
00060 
00061     def testProcessTurkish(self):
00062         # Turkish letters
00063         input = [u"\xdc\u011f\xfcr foo"]
00064         output = [u"\xdc\u011f\xfcr", u"foo"]
00065         output = [t.encode('utf-8') for t in output]
00066 
00067         self.assertEqual(self.process(input), output)
00068         self.assertEqual(self.processGlob(input), output)
00069 
00070         input = [t.encode('utf-8') for t in input]
00071         self.assertEqual(self.process(input), output)
00072         self.assertEqual(self.processGlob(input), output)
00073 
00074     def testProcessLatin1(self):
00075         #
00076         # Test passes because plone_lexicon pipeline elements
00077         # are coded defensively.
00078         #
00079         input = ["\xc4ffin foo"]
00080         output = ["\xc4ffin", "foo"]
00081 
00082         # May still fail if none of the locales is available
00083         saved = _setlocale(*LATIN1)
00084         try:
00085             self.assertEqual(self.process(input), output)
00086             self.assertEqual(self.processGlob(input), output)
00087         finally:
00088             _setlocale(saved)
00089 
00090 
00091 class TestCaseNormalizer(PloneTestCase.PloneTestCase):
00092 
00093     def afterSetUp(self):
00094         self.normalizer = CaseNormalizer()
00095         self.process = self.normalizer.process
00096 
00097     def testNormalizeGerman(self):
00098         input = [u"\xc4ffin"]
00099         output = [u"\xe4ffin"]
00100         output = [t.encode('utf-8') for t in output]
00101 
00102         self.assertEqual(self.process(input), output)
00103 
00104         input = [t.encode('utf-8') for t in input]
00105         self.assertEqual(self.process(input), output)
00106 
00107     def testNormalizeLatin1(self):
00108         #
00109         # Test passes because plone_lexicon pipeline elements
00110         # are coded defensively.
00111         #
00112         input = ["\xc4ffin"]
00113         output = ["\xe4ffin"]
00114 
00115         # May still fail if none of the locales is available
00116         saved = _setlocale(*LATIN1)
00117         try:
00118             self.assertEqual(self.process(input), output)
00119         finally:
00120             _setlocale(saved)
00121 
00122 
00123 class TestQuery(PloneTestCase.PloneTestCase):
00124 
00125     def afterSetUp(self):
00126         self.catalog = self.portal.portal_catalog
00127         self.folder._setObject('doc1', dummy.Item('doc1'))
00128         self.doc1 = self.folder.doc1
00129         self.folder._setObject('doc2', dummy.Item('doc2'))
00130         self.doc2 = self.folder.doc2
00131 
00132     def testQueryByUmlaut(self):
00133         self.doc1.SearchableText = '\303\204ffin'
00134         self.catalog.indexObject(self.doc1)
00135         brains = self.catalog(SearchableText='\303\204ffin')
00136         self.assertEqual(len(brains), 1)
00137 
00138     def testQueryByUmlautLower(self):
00139         self.doc1.SearchableText = '\303\204ffin'
00140         self.catalog.indexObject(self.doc1)
00141         brains = self.catalog(SearchableText='\303\244ffin')
00142         self.assertEqual(len(brains), 1)
00143 
00144     def testQueryDifferentiatesUmlauts(self):
00145         self.doc1.SearchableText = '\303\204ffin'
00146         self.catalog.indexObject(self.doc1)
00147         self.doc2.SearchableText = '\303\226ffin'
00148         self.catalog.indexObject(self.doc2)
00149         brains = self.catalog(SearchableText='\303\226ffin')
00150         self.assertEqual(len(brains), 1)
00151 
00152     def testQueryDifferentiatesUmlautsLower(self):
00153         self.doc1.SearchableText = '\303\204ffin'
00154         self.catalog.indexObject(self.doc1)
00155         self.doc2.SearchableText = '\303\226ffin'
00156         self.catalog.indexObject(self.doc2)
00157         brains = self.catalog(SearchableText='\303\266ffin')
00158         self.assertEqual(len(brains), 1)
00159 
00160     def testQueryByLatin1(self):
00161         #
00162         # Test passes because plone_lexicon pipeline elements
00163         # are coded defensively.
00164         #
00165         saved = _setlocale(*LATIN1)
00166         try:
00167             self.doc1.SearchableText = '\xc4ffin'
00168             self.catalog.indexObject(self.doc1)
00169             brains = self.catalog(SearchableText='\xc4ffin')
00170             self.assertEqual(len(brains), 1)
00171         finally:
00172             _setlocale(saved)
00173 
00174     def testQueryByLatin1Lower(self):
00175         #
00176         # Test passes because plone_lexicon pipeline elements
00177         # are coded defensively.
00178         #
00179         saved = _setlocale(*LATIN1)
00180         try:
00181             self.doc1.SearchableText = '\xc4ffin'
00182             self.catalog.indexObject(self.doc1)
00183             brains = self.catalog(SearchableText='\xe4ffin')
00184             self.assertEqual(len(brains), 1)
00185         finally:
00186             _setlocale(saved)
00187 
00188     def testMixedModeQuery(self):
00189         #
00190         # Test passes because plone_lexicon pipeline elements
00191         # are coded defensively.
00192         #
00193         saved = _setlocale(*LATIN1)
00194         try:
00195             # Index Latin-1
00196             self.doc1.SearchableText = '\xc4ffin'
00197             self.catalog.indexObject(self.doc1)
00198             # Query by UTF-8
00199             brains = self.catalog(SearchableText='\303\204ffin')
00200             # We get no results, but at least we don't break
00201             self.assertEqual(len(brains), 0)
00202         finally:
00203             _setlocale(saved)
00204 
00205     def testQueryByUnicode(self):
00206         self.doc1.SearchableText = '\303\204ffin'
00207         self.catalog.indexObject(self.doc1)
00208         brains = self.catalog(SearchableText=u'\xc4ffin')
00209         self.assertEqual(len(brains), 1)
00210 
00211     def testQueryByUnicodeLower(self):
00212         self.doc1.SearchableText = '\303\204ffin'
00213         self.catalog.indexObject(self.doc1)
00214         brains = self.catalog(SearchableText=u'\xe4ffin')
00215         self.assertEqual(len(brains), 1)
00216 
00217     def testIndexUnicode(self):
00218         self.doc1.SearchableText = u'\xc4ffin'
00219         self.catalog.indexObject(self.doc1)
00220         brains = self.catalog(SearchableText='\303\204ffin')
00221         self.assertEqual(len(brains), 1)
00222 
00223     def testIndexUnicodeLower(self):
00224         self.doc1.SearchableText = u'\xc4ffin'
00225         self.catalog.indexObject(self.doc1)
00226         brains = self.catalog(SearchableText='\303\244ffin')
00227         self.assertEqual(len(brains), 1)
00228 
00229 
00230 def test_suite():
00231     from unittest import TestSuite, makeSuite
00232     suite = TestSuite()
00233     suite.addTest(makeSuite(TestSplitter))
00234     suite.addTest(makeSuite(TestCaseNormalizer))
00235     suite.addTest(makeSuite(TestQuery))
00236     return suite