Back to index

moin  1.9.0~rc2
Classes | Functions | Variables
MoinMoin.support.parsedatetime.parsedatetime_consts Namespace Reference

Classes

class  pdtLocale_en
class  pdtLocale_au
class  pdtLocale_es
class  pdtLocale_de
class  Constants

Functions

def _initLocale
def _initSymbols
def _initPatterns
def _initConstants

Variables

string __license__
 pyicu = None
dictionary pdtLocales

Function Documentation

Create localized versions of the units, week and month names

Definition at line 841 of file parsedatetime_consts.py.

00841 
00842 def _initConstants(ptc):
00843     """
00844     Create localized versions of the units, week and month names
00845     """
00846       # build weekday offsets - yes, it assumes the Weekday and shortWeekday
00847       # lists are in the same order and Mon..Sun (Python style)
00848     ptc.WeekdayOffsets = {}
00849 
00850     o = 0
00851     for key in ptc.Weekdays:
00852         ptc.WeekdayOffsets[key] = o
00853         o += 1
00854     o = 0
00855     for key in ptc.shortWeekdays:
00856         ptc.WeekdayOffsets[key] = o
00857         o += 1
00858 
00859       # build month offsets - yes, it assumes the Months and shortMonths
00860       # lists are in the same order and Jan..Dec
00861     ptc.MonthOffsets = {}
00862 
00863     o = 1
00864     for key in ptc.Months:
00865         ptc.MonthOffsets[key] = o
00866         o += 1
00867     o = 1
00868     for key in ptc.shortMonths:
00869         ptc.MonthOffsets[key] = o
00870         o += 1
00871 
00872     # ptc.DaySuffixes = ptc.re_consts['daysuffix'].split('|')
00873 

Helper function to initialize the different lists and strings
from either PyICU or one of the internal pdt Locales and store
them into ptc.

Definition at line 479 of file parsedatetime_consts.py.

00479 
00480 def _initLocale(ptc):
00481     """
00482     Helper function to initialize the different lists and strings
00483     from either PyICU or one of the internal pdt Locales and store
00484     them into ptc.
00485     """
00486 
00487     def lcase(x):
00488         return x.lower()
00489 
00490     if pyicu and ptc.usePyICU:
00491         ptc.icuLocale = None
00492 
00493         if ptc.localeID is not None:
00494             ptc.icuLocale = pyicu.Locale(ptc.localeID)
00495 
00496         if ptc.icuLocale is None:
00497             for id in range(0, len(ptc.fallbackLocales)):
00498                 ptc.localeID  = ptc.fallbackLocales[id]
00499                 ptc.icuLocale = pyicu.Locale(ptc.localeID)
00500 
00501                 if ptc.icuLocale is not None:
00502                     break
00503 
00504         ptc.icuSymbols = pyicu.DateFormatSymbols(ptc.icuLocale)
00505 
00506           # grab ICU list of weekdays, skipping first entry which
00507           # is always blank
00508         wd  = map(lcase, ptc.icuSymbols.getWeekdays()[1:])
00509         swd = map(lcase, ptc.icuSymbols.getShortWeekdays()[1:])
00510 
00511           # store them in our list with Monday first (ICU puts Sunday first)
00512         ptc.Weekdays      = wd[1:] + wd[0:1]
00513         ptc.shortWeekdays = swd[1:] + swd[0:1]
00514         ptc.Months        = map(lcase, ptc.icuSymbols.getMonths())
00515         ptc.shortMonths   = map(lcase, ptc.icuSymbols.getShortMonths())
00516 
00517           # not quite sure how to init this so for now
00518           # set it to none so it will be set to the en_US defaults for now
00519         ptc.re_consts   = None
00520         ptc.icu_df      = { 'full':   pyicu.DateFormat.createDateInstance(pyicu.DateFormat.kFull,   ptc.icuLocale),
00521                             'long':   pyicu.DateFormat.createDateInstance(pyicu.DateFormat.kLong,   ptc.icuLocale),
00522                             'medium': pyicu.DateFormat.createDateInstance(pyicu.DateFormat.kMedium, ptc.icuLocale),
00523                             'short':  pyicu.DateFormat.createDateInstance(pyicu.DateFormat.kShort,  ptc.icuLocale),
00524                           }
00525         ptc.icu_tf      = { 'full':   pyicu.DateFormat.createTimeInstance(pyicu.DateFormat.kFull,   ptc.icuLocale),
00526                             'long':   pyicu.DateFormat.createTimeInstance(pyicu.DateFormat.kLong,   ptc.icuLocale),
00527                             'medium': pyicu.DateFormat.createTimeInstance(pyicu.DateFormat.kMedium, ptc.icuLocale),
00528                             'short':  pyicu.DateFormat.createTimeInstance(pyicu.DateFormat.kShort,  ptc.icuLocale),
00529                           }
00530         ptc.dateFormats = { 'full':   ptc.icu_df['full'].toPattern(),
00531                             'long':   ptc.icu_df['long'].toPattern(),
00532                             'medium': ptc.icu_df['medium'].toPattern(),
00533                             'short':  ptc.icu_df['short'].toPattern(),
00534                           }
00535         ptc.timeFormats = { 'full':   ptc.icu_tf['full'].toPattern(),
00536                             'long':   ptc.icu_tf['long'].toPattern(),
00537                             'medium': ptc.icu_tf['medium'].toPattern(),
00538                             'short':  ptc.icu_tf['short'].toPattern(),
00539                           }
00540     else:
00541         if not ptc.localeID in pdtLocales:
00542             for id in range(0, len(ptc.fallbackLocales)):
00543                 ptc.localeID  = ptc.fallbackLocales[id]
00544 
00545                 if ptc.localeID in pdtLocales:
00546                     break
00547 
00548         ptc.locale   = pdtLocales[ptc.localeID]
00549         ptc.usePyICU = False
00550 
00551         ptc.Weekdays      = ptc.locale.Weekdays
00552         ptc.shortWeekdays = ptc.locale.shortWeekdays
00553         ptc.Months        = ptc.locale.Months
00554         ptc.shortMonths   = ptc.locale.shortMonths
00555         ptc.dateFormats   = ptc.locale.dateFormats
00556         ptc.timeFormats   = ptc.locale.timeFormats
00557 
00558       # these values are used to setup the various bits 
00559       # of the regex values used to parse
00560       #
00561       # check if a local set of constants has been
00562       # provided, if not use en_US as the default
00563     if ptc.localeID in pdtLocales:
00564         ptc.re_sources = pdtLocales[ptc.localeID].re_sources
00565         ptc.re_values  = pdtLocales[ptc.localeID].re_consts
00566 
00567         units = pdtLocales[ptc.localeID].units
00568 
00569         ptc.Modifiers  = pdtLocales[ptc.localeID].modifiers
00570         ptc.dayOffsets = pdtLocales[ptc.localeID].dayoffsets
00571 
00572           # for now, pull over any missing keys from the US set
00573         for key in pdtLocales['en_US'].re_consts:
00574             if not key in ptc.re_values:
00575                 ptc.re_values[key] = pdtLocales['en_US'].re_consts[key]
00576     else:
00577         ptc.re_sources = pdtLocales['en_US'].re_sources
00578         ptc.re_values  = pdtLocales['en_US'].re_consts
00579         ptc.Modifiers  = pdtLocales['en_US'].modifiers
00580         ptc.dayOffsets = pdtLocales['en_US'].dayoffsets
00581         units          = pdtLocales['en_US'].units
00582 
00583       # escape any regex special characters that may be found
00584     wd   = tuple(map(re.escape, ptc.Weekdays))
00585     swd  = tuple(map(re.escape, ptc.shortWeekdays))
00586     mth  = tuple(map(re.escape, ptc.Months))
00587     smth = tuple(map(re.escape, ptc.shortMonths))
00588 
00589     ptc.re_values['months']      = '%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s' % mth
00590     ptc.re_values['shortmonths'] = '%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s' % smth
00591     ptc.re_values['days']        = '%s|%s|%s|%s|%s|%s|%s' % wd
00592     ptc.re_values['shortdays']   = '%s|%s|%s|%s|%s|%s|%s' % swd
00593 
00594     l = []
00595     for unit in units:
00596         l.append('|'.join(units[unit]))
00597 
00598     ptc.re_values['units'] = '|'.join(l)
00599     ptc.Units              = ptc.re_values['units'].split('|')
00600 

Helper function to take the different localized bits from ptc and
create the regex strings.

Definition at line 710 of file parsedatetime_consts.py.

00710 
00711 def _initPatterns(ptc):
00712     """
00713     Helper function to take the different localized bits from ptc and
00714     create the regex strings.
00715     """
00716     # TODO add code to parse the date formats and build the regexes up from sub-parts
00717     # TODO find all hard-coded uses of date/time seperators
00718 
00719     ptc.RE_DATE4     = r'''(?P<date>(((?P<day>\d\d?)(?P<suffix>%(daysuffix)s)?(,)?(\s)?)
00720                                       (?P<mthname>(%(months)s|%(shortmonths)s))\s?
00721                                       (?P<year>\d\d(\d\d)?)?
00722                                     )
00723                            )''' % ptc.re_values
00724 
00725     # I refactored DATE3 to fix Issue 16 http://code.google.com/p/parsedatetime/issues/detail?id=16
00726     # I suspect the final line was for a trailing time - but testing shows it's not needed
00727     # ptc.RE_DATE3     = r'''(?P<date>((?P<mthname>(%(months)s|%(shortmonths)s))\s?
00728     #                                  ((?P<day>\d\d?)(\s?|%(daysuffix)s|$)+)?
00729     #                                  (,\s?(?P<year>\d\d(\d\d)?))?))
00730     #                        (\s?|$|[^0-9a-zA-Z])''' % ptc.re_values
00731     ptc.RE_DATE3     = r'''(?P<date>(
00732                                      (((?P<mthname>(%(months)s|%(shortmonths)s))|
00733                                      ((?P<day>\d\d?)(?P<suffix>%(daysuffix)s)?))(\s)?){1,2}
00734                                      ((,)?(\s)?(?P<year>\d\d(\d\d)?))?
00735                                     )
00736                            )''' % ptc.re_values
00737     ptc.RE_MONTH     = r'''(\s?|^)
00738                            (?P<month>(
00739                                       (?P<mthname>(%(months)s|%(shortmonths)s))
00740                                       (\s?(?P<year>(\d\d\d\d)))?
00741                                      ))
00742                            (\s?|$|[^0-9a-zA-Z])''' % ptc.re_values
00743     ptc.RE_WEEKDAY   = r'''(\s?|^)
00744                            (?P<weekday>(%(days)s|%(shortdays)s))
00745                            (\s?|$|[^0-9a-zA-Z])''' % ptc.re_values
00746 
00747     ptc.RE_SPECIAL   = r'(?P<special>^[%(specials)s]+)\s+' % ptc.re_values
00748     ptc.RE_UNITS     = r'''(?P<qty>(-?\d+\s*
00749                                     (?P<units>((%(units)s)s?))
00750                                    ))''' % ptc.re_values
00751     ptc.RE_QUNITS    = r'''(?P<qty>(-?\d+\s?
00752                                     (?P<qunits>%(qunits)s)
00753                                     (\s?|,|$)
00754                                    ))''' % ptc.re_values
00755     ptc.RE_MODIFIER  = r'''(\s?|^)
00756                            (?P<modifier>
00757                             (previous|prev|last|next|eod|eo|(end\sof)|(in\sa)))''' % ptc.re_values
00758     ptc.RE_MODIFIER2 = r'''(\s?|^)
00759                            (?P<modifier>
00760                             (from|before|after|ago|prior))
00761                            (\s?|$|[^0-9a-zA-Z])''' % ptc.re_values
00762     ptc.RE_TIMEHMS   = r'''(\s?|^)
00763                            (?P<hours>\d\d?)
00764                            (?P<tsep>%(timeseperator)s|)
00765                            (?P<minutes>\d\d)
00766                            (?:(?P=tsep)(?P<seconds>\d\d(?:[.,]\d+)?))?''' % ptc.re_values
00767     ptc.RE_TIMEHMS2  = r'''(?P<hours>(\d\d?))
00768                            ((?P<tsep>%(timeseperator)s|)
00769                             (?P<minutes>(\d\d?))
00770                             (?:(?P=tsep)
00771                                (?P<seconds>\d\d?
00772                                 (?:[.,]\d+)?))?)?''' % ptc.re_values
00773 
00774     if 'meridian' in ptc.re_values:
00775         ptc.RE_TIMEHMS2 += r'\s?(?P<meridian>(%(meridian)s))' % ptc.re_values
00776 
00777     dateSeps = ''.join(ptc.dateSep) + '.'
00778 
00779     ptc.RE_DATE      = r'''(\s?|^)
00780                            (?P<date>(\d\d?[%s]\d\d?([%s]\d\d(\d\d)?)?))
00781                            (\s?|$|[^0-9a-zA-Z])''' % (dateSeps, dateSeps)
00782     ptc.RE_DATE2     = r'[%s]' % dateSeps
00783     ptc.RE_DAY       = r'''(\s?|^)
00784                            (?P<day>(today|tomorrow|yesterday))
00785                            (\s?|$|[^0-9a-zA-Z])''' % ptc.re_values
00786     ptc.RE_DAY2      = r'''(?P<day>\d\d?)|(?P<suffix>%(daysuffix)s)
00787                         ''' % ptc.re_values
00788     ptc.RE_TIME      = r'''(\s?|^)
00789                            (?P<time>(morning|breakfast|noon|lunch|evening|midnight|tonight|dinner|night|now))
00790                            (\s?|$|[^0-9a-zA-Z])''' % ptc.re_values
00791     ptc.RE_REMAINING = r'\s+'
00792 
00793     # Regex for date/time ranges
00794     ptc.RE_RTIMEHMS  = r'''(\s?|^)
00795                            (\d\d?)%(timeseperator)s
00796                            (\d\d)
00797                            (%(timeseperator)s(\d\d))?
00798                            (\s?|$)''' % ptc.re_values
00799     ptc.RE_RTIMEHMS2 = r'''(\s?|^)
00800                            (\d\d?)
00801                            (%(timeseperator)s(\d\d?))?
00802                            (%(timeseperator)s(\d\d?))?''' % ptc.re_values
00803 
00804     if 'meridian' in ptc.re_values:
00805         ptc.RE_RTIMEHMS2 += r'\s?(%(meridian)s)' % ptc.re_values
00806 
00807     ptc.RE_RDATE  = r'(\d+([%s]\d+)+)' % dateSeps
00808     ptc.RE_RDATE3 = r'''((((%(months)s))\s?
00809                          ((\d\d?)
00810                           (\s?|%(daysuffix)s|$)+)?
00811                          (,\s?\d\d\d\d)?))''' % ptc.re_values
00812 
00813     # "06/07/06 - 08/09/06"
00814     ptc.DATERNG1 = ptc.RE_RDATE + r'\s?%(rangeseperator)s\s?' + ptc.RE_RDATE
00815     ptc.DATERNG1 = ptc.DATERNG1 % ptc.re_values
00816 
00817     # "march 31 - june 1st, 2006"
00818     ptc.DATERNG2 = ptc.RE_RDATE3 + r'\s?%(rangeseperator)s\s?' + ptc.RE_RDATE3
00819     ptc.DATERNG2 = ptc.DATERNG2 % ptc.re_values
00820 
00821     # "march 1rd -13th"
00822     ptc.DATERNG3 = ptc.RE_RDATE3 + r'\s?%(rangeseperator)s\s?(\d\d?)\s?(rd|st|nd|th)?'
00823     ptc.DATERNG3 = ptc.DATERNG3 % ptc.re_values
00824 
00825     # "4:00:55 pm - 5:90:44 am", '4p-5p'
00826     ptc.TIMERNG1 = ptc.RE_RTIMEHMS2 + r'\s?%(rangeseperator)s\s?' + ptc.RE_RTIMEHMS2
00827     ptc.TIMERNG1 = ptc.TIMERNG1 % ptc.re_values
00828 
00829     # "4:00 - 5:90 ", "4:55:55-3:44:55"
00830     ptc.TIMERNG2 = ptc.RE_RTIMEHMS + r'\s?%(rangeseperator)s\s?' + ptc.RE_RTIMEHMS
00831     ptc.TIMERNG2 = ptc.TIMERNG2 % ptc.re_values
00832 
00833     # "4-5pm "
00834     ptc.TIMERNG3 = r'\d\d?\s?%(rangeseperator)s\s?' + ptc.RE_RTIMEHMS2
00835     ptc.TIMERNG3 = ptc.TIMERNG3 % ptc.re_values
00836 
00837     # "4:30-5pm "
00838     ptc.TIMERNG4 = ptc.RE_RTIMEHMS + r'\s?%(rangeseperator)s\s?' + ptc.RE_RTIMEHMS2
00839     ptc.TIMERNG4 = ptc.TIMERNG4 % ptc.re_values
00840 

Helper function to initialize the single character constants
and other symbols needed.

Definition at line 601 of file parsedatetime_consts.py.

00601 
00602 def _initSymbols(ptc):
00603     """
00604     Helper function to initialize the single character constants
00605     and other symbols needed.
00606     """
00607     ptc.timeSep  = [ u':' ]
00608     ptc.dateSep  = [ u'/' ]
00609     ptc.meridian = [ u'AM', u'PM' ]
00610 
00611     ptc.usesMeridian = True
00612     ptc.uses24       = False
00613 
00614     if pyicu and ptc.usePyICU:
00615         am = u''
00616         pm = u''
00617         ts = ''
00618 
00619         # ICU doesn't seem to provide directly the
00620         # date or time seperator - so we have to
00621         # figure it out
00622         o = ptc.icu_tf['short']
00623         s = ptc.timeFormats['short']
00624 
00625         ptc.usesMeridian = u'a' in s
00626         ptc.uses24       = u'H' in s
00627 
00628         # '11:45 AM' or '11:45'
00629         s = o.format(datetime.datetime(2003, 10, 30, 11, 45))
00630 
00631         # ': AM' or ':'
00632         s = s.replace('11', '').replace('45', '')
00633 
00634         if len(s) > 0:
00635             ts = s[0]
00636 
00637         if ptc.usesMeridian:
00638             # '23:45 AM' or '23:45'
00639             am = s[1:].strip()
00640             s  = o.format(datetime.datetime(2003, 10, 30, 23, 45))
00641 
00642             if ptc.uses24:
00643                 s = s.replace('23', '')
00644             else:
00645                 s = s.replace('11', '')
00646 
00647             # 'PM' or ''
00648             pm = s.replace('45', '').replace(ts, '').strip()
00649 
00650         ptc.timeSep  = [ ts ]
00651         ptc.meridian = [ am, pm ]
00652 
00653         o = ptc.icu_df['short']
00654         s = o.format(datetime.datetime(2003, 10, 30, 11, 45))
00655         s = s.replace('10', '').replace('30', '').replace('03', '').replace('2003', '')
00656 
00657         if len(s) > 0:
00658             ds = s[0]
00659         else:
00660             ds = '/'
00661 
00662         ptc.dateSep = [ ds ]
00663         s           = ptc.dateFormats['short']
00664         l           = s.lower().split(ds)
00665         dp_order    = []
00666 
00667         for s in l:
00668             if len(s) > 0:
00669                 dp_order.append(s[:1])
00670 
00671         ptc.dp_order = dp_order
00672     else:
00673         ptc.timeSep      = ptc.locale.timeSep
00674         ptc.dateSep      = ptc.locale.dateSep
00675         ptc.meridian     = ptc.locale.meridian
00676         ptc.usesMeridian = ptc.locale.usesMeridian
00677         ptc.uses24       = ptc.locale.uses24
00678         ptc.dp_order     = ptc.locale.dp_order
00679 
00680       # build am and pm lists to contain
00681       # original case, lowercase and first-char
00682       # versions of the meridian text
00683 
00684     if len(ptc.meridian) > 0:
00685         am     = ptc.meridian[0]
00686         ptc.am = [ am ]
00687 
00688         if len(am) > 0:
00689             ptc.am.append(am[0])
00690             am = am.lower()
00691             ptc.am.append(am)
00692             ptc.am.append(am[0])
00693     else:
00694         am     = ''
00695         ptc.am = [ '', '' ]
00696 
00697     if len(ptc.meridian) > 1:
00698         pm     = ptc.meridian[1]
00699         ptc.pm = [ pm ]
00700 
00701         if len(pm) > 0:
00702             ptc.pm.append(pm[0])
00703             pm = pm.lower()
00704             ptc.pm.append(pm)
00705             ptc.pm.append(pm[0])
00706     else:
00707         pm     = ''
00708         ptc.pm = [ '', '' ]
00709 


Variable Documentation

Initial value:
00001 """
00002 Copyright (c) 2004-2008 Mike Taylor
00003 Copyright (c) 2006-2008 Darshana Chhajed
00004 Copyright (c)      2007 Bernd Zeimetz <bzed@debian.org>
00005 All rights reserved.
00006 
00007 Licensed under the Apache License, Version 2.0 (the "License");
00008 you may not use this file except in compliance with the License.
00009 You may obtain a copy of the License at
00010 
00011    http://www.apache.org/licenses/LICENSE-2.0
00012 
00013 Unless required by applicable law or agreed to in writing, software
00014 distributed under the License is distributed on an "AS IS" BASIS,
00015 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00016 See the License for the specific language governing permissions and
00017 limitations under the License.
00018 """

Definition at line 11 of file parsedatetime_consts.py.

Initial value:
00001 { 'en_US': pdtLocale_en,
00002                'en_AU': pdtLocale_au,
00003                'es_ES': pdtLocale_es,
00004                'de_DE': pdtLocale_de,
00005              }

Definition at line 472 of file parsedatetime_consts.py.

Definition at line 33 of file parsedatetime_consts.py.