Back to index

moin  1.9.0~rc2
__init__.py
Go to the documentation of this file.
00001 # -*- coding: utf-8 -*-
00002 """
00003     pygments.filters
00004     ~~~~~~~~~~~~~~~~
00005 
00006     Module containing filter lookup functions and default
00007     filters.
00008 
00009     :copyright: Copyright 2006-2009 by the Pygments team, see AUTHORS.
00010     :license: BSD, see LICENSE for details.
00011 """
00012 try:
00013     set
00014 except NameError:
00015     from sets import Set as set
00016 
00017 import re
00018 from pygments.token import String, Comment, Keyword, Name, Error, Whitespace, \
00019     string_to_tokentype
00020 from pygments.filter import Filter
00021 from pygments.util import get_list_opt, get_int_opt, get_bool_opt, get_choice_opt, \
00022      ClassNotFound, OptionError
00023 from pygments.plugin import find_plugin_filters
00024 
00025 
00026 def find_filter_class(filtername):
00027     """
00028     Lookup a filter by name. Return None if not found.
00029     """
00030     if filtername in FILTERS:
00031         return FILTERS[filtername]
00032     for name, cls in find_plugin_filters():
00033         if name == filtername:
00034             return cls
00035     return None
00036 
00037 
00038 def get_filter_by_name(filtername, **options):
00039     """
00040     Return an instantiated filter. Options are passed to the filter
00041     initializer if wanted. Raise a ClassNotFound if not found.
00042     """
00043     cls = find_filter_class(filtername)
00044     if cls:
00045         return cls(**options)
00046     else:
00047         raise ClassNotFound('filter %r not found' % filtername)
00048 
00049 
00050 def get_all_filters():
00051     """
00052     Return a generator of all filter names.
00053     """
00054     for name in FILTERS:
00055         yield name
00056     for name, _ in find_plugin_filters():
00057         yield name
00058 
00059 
00060 def _replace_special(ttype, value, regex, specialttype,
00061                      replacefunc=lambda x: x):
00062     last = 0
00063     for match in regex.finditer(value):
00064         start, end = match.start(), match.end()
00065         if start != last:
00066             yield ttype, value[last:start]
00067         yield specialttype, replacefunc(value[start:end])
00068         last = end
00069     if last != len(value):
00070         yield ttype, value[last:]
00071 
00072 
00073 class CodeTagFilter(Filter):
00074     """
00075     Highlight special code tags in comments and docstrings.
00076 
00077     Options accepted:
00078 
00079     `codetags` : list of strings
00080        A list of strings that are flagged as code tags.  The default is to
00081        highlight ``XXX``, ``TODO``, ``BUG`` and ``NOTE``.
00082     """
00083 
00084     def __init__(self, **options):
00085         Filter.__init__(self, **options)
00086         tags = get_list_opt(options, 'codetags',
00087                             ['XXX', 'TODO', 'BUG', 'NOTE'])
00088         self.tag_re = re.compile(r'\b(%s)\b' % '|'.join([
00089             re.escape(tag) for tag in tags if tag
00090         ]))
00091 
00092     def filter(self, lexer, stream):
00093         regex = self.tag_re
00094         for ttype, value in stream:
00095             if ttype in String.Doc or \
00096                ttype in Comment and \
00097                ttype not in Comment.Preproc:
00098                 for sttype, svalue in _replace_special(ttype, value, regex,
00099                                                        Comment.Special):
00100                     yield sttype, svalue
00101             else:
00102                 yield ttype, value
00103 
00104 
00105 class KeywordCaseFilter(Filter):
00106     """
00107     Convert keywords to lowercase or uppercase or capitalize them, which
00108     means first letter uppercase, rest lowercase.
00109 
00110     This can be useful e.g. if you highlight Pascal code and want to adapt the
00111     code to your styleguide.
00112 
00113     Options accepted:
00114 
00115     `case` : string
00116        The casing to convert keywords to. Must be one of ``'lower'``,
00117        ``'upper'`` or ``'capitalize'``.  The default is ``'lower'``.
00118     """
00119 
00120     def __init__(self, **options):
00121         Filter.__init__(self, **options)
00122         case = get_choice_opt(options, 'case', ['lower', 'upper', 'capitalize'], 'lower')
00123         self.convert = getattr(unicode, case)
00124 
00125     def filter(self, lexer, stream):
00126         for ttype, value in stream:
00127             if ttype in Keyword:
00128                 yield ttype, self.convert(value)
00129             else:
00130                 yield ttype, value
00131 
00132 
00133 class NameHighlightFilter(Filter):
00134     """
00135     Highlight a normal Name token with a different token type.
00136 
00137     Example::
00138 
00139         filter = NameHighlightFilter(
00140             names=['foo', 'bar', 'baz'],
00141             tokentype=Name.Function,
00142         )
00143 
00144     This would highlight the names "foo", "bar" and "baz"
00145     as functions. `Name.Function` is the default token type.
00146 
00147     Options accepted:
00148 
00149     `names` : list of strings
00150       A list of names that should be given the different token type.
00151       There is no default.
00152     `tokentype` : TokenType or string
00153       A token type or a string containing a token type name that is
00154       used for highlighting the strings in `names`.  The default is
00155       `Name.Function`.
00156     """
00157 
00158     def __init__(self, **options):
00159         Filter.__init__(self, **options)
00160         self.names = set(get_list_opt(options, 'names', []))
00161         tokentype = options.get('tokentype')
00162         if tokentype:
00163             self.tokentype = string_to_tokentype(tokentype)
00164         else:
00165             self.tokentype = Name.Function
00166 
00167     def filter(self, lexer, stream):
00168         for ttype, value in stream:
00169             if ttype is Name and value in self.names:
00170                 yield self.tokentype, value
00171             else:
00172                 yield ttype, value
00173 
00174 
00175 class ErrorToken(Exception):
00176     pass
00177 
00178 class RaiseOnErrorTokenFilter(Filter):
00179     """
00180     Raise an exception when the lexer generates an error token.
00181 
00182     Options accepted:
00183 
00184     `excclass` : Exception class
00185       The exception class to raise.
00186       The default is `pygments.filters.ErrorToken`.
00187 
00188     *New in Pygments 0.8.*
00189     """
00190 
00191     def __init__(self, **options):
00192         Filter.__init__(self, **options)
00193         self.exception = options.get('excclass', ErrorToken)
00194         try:
00195             # issubclass() will raise TypeError if first argument is not a class
00196             if not issubclass(self.exception, Exception):
00197                 raise TypeError
00198         except TypeError:
00199             raise OptionError('excclass option is not an exception class')
00200 
00201     def filter(self, lexer, stream):
00202         for ttype, value in stream:
00203             if ttype is Error:
00204                 raise self.exception(value)
00205             yield ttype, value
00206 
00207 
00208 class VisibleWhitespaceFilter(Filter):
00209     """
00210     Convert tabs, newlines and/or spaces to visible characters.
00211 
00212     Options accepted:
00213 
00214     `spaces` : string or bool
00215       If this is a one-character string, spaces will be replaces by this string.
00216       If it is another true value, spaces will be replaced by ``·`` (unicode
00217       MIDDLE DOT).  If it is a false value, spaces will not be replaced.  The
00218       default is ``False``.
00219     `tabs` : string or bool
00220       The same as for `spaces`, but the default replacement character is ``»``
00221       (unicode RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK).  The default value
00222       is ``False``.  Note: this will not work if the `tabsize` option for the
00223       lexer is nonzero, as tabs will already have been expanded then.
00224     `tabsize` : int
00225       If tabs are to be replaced by this filter (see the `tabs` option), this
00226       is the total number of characters that a tab should be expanded to.
00227       The default is ``8``.
00228     `newlines` : string or bool
00229       The same as for `spaces`, but the default replacement character is ``¶``
00230       (unicode PILCROW SIGN).  The default value is ``False``.
00231     `wstokentype` : bool
00232       If true, give whitespace the special `Whitespace` token type.  This allows
00233       styling the visible whitespace differently (e.g. greyed out), but it can
00234       disrupt background colors.  The default is ``True``.
00235 
00236     *New in Pygments 0.8.*
00237     """
00238 
00239     def __init__(self, **options):
00240         Filter.__init__(self, **options)
00241         for name, default in {'spaces': u'·', 'tabs': u'»', 'newlines': u'¶'}.items():
00242             opt = options.get(name, False)
00243             if isinstance(opt, basestring) and len(opt) == 1:
00244                 setattr(self, name, opt)
00245             else:
00246                 setattr(self, name, (opt and default or ''))
00247         tabsize = get_int_opt(options, 'tabsize', 8)
00248         if self.tabs:
00249             self.tabs += ' '*(tabsize-1)
00250         if self.newlines:
00251             self.newlines += '\n'
00252         self.wstt = get_bool_opt(options, 'wstokentype', True)
00253 
00254     def filter(self, lexer, stream):
00255         if self.wstt:
00256             spaces = self.spaces or ' '
00257             tabs = self.tabs or '\t'
00258             newlines = self.newlines or '\n'
00259             regex = re.compile(r'\s')
00260             def replacefunc(wschar):
00261                 if wschar == ' ':
00262                     return spaces
00263                 elif wschar == '\t':
00264                     return tabs
00265                 elif wschar == '\n':
00266                     return newlines
00267                 return wschar
00268 
00269             for ttype, value in stream:
00270                 for sttype, svalue in _replace_special(ttype, value, regex,
00271                                                        Whitespace, replacefunc):
00272                     yield sttype, svalue
00273         else:
00274             spaces, tabs, newlines = self.spaces, self.tabs, self.newlines
00275             # simpler processing
00276             for ttype, value in stream:
00277                 if spaces:
00278                     value = value.replace(' ', spaces)
00279                 if tabs:
00280                     value = value.replace('\t', tabs)
00281                 if newlines:
00282                     value = value.replace('\n', newlines)
00283                 yield ttype, value
00284 
00285 
00286 FILTERS = {
00287     'codetagify':     CodeTagFilter,
00288     'keywordcase':    KeywordCaseFilter,
00289     'highlight':      NameHighlightFilter,
00290     'raiseonerror':   RaiseOnErrorTokenFilter,
00291     'whitespace':     VisibleWhitespaceFilter,
00292 }