Back to index

moin  1.9.0~rc2
__init__.py
Go to the documentation of this file.
00001 # -*- coding: utf-8 -*-
00002 """
00003     pygments.lexers
00004     ~~~~~~~~~~~~~~~
00005 
00006     Pygments lexers.
00007 
00008     :copyright: Copyright 2006-2009 by the Pygments team, see AUTHORS.
00009     :license: BSD, see LICENSE for details.
00010 """
00011 import sys
00012 import fnmatch
00013 import types
00014 from os.path import basename
00015 
00016 try:
00017     set
00018 except NameError:
00019     from sets import Set as set
00020 
00021 from pygments.lexers._mapping import LEXERS
00022 from pygments.plugin import find_plugin_lexers
00023 from pygments.util import ClassNotFound, bytes
00024 
00025 
00026 __all__ = ['get_lexer_by_name', 'get_lexer_for_filename', 'find_lexer_class',
00027            'guess_lexer'] + LEXERS.keys()
00028 
00029 _lexer_cache = {}
00030 
00031 
00032 def _load_lexers(module_name):
00033     """
00034     Load a lexer (and all others in the module too).
00035     """
00036     mod = __import__(module_name, None, None, ['__all__'])
00037     for lexer_name in mod.__all__:
00038         cls = getattr(mod, lexer_name)
00039         _lexer_cache[cls.name] = cls
00040 
00041 
00042 def get_all_lexers():
00043     """
00044     Return a generator of tuples in the form ``(name, aliases,
00045     filenames, mimetypes)`` of all know lexers.
00046     """
00047     for item in LEXERS.itervalues():
00048         yield item[1:]
00049     for lexer in find_plugin_lexers():
00050         yield lexer.name, lexer.aliases, lexer.filenames, lexer.mimetypes
00051 
00052 
00053 def find_lexer_class(name):
00054     """
00055     Lookup a lexer class by name. Return None if not found.
00056     """
00057     if name in _lexer_cache:
00058         return _lexer_cache[name]
00059     # lookup builtin lexers
00060     for module_name, lname, aliases, _, _ in LEXERS.itervalues():
00061         if name == lname:
00062             _load_lexers(module_name)
00063             return _lexer_cache[name]
00064     # continue with lexers from setuptools entrypoints
00065     for cls in find_plugin_lexers():
00066         if cls.name == name:
00067             return cls
00068 
00069 
00070 def get_lexer_by_name(_alias, **options):
00071     """
00072     Get a lexer by an alias.
00073     """
00074     # lookup builtin lexers
00075     for module_name, name, aliases, _, _ in LEXERS.itervalues():
00076         if _alias in aliases:
00077             if name not in _lexer_cache:
00078                 _load_lexers(module_name)
00079             return _lexer_cache[name](**options)
00080     # continue with lexers from setuptools entrypoints
00081     for cls in find_plugin_lexers():
00082         if _alias in cls.aliases:
00083             return cls(**options)
00084     raise ClassNotFound('no lexer for alias %r found' % _alias)
00085 
00086 
00087 def get_lexer_for_filename(_fn, code=None, **options):
00088     """
00089     Get a lexer for a filename.  If multiple lexers match the filename
00090     pattern, use ``analyze_text()`` to figure out which one is more
00091     appropriate.
00092     """
00093     matches = []
00094     fn = basename(_fn)
00095     for modname, name, _, filenames, _ in LEXERS.itervalues():
00096         for filename in filenames:
00097             if fnmatch.fnmatch(fn, filename):
00098                 if name not in _lexer_cache:
00099                     _load_lexers(modname)
00100                 matches.append(_lexer_cache[name])
00101     for cls in find_plugin_lexers():
00102         for filename in cls.filenames:
00103             if fnmatch.fnmatch(fn, filename):
00104                 matches.append(cls)
00105 
00106     if sys.version_info > (3,) and isinstance(code, bytes):
00107         # decode it, since all analyse_text functions expect unicode
00108         code = code.decode('latin1')
00109 
00110     def get_rating(cls):
00111         # The class _always_ defines analyse_text because it's included in
00112         # the Lexer class.  The default implementation returns None which
00113         # gets turned into 0.0.  Run scripts/detect_missing_analyse_text.py
00114         # to find lexers which need it overridden.
00115         d = cls.analyse_text(code)
00116         #print "Got %r from %r" % (d, cls)
00117         return d
00118 
00119     if code:
00120         matches.sort(key=get_rating)
00121     if matches:
00122         #print "Possible lexers, after sort:", matches
00123         return matches[-1](**options)
00124     raise ClassNotFound('no lexer for filename %r found' % _fn)
00125 
00126 
00127 def get_lexer_for_mimetype(_mime, **options):
00128     """
00129     Get a lexer for a mimetype.
00130     """
00131     for modname, name, _, _, mimetypes in LEXERS.itervalues():
00132         if _mime in mimetypes:
00133             if name not in _lexer_cache:
00134                 _load_lexers(modname)
00135             return _lexer_cache[name](**options)
00136     for cls in find_plugin_lexers():
00137         if _mime in cls.mimetypes:
00138             return cls(**options)
00139     raise ClassNotFound('no lexer for mimetype %r found' % _mime)
00140 
00141 
00142 def _iter_lexerclasses():
00143     """
00144     Return an iterator over all lexer classes.
00145     """
00146     for module_name, name, _, _, _ in LEXERS.itervalues():
00147         if name not in _lexer_cache:
00148             _load_lexers(module_name)
00149         yield _lexer_cache[name]
00150     for lexer in find_plugin_lexers():
00151         yield lexer
00152 
00153 
00154 def guess_lexer_for_filename(_fn, _text, **options):
00155     """
00156     Lookup all lexers that handle those filenames primary (``filenames``)
00157     or secondary (``alias_filenames``). Then run a text analysis for those
00158     lexers and choose the best result.
00159 
00160     usage::
00161 
00162         >>> from pygments.lexers import guess_lexer_for_filename
00163         >>> guess_lexer_for_filename('hello.html', '<%= @foo %>')
00164         <pygments.lexers.templates.RhtmlLexer object at 0xb7d2f32c>
00165         >>> guess_lexer_for_filename('hello.html', '<h1>{{ title|e }}</h1>')
00166         <pygments.lexers.templates.HtmlDjangoLexer object at 0xb7d2f2ac>
00167         >>> guess_lexer_for_filename('style.css', 'a { color: <?= $link ?> }')
00168         <pygments.lexers.templates.CssPhpLexer object at 0xb7ba518c>
00169     """
00170     fn = basename(_fn)
00171     primary = None
00172     matching_lexers = set()
00173     for lexer in _iter_lexerclasses():
00174         for filename in lexer.filenames:
00175             if fnmatch.fnmatch(fn, filename):
00176                 matching_lexers.add(lexer)
00177                 primary = lexer
00178         for filename in lexer.alias_filenames:
00179             if fnmatch.fnmatch(fn, filename):
00180                 matching_lexers.add(lexer)
00181     if not matching_lexers:
00182         raise ClassNotFound('no lexer for filename %r found' % fn)
00183     if len(matching_lexers) == 1:
00184         return matching_lexers.pop()(**options)
00185     result = []
00186     for lexer in matching_lexers:
00187         rv = lexer.analyse_text(_text)
00188         if rv == 1.0:
00189             return lexer(**options)
00190         result.append((rv, lexer))
00191     result.sort()
00192     if not result[-1][0] and primary is not None:
00193         return primary(**options)
00194     return result[-1][1](**options)
00195 
00196 
00197 def guess_lexer(_text, **options):
00198     """
00199     Guess a lexer by strong distinctions in the text (eg, shebang).
00200     """
00201     best_lexer = [0.0, None]
00202     for lexer in _iter_lexerclasses():
00203         rv = lexer.analyse_text(_text)
00204         if rv == 1.0:
00205             return lexer(**options)
00206         if rv > best_lexer[0]:
00207             best_lexer[:] = (rv, lexer)
00208     if not best_lexer[0] or best_lexer[1] is None:
00209         raise ClassNotFound('no lexer matching the text found')
00210     return best_lexer[1](**options)
00211 
00212 
00213 class _automodule(types.ModuleType):
00214     """Automatically import lexers."""
00215 
00216     def __getattr__(self, name):
00217         info = LEXERS.get(name)
00218         if info:
00219             _load_lexers(info[0])
00220             cls = _lexer_cache[info[1]]
00221             setattr(self, name, cls)
00222             return cls
00223         raise AttributeError(name)
00224 
00225 
00226 import sys
00227 oldmod = sys.modules['pygments.lexers']
00228 newmod = _automodule('pygments.lexers')
00229 newmod.__dict__.update(oldmod.__dict__)
00230 sys.modules['pygments.lexers'] = newmod
00231 del newmod.newmod, newmod.oldmod, newmod.sys, newmod.types