Back to index

moin  1.9.0~rc2
util.py
Go to the documentation of this file.
00001 # -*- coding: utf-8 -*-
00002 """
00003     pygments.util
00004     ~~~~~~~~~~~~~
00005 
00006     Utility functions.
00007 
00008     :copyright: Copyright 2006-2009 by the Pygments team, see AUTHORS.
00009     :license: BSD, see LICENSE for details.
00010 """
00011 import re
00012 import sys
00013 
00014 
00015 split_path_re = re.compile(r'[/\\ ]')
00016 doctype_lookup_re = re.compile(r'''(?smx)
00017     (<\?.*?\?>)?\s*
00018     <!DOCTYPE\s+(
00019      [a-zA-Z_][a-zA-Z0-9]*\s+
00020      [a-zA-Z_][a-zA-Z0-9]*\s+
00021      "[^"]*")
00022      [^>]*>
00023 ''')
00024 tag_re = re.compile(r'<(.+?)(\s.*?)?>.*?</.+?>(?uism)')
00025 
00026 
00027 class ClassNotFound(ValueError):
00028     """
00029     If one of the get_*_by_* functions didn't find a matching class.
00030     """
00031 
00032 
00033 class OptionError(Exception):
00034     pass
00035 
00036 
00037 def get_choice_opt(options, optname, allowed, default=None, normcase=False):
00038     string = options.get(optname, default)
00039     if normcase:
00040         string = string.lower()
00041     if string not in allowed:
00042         raise OptionError('Value for option %s must be one of %s' %
00043                           (optname, ', '.join(map(str, allowed))))
00044     return string
00045 
00046 
00047 def get_bool_opt(options, optname, default=None):
00048     string = options.get(optname, default)
00049     if isinstance(string, bool):
00050         return string
00051     elif isinstance(string, int):
00052         return bool(string)
00053     elif not isinstance(string, basestring):
00054         raise OptionError('Invalid type %r for option %s; use '
00055                           '1/0, yes/no, true/false, on/off' % (
00056                           string, optname))
00057     elif string.lower() in ('1', 'yes', 'true', 'on'):
00058         return True
00059     elif string.lower() in ('0', 'no', 'false', 'off'):
00060         return False
00061     else:
00062         raise OptionError('Invalid value %r for option %s; use '
00063                           '1/0, yes/no, true/false, on/off' % (
00064                           string, optname))
00065 
00066 
00067 def get_int_opt(options, optname, default=None):
00068     string = options.get(optname, default)
00069     try:
00070         return int(string)
00071     except TypeError:
00072         raise OptionError('Invalid type %r for option %s; you '
00073                           'must give an integer value' % (
00074                           string, optname))
00075     except ValueError:
00076         raise OptionError('Invalid value %r for option %s; you '
00077                           'must give an integer value' % (
00078                           string, optname))
00079 
00080 
00081 def get_list_opt(options, optname, default=None):
00082     val = options.get(optname, default)
00083     if isinstance(val, basestring):
00084         return val.split()
00085     elif isinstance(val, (list, tuple)):
00086         return list(val)
00087     else:
00088         raise OptionError('Invalid type %r for option %s; you '
00089                           'must give a list value' % (
00090                           val, optname))
00091 
00092 
00093 def docstring_headline(obj):
00094     if not obj.__doc__:
00095         return ''
00096     res = []
00097     for line in obj.__doc__.strip().splitlines():
00098         if line.strip():
00099             res.append(" " + line.strip())
00100         else:
00101             break
00102     return ''.join(res).lstrip()
00103 
00104 
00105 def make_analysator(f):
00106     """
00107     Return a static text analysation function that
00108     returns float values.
00109     """
00110     def text_analyse(text):
00111         rv = f(text)
00112         if not rv:
00113             return 0.0
00114         return min(1.0, max(0.0, float(rv)))
00115     text_analyse.__doc__ = f.__doc__
00116     return staticmethod(text_analyse)
00117 
00118 
00119 def shebang_matches(text, regex):
00120     """
00121     Check if the given regular expression matches the last part of the
00122     shebang if one exists.
00123 
00124         >>> from pygments.util import shebang_matches
00125         >>> shebang_matches('#!/usr/bin/env python', r'python(2\.\d)?')
00126         True
00127         >>> shebang_matches('#!/usr/bin/python2.4', r'python(2\.\d)?')
00128         True
00129         >>> shebang_matches('#!/usr/bin/python-ruby', r'python(2\.\d)?')
00130         False
00131         >>> shebang_matches('#!/usr/bin/python/ruby', r'python(2\.\d)?')
00132         False
00133         >>> shebang_matches('#!/usr/bin/startsomethingwith python',
00134         ...                 r'python(2\.\d)?')
00135         True
00136 
00137     It also checks for common windows executable file extensions::
00138 
00139         >>> shebang_matches('#!C:\\Python2.4\\Python.exe', r'python(2\.\d)?')
00140         True
00141 
00142     Parameters (``'-f'`` or ``'--foo'`` are ignored so ``'perl'`` does
00143     the same as ``'perl -e'``)
00144 
00145     Note that this method automatically searches the whole string (eg:
00146     the regular expression is wrapped in ``'^$'``)
00147     """
00148     index = text.find('\n')
00149     if index >= 0:
00150         first_line = text[:index].lower()
00151     else:
00152         first_line = text.lower()
00153     if first_line.startswith('#!'):
00154         try:
00155             found = [x for x in split_path_re.split(first_line[2:].strip())
00156                      if x and not x.startswith('-')][-1]
00157         except IndexError:
00158             return False
00159         regex = re.compile('^%s(\.(exe|cmd|bat|bin))?$' % regex, re.IGNORECASE)
00160         if regex.search(found) is not None:
00161             return True
00162     return False
00163 
00164 
00165 def doctype_matches(text, regex):
00166     """
00167     Check if the doctype matches a regular expression (if present).
00168     Note that this method only checks the first part of a DOCTYPE.
00169     eg: 'html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"'
00170     """
00171     m = doctype_lookup_re.match(text)
00172     if m is None:
00173         return False
00174     doctype = m.group(2)
00175     return re.compile(regex).match(doctype.strip()) is not None
00176 
00177 
00178 def html_doctype_matches(text):
00179     """
00180     Check if the file looks like it has a html doctype.
00181     """
00182     return doctype_matches(text, r'html\s+PUBLIC\s+"-//W3C//DTD X?HTML.*')
00183 
00184 
00185 _looks_like_xml_cache = {}
00186 def looks_like_xml(text):
00187     """
00188     Check if a doctype exists or if we have some tags.
00189     """
00190     key = hash(text)
00191     try:
00192         return _looks_like_xml_cache[key]
00193     except KeyError:
00194         m = doctype_lookup_re.match(text)
00195         if m is not None:
00196             return True
00197         rv = tag_re.search(text[:1000]) is not None
00198         _looks_like_xml_cache[key] = rv
00199         return rv
00200 
00201 # Python 2/3 compatibility
00202 
00203 if sys.version_info < (3,0):
00204     b = bytes = str
00205     u_prefix = 'u'
00206     import StringIO, cStringIO
00207     BytesIO = cStringIO.StringIO
00208     StringIO = StringIO.StringIO
00209 else:
00210     import builtins
00211     bytes = builtins.bytes
00212     u_prefix = ''
00213     def b(s):
00214         if isinstance(s, str):
00215             return bytes(map(ord, s))
00216         elif isinstance(s, bytes):
00217             return s
00218         else:
00219             raise TypeError("Invalid argument %r for b()" % (s,))
00220     import io
00221     BytesIO = io.BytesIO
00222     StringIO = io.StringIO