Back to index

moin  1.9.0~rc2
agile.py
Go to the documentation of this file.
00001 # -*- coding: utf-8 -*-
00002 """
00003     pygments.lexers.agile
00004     ~~~~~~~~~~~~~~~~~~~~~
00005 
00006     Lexers for agile languages.
00007 
00008     :copyright: Copyright 2006-2009 by the Pygments team, see AUTHORS.
00009     :license: BSD, see LICENSE for details.
00010 """
00011 
00012 import re
00013 try:
00014     set
00015 except NameError:
00016     from sets import Set as set
00017 
00018 from pygments.lexer import Lexer, RegexLexer, ExtendedRegexLexer, \
00019      LexerContext, include, combined, do_insertions, bygroups, using
00020 from pygments.token import Error, Text, Other, \
00021      Comment, Operator, Keyword, Name, String, Number, Generic, Punctuation
00022 from pygments.util import get_bool_opt, get_list_opt, shebang_matches
00023 from pygments import unistring as uni
00024 
00025 
00026 __all__ = ['PythonLexer', 'PythonConsoleLexer', 'PythonTracebackLexer',
00027            'RubyLexer', 'RubyConsoleLexer', 'PerlLexer', 'LuaLexer',
00028            'MiniDLexer', 'IoLexer', 'TclLexer', 'ClojureLexer',
00029            'Python3Lexer', 'Python3TracebackLexer']
00030 
00031 # b/w compatibility
00032 from pygments.lexers.functional import SchemeLexer
00033 
00034 line_re  = re.compile('.*?\n')
00035 
00036 
00037 class PythonLexer(RegexLexer):
00038     """
00039     For `Python <http://www.python.org>`_ source code.
00040     """
00041 
00042     name = 'Python'
00043     aliases = ['python', 'py']
00044     filenames = ['*.py', '*.pyw', '*.sc', 'SConstruct', 'SConscript']
00045     mimetypes = ['text/x-python', 'application/x-python']
00046 
00047     tokens = {
00048         'root': [
00049             (r'\n', Text),
00050             (r'^(\s*)("""(?:.|\n)*?""")', bygroups(Text, String.Doc)),
00051             (r"^(\s*)('''(?:.|\n)*?''')", bygroups(Text, String.Doc)),
00052             (r'[^\S\n]+', Text),
00053             (r'#.*$', Comment),
00054             (r'[]{}:(),;[]', Punctuation),
00055             (r'\\\n', Text),
00056             (r'\\', Text),
00057             (r'(in|is|and|or|not)\b', Operator.Word),
00058             (r'!=|==|<<|>>|[-~+/*%=<>&^|.]', Operator),
00059             include('keywords'),
00060             (r'(def)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'funcname'),
00061             (r'(class)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'classname'),
00062             (r'(from)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text), 'fromimport'),
00063             (r'(import)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text), 'import'),
00064             include('builtins'),
00065             include('backtick'),
00066             ('(?:[rR]|[uU][rR]|[rR][uU])"""', String, 'tdqs'),
00067             ("(?:[rR]|[uU][rR]|[rR][uU])'''", String, 'tsqs'),
00068             ('(?:[rR]|[uU][rR]|[rR][uU])"', String, 'dqs'),
00069             ("(?:[rR]|[uU][rR]|[rR][uU])'", String, 'sqs'),
00070             ('[uU]?"""', String, combined('stringescape', 'tdqs')),
00071             ("[uU]?'''", String, combined('stringescape', 'tsqs')),
00072             ('[uU]?"', String, combined('stringescape', 'dqs')),
00073             ("[uU]?'", String, combined('stringescape', 'sqs')),
00074             include('name'),
00075             include('numbers'),
00076         ],
00077         'keywords': [
00078             (r'(assert|break|continue|del|elif|else|except|exec|'
00079              r'finally|for|global|if|lambda|pass|print|raise|'
00080              r'return|try|while|yield|as|with)\b', Keyword),
00081         ],
00082         'builtins': [
00083             (r'(?<!\.)(__import__|abs|all|any|apply|basestring|bin|bool|buffer|'
00084              r'bytearray|bytes|callable|chr|classmethod|cmp|coerce|compile|'
00085              r'complex|delattr|dict|dir|divmod|enumerate|eval|execfile|exit|'
00086              r'file|filter|float|frozenset|getattr|globals|hasattr|hash|hex|id|'
00087              r'input|int|intern|isinstance|issubclass|iter|len|list|locals|'
00088              r'long|map|max|min|next|object|oct|open|ord|pow|property|range|'
00089              r'raw_input|reduce|reload|repr|reversed|round|set|setattr|slice|'
00090              r'sorted|staticmethod|str|sum|super|tuple|type|unichr|unicode|'
00091              r'vars|xrange|zip)\b', Name.Builtin),
00092             (r'(?<!\.)(self|None|Ellipsis|NotImplemented|False|True'
00093              r')\b', Name.Builtin.Pseudo),
00094             (r'(?<!\.)(ArithmeticError|AssertionError|AttributeError|'
00095              r'BaseException|DeprecationWarning|EOFError|EnvironmentError|'
00096              r'Exception|FloatingPointError|FutureWarning|GeneratorExit|IOError|'
00097              r'ImportError|ImportWarning|IndentationError|IndexError|KeyError|'
00098              r'KeyboardInterrupt|LookupError|MemoryError|NameError|'
00099              r'NotImplemented|NotImplementedError|OSError|OverflowError|'
00100              r'OverflowWarning|PendingDeprecationWarning|ReferenceError|'
00101              r'RuntimeError|RuntimeWarning|StandardError|StopIteration|'
00102              r'SyntaxError|SyntaxWarning|SystemError|SystemExit|TabError|'
00103              r'TypeError|UnboundLocalError|UnicodeDecodeError|'
00104              r'UnicodeEncodeError|UnicodeError|UnicodeTranslateError|'
00105              r'UnicodeWarning|UserWarning|ValueError|VMSError|Warning|'
00106              r'WindowsError|ZeroDivisionError)\b', Name.Exception),
00107         ],
00108         'numbers': [
00109             (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float),
00110             (r'\d+[eE][+-]?[0-9]+', Number.Float),
00111             (r'0\d+', Number.Oct),
00112             (r'0[xX][a-fA-F0-9]+', Number.Hex),
00113             (r'\d+L', Number.Integer.Long),
00114             (r'\d+', Number.Integer)
00115         ],
00116         'backtick': [
00117             ('`.*?`', String.Backtick),
00118         ],
00119         'name': [
00120             (r'@[a-zA-Z0-9_.]+', Name.Decorator),
00121             ('[a-zA-Z_][a-zA-Z0-9_]*', Name),
00122         ],
00123         'funcname': [
00124             ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Function, '#pop')
00125         ],
00126         'classname': [
00127             ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop')
00128         ],
00129         'import': [
00130             (r'((?:\s|\\\s)+)(as)((?:\s|\\\s)+)',
00131              bygroups(Text, Keyword.Namespace, Text)),
00132             (r'[a-zA-Z_][a-zA-Z0-9_.]*', Name.Namespace),
00133             (r'(\s*)(,)(\s*)', bygroups(Text, Operator, Text)),
00134             (r'', Text, '#pop') # all else: go back
00135         ],
00136         'fromimport': [
00137             (r'((?:\s|\\\s)+)(import)\b', bygroups(Text, Keyword.Namespace), '#pop'),
00138             (r'[a-zA-Z_.][a-zA-Z0-9_.]*', Name.Namespace),
00139         ],
00140         'stringescape': [
00141             (r'\\([\\abfnrtv"\']|\n|N{.*?}|u[a-fA-F0-9]{4}|'
00142              r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
00143         ],
00144         'strings': [
00145             (r'%(\([a-zA-Z0-9_]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
00146              '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol),
00147             (r'[^\\\'"%\n]+', String),
00148             # quotes, percents and backslashes must be parsed one at a time
00149             (r'[\'"\\]', String),
00150             # unhandled string formatting sign
00151             (r'%', String)
00152             # newlines are an error (use "nl" state)
00153         ],
00154         'nl': [
00155             (r'\n', String)
00156         ],
00157         'dqs': [
00158             (r'"', String, '#pop'),
00159             (r'\\\\|\\"|\\\n', String.Escape), # included here again for raw strings
00160             include('strings')
00161         ],
00162         'sqs': [
00163             (r"'", String, '#pop'),
00164             (r"\\\\|\\'|\\\n", String.Escape), # included here again for raw strings
00165             include('strings')
00166         ],
00167         'tdqs': [
00168             (r'"""', String, '#pop'),
00169             include('strings'),
00170             include('nl')
00171         ],
00172         'tsqs': [
00173             (r"'''", String, '#pop'),
00174             include('strings'),
00175             include('nl')
00176         ],
00177     }
00178 
00179     def analyse_text(text):
00180         return shebang_matches(text, r'pythonw?(2\.\d)?')
00181 
00182 
00183 class Python3Lexer(RegexLexer):
00184     """
00185     For `Python <http://www.python.org>`_ source code (version 3.0).
00186 
00187     *New in Pygments 0.10.*
00188     """
00189 
00190     name = 'Python 3'
00191     aliases = ['python3', 'py3']
00192     filenames = []  # Nothing until Python 3 gets widespread
00193     mimetypes = ['text/x-python3', 'application/x-python3']
00194 
00195     flags = re.MULTILINE | re.UNICODE
00196 
00197     uni_name = "[%s][%s]*" % (uni.xid_start, uni.xid_continue)
00198 
00199     tokens = PythonLexer.tokens.copy()
00200     tokens['keywords'] = [
00201         (r'(assert|break|continue|del|elif|else|except|'
00202          r'finally|for|global|if|lambda|pass|raise|'
00203          r'return|try|while|yield|as|with|True|False|None)\b', Keyword),
00204     ]
00205     tokens['builtins'] = [
00206         (r'(?<!\.)(__import__|abs|all|any|bin|bool|bytearray|bytes|'
00207          r'chr|classmethod|cmp|compile|complex|delattr|dict|dir|'
00208          r'divmod|enumerate|eval|filter|float|format|frozenset|getattr|'
00209          r'globals|hasattr|hash|hex|id|input|int|isinstance|issubclass|'
00210          r'iter|len|list|locals|map|max|memoryview|min|next|object|oct|'
00211          r'open|ord|pow|print|property|range|repr|reversed|round|'
00212          r'set|setattr|slice|sorted|staticmethod|str|sum|super|tuple|type|'
00213          r'vars|zip)\b', Name.Builtin),
00214         (r'(?<!\.)(self|Ellipsis|NotImplemented)\b', Name.Builtin.Pseudo),
00215         (r'(?<!\.)(ArithmeticError|AssertionError|AttributeError|'
00216          r'BaseException|BufferError|BytesWarning|DeprecationWarning|'
00217          r'EOFError|EnvironmentError|Exception|FloatingPointError|'
00218          r'FutureWarning|GeneratorExit|IOError|ImportError|'
00219          r'ImportWarning|IndentationError|IndexError|KeyError|'
00220          r'KeyboardInterrupt|LookupError|MemoryError|NameError|'
00221          r'NotImplementedError|OSError|OverflowError|'
00222          r'PendingDeprecationWarning|ReferenceError|'
00223          r'RuntimeError|RuntimeWarning|StopIteration|'
00224          r'SyntaxError|SyntaxWarning|SystemError|SystemExit|TabError|'
00225          r'TypeError|UnboundLocalError|UnicodeDecodeError|'
00226          r'UnicodeEncodeError|UnicodeError|UnicodeTranslateError|'
00227          r'UnicodeWarning|UserWarning|ValueError|VMSError|Warning|'
00228          r'WindowsError|ZeroDivisionError)\b', Name.Exception),
00229     ]
00230     tokens['numbers'] = [
00231         (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float),
00232         (r'0[oO][0-7]+', Number.Oct),
00233         (r'0[bB][01]+', Number.Bin),
00234         (r'0[xX][a-fA-F0-9]+', Number.Hex),
00235         (r'\d+', Number.Integer)
00236     ]
00237     tokens['backtick'] = []
00238     tokens['name'] = [
00239         (r'@[a-zA-Z0-9_]+', Name.Decorator),
00240         (uni_name, Name),
00241     ]
00242     tokens['funcname'] = [
00243         (uni_name, Name.Function, '#pop')
00244     ]
00245     tokens['classname'] = [
00246         (uni_name, Name.Class, '#pop')
00247     ]
00248     tokens['import'] = [
00249         (r'(\s+)(as)(\s+)', bygroups(Text, Keyword, Text)),
00250         (r'\.', Name.Namespace),
00251         (uni_name, Name.Namespace),
00252         (r'(\s*)(,)(\s*)', bygroups(Text, Operator, Text)),
00253         (r'', Text, '#pop') # all else: go back
00254     ]
00255     tokens['fromimport'] = [
00256         (r'(\s+)(import)\b', bygroups(Text, Keyword), '#pop'),
00257         (r'\.', Name.Namespace),
00258         (uni_name, Name.Namespace),
00259     ]
00260     # don't highlight "%s" substitutions
00261     tokens['strings'] = [
00262         (r'[^\\\'"%\n]+', String),
00263         # quotes, percents and backslashes must be parsed one at a time
00264         (r'[\'"\\]', String),
00265         # unhandled string formatting sign
00266         (r'%', String)
00267         # newlines are an error (use "nl" state)
00268     ]
00269 
00270     def analyse_text(text):
00271         return shebang_matches(text, r'pythonw?3(\.\d)?')
00272 
00273 
00274 class PythonConsoleLexer(Lexer):
00275     """
00276     For Python console output or doctests, such as:
00277 
00278     .. sourcecode:: pycon
00279 
00280         >>> a = 'foo'
00281         >>> print a
00282         foo
00283         >>> 1 / 0
00284         Traceback (most recent call last):
00285           File "<stdin>", line 1, in <module>
00286         ZeroDivisionError: integer division or modulo by zero
00287 
00288     Additional options:
00289 
00290     `python3`
00291         Use Python 3 lexer for code.  Default is ``False``.
00292         *New in Pygments 1.0.*
00293     """
00294     name = 'Python console session'
00295     aliases = ['pycon']
00296     mimetypes = ['text/x-python-doctest']
00297 
00298     def __init__(self, **options):
00299         self.python3 = get_bool_opt(options, 'python3', False)
00300         Lexer.__init__(self, **options)
00301 
00302     def get_tokens_unprocessed(self, text):
00303         if self.python3:
00304             pylexer = Python3Lexer(**self.options)
00305             tblexer = Python3TracebackLexer(**self.options)
00306         else:
00307             pylexer = PythonLexer(**self.options)
00308             tblexer = PythonTracebackLexer(**self.options)
00309 
00310         curcode = ''
00311         insertions = []
00312         curtb = ''
00313         tbindex = 0
00314         tb = 0
00315         for match in line_re.finditer(text):
00316             line = match.group()
00317             if line.startswith('>>> ') or line.startswith('... '):
00318                 tb = 0
00319                 insertions.append((len(curcode),
00320                                    [(0, Generic.Prompt, line[:4])]))
00321                 curcode += line[4:]
00322             elif line.rstrip() == '...':
00323                 tb = 0
00324                 insertions.append((len(curcode),
00325                                    [(0, Generic.Prompt, '...')]))
00326                 curcode += line[3:]
00327             else:
00328                 if curcode:
00329                     for item in do_insertions(insertions,
00330                                     pylexer.get_tokens_unprocessed(curcode)):
00331                         yield item
00332                     curcode = ''
00333                     insertions = []
00334                 if (line.startswith('Traceback (most recent call last):') or
00335                     re.match(r'  File "[^"]+", line \d+\n$', line)):
00336                     tb = 1
00337                     curtb = line
00338                     tbindex = match.start()
00339                 elif line == 'KeyboardInterrupt\n':
00340                     yield match.start(), Name.Class, line
00341                 elif tb:
00342                     curtb += line
00343                     if not (line.startswith(' ') or line.strip() == '...'):
00344                         tb = 0
00345                         for i, t, v in tblexer.get_tokens_unprocessed(curtb):
00346                             yield tbindex+i, t, v
00347                 else:
00348                     yield match.start(), Generic.Output, line
00349         if curcode:
00350             for item in do_insertions(insertions,
00351                                       pylexer.get_tokens_unprocessed(curcode)):
00352                 yield item
00353 
00354 
00355 class PythonTracebackLexer(RegexLexer):
00356     """
00357     For Python tracebacks.
00358 
00359     *New in Pygments 0.7.*
00360     """
00361 
00362     name = 'Python Traceback'
00363     aliases = ['pytb']
00364     filenames = ['*.pytb']
00365     mimetypes = ['text/x-python-traceback']
00366 
00367     tokens = {
00368         'root': [
00369             (r'^Traceback \(most recent call last\):\n', Generic.Traceback, 'intb'),
00370             # SyntaxError starts with this.
00371             (r'^(?=  File "[^"]+", line \d+)', Generic.Traceback, 'intb'),
00372             (r'^.*\n', Other),
00373         ],
00374         'intb': [
00375             (r'^(  File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)',
00376              bygroups(Text, Name.Builtin, Text, Number, Text, Name.Identifier, Text)),
00377             (r'^(  File )("[^"]+")(, line )(\d+)(\n)',
00378              bygroups(Text, Name.Builtin, Text, Number, Text)),
00379             (r'^(    )(.+)(\n)',
00380              bygroups(Text, using(PythonLexer), Text)),
00381             (r'^([ \t]*)(...)(\n)',
00382              bygroups(Text, Comment, Text)), # for doctests...
00383             (r'^(.+)(: )(.+)(\n)',
00384              bygroups(Name.Class, Text, Name.Identifier, Text), '#pop'),
00385             (r'^([a-zA-Z_][a-zA-Z0-9_]*)(:?\n)',
00386              bygroups(Name.Class, Text), '#pop')
00387         ],
00388     }
00389 
00390 
00391 class Python3TracebackLexer(RegexLexer):
00392     """
00393     For Python 3.0 tracebacks, with support for chained exceptions.
00394 
00395     *New in Pygments 1.0.*
00396     """
00397 
00398     name = 'Python 3.0 Traceback'
00399     aliases = ['py3tb']
00400     filenames = ['*.py3tb']
00401     mimetypes = ['text/x-python3-traceback']
00402 
00403     tokens = {
00404         'root': [
00405             (r'\n', Text),
00406             (r'^Traceback \(most recent call last\):\n', Generic.Traceback, 'intb'),
00407             (r'^During handling of the above exception, another '
00408              r'exception occurred:\n\n', Generic.Traceback),
00409             (r'^The above exception was the direct cause of the '
00410              r'following exception:\n\n', Generic.Traceback),
00411         ],
00412         'intb': [
00413             (r'^(  File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)',
00414              bygroups(Text, Name.Builtin, Text, Number, Text, Name.Identifier, Text)),
00415             (r'^(    )(.+)(\n)',
00416              bygroups(Text, using(Python3Lexer), Text)),
00417             (r'^([ \t]*)(...)(\n)',
00418              bygroups(Text, Comment, Text)), # for doctests...
00419             (r'^(.+)(: )(.+)(\n)',
00420              bygroups(Name.Class, Text, Name.Identifier, Text), '#pop'),
00421             (r'^([a-zA-Z_][a-zA-Z0-9_]*)(:?\n)',
00422              bygroups(Name.Class, Text), '#pop')
00423         ],
00424     }
00425 
00426 
00427 class RubyLexer(ExtendedRegexLexer):
00428     """
00429     For `Ruby <http://www.ruby-lang.org>`_ source code.
00430     """
00431 
00432     name = 'Ruby'
00433     aliases = ['rb', 'ruby']
00434     filenames = ['*.rb', '*.rbw', 'Rakefile', '*.rake', '*.gemspec', '*.rbx']
00435     mimetypes = ['text/x-ruby', 'application/x-ruby']
00436 
00437     flags = re.DOTALL | re.MULTILINE
00438 
00439     def heredoc_callback(self, match, ctx):
00440         # okay, this is the hardest part of parsing Ruby...
00441         # match: 1 = <<-?, 2 = quote? 3 = name 4 = quote? 5 = rest of line
00442 
00443         start = match.start(1)
00444         yield start, Operator, match.group(1)        # <<-?
00445         yield match.start(2), String.Heredoc, match.group(2)  # quote ", ', `
00446         yield match.start(3), Name.Constant, match.group(3)   # heredoc name
00447         yield match.start(4), String.Heredoc, match.group(4)  # quote again
00448 
00449         heredocstack = ctx.__dict__.setdefault('heredocstack', [])
00450         outermost = not bool(heredocstack)
00451         heredocstack.append((match.group(1) == '<<-', match.group(3)))
00452 
00453         ctx.pos = match.start(5)
00454         ctx.end = match.end(5)
00455         # this may find other heredocs
00456         for i, t, v in self.get_tokens_unprocessed(context=ctx):
00457             yield i, t, v
00458         ctx.pos = match.end()
00459 
00460         if outermost:
00461             # this is the outer heredoc again, now we can process them all
00462             for tolerant, hdname in heredocstack:
00463                 lines = []
00464                 for match in line_re.finditer(ctx.text, ctx.pos):
00465                     if tolerant:
00466                         check = match.group().strip()
00467                     else:
00468                         check = match.group().rstrip()
00469                     if check == hdname:
00470                         for amatch in lines:
00471                             yield amatch.start(), String.Heredoc, amatch.group()
00472                         yield match.start(), Name.Constant, match.group()
00473                         ctx.pos = match.end()
00474                         break
00475                     else:
00476                         lines.append(match)
00477                 else:
00478                     # end of heredoc not found -- error!
00479                     for amatch in lines:
00480                         yield amatch.start(), Error, amatch.group()
00481             ctx.end = len(ctx.text)
00482             del heredocstack[:]
00483 
00484 
00485     def gen_rubystrings_rules():
00486         def intp_regex_callback(self, match, ctx):
00487             yield match.start(1), String.Regex, match.group(1)    # begin
00488             nctx = LexerContext(match.group(3), 0, ['interpolated-regex'])
00489             for i, t, v in self.get_tokens_unprocessed(context=nctx):
00490                 yield match.start(3)+i, t, v
00491             yield match.start(4), String.Regex, match.group(4)    # end[mixounse]*
00492             ctx.pos = match.end()
00493 
00494         def intp_string_callback(self, match, ctx):
00495             yield match.start(1), String.Other, match.group(1)
00496             nctx = LexerContext(match.group(3), 0, ['interpolated-string'])
00497             for i, t, v in self.get_tokens_unprocessed(context=nctx):
00498                 yield match.start(3)+i, t, v
00499             yield match.start(4), String.Other, match.group(4)    # end
00500             ctx.pos = match.end()
00501 
00502         states = {}
00503         states['strings'] = [
00504             # easy ones
00505             (r'\:([a-zA-Z_][\w_]*[\!\?]?|\*\*?|[-+]@?|'
00506              r'[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)', String.Symbol),
00507             (r":'(\\\\|\\'|[^'])*'", String.Symbol),
00508             (r"'(\\\\|\\'|[^'])*'", String.Single),
00509             (r':"', String.Symbol, 'simple-sym'),
00510             (r'"', String.Double, 'simple-string'),
00511             (r'(?<!\.)`', String.Backtick, 'simple-backtick'),
00512         ]
00513 
00514         # double-quoted string and symbol
00515         for name, ttype, end in ('string', String.Double, '"'), \
00516                                 ('sym', String.Symbol, '"'), \
00517                                 ('backtick', String.Backtick, '`'):
00518             states['simple-'+name] = [
00519                 include('string-intp-escaped'),
00520                 (r'[^\\%s#]+' % end, ttype),
00521                 (r'[\\#]', ttype),
00522                 (end, ttype, '#pop'),
00523             ]
00524 
00525         # braced quoted strings
00526         for lbrace, rbrace, name in ('\\{', '\\}', 'cb'), \
00527                                     ('\\[', '\\]', 'sb'), \
00528                                     ('\\(', '\\)', 'pa'), \
00529                                     ('<', '>', 'ab'):
00530             states[name+'-intp-string'] = [
00531                 (r'\\[\\' + lbrace + rbrace + ']', String.Other),
00532                 (r'(?<!\\)' + lbrace, String.Other, '#push'),
00533                 (r'(?<!\\)' + rbrace, String.Other, '#pop'),
00534                 include('string-intp-escaped'),
00535                 (r'[\\#' + lbrace + rbrace + ']', String.Other),
00536                 (r'[^\\#' + lbrace + rbrace + ']+', String.Other),
00537             ]
00538             states['strings'].append((r'%[QWx]?' + lbrace, String.Other,
00539                                       name+'-intp-string'))
00540             states[name+'-string'] = [
00541                 (r'\\[\\' + lbrace + rbrace + ']', String.Other),
00542                 (r'(?<!\\)' + lbrace, String.Other, '#push'),
00543                 (r'(?<!\\)' + rbrace, String.Other, '#pop'),
00544                 (r'[\\#' + lbrace + rbrace + ']', String.Other),
00545                 (r'[^\\#' + lbrace + rbrace + ']+', String.Other),
00546             ]
00547             states['strings'].append((r'%[qsw]' + lbrace, String.Other,
00548                                       name+'-string'))
00549             states[name+'-regex'] = [
00550                 (r'\\[\\' + lbrace + rbrace + ']', String.Regex),
00551                 (r'(?<!\\)' + lbrace, String.Regex, '#push'),
00552                 (r'(?<!\\)' + rbrace + '[mixounse]*', String.Regex, '#pop'),
00553                 include('string-intp'),
00554                 (r'[\\#' + lbrace + rbrace + ']', String.Regex),
00555                 (r'[^\\#' + lbrace + rbrace + ']+', String.Regex),
00556             ]
00557             states['strings'].append((r'%r' + lbrace, String.Regex,
00558                                       name+'-regex'))
00559 
00560         # these must come after %<brace>!
00561         states['strings'] += [
00562             # %r regex
00563             (r'(%r([^a-zA-Z0-9]))([^\2\\]*(?:\\.[^\2\\]*)*)(\2[mixounse]*)',
00564              intp_regex_callback),
00565             # regular fancy strings with qsw
00566             (r'%[qsw]([^a-zA-Z0-9])([^\1\\]*(?:\\.[^\1\\]*)*)\1', String.Other),
00567             (r'(%[QWx]([^a-zA-Z0-9]))([^\2\\]*(?:\\.[^\2\\]*)*)(\2)',
00568              intp_string_callback),
00569             # special forms of fancy strings after operators or
00570             # in method calls with braces
00571             (r'(?<=[-+/*%=<>&!^|~,(])(\s*)(%([\t ])(?:[^\3\\]*(?:\\.[^\3\\]*)*)\3)',
00572              bygroups(Text, String.Other, None)),
00573             # and because of fixed width lookbehinds the whole thing a
00574             # second time for line startings...
00575             (r'^(\s*)(%([\t ])(?:[^\3\\]*(?:\\.[^\3\\]*)*)\3)',
00576              bygroups(Text, String.Other, None)),
00577             # all regular fancy strings without qsw
00578             (r'(%([^a-zA-Z0-9\s]))([^\2\\]*(?:\\.[^\2\\]*)*)(\2)',
00579              intp_string_callback),
00580         ]
00581 
00582         return states
00583 
00584     tokens = {
00585         'root': [
00586             (r'#.*?$', Comment.Single),
00587             (r'=begin\s.*?\n=end', Comment.Multiline),
00588             # keywords
00589             (r'(BEGIN|END|alias|begin|break|case|defined\?|'
00590              r'do|else|elsif|end|ensure|for|if|in|next|redo|'
00591              r'rescue|raise|retry|return|super|then|undef|unless|until|when|'
00592              r'while|yield)\b', Keyword),
00593             # start of function, class and module names
00594             (r'(module)(\s+)([a-zA-Z_][a-zA-Z0-9_]*(::[a-zA-Z_][a-zA-Z0-9_]*)*)',
00595              bygroups(Keyword, Text, Name.Namespace)),
00596             (r'(def)(\s+)', bygroups(Keyword, Text), 'funcname'),
00597             (r'def(?=[*%&^`~+-/\[<>=])', Keyword, 'funcname'),
00598             (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'),
00599             # special methods
00600             (r'(initialize|new|loop|include|extend|raise|attr_reader|'
00601              r'attr_writer|attr_accessor|attr|catch|throw|private|'
00602              r'module_function|public|protected|true|false|nil)\b', Keyword.Pseudo),
00603             (r'(not|and|or)\b', Operator.Word),
00604             (r'(autoload|block_given|const_defined|eql|equal|frozen|include|'
00605              r'instance_of|is_a|iterator|kind_of|method_defined|nil|'
00606              r'private_method_defined|protected_method_defined|'
00607              r'public_method_defined|respond_to|tainted)\?', Name.Builtin),
00608             (r'(chomp|chop|exit|gsub|sub)!', Name.Builtin),
00609             (r'(?<!\.)(Array|Float|Integer|String|__id__|__send__|abort|ancestors|'
00610              r'at_exit|autoload|binding|callcc|caller|'
00611              r'catch|chomp|chop|class_eval|class_variables|'
00612              r'clone|const_defined\?|const_get|const_missing|const_set|constants|'
00613              r'display|dup|eval|exec|exit|extend|fail|fork|'
00614              r'format|freeze|getc|gets|global_variables|gsub|'
00615              r'hash|id|included_modules|inspect|instance_eval|'
00616              r'instance_method|instance_methods|'
00617              r'instance_variable_get|instance_variable_set|instance_variables|'
00618              r'lambda|load|local_variables|loop|'
00619              r'method|method_missing|methods|module_eval|name|'
00620              r'object_id|open|p|print|printf|private_class_method|'
00621              r'private_instance_methods|'
00622              r'private_methods|proc|protected_instance_methods|'
00623              r'protected_methods|public_class_method|'
00624              r'public_instance_methods|public_methods|'
00625              r'putc|puts|raise|rand|readline|readlines|require|'
00626              r'scan|select|self|send|set_trace_func|singleton_methods|sleep|'
00627              r'split|sprintf|srand|sub|syscall|system|taint|'
00628              r'test|throw|to_a|to_s|trace_var|trap|type|untaint|untrace_var|'
00629              r'warn)\b', Name.Builtin),
00630             (r'__(FILE|LINE)__\b', Name.Builtin.Pseudo),
00631             # normal heredocs
00632             (r'(?<!\w)(<<-?)(["`\']?)([a-zA-Z_]\w*)(\2)(.*?\n)', heredoc_callback),
00633             # empty string heredocs
00634             (r'(<<-?)("|\')()(\2)(.*?\n)', heredoc_callback),
00635             (r'__END__', Comment.Preproc, 'end-part'),
00636             # multiline regex (after keywords or assignments)
00637             (r'(?:^|(?<=[=<>~!])|'
00638                  r'(?<=(?:\s|;)when\s)|'
00639                  r'(?<=(?:\s|;)or\s)|'
00640                  r'(?<=(?:\s|;)and\s)|'
00641                  r'(?<=(?:\s|;|\.)index\s)|'
00642                  r'(?<=(?:\s|;|\.)scan\s)|'
00643                  r'(?<=(?:\s|;|\.)sub\s)|'
00644                  r'(?<=(?:\s|;|\.)sub!\s)|'
00645                  r'(?<=(?:\s|;|\.)gsub\s)|'
00646                  r'(?<=(?:\s|;|\.)gsub!\s)|'
00647                  r'(?<=(?:\s|;|\.)match\s)|'
00648                  r'(?<=(?:\s|;)if\s)|'
00649                  r'(?<=(?:\s|;)elsif\s)|'
00650                  r'(?<=^when\s)|'
00651                  r'(?<=^index\s)|'
00652                  r'(?<=^scan\s)|'
00653                  r'(?<=^sub\s)|'
00654                  r'(?<=^gsub\s)|'
00655                  r'(?<=^sub!\s)|'
00656                  r'(?<=^gsub!\s)|'
00657                  r'(?<=^match\s)|'
00658                  r'(?<=^if\s)|'
00659                  r'(?<=^elsif\s)'
00660              r')(\s*)(/)(?!=)', bygroups(Text, String.Regex), 'multiline-regex'),
00661             # multiline regex (in method calls)
00662             (r'(?<=\(|,)/', String.Regex, 'multiline-regex'),
00663             # multiline regex (this time the funny no whitespace rule)
00664             (r'(\s+)(/[^\s=])', String.Regex, 'multiline-regex'),
00665             # lex numbers and ignore following regular expressions which
00666             # are division operators in fact (grrrr. i hate that. any
00667             # better ideas?)
00668             # since pygments 0.7 we also eat a "?" operator after numbers
00669             # so that the char operator does not work. Chars are not allowed
00670             # there so that you can use the ternary operator.
00671             # stupid example:
00672             #   x>=0?n[x]:""
00673             (r'(0_?[0-7]+(?:_[0-7]+)*)(\s*)([/?])?',
00674              bygroups(Number.Oct, Text, Operator)),
00675             (r'(0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*)(\s*)([/?])?',
00676              bygroups(Number.Hex, Text, Operator)),
00677             (r'(0b[01]+(?:_[01]+)*)(\s*)([/?])?',
00678              bygroups(Number.Bin, Text, Operator)),
00679             (r'([\d]+(?:_\d+)*)(\s*)([/?])?',
00680              bygroups(Number.Integer, Text, Operator)),
00681             # Names
00682             (r'@@[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable.Class),
00683             (r'@[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable.Instance),
00684             (r'\$[a-zA-Z0-9_]+', Name.Variable.Global),
00685             (r'\$[!@&`\'+~=/\\,;.<>_*$?:"]', Name.Variable.Global),
00686             (r'\$-[0adFiIlpvw]', Name.Variable.Global),
00687             (r'::', Operator),
00688             include('strings'),
00689             # chars
00690             (r'\?(\\[MC]-)*' # modifiers
00691              r'(\\([\\abefnrstv#"\']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})|\S)'
00692              r'(?!\w)',
00693              String.Char),
00694             (r'[A-Z][a-zA-Z0-9_]+', Name.Constant),
00695             # this is needed because ruby attributes can look
00696             # like keywords (class) or like this: ` ?!?
00697             (r'(\.|::)([a-zA-Z_]\w*[\!\?]?|[*%&^`~+-/\[<>=])',
00698              bygroups(Operator, Name)),
00699             (r'[a-zA-Z_][\w_]*[\!\?]?', Name),
00700             (r'(\[|\]|\*\*|<<?|>>?|>=|<=|<=>|=~|={3}|'
00701              r'!~|&&?|\|\||\.{1,3})', Operator),
00702             (r'[-+/*%=<>&!^|~]=?', Operator),
00703             (r'[(){};,/?:\\]', Punctuation),
00704             (r'\s+', Text)
00705         ],
00706         'funcname': [
00707             (r'\(', Punctuation, 'defexpr'),
00708             (r'(?:([a-zA-Z_][a-zA-Z0-9_]*)(\.))?'
00709              r'([a-zA-Z_][\w_]*[\!\?]?|\*\*?|[-+]@?|'
00710              r'[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)',
00711              bygroups(Name.Class, Operator, Name.Function), '#pop'),
00712             (r'', Text, '#pop')
00713         ],
00714         'classname': [
00715             (r'\(', Punctuation, 'defexpr'),
00716             (r'<<', Operator, '#pop'),
00717             (r'[A-Z_][\w_]*', Name.Class, '#pop'),
00718             (r'', Text, '#pop')
00719         ],
00720         'defexpr': [
00721             (r'(\))(\.|::)?', bygroups(Punctuation, Operator), '#pop'),
00722             (r'\(', Operator, '#push'),
00723             include('root')
00724         ],
00725         'in-intp': [
00726             ('}', String.Interpol, '#pop'),
00727             include('root'),
00728         ],
00729         'string-intp': [
00730             (r'#{', String.Interpol, 'in-intp'),
00731             (r'#@@?[a-zA-Z_][a-zA-Z0-9_]*', String.Interpol),
00732             (r'#\$[a-zA-Z_][a-zA-Z0-9_]*', String.Interpol)
00733         ],
00734         'string-intp-escaped': [
00735             include('string-intp'),
00736             (r'\\([\\abefnrstv#"\']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})', String.Escape)
00737         ],
00738         'interpolated-regex': [
00739             include('string-intp'),
00740             (r'[\\#]', String.Regex),
00741             (r'[^\\#]+', String.Regex),
00742         ],
00743         'interpolated-string': [
00744             include('string-intp'),
00745             (r'[\\#]', String.Other),
00746             (r'[^\\#]+', String.Other),
00747         ],
00748         'multiline-regex': [
00749             include('string-intp'),
00750             (r'\\\\', String.Regex),
00751             (r'\\/', String.Regex),
00752             (r'[\\#]', String.Regex),
00753             (r'[^\\/#]+', String.Regex),
00754             (r'/[mixounse]*', String.Regex, '#pop'),
00755         ],
00756         'end-part': [
00757             (r'.+', Comment.Preproc, '#pop')
00758         ]
00759     }
00760     tokens.update(gen_rubystrings_rules())
00761 
00762     def analyse_text(text):
00763         return shebang_matches(text, r'ruby(1\.\d)?')
00764 
00765 
00766 class RubyConsoleLexer(Lexer):
00767     """
00768     For Ruby interactive console (**irb**) output like:
00769 
00770     .. sourcecode:: rbcon
00771 
00772         irb(main):001:0> a = 1
00773         => 1
00774         irb(main):002:0> puts a
00775         1
00776         => nil
00777     """
00778     name = 'Ruby irb session'
00779     aliases = ['rbcon', 'irb']
00780     mimetypes = ['text/x-ruby-shellsession']
00781 
00782     _prompt_re = re.compile('irb\([a-zA-Z_][a-zA-Z0-9_]*\):\d{3}:\d+[>*"\'] '
00783                             '|>> |\?> ')
00784 
00785     def get_tokens_unprocessed(self, text):
00786         rblexer = RubyLexer(**self.options)
00787 
00788         curcode = ''
00789         insertions = []
00790         for match in line_re.finditer(text):
00791             line = match.group()
00792             m = self._prompt_re.match(line)
00793             if m is not None:
00794                 end = m.end()
00795                 insertions.append((len(curcode),
00796                                    [(0, Generic.Prompt, line[:end])]))
00797                 curcode += line[end:]
00798             else:
00799                 if curcode:
00800                     for item in do_insertions(insertions,
00801                                     rblexer.get_tokens_unprocessed(curcode)):
00802                         yield item
00803                     curcode = ''
00804                     insertions = []
00805                 yield match.start(), Generic.Output, line
00806         if curcode:
00807             for item in do_insertions(insertions,
00808                                       rblexer.get_tokens_unprocessed(curcode)):
00809                 yield item
00810 
00811 
00812 class PerlLexer(RegexLexer):
00813     """
00814     For `Perl <http://www.perl.org>`_ source code.
00815     """
00816 
00817     name = 'Perl'
00818     aliases = ['perl', 'pl']
00819     filenames = ['*.pl', '*.pm']
00820     mimetypes = ['text/x-perl', 'application/x-perl']
00821 
00822     flags = re.DOTALL | re.MULTILINE
00823     # TODO: give this a perl guy who knows how to parse perl...
00824     tokens = {
00825         'balanced-regex': [
00826             (r'/(\\\\|\\/|[^/])*/[egimosx]*', String.Regex, '#pop'),
00827             (r'!(\\\\|\\!|[^!])*![egimosx]*', String.Regex, '#pop'),
00828             (r'\\(\\\\|[^\\])*\\[egimosx]*', String.Regex, '#pop'),
00829             (r'{(\\\\|\\}|[^}])*}[egimosx]*', String.Regex, '#pop'),
00830             (r'<(\\\\|\\>|[^>])*>[egimosx]*', String.Regex, '#pop'),
00831             (r'\[(\\\\|\\\]|[^\]])*\][egimosx]*', String.Regex, '#pop'),
00832             (r'\((\\\\|\\\)|[^\)])*\)[egimosx]*', String.Regex, '#pop'),
00833             (r'@(\\\\|\\\@|[^\@])*@[egimosx]*', String.Regex, '#pop'),
00834             (r'%(\\\\|\\\%|[^\%])*%[egimosx]*', String.Regex, '#pop'),
00835             (r'\$(\\\\|\\\$|[^\$])*\$[egimosx]*', String.Regex, '#pop'),
00836             (r'!(\\\\|\\!|[^!])*![egimosx]*', String.Regex, '#pop'),
00837         ],
00838         'root': [
00839             (r'\#.*?$', Comment.Single),
00840             (r'^=[a-zA-Z0-9]+\s+.*?\n=cut', Comment.Multiline),
00841             (r'(case|continue|do|else|elsif|for|foreach|if|last|my|'
00842              r'next|our|redo|reset|then|unless|until|while|use|'
00843              r'print|new|BEGIN|END|return)\b', Keyword),
00844             (r'(format)(\s+)([a-zA-Z0-9_]+)(\s*)(=)(\s*\n)',
00845              bygroups(Keyword, Text, Name, Text, Punctuation, Text), 'format'),
00846             (r'(eq|lt|gt|le|ge|ne|not|and|or|cmp)\b', Operator.Word),
00847             # common delimiters
00848             (r's/(\\\\|\\/|[^/])*/(\\\\|\\/|[^/])*/[egimosx]*', String.Regex),
00849             (r's!(\\\\|\\!|[^!])*!(\\\\|\\!|[^!])*![egimosx]*', String.Regex),
00850             (r's\\(\\\\|[^\\])*\\(\\\\|[^\\])*\\[egimosx]*', String.Regex),
00851             (r's@(\\\\|\\@|[^@])*@(\\\\|\\@|[^@])*@[egimosx]*', String.Regex),
00852             (r's%(\\\\|\\%|[^%])*%(\\\\|\\%|[^%])*%[egimosx]*', String.Regex),
00853             # balanced delimiters
00854             (r's{(\\\\|\\}|[^}])*}\s*', String.Regex, 'balanced-regex'),
00855             (r's<(\\\\|\\>|[^>])*>\s*', String.Regex, 'balanced-regex'),
00856             (r's\[(\\\\|\\\]|[^\]])*\]\s*', String.Regex, 'balanced-regex'),
00857             (r's\((\\\\|\\\)|[^\)])*\)\s*', String.Regex, 'balanced-regex'),
00858 
00859             (r'm?/(\\\\|\\/|[^/\n])*/[gcimosx]*', String.Regex),
00860             (r'((?<==~)|(?<=\())\s*/(\\\\|\\/|[^/])*/[gcimosx]*', String.Regex),
00861             (r'\s+', Text),
00862             (r'(abs|accept|alarm|atan2|bind|binmode|bless|caller|chdir|'
00863              r'chmod|chomp|chop|chown|chr|chroot|close|closedir|connect|'
00864              r'continue|cos|crypt|dbmclose|dbmopen|defined|delete|die|'
00865              r'dump|each|endgrent|endhostent|endnetent|endprotoent|'
00866              r'endpwent|endservent|eof|eval|exec|exists|exit|exp|fcntl|'
00867              r'fileno|flock|fork|format|formline|getc|getgrent|getgrgid|'
00868              r'getgrnam|gethostbyaddr|gethostbyname|gethostent|getlogin|'
00869              r'getnetbyaddr|getnetbyname|getnetent|getpeername|getpgrp|'
00870              r'getppid|getpriority|getprotobyname|getprotobynumber|'
00871              r'getprotoent|getpwent|getpwnam|getpwuid|getservbyname|'
00872              r'getservbyport|getservent|getsockname|getsockopt|glob|gmtime|'
00873              r'goto|grep|hex|import|index|int|ioctl|join|keys|kill|last|'
00874              r'lc|lcfirst|length|link|listen|local|localtime|log|lstat|'
00875              r'map|mkdir|msgctl|msgget|msgrcv|msgsnd|my|next|no|oct|open|'
00876              r'opendir|ord|our|pack|package|pipe|pop|pos|printf|'
00877              r'prototype|push|quotemeta|rand|read|readdir|'
00878              r'readline|readlink|readpipe|recv|redo|ref|rename|require|'
00879              r'reverse|rewinddir|rindex|rmdir|scalar|seek|seekdir|'
00880              r'select|semctl|semget|semop|send|setgrent|sethostent|setnetent|'
00881              r'setpgrp|setpriority|setprotoent|setpwent|setservent|'
00882              r'setsockopt|shift|shmctl|shmget|shmread|shmwrite|shutdown|'
00883              r'sin|sleep|socket|socketpair|sort|splice|split|sprintf|sqrt|'
00884              r'srand|stat|study|substr|symlink|syscall|sysopen|sysread|'
00885              r'sysseek|system|syswrite|tell|telldir|tie|tied|time|times|tr|'
00886              r'truncate|uc|ucfirst|umask|undef|unlink|unpack|unshift|untie|'
00887              r'utime|values|vec|wait|waitpid|wantarray|warn|write'
00888              r')\b', Name.Builtin),
00889             (r'((__(DATA|DIE|WARN)__)|(STD(IN|OUT|ERR)))\b', Name.Builtin.Pseudo),
00890             (r'<<([\'"]?)([a-zA-Z_][a-zA-Z0-9_]*)\1;?\n.*?\n\2\n', String),
00891             (r'__END__', Comment.Preproc, 'end-part'),
00892             (r'\$\^[ADEFHILMOPSTWX]', Name.Variable.Global),
00893             (r"\$[\\\"\[\]'&`+*.,;=%~?@$!<>(^|/-](?!\w)", Name.Variable.Global),
00894             (r'[$@%#]+', Name.Variable, 'varname'),
00895             (r'0_?[0-7]+(_[0-7]+)*', Number.Oct),
00896             (r'0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*', Number.Hex),
00897             (r'0b[01]+(_[01]+)*', Number.Bin),
00898             (r'\d+', Number.Integer),
00899             (r"'(\\\\|\\'|[^'])*'", String),
00900             (r'"(\\\\|\\"|[^"])*"', String),
00901             (r'`(\\\\|\\`|[^`])*`', String.Backtick),
00902             (r'<([^\s>]+)>', String.Regexp),
00903             (r'(q|qq|qw|qr|qx)\{', String.Other, 'cb-string'),
00904             (r'(q|qq|qw|qr|qx)\(', String.Other, 'rb-string'),
00905             (r'(q|qq|qw|qr|qx)\[', String.Other, 'sb-string'),
00906             (r'(q|qq|qw|qr|qx)<', String.Other, 'lt-string'),
00907             (r'(q|qq|qw|qr|qx)(.)[.\n]*?\1', String.Other),
00908             (r'package\s+', Keyword, 'modulename'),
00909             (r'sub\s+', Keyword, 'funcname'),
00910             (r'(\[\]|\*\*|::|<<|>>|>=|<=|<=>|={3}|!=|=~|'
00911              r'!~|&&?|\|\||\.{1,3})', Operator),
00912             (r'[-+/*%=<>&^|!\\~]=?', Operator),
00913             (r'[\(\)\[\]:;,<>/\?\{\}]', Punctuation), # yes, there's no shortage
00914                                                       # of punctuation in Perl!
00915             (r'(?=\w)', Name, 'name'),
00916         ],
00917         'format': [
00918             (r'\.\n', String.Interpol, '#pop'),
00919             (r'[^\n]*\n', String.Interpol),
00920         ],
00921         'varname': [
00922             (r'\s+', Text),
00923             (r'\{', Punctuation, '#pop'), # hash syntax?
00924             (r'\)|,', Punctuation, '#pop'), # argument specifier
00925             (r'[a-zA-Z0-9_]+::', Name.Namespace),
00926             (r'[a-zA-Z0-9_:]+', Name.Variable, '#pop'),
00927         ],
00928         'name': [
00929             (r'[a-zA-Z0-9_]+::', Name.Namespace),
00930             (r'[a-zA-Z0-9_:]+', Name, '#pop'),
00931             (r'[A-Z_]+(?=[^a-zA-Z0-9_])', Name.Constant, '#pop'),
00932             (r'(?=[^a-zA-Z0-9_])', Text, '#pop'),
00933         ],
00934         'modulename': [
00935             (r'[a-zA-Z_][\w_]*', Name.Namespace, '#pop')
00936         ],
00937         'funcname': [
00938             (r'[a-zA-Z_][\w_]*[\!\?]?', Name.Function),
00939             (r'\s+', Text),
00940             # argument declaration
00941             (r'(\([$@%]*\))(\s*)', bygroups(Punctuation, Text)),
00942             (r'.*?{', Punctuation, '#pop'),
00943             (r';', Punctuation, '#pop'),
00944         ],
00945         'cb-string': [
00946             (r'\\[\{\}\\]', String.Other),
00947             (r'\\', String.Other),
00948             (r'\{', String.Other, 'cb-string'),
00949             (r'\}', String.Other, '#pop'),
00950             (r'[^\{\}\\]+', String.Other)
00951         ],
00952         'rb-string': [
00953             (r'\\[\(\)\\]', String.Other),
00954             (r'\\', String.Other),
00955             (r'\(', String.Other, 'rb-string'),
00956             (r'\)', String.Other, '#pop'),
00957             (r'[^\(\)]+', String.Other)
00958         ],
00959         'sb-string': [
00960             (r'\\[\[\]\\]', String.Other),
00961             (r'\\', String.Other),
00962             (r'\[', String.Other, 'sb-string'),
00963             (r'\]', String.Other, '#pop'),
00964             (r'[^\[\]]+', String.Other)
00965         ],
00966         'lt-string': [
00967             (r'\\[<>\\]', String.Other),
00968             (r'\\', String.Other),
00969             (r'<', String.Other, 'lt-string'),
00970             (r'>', String.Other, '#pop'),
00971             (r'[^<>]]+', String.Other)
00972         ],
00973         'end-part': [
00974             (r'.+', Comment.Preproc, '#pop')
00975         ]
00976     }
00977 
00978     def analyse_text(text):
00979         if shebang_matches(text, r'perl(\d\.\d\.\d)?'):
00980             return True
00981         if 'my $' in text:
00982             return 0.9
00983         return 0.1 # who knows, might still be perl!
00984 
00985 
00986 class LuaLexer(RegexLexer):
00987     """
00988     For `Lua <http://www.lua.org>`_ source code.
00989 
00990     Additional options accepted:
00991 
00992     `func_name_highlighting`
00993         If given and ``True``, highlight builtin function names
00994         (default: ``True``).
00995     `disabled_modules`
00996         If given, must be a list of module names whose function names
00997         should not be highlighted. By default all modules are highlighted.
00998 
00999         To get a list of allowed modules have a look into the
01000         `_luabuiltins` module:
01001 
01002         .. sourcecode:: pycon
01003 
01004             >>> from pygments.lexers._luabuiltins import MODULES
01005             >>> MODULES.keys()
01006             ['string', 'coroutine', 'modules', 'io', 'basic', ...]
01007     """
01008 
01009     name = 'Lua'
01010     aliases = ['lua']
01011     filenames = ['*.lua']
01012     mimetypes = ['text/x-lua', 'application/x-lua']
01013 
01014     tokens = {
01015         'root': [
01016             (r'(?s)--\[(=*)\[.*?\]\1\]', Comment.Multiline),
01017             ('--.*$', Comment.Single),
01018 
01019             (r'(?i)(\d*\.\d+|\d+\.\d*)(e[+-]?\d+)?', Number.Float),
01020             (r'(?i)\d+e[+-]?\d+', Number.Float),
01021             ('(?i)0x[0-9a-f]*', Number.Hex),
01022             (r'\d+', Number.Integer),
01023 
01024             (r'\n', Text),
01025             (r'[^\S\n]', Text),
01026             (r'(?s)\[(=*)\[.*?\]\1\]', String.Multiline),
01027             (r'[\[\]\{\}\(\)\.,:;]', Punctuation),
01028 
01029             (r'(==|~=|<=|>=|\.\.|\.\.\.|[=+\-*/%^<>#])', Operator),
01030             (r'(and|or|not)\b', Operator.Word),
01031 
01032             ('(break|do|else|elseif|end|for|if|in|repeat|return|then|until|'
01033              r'while)\b', Keyword),
01034             (r'(local)\b', Keyword.Declaration),
01035             (r'(true|false|nil)\b', Keyword.Constant),
01036 
01037             (r'(function)(\s+)', bygroups(Keyword, Text), 'funcname'),
01038             (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'),
01039 
01040             (r'[A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)?', Name),
01041 
01042             # multiline strings
01043             (r'(?s)\[(=*)\[(.*?)\]\1\]', String),
01044             ("'", String.Single, combined('stringescape', 'sqs')),
01045             ('"', String.Double, combined('stringescape', 'dqs'))
01046         ],
01047 
01048         'funcname': [
01049             ('[A-Za-z_][A-Za-z0-9_]*', Name.Function, '#pop'),
01050             # inline function
01051             ('\(', Punctuation, '#pop'),
01052         ],
01053 
01054         'classname': [
01055             ('[A-Za-z_][A-Za-z0-9_]*', Name.Class, '#pop')
01056         ],
01057 
01058         # if I understand correctly, every character is valid in a lua string,
01059         # so this state is only for later corrections
01060         'string': [
01061             ('.', String)
01062         ],
01063 
01064         'stringescape': [
01065             (r'''\\([abfnrtv\\"']|\d{1,3})''', String.Escape)
01066         ],
01067 
01068         'sqs': [
01069             ("'", String, '#pop'),
01070             include('string')
01071         ],
01072 
01073         'dqs': [
01074             ('"', String, '#pop'),
01075             include('string')
01076         ]
01077     }
01078 
01079     def __init__(self, **options):
01080         self.func_name_highlighting = get_bool_opt(
01081             options, 'func_name_highlighting', True)
01082         self.disabled_modules = get_list_opt(options, 'disabled_modules', [])
01083 
01084         self._functions = set()
01085         if self.func_name_highlighting:
01086             from pygments.lexers._luabuiltins import MODULES
01087             for mod, func in MODULES.iteritems():
01088                 if mod not in self.disabled_modules:
01089                     self._functions.update(func)
01090         RegexLexer.__init__(self, **options)
01091 
01092     def get_tokens_unprocessed(self, text):
01093         for index, token, value in \
01094             RegexLexer.get_tokens_unprocessed(self, text):
01095             if token is Name:
01096                 if value in self._functions:
01097                     yield index, Name.Builtin, value
01098                     continue
01099                 elif '.' in value:
01100                     a, b = value.split('.')
01101                     yield index, Name, a
01102                     yield index + len(a), Punctuation, u'.'
01103                     yield index + len(a) + 1, Name, b
01104                     continue
01105             yield index, token, value
01106 
01107 
01108 class MiniDLexer(RegexLexer):
01109     """
01110     For `MiniD <http://www.dsource.org/projects/minid>`_ (a D-like scripting
01111     language) source.
01112     """
01113     name = 'MiniD'
01114     filenames = ['*.md']
01115     aliases = ['minid']
01116     mimetypes = ['text/x-minidsrc']
01117 
01118     tokens = {
01119         'root': [
01120             (r'\n', Text),
01121             (r'\s+', Text),
01122             # Comments
01123             (r'//(.*?)\n', Comment.Single),
01124             (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline),
01125             (r'/\+', Comment.Multiline, 'nestedcomment'),
01126             # Keywords
01127             (r'(as|assert|break|case|catch|class|continue|coroutine|default'
01128              r'|do|else|finally|for|foreach|function|global|namespace'
01129              r'|if|import|in|is|local|module|return|super|switch'
01130              r'|this|throw|try|vararg|while|with|yield)\b', Keyword),
01131             (r'(false|true|null)\b', Keyword.Constant),
01132             # FloatLiteral
01133             (r'([0-9][0-9_]*)?\.[0-9_]+([eE][+\-]?[0-9_]+)?', Number.Float),
01134             # IntegerLiteral
01135             # -- Binary
01136             (r'0[Bb][01_]+', Number),
01137             # -- Octal
01138             (r'0[Cc][0-7_]+', Number.Oct),
01139             # -- Hexadecimal
01140             (r'0[xX][0-9a-fA-F_]+', Number.Hex),
01141             # -- Decimal
01142             (r'(0|[1-9][0-9_]*)', Number.Integer),
01143             # CharacterLiteral
01144             (r"""'(\\['"?\\abfnrtv]|\\x[0-9a-fA-F]{2}|\\[0-9]{1,3}"""
01145              r"""|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}|.)'""",
01146              String.Char
01147             ),
01148             # StringLiteral
01149             # -- WysiwygString
01150             (r'@"(""|.)*"', String),
01151             # -- AlternateWysiwygString
01152             (r'`(``|.)*`', String),
01153             # -- DoubleQuotedString
01154             (r'"(\\\\|\\"|[^"])*"', String),
01155             # Tokens
01156             (
01157              r'(~=|\^=|%=|\*=|==|!=|>>>=|>>>|>>=|>>|>=|<=>|\?=|->'
01158              r'|<<=|<<|<=|\+\+|\+=|--|-=|\|\||\|=|&&|&=|\.\.|/=)'
01159              r'|[-/.&$@|\+<>!()\[\]{}?,;:=*%^~#\\]', Punctuation
01160             ),
01161             # Identifier
01162             (r'[a-zA-Z_]\w*', Name),
01163         ],
01164         'nestedcomment': [
01165             (r'[^+/]+', Comment.Multiline),
01166             (r'/\+', Comment.Multiline, '#push'),
01167             (r'\+/', Comment.Multiline, '#pop'),
01168             (r'[+/]', Comment.Multiline),
01169         ],
01170     }
01171 
01172 
01173 class IoLexer(RegexLexer):
01174     """
01175     For `Io <http://iolanguage.com/>`_ (a small, prototype-based
01176     programming language) source.
01177 
01178     *New in Pygments 0.10.*
01179     """
01180     name = 'Io'
01181     filenames = ['*.io']
01182     aliases = ['io']
01183     mimetypes = ['text/x-iosrc']
01184     tokens = {
01185         'root': [
01186             (r'\n', Text),
01187             (r'\s+', Text),
01188             # Comments
01189             (r'//(.*?)\n', Comment.Single),
01190             (r'#(.*?)\n', Comment.Single),
01191             (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline),
01192             (r'/\+', Comment.Multiline, 'nestedcomment'),
01193             # DoubleQuotedString
01194             (r'"(\\\\|\\"|[^"])*"', String),
01195             # Operators
01196             (r'::=|:=|=|\(|\)|;|,|\*|-|\+|>|<|@|!|/|\||\^|\.|%|&|\[|\]|\{|\}',
01197              Operator),
01198             # keywords
01199             (r'(clone|do|doFile|doString|method|for|if|else|elseif|then)\b',
01200              Keyword),
01201             # constants
01202             (r'(nil|false|true)\b', Name.Constant),
01203             # names
01204             ('(Object|list|List|Map|args|Sequence|Coroutine|File)\b',
01205              Name.Builtin),
01206             ('[a-zA-Z_][a-zA-Z0-9_]*', Name),
01207             # numbers
01208             (r'(\d+\.?\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float),
01209             (r'\d+', Number.Integer)
01210         ],
01211         'nestedcomment': [
01212             (r'[^+/]+', Comment.Multiline),
01213             (r'/\+', Comment.Multiline, '#push'),
01214             (r'\+/', Comment.Multiline, '#pop'),
01215             (r'[+/]', Comment.Multiline),
01216         ]
01217     }
01218 
01219 
01220 class TclLexer(RegexLexer):
01221     """
01222     For Tcl source code.
01223 
01224     *New in Pygments 0.10.*
01225     """
01226 
01227     keyword_cmds_re = (
01228         r'\b(after|apply|array|break|catch|continue|elseif|else|error|'
01229         r'eval|expr|for|foreach|global|if|namespace|proc|rename|return|'
01230         r'set|switch|then|trace|unset|update|uplevel|upvar|variable|'
01231         r'vwait|while)\b'
01232         )
01233 
01234     builtin_cmds_re = (
01235         r'\b(append|bgerror|binary|cd|chan|clock|close|concat|dde|dict|'
01236         r'encoding|eof|exec|exit|fblocked|fconfigure|fcopy|file|'
01237         r'fileevent|flush|format|gets|glob|history|http|incr|info|interp|'
01238         r'join|lappend|lassign|lindex|linsert|list|llength|load|loadTk|'
01239         r'lrange|lrepeat|lreplace|lreverse|lsearch|lset|lsort|mathfunc|'
01240         r'mathop|memory|msgcat|open|package|pid|pkg::create|pkg_mkIndex|'
01241         r'platform|platform::shell|puts|pwd|re_syntax|read|refchan|'
01242         r'regexp|registry|regsub|scan|seek|socket|source|split|string|'
01243         r'subst|tell|time|tm|unknown|unload)\b'
01244         )
01245 
01246     name = 'Tcl'
01247     aliases = ['tcl']
01248     filenames = ['*.tcl']
01249     mimetypes = ['text/x-tcl', 'text/x-script.tcl', 'application/x-tcl']
01250 
01251     def _gen_command_rules(keyword_cmds_re, builtin_cmds_re, context=""):
01252         return [
01253             (keyword_cmds_re, Keyword, 'params' + context),
01254             (builtin_cmds_re, Name.Builtin, 'params' + context),
01255             (r'([\w\.\-]+)', Name.Variable, 'params' + context),
01256             (r'#', Comment, 'comment'),
01257         ]
01258 
01259     tokens = {
01260         'root': [
01261             include('command'),
01262             include('basic'),
01263             include('data'),
01264         ],
01265         'command': _gen_command_rules(keyword_cmds_re, builtin_cmds_re),
01266         'command-in-brace': _gen_command_rules(keyword_cmds_re,
01267                                                builtin_cmds_re,
01268                                                "-in-brace"),
01269         'command-in-bracket': _gen_command_rules(keyword_cmds_re,
01270                                                  builtin_cmds_re,
01271                                                  "-in-bracket"),
01272         'command-in-paren': _gen_command_rules(keyword_cmds_re,
01273                                                builtin_cmds_re,
01274                                                "-in-paren"),
01275         'basic': [
01276             (r'\(', Keyword, 'paren'),
01277             (r'\[', Keyword, 'bracket'),
01278             (r'\{', Keyword, 'brace'),
01279             (r'"', String.Double, 'string'),
01280             (r'(eq|ne|in|ni)\b', Operator.Word),
01281             (r'!=|==|<<|>>|<=|>=|&&|\|\||\*\*|[-+~!*/%<>&^|?:]', Operator),
01282         ],
01283         'data': [
01284             (r'\s+', Text),
01285             (r'0x[a-fA-F0-9]+', Number.Hex),
01286             (r'0[0-7]+', Number.Oct),
01287             (r'\d+\.\d+', Number.Float),
01288             (r'\d+', Number.Integer),
01289             (r'\$([\w\.\-\:]+)', Name.Variable),
01290             (r'([\w\.\-\:]+)', Text),
01291         ],
01292         'params': [
01293             (r';', Keyword, '#pop'),
01294             (r'\n', Text, '#pop'),
01295             (r'(else|elseif|then)', Keyword),
01296             include('basic'),
01297             include('data'),
01298         ],
01299         'params-in-brace': [
01300             (r'}', Keyword, ('#pop', '#pop')),
01301             include('params')
01302         ],
01303         'params-in-paren': [
01304             (r'\)', Keyword, ('#pop', '#pop')),
01305             include('params')
01306         ],
01307         'params-in-bracket': [
01308             (r'\]', Keyword, ('#pop', '#pop')),
01309             include('params')
01310         ],
01311         'string': [
01312             (r'\[', String.Double, 'string-square'),
01313             (r'(\\\\|\\[0-7]+|\\.|[^"])', String.Double),
01314             (r'"', String.Double, '#pop')
01315         ],
01316         'string-square': [
01317             (r'\[', String.Double, 'string-square'),
01318             (r'(\\\\|\\[0-7]+|\\.|[^\]])', String.Double),
01319             (r'\]', String.Double, '#pop')
01320         ],
01321         'brace': [
01322             (r'}', Keyword, '#pop'),
01323             include('command-in-brace'),
01324             include('basic'),
01325             include('data'),
01326         ],
01327         'paren': [
01328             (r'\)', Keyword, '#pop'),
01329             include('command-in-paren'),
01330             include('basic'),
01331             include('data'),
01332         ],
01333         'bracket': [
01334             (r'\]', Keyword, '#pop'),
01335             include('command-in-bracket'),
01336             include('basic'),
01337             include('data'),
01338         ],
01339         'comment': [
01340             (r'.*[^\\]\n', Comment, '#pop'),
01341             (r'.*\\\n', Comment),
01342         ],
01343     }
01344 
01345     def analyse_text(text):
01346         return shebang_matches(text, r'(tcl)')
01347 
01348 
01349 class ClojureLexer(RegexLexer):
01350     """
01351     Lexer for `Clojure <http://clojure.org/>`_ source code.
01352 
01353     *New in Pygments 0.11.*
01354     """
01355     name = 'Clojure'
01356     aliases = ['clojure', 'clj']
01357     filenames = ['*.clj']
01358     mimetypes = ['text/x-clojure', 'application/x-clojure']
01359 
01360     keywords = [
01361         'fn', 'def', 'defn', 'defmacro', 'defmethod', 'defmulti', 'defn-',
01362         'defstruct',
01363         'if', 'cond',
01364         'let', 'for'
01365     ]
01366     builtins = [
01367         '.', '..',
01368         '*', '+', '-', '->', '..', '/', '<', '<=', '=', '==', '>', '>=',
01369         'accessor', 'agent', 'agent-errors', 'aget', 'alength', 'all-ns',
01370         'alter', 'and', 'append-child', 'apply', 'array-map', 'aset',
01371         'aset-boolean', 'aset-byte', 'aset-char', 'aset-double', 'aset-float',
01372         'aset-int', 'aset-long', 'aset-short', 'assert', 'assoc', 'await',
01373         'await-for', 'bean', 'binding', 'bit-and', 'bit-not', 'bit-or',
01374         'bit-shift-left', 'bit-shift-right', 'bit-xor', 'boolean', 'branch?',
01375         'butlast', 'byte', 'cast', 'char', 'children', 'class',
01376         'clear-agent-errors', 'comment', 'commute', 'comp', 'comparator',
01377         'complement', 'concat', 'conj', 'cons', 'constantly',
01378         'construct-proxy', 'contains?', 'count', 'create-ns', 'create-struct',
01379         'cycle', 'dec',  'deref', 'difference', 'disj', 'dissoc', 'distinct',
01380         'doall', 'doc', 'dorun', 'doseq', 'dosync', 'dotimes', 'doto',
01381         'double', 'down', 'drop', 'drop-while', 'edit', 'end?', 'ensure',
01382         'eval', 'every?', 'false?', 'ffirst', 'file-seq', 'filter', 'find',
01383         'find-doc', 'find-ns', 'find-var', 'first', 'float', 'flush',
01384         'fnseq', 'frest', 'gensym', 'get', 'get-proxy-class',
01385         'hash-map', 'hash-set', 'identical?', 'identity', 'if-let', 'import',
01386         'in-ns', 'inc', 'index', 'insert-child', 'insert-left', 'insert-right',
01387         'inspect-table', 'inspect-tree', 'instance?', 'int', 'interleave',
01388         'intersection', 'into', 'into-array', 'iterate', 'join', 'key', 'keys',
01389         'keyword', 'keyword?', 'last', 'lazy-cat', 'lazy-cons', 'left',
01390         'lefts', 'line-seq', 'list', 'list*', 'load', 'load-file',
01391         'locking', 'long', 'loop', 'macroexpand', 'macroexpand-1',
01392         'make-array', 'make-node', 'map', 'map-invert', 'map?', 'mapcat',
01393         'max', 'max-key', 'memfn', 'merge', 'merge-with', 'meta', 'min',
01394         'min-key', 'name', 'namespace', 'neg?', 'new', 'newline', 'next',
01395         'nil?', 'node', 'not', 'not-any?', 'not-every?', 'not=', 'ns-imports',
01396         'ns-interns', 'ns-map', 'ns-name', 'ns-publics', 'ns-refers',
01397         'ns-resolve', 'ns-unmap', 'nth', 'nthrest', 'or', 'parse', 'partial',
01398         'path', 'peek', 'pop', 'pos?', 'pr', 'pr-str', 'print', 'print-str',
01399         'println', 'println-str', 'prn', 'prn-str', 'project', 'proxy',
01400         'proxy-mappings', 'quot', 'rand', 'rand-int', 'range', 're-find',
01401         're-groups', 're-matcher', 're-matches', 're-pattern', 're-seq',
01402         'read', 'read-line', 'reduce', 'ref', 'ref-set', 'refer', 'rem',
01403         'remove', 'remove-method', 'remove-ns', 'rename', 'rename-keys',
01404         'repeat', 'replace', 'replicate', 'resolve', 'rest', 'resultset-seq',
01405         'reverse', 'rfirst', 'right', 'rights', 'root', 'rrest', 'rseq',
01406         'second', 'select', 'select-keys', 'send', 'send-off', 'seq',
01407         'seq-zip', 'seq?', 'set', 'short', 'slurp', 'some', 'sort',
01408         'sort-by', 'sorted-map', 'sorted-map-by', 'sorted-set',
01409         'special-symbol?', 'split-at', 'split-with', 'str', 'string?',
01410         'struct', 'struct-map', 'subs', 'subvec', 'symbol', 'symbol?',
01411         'sync', 'take', 'take-nth', 'take-while', 'test', 'time', 'to-array',
01412         'to-array-2d', 'tree-seq', 'true?', 'union', 'up', 'update-proxy',
01413         'val', 'vals', 'var-get', 'var-set', 'var?', 'vector', 'vector-zip',
01414         'vector?', 'when', 'when-first', 'when-let', 'when-not',
01415         'with-local-vars', 'with-meta', 'with-open', 'with-out-str',
01416         'xml-seq', 'xml-zip', 'zero?', 'zipmap', 'zipper']
01417 
01418     # valid names for identifiers
01419     # well, names can only not consist fully of numbers
01420     # but this should be good enough for now
01421     valid_name = r'[a-zA-Z0-9!$%&*+,/:<=>?@^_~-]+'
01422 
01423     tokens = {
01424         'root' : [
01425             # the comments - always starting with semicolon
01426             # and going to the end of the line
01427             (r';.*$', Comment.Single),
01428 
01429             # whitespaces - usually not relevant
01430             (r'\s+', Text),
01431 
01432             # numbers
01433             (r'-?\d+\.\d+', Number.Float),
01434             (r'-?\d+', Number.Integer),
01435             # support for uncommon kinds of numbers -
01436             # have to figure out what the characters mean
01437             #(r'(#e|#i|#b|#o|#d|#x)[\d.]+', Number),
01438 
01439             # strings, symbols and characters
01440             (r'"(\\\\|\\"|[^"])*"', String),
01441             (r"'" + valid_name, String.Symbol),
01442             (r"\\([()/'\".'_!§$%& ?;=+-]{1}|[a-zA-Z0-9]+)", String.Char),
01443 
01444             # constants
01445             (r'(#t|#f)', Name.Constant),
01446 
01447             # special operators
01448             (r"('|#|`|,@|,|\.)", Operator),
01449 
01450             # highlight the keywords
01451             ('(%s)' % '|'.join([
01452                 re.escape(entry) + ' ' for entry in keywords]),
01453                 Keyword
01454             ),
01455 
01456             # first variable in a quoted string like
01457             # '(this is syntactic sugar)
01458             (r"(?<='\()" + valid_name, Name.Variable),
01459             (r"(?<=#\()" + valid_name, Name.Variable),
01460 
01461             # highlight the builtins
01462             ("(?<=\()(%s)" % '|'.join([
01463                 re.escape(entry) + ' ' for entry in builtins]),
01464                 Name.Builtin
01465             ),
01466 
01467             # the remaining functions
01468             (r'(?<=\()' + valid_name, Name.Function),
01469             # find the remaining variables
01470             (valid_name, Name.Variable),
01471 
01472             # Clojure accepts vector notation
01473             (r'(\[|\])', Punctuation),
01474 
01475             # Clojure accepts map notation
01476             (r'(\{|\})', Punctuation),
01477 
01478             # the famous parentheses!
01479             (r'(\(|\))', Punctuation),
01480         ],
01481     }