Back to index

moin  1.9.0~rc2
compiled.py
Go to the documentation of this file.
00001 # -*- coding: utf-8 -*-
00002 """
00003     pygments.lexers.compiled
00004     ~~~~~~~~~~~~~~~~~~~~~~~~
00005 
00006     Lexers for compiled languages.
00007 
00008     :copyright: Copyright 2006-2009 by the Pygments team, see AUTHORS.
00009     :license: BSD, see LICENSE for details.
00010 """
00011 
00012 import re
00013 try:
00014     set
00015 except NameError:
00016     from sets import Set as set
00017 
00018 from pygments.scanner import Scanner
00019 from pygments.lexer import Lexer, RegexLexer, include, bygroups, using, \
00020                            this, combined
00021 from pygments.util import get_bool_opt, get_list_opt
00022 from pygments.token import \
00023      Text, Comment, Operator, Keyword, Name, String, Number, Punctuation, \
00024      Error
00025 
00026 # backwards compatibility
00027 from pygments.lexers.functional import OcamlLexer
00028 
00029 __all__ = ['CLexer', 'CppLexer', 'DLexer', 'DelphiLexer', 'JavaLexer',
00030            'ScalaLexer', 'DylanLexer', 'OcamlLexer', 'ObjectiveCLexer',
00031            'FortranLexer', 'GLShaderLexer', 'PrologLexer', 'CythonLexer',
00032            'ValaLexer', 'OocLexer']
00033 
00034 
00035 class CLexer(RegexLexer):
00036     """
00037     For C source code with preprocessor directives.
00038     """
00039     name = 'C'
00040     aliases = ['c']
00041     filenames = ['*.c', '*.h']
00042     mimetypes = ['text/x-chdr', 'text/x-csrc']
00043 
00044     #: optional Comment or Whitespace
00045     _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+'
00046 
00047     tokens = {
00048         'whitespace': [
00049             (r'^\s*#if\s+0', Comment.Preproc, 'if0'),
00050             (r'^\s*#', Comment.Preproc, 'macro'),
00051             (r'^(\s*)([a-zA-Z_][a-zA-Z0-9_]*:(?!:))', bygroups(Text, Name.Label)),
00052             (r'\n', Text),
00053             (r'\s+', Text),
00054             (r'\\\n', Text), # line continuation
00055             (r'//(\n|(.|\n)*?[^\\]\n)', Comment.Single),
00056             (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline),
00057         ],
00058         'statements': [
00059             (r'L?"', String, 'string'),
00060             (r"L?'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char),
00061             (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float),
00062             (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
00063             (r'0x[0-9a-fA-F]+[Ll]?', Number.Hex),
00064             (r'0[0-7]+[Ll]?', Number.Oct),
00065             (r'\d+[Ll]?', Number.Integer),
00066             (r'[~!%^&*+=|?:<>/-]', Operator),
00067             (r'[()\[\],.]', Punctuation),
00068             (r'\b(case)(.+?)(:)', bygroups(Keyword, using(this), Text)),
00069             (r'(auto|break|case|const|continue|default|do|else|enum|extern|'
00070              r'for|goto|if|register|restricted|return|sizeof|static|struct|'
00071              r'switch|typedef|union|volatile|virtual|while)\b', Keyword),
00072             (r'(int|long|float|short|double|char|unsigned|signed|void)\b',
00073              Keyword.Type),
00074             (r'(_{0,2}inline|naked|restrict|thread|typename)\b', Keyword.Reserved),
00075             (r'__(asm|int8|based|except|int16|stdcall|cdecl|fastcall|int32|'
00076              r'declspec|finally|int64|try|leave)\b', Keyword.Reserved),
00077             (r'(true|false|NULL)\b', Name.Builtin),
00078             ('[a-zA-Z_][a-zA-Z0-9_]*', Name),
00079         ],
00080         'root': [
00081             include('whitespace'),
00082             # functions
00083             (r'((?:[a-zA-Z0-9_*\s])+?(?:\s|[*]))'    # return arguments
00084              r'([a-zA-Z_][a-zA-Z0-9_]*)'             # method name
00085              r'(\s*\([^;]*?\))'                      # signature
00086              r'(' + _ws + r')({)',
00087              bygroups(using(this), Name.Function, using(this), using(this),
00088                       Punctuation),
00089              'function'),
00090             # function declarations
00091             (r'((?:[a-zA-Z0-9_*\s])+?(?:\s|[*]))'    # return arguments
00092              r'([a-zA-Z_][a-zA-Z0-9_]*)'             # method name
00093              r'(\s*\([^;]*?\))'                      # signature
00094              r'(' + _ws + r')(;)',
00095              bygroups(using(this), Name.Function, using(this), using(this),
00096                       Punctuation)),
00097             ('', Text, 'statement'),
00098         ],
00099         'statement' : [
00100             include('whitespace'),
00101             include('statements'),
00102             ('[{}]', Punctuation),
00103             (';', Punctuation, '#pop'),
00104         ],
00105         'function': [
00106             include('whitespace'),
00107             include('statements'),
00108             (';', Punctuation),
00109             ('{', Punctuation, '#push'),
00110             ('}', Punctuation, '#pop'),
00111         ],
00112         'string': [
00113             (r'"', String, '#pop'),
00114             (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape),
00115             (r'[^\\"\n]+', String), # all other characters
00116             (r'\\\n', String), # line continuation
00117             (r'\\', String), # stray backslash
00118         ],
00119         'macro': [
00120             (r'[^/\n]+', Comment.Preproc),
00121             (r'/[*](.|\n)*?[*]/', Comment.Multiline),
00122             (r'//.*?\n', Comment.Single, '#pop'),
00123             (r'/', Comment.Preproc),
00124             (r'(?<=\\)\n', Comment.Preproc),
00125             (r'\n', Comment.Preproc, '#pop'),
00126         ],
00127         'if0': [
00128             (r'^\s*#if.*?(?<!\\)\n', Comment.Preproc, '#push'),
00129             (r'^\s*#el(?:se|if).*\n', Comment.Preproc, '#pop'),
00130             (r'^\s*#endif.*?(?<!\\)\n', Comment.Preproc, '#pop'),
00131             (r'.*?\n', Comment),
00132         ]
00133     }
00134 
00135     stdlib_types = ['size_t', 'ssize_t', 'off_t', 'wchar_t', 'ptrdiff_t',
00136             'sig_atomic_t', 'fpos_t', 'clock_t', 'time_t', 'va_list',
00137             'jmp_buf', 'FILE', 'DIR', 'div_t', 'ldiv_t', 'mbstate_t',
00138             'wctrans_t', 'wint_t', 'wctype_t']
00139     c99_types = ['_Bool', '_Complex', 'int8_t', 'int16_t', 'int32_t', 'int64_t',
00140             'uint8_t', 'uint16_t', 'uint32_t', 'uint64_t', 'int_least8_t',
00141             'int_least16_t', 'int_least32_t', 'int_least64_t',
00142             'uint_least8_t', 'uint_least16_t', 'uint_least32_t',
00143             'uint_least64_t', 'int_fast8_t', 'int_fast16_t', 'int_fast32_t',
00144             'int_fast64_t', 'uint_fast8_t', 'uint_fast16_t', 'uint_fast32_t',
00145             'uint_fast64_t', 'intptr_t', 'uintptr_t', 'intmax_t', 'uintmax_t']
00146 
00147     def __init__(self, **options):
00148         self.stdlibhighlighting = get_bool_opt(options,
00149                 'stdlibhighlighting', True)
00150         self.c99highlighting = get_bool_opt(options,
00151                 'c99highlighting', True)
00152         RegexLexer.__init__(self, **options)
00153 
00154     def get_tokens_unprocessed(self, text):
00155         for index, token, value in \
00156             RegexLexer.get_tokens_unprocessed(self, text):
00157             if token is Name:
00158                 if self.stdlibhighlighting and value in self.stdlib_types:
00159                     token = Keyword.Type
00160                 elif self.c99highlighting and value in self.c99_types:
00161                     token = Keyword.Type
00162             yield index, token, value
00163 
00164 class CppLexer(RegexLexer):
00165     """
00166     For C++ source code with preprocessor directives.
00167     """
00168     name = 'C++'
00169     aliases = ['cpp', 'c++']
00170     filenames = ['*.cpp', '*.hpp', '*.c++', '*.h++', '*.cc', '*.hh', '*.cxx', '*.hxx']
00171     mimetypes = ['text/x-c++hdr', 'text/x-c++src']
00172 
00173     tokens = {
00174         'root': [
00175             (r'^\s*#if\s+0', Comment.Preproc, 'if0'),
00176             (r'^\s*#', Comment.Preproc, 'macro'),
00177             (r'\n', Text),
00178             (r'\s+', Text),
00179             (r'\\\n', Text), # line continuation
00180             (r'/(\\\n)?/(\n|(.|\n)*?[^\\]\n)', Comment.Single),
00181             (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline),
00182             (r'[{}]', Punctuation),
00183             (r'L?"', String, 'string'),
00184             (r"L?'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char),
00185             (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float),
00186             (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
00187             (r'0x[0-9a-fA-F]+[Ll]?', Number.Hex),
00188             (r'0[0-7]+[Ll]?', Number.Oct),
00189             (r'\d+[Ll]?', Number.Integer),
00190             (r'[~!%^&*+=|?:<>/-]', Operator),
00191             (r'[()\[\],.;]', Punctuation),
00192             (r'(asm|auto|break|case|catch|const|const_cast|continue|'
00193              r'default|delete|do|dynamic_cast|else|enum|explicit|export|'
00194              r'extern|for|friend|goto|if|mutable|namespace|new|operator|'
00195              r'private|protected|public|register|reinterpret_cast|return|'
00196              r'restrict|sizeof|static|static_cast|struct|switch|template|'
00197              r'this|throw|throws|try|typedef|typeid|typename|union|using|'
00198              r'volatile|virtual|while)\b', Keyword),
00199             (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'),
00200             (r'(bool|int|long|float|short|double|char|unsigned|signed|'
00201              r'void|wchar_t)\b', Keyword.Type),
00202             (r'(_{0,2}inline|naked|thread)\b', Keyword.Reserved),
00203             (r'__(asm|int8|based|except|int16|stdcall|cdecl|fastcall|int32|'
00204              r'declspec|finally|int64|try|leave|wchar_t|w64|virtual_inheritance|'
00205              r'uuidof|unaligned|super|single_inheritance|raise|noop|'
00206              r'multiple_inheritance|m128i|m128d|m128|m64|interface|'
00207              r'identifier|forceinline|event|assume)\b', Keyword.Reserved),
00208             (r'(true|false)\b', Keyword.Constant),
00209             (r'NULL\b', Name.Builtin),
00210             ('[a-zA-Z_][a-zA-Z0-9_]*:(?!:)', Name.Label),
00211             ('[a-zA-Z_][a-zA-Z0-9_]*', Name),
00212         ],
00213         'classname': [
00214             (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop'),
00215             # template specification
00216             (r'\s*(?=>)', Text, '#pop'),
00217         ],
00218         'string': [
00219             (r'"', String, '#pop'),
00220             (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape),
00221             (r'[^\\"\n]+', String), # all other characters
00222             (r'\\\n', String), # line continuation
00223             (r'\\', String), # stray backslash
00224         ],
00225         'macro': [
00226             (r'[^/\n]+', Comment.Preproc),
00227             (r'/[*](.|\n)*?[*]/', Comment.Multiline),
00228             (r'//.*?\n', Comment.Single, '#pop'),
00229             (r'/', Comment.Preproc),
00230             (r'(?<=\\)\n', Comment.Preproc),
00231             (r'\n', Comment.Preproc, '#pop'),
00232         ],
00233         'if0': [
00234             (r'^\s*#if.*?(?<!\\)\n', Comment.Preproc, '#push'),
00235             (r'^\s*#endif.*?(?<!\\)\n', Comment.Preproc, '#pop'),
00236             (r'.*?\n', Comment),
00237         ]
00238     }
00239 
00240 
00241 class DLexer(RegexLexer):
00242     """
00243     For D source.
00244     """
00245     name = 'D'
00246     filenames = ['*.d', '*.di']
00247     aliases = ['d']
00248     mimetypes = ['text/x-dsrc']
00249 
00250     tokens = {
00251         'root': [
00252             (r'\n', Text),
00253             (r'\s+', Text),
00254             #(r'\\\n', Text), # line continuations
00255             # Comments
00256             (r'//(.*?)\n', Comment.Single),
00257             (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline),
00258             (r'/\+', Comment.Multiline, 'nested_comment'),
00259             # Keywords
00260             (r'(abstract|alias|align|asm|assert|auto|body|break|case|cast'
00261              r'|catch|class|const|continue|debug|default|delegate|delete'
00262              r'|deprecated|do|else|enum|export|extern|finally|final'
00263              r'|foreach_reverse|foreach|for|function|goto|if|import|inout'
00264              r'|interface|invariant|in|is|lazy|mixin|module|new|nothrow|out'
00265              r'|override|package|pragma|private|protected|public|pure|ref|return'
00266              r'|scope|static|struct|super|switch|synchronized|template|this'
00267              r'|throw|try|typedef|typeid|typeof|union|unittest|version|volatile'
00268              r'|while|with|__traits)\b', Keyword
00269             ),
00270             (r'(bool|byte|cdouble|cent|cfloat|char|creal|dchar|double|float'
00271              r'|idouble|ifloat|int|ireal|long|real|short|ubyte|ucent|uint|ulong'
00272              r'|ushort|void|wchar)\b', Keyword.Type
00273             ),
00274             (r'(false|true|null)\b', Keyword.Constant),
00275             (r'macro\b', Keyword.Reserved),
00276             (r'(string|wstring|dstring)\b', Name.Builtin),
00277             # FloatLiteral
00278             # -- HexFloat
00279             (r'0[xX]([0-9a-fA-F_]*\.[0-9a-fA-F_]+|[0-9a-fA-F_]+)'
00280              r'[pP][+\-]?[0-9_]+[fFL]?[i]?', Number.Float),
00281             # -- DecimalFloat
00282             (r'[0-9_]+(\.[0-9_]+[eE][+\-]?[0-9_]+|'
00283              r'\.[0-9_]*|[eE][+\-]?[0-9_]+)[fFL]?[i]?', Number.Float),
00284             (r'\.(0|[1-9][0-9_]*)([eE][+\-]?[0-9_]+)?[fFL]?[i]?', Number.Float),
00285             # IntegerLiteral
00286             # -- Binary
00287             (r'0[Bb][01_]+', Number),
00288             # -- Octal
00289             (r'0[0-7_]+', Number.Oct),
00290             # -- Hexadecimal
00291             (r'0[xX][0-9a-fA-F_]+', Number.Hex),
00292             # -- Decimal
00293             (r'(0|[1-9][0-9_]*)([LUu]|Lu|LU|uL|UL)?', Number.Integer),
00294             # CharacterLiteral
00295             (r"""'(\\['"?\\abfnrtv]|\\x[0-9a-fA-F]{2}|\\[0-7]{1,3}"""
00296              r"""|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}|\\&\w+;|.)'""",
00297              String.Char
00298             ),
00299             # StringLiteral
00300             # -- WysiwygString
00301             (r'r"[^"]*"[cwd]?', String),
00302             # -- AlternateWysiwygString
00303             (r'`[^`]*`[cwd]?', String),
00304             # -- DoubleQuotedString
00305             (r'"(\\\\|\\"|[^"])*"[cwd]?', String),
00306             # -- EscapeSequence
00307             (r"""\\(['"?\\abfnrtv]|x[0-9a-fA-F]{2}|[0-7]{1,3}"""
00308              r"""|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8}|&\w+;)""",
00309              String
00310             ),
00311             # -- HexString
00312             (r'x"[0-9a-fA-F_\s]*"[cwd]?', String),
00313             # -- DelimitedString
00314             (r'q"\[', String, 'delimited_bracket'),
00315             (r'q"\(', String, 'delimited_parenthesis'),
00316             (r'q"<', String, 'delimited_angle'),
00317             (r'q"{', String, 'delimited_curly'),
00318             (r'q"([a-zA-Z_]\w*)\n.*?\n\1"', String),
00319             (r'q"(.).*?\1"', String),
00320             # -- TokenString
00321             (r'q{', String, 'token_string'),
00322             # Tokens
00323             (r'(~=|\^=|%=|\*=|==|!>=|!<=|!<>=|!<>|!<|!>|!=|>>>=|>>>|>>=|>>|>='
00324              r'|<>=|<>|<<=|<<|<=|\+\+|\+=|--|-=|\|\||\|=|&&|&=|\.\.\.|\.\.|/=)'
00325              r'|[/.&|\-+<>!()\[\]{}?,;:$=*%^~]', Punctuation
00326             ),
00327             # Identifier
00328             (r'[a-zA-Z_]\w*', Name),
00329         ],
00330         'nested_comment': [
00331             (r'[^+/]+', Comment.Multiline),
00332             (r'/\+', Comment.Multiline, '#push'),
00333             (r'\+/', Comment.Multiline, '#pop'),
00334             (r'[+/]', Comment.Multiline),
00335         ],
00336         'token_string': [
00337             (r'{', Punctuation, 'token_string_nest'),
00338             (r'}', String, '#pop'),
00339             include('root'),
00340         ],
00341         'token_string_nest': [
00342             (r'{', Punctuation, '#push'),
00343             (r'}', Punctuation, '#pop'),
00344             include('root'),
00345         ],
00346         'delimited_bracket': [
00347             (r'[^\[\]]+', String),
00348             (r'\[', String, 'delimited_inside_bracket'),
00349             (r'\]"', String, '#pop'),
00350         ],
00351         'delimited_inside_bracket': [
00352             (r'[^\[\]]+', String),
00353             (r'\[', String, '#push'),
00354             (r'\]', String, '#pop'),
00355         ],
00356         'delimited_parenthesis': [
00357             (r'[^\(\)]+', String),
00358             (r'\(', String, 'delimited_inside_parenthesis'),
00359             (r'\)"', String, '#pop'),
00360         ],
00361         'delimited_inside_parenthesis': [
00362             (r'[^\(\)]+', String),
00363             (r'\(', String, '#push'),
00364             (r'\)', String, '#pop'),
00365         ],
00366         'delimited_angle': [
00367             (r'[^<>]+', String),
00368             (r'<', String, 'delimited_inside_angle'),
00369             (r'>"', String, '#pop'),
00370         ],
00371         'delimited_inside_angle': [
00372             (r'[^<>]+', String),
00373             (r'<', String, '#push'),
00374             (r'>', String, '#pop'),
00375         ],
00376         'delimited_curly': [
00377             (r'[^{}]+', String),
00378             (r'{', String, 'delimited_inside_curly'),
00379             (r'}"', String, '#pop'),
00380         ],
00381         'delimited_inside_curly': [
00382             (r'[^{}]+', String),
00383             (r'{', String, '#push'),
00384             (r'}', String, '#pop'),
00385         ],
00386     }
00387 
00388 
00389 class DelphiLexer(Lexer):
00390     """
00391     For `Delphi <http://www.borland.com/delphi/>`_ (Borland Object Pascal),
00392     Turbo Pascal and Free Pascal source code.
00393 
00394     Additional options accepted:
00395 
00396     `turbopascal`
00397         Highlight Turbo Pascal specific keywords (default: ``True``).
00398     `delphi`
00399         Highlight Borland Delphi specific keywords (default: ``True``).
00400     `freepascal`
00401         Highlight Free Pascal specific keywords (default: ``True``).
00402     `units`
00403         A list of units that should be considered builtin, supported are
00404         ``System``, ``SysUtils``, ``Classes`` and ``Math``.
00405         Default is to consider all of them builtin.
00406     """
00407     name = 'Delphi'
00408     aliases = ['delphi', 'pas', 'pascal', 'objectpascal']
00409     filenames = ['*.pas']
00410     mimetypes = ['text/x-pascal']
00411 
00412     TURBO_PASCAL_KEYWORDS = [
00413         'absolute', 'and', 'array', 'asm', 'begin', 'break', 'case',
00414         'const', 'constructor', 'continue', 'destructor', 'div', 'do',
00415         'downto', 'else', 'end', 'file', 'for', 'function', 'goto',
00416         'if', 'implementation', 'in', 'inherited', 'inline', 'interface',
00417         'label', 'mod', 'nil', 'not', 'object', 'of', 'on', 'operator',
00418         'or', 'packed', 'procedure', 'program', 'record', 'reintroduce',
00419         'repeat', 'self', 'set', 'shl', 'shr', 'string', 'then', 'to',
00420         'type', 'unit', 'until', 'uses', 'var', 'while', 'with', 'xor'
00421     ]
00422 
00423     DELPHI_KEYWORDS = [
00424         'as', 'class', 'except', 'exports', 'finalization', 'finally',
00425         'initialization', 'is', 'library', 'on', 'property', 'raise',
00426         'threadvar', 'try'
00427     ]
00428 
00429     FREE_PASCAL_KEYWORDS = [
00430         'dispose', 'exit', 'false', 'new', 'true'
00431     ]
00432 
00433     BLOCK_KEYWORDS = set([
00434         'begin', 'class', 'const', 'constructor', 'destructor', 'end',
00435         'finalization', 'function', 'implementation', 'initialization',
00436         'label', 'library', 'operator', 'procedure', 'program', 'property',
00437         'record', 'threadvar', 'type', 'unit', 'uses', 'var'
00438     ])
00439 
00440     FUNCTION_MODIFIERS = set([
00441         'alias', 'cdecl', 'export', 'inline', 'interrupt', 'nostackframe',
00442         'pascal', 'register', 'safecall', 'softfloat', 'stdcall',
00443         'varargs', 'name', 'dynamic', 'near', 'virtual', 'external',
00444         'override', 'assembler'
00445     ])
00446 
00447     # XXX: those aren't global. but currently we know no way for defining
00448     #      them just for the type context.
00449     DIRECTIVES = set([
00450         'absolute', 'abstract', 'assembler', 'cppdecl', 'default', 'far',
00451         'far16', 'forward', 'index', 'oldfpccall', 'private', 'protected',
00452         'published', 'public'
00453     ])
00454 
00455     BUILTIN_TYPES = set([
00456         'ansichar', 'ansistring', 'bool', 'boolean', 'byte', 'bytebool',
00457         'cardinal', 'char', 'comp', 'currency', 'double', 'dword',
00458         'extended', 'int64', 'integer', 'iunknown', 'longbool', 'longint',
00459         'longword', 'pansichar', 'pansistring', 'pbool', 'pboolean',
00460         'pbyte', 'pbytearray', 'pcardinal', 'pchar', 'pcomp', 'pcurrency',
00461         'pdate', 'pdatetime', 'pdouble', 'pdword', 'pextended', 'phandle',
00462         'pint64', 'pinteger', 'plongint', 'plongword', 'pointer',
00463         'ppointer', 'pshortint', 'pshortstring', 'psingle', 'psmallint',
00464         'pstring', 'pvariant', 'pwidechar', 'pwidestring', 'pword',
00465         'pwordarray', 'pwordbool', 'real', 'real48', 'shortint',
00466         'shortstring', 'single', 'smallint', 'string', 'tclass', 'tdate',
00467         'tdatetime', 'textfile', 'thandle', 'tobject', 'ttime', 'variant',
00468         'widechar', 'widestring', 'word', 'wordbool'
00469     ])
00470 
00471     BUILTIN_UNITS = {
00472         'System': [
00473             'abs', 'acquireexceptionobject', 'addr', 'ansitoutf8',
00474             'append', 'arctan', 'assert', 'assigned', 'assignfile',
00475             'beginthread', 'blockread', 'blockwrite', 'break', 'chdir',
00476             'chr', 'close', 'closefile', 'comptocurrency', 'comptodouble',
00477             'concat', 'continue', 'copy', 'cos', 'dec', 'delete',
00478             'dispose', 'doubletocomp', 'endthread', 'enummodules',
00479             'enumresourcemodules', 'eof', 'eoln', 'erase', 'exceptaddr',
00480             'exceptobject', 'exclude', 'exit', 'exp', 'filepos', 'filesize',
00481             'fillchar', 'finalize', 'findclasshinstance', 'findhinstance',
00482             'findresourcehinstance', 'flush', 'frac', 'freemem',
00483             'get8087cw', 'getdir', 'getlasterror', 'getmem',
00484             'getmemorymanager', 'getmodulefilename', 'getvariantmanager',
00485             'halt', 'hi', 'high', 'inc', 'include', 'initialize', 'insert',
00486             'int', 'ioresult', 'ismemorymanagerset', 'isvariantmanagerset',
00487             'length', 'ln', 'lo', 'low', 'mkdir', 'move', 'new', 'odd',
00488             'olestrtostring', 'olestrtostrvar', 'ord', 'paramcount',
00489             'paramstr', 'pi', 'pos', 'pred', 'ptr', 'pucs4chars', 'random',
00490             'randomize', 'read', 'readln', 'reallocmem',
00491             'releaseexceptionobject', 'rename', 'reset', 'rewrite', 'rmdir',
00492             'round', 'runerror', 'seek', 'seekeof', 'seekeoln',
00493             'set8087cw', 'setlength', 'setlinebreakstyle',
00494             'setmemorymanager', 'setstring', 'settextbuf',
00495             'setvariantmanager', 'sin', 'sizeof', 'slice', 'sqr', 'sqrt',
00496             'str', 'stringofchar', 'stringtoolestr', 'stringtowidechar',
00497             'succ', 'swap', 'trunc', 'truncate', 'typeinfo',
00498             'ucs4stringtowidestring', 'unicodetoutf8', 'uniquestring',
00499             'upcase', 'utf8decode', 'utf8encode', 'utf8toansi',
00500             'utf8tounicode', 'val', 'vararrayredim', 'varclear',
00501             'widecharlentostring', 'widecharlentostrvar',
00502             'widechartostring', 'widechartostrvar',
00503             'widestringtoucs4string', 'write', 'writeln'
00504         ],
00505         'SysUtils': [
00506             'abort', 'addexitproc', 'addterminateproc', 'adjustlinebreaks',
00507             'allocmem', 'ansicomparefilename', 'ansicomparestr',
00508             'ansicomparetext', 'ansidequotedstr', 'ansiextractquotedstr',
00509             'ansilastchar', 'ansilowercase', 'ansilowercasefilename',
00510             'ansipos', 'ansiquotedstr', 'ansisamestr', 'ansisametext',
00511             'ansistrcomp', 'ansistricomp', 'ansistrlastchar', 'ansistrlcomp',
00512             'ansistrlicomp', 'ansistrlower', 'ansistrpos', 'ansistrrscan',
00513             'ansistrscan', 'ansistrupper', 'ansiuppercase',
00514             'ansiuppercasefilename', 'appendstr', 'assignstr', 'beep',
00515             'booltostr', 'bytetocharindex', 'bytetocharlen', 'bytetype',
00516             'callterminateprocs', 'changefileext', 'charlength',
00517             'chartobyteindex', 'chartobytelen', 'comparemem', 'comparestr',
00518             'comparetext', 'createdir', 'createguid', 'currentyear',
00519             'currtostr', 'currtostrf', 'date', 'datetimetofiledate',
00520             'datetimetostr', 'datetimetostring', 'datetimetosystemtime',
00521             'datetimetotimestamp', 'datetostr', 'dayofweek', 'decodedate',
00522             'decodedatefully', 'decodetime', 'deletefile', 'directoryexists',
00523             'diskfree', 'disksize', 'disposestr', 'encodedate', 'encodetime',
00524             'exceptionerrormessage', 'excludetrailingbackslash',
00525             'excludetrailingpathdelimiter', 'expandfilename',
00526             'expandfilenamecase', 'expanduncfilename', 'extractfiledir',
00527             'extractfiledrive', 'extractfileext', 'extractfilename',
00528             'extractfilepath', 'extractrelativepath', 'extractshortpathname',
00529             'fileage', 'fileclose', 'filecreate', 'filedatetodatetime',
00530             'fileexists', 'filegetattr', 'filegetdate', 'fileisreadonly',
00531             'fileopen', 'fileread', 'filesearch', 'fileseek', 'filesetattr',
00532             'filesetdate', 'filesetreadonly', 'filewrite', 'finalizepackage',
00533             'findclose', 'findcmdlineswitch', 'findfirst', 'findnext',
00534             'floattocurr', 'floattodatetime', 'floattodecimal', 'floattostr',
00535             'floattostrf', 'floattotext', 'floattotextfmt', 'fmtloadstr',
00536             'fmtstr', 'forcedirectories', 'format', 'formatbuf', 'formatcurr',
00537             'formatdatetime', 'formatfloat', 'freeandnil', 'getcurrentdir',
00538             'getenvironmentvariable', 'getfileversion', 'getformatsettings',
00539             'getlocaleformatsettings', 'getmodulename', 'getpackagedescription',
00540             'getpackageinfo', 'gettime', 'guidtostring', 'incamonth',
00541             'includetrailingbackslash', 'includetrailingpathdelimiter',
00542             'incmonth', 'initializepackage', 'interlockeddecrement',
00543             'interlockedexchange', 'interlockedexchangeadd',
00544             'interlockedincrement', 'inttohex', 'inttostr', 'isdelimiter',
00545             'isequalguid', 'isleapyear', 'ispathdelimiter', 'isvalidident',
00546             'languages', 'lastdelimiter', 'loadpackage', 'loadstr',
00547             'lowercase', 'msecstotimestamp', 'newstr', 'nextcharindex', 'now',
00548             'outofmemoryerror', 'quotedstr', 'raiselastoserror',
00549             'raiselastwin32error', 'removedir', 'renamefile', 'replacedate',
00550             'replacetime', 'safeloadlibrary', 'samefilename', 'sametext',
00551             'setcurrentdir', 'showexception', 'sleep', 'stralloc', 'strbufsize',
00552             'strbytetype', 'strcat', 'strcharlength', 'strcomp', 'strcopy',
00553             'strdispose', 'strecopy', 'strend', 'strfmt', 'stricomp',
00554             'stringreplace', 'stringtoguid', 'strlcat', 'strlcomp', 'strlcopy',
00555             'strlen', 'strlfmt', 'strlicomp', 'strlower', 'strmove', 'strnew',
00556             'strnextchar', 'strpas', 'strpcopy', 'strplcopy', 'strpos',
00557             'strrscan', 'strscan', 'strtobool', 'strtobooldef', 'strtocurr',
00558             'strtocurrdef', 'strtodate', 'strtodatedef', 'strtodatetime',
00559             'strtodatetimedef', 'strtofloat', 'strtofloatdef', 'strtoint',
00560             'strtoint64', 'strtoint64def', 'strtointdef', 'strtotime',
00561             'strtotimedef', 'strupper', 'supports', 'syserrormessage',
00562             'systemtimetodatetime', 'texttofloat', 'time', 'timestamptodatetime',
00563             'timestamptomsecs', 'timetostr', 'trim', 'trimleft', 'trimright',
00564             'tryencodedate', 'tryencodetime', 'tryfloattocurr', 'tryfloattodatetime',
00565             'trystrtobool', 'trystrtocurr', 'trystrtodate', 'trystrtodatetime',
00566             'trystrtofloat', 'trystrtoint', 'trystrtoint64', 'trystrtotime',
00567             'unloadpackage', 'uppercase', 'widecomparestr', 'widecomparetext',
00568             'widefmtstr', 'wideformat', 'wideformatbuf', 'widelowercase',
00569             'widesamestr', 'widesametext', 'wideuppercase', 'win32check',
00570             'wraptext'
00571         ],
00572         'Classes': [
00573             'activateclassgroup', 'allocatehwnd', 'bintohex', 'checksynchronize',
00574             'collectionsequal', 'countgenerations', 'deallocatehwnd', 'equalrect',
00575             'extractstrings', 'findclass', 'findglobalcomponent', 'getclass',
00576             'groupdescendantswith', 'hextobin', 'identtoint',
00577             'initinheritedcomponent', 'inttoident', 'invalidpoint',
00578             'isuniqueglobalcomponentname', 'linestart', 'objectbinarytotext',
00579             'objectresourcetotext', 'objecttexttobinary', 'objecttexttoresource',
00580             'pointsequal', 'readcomponentres', 'readcomponentresex',
00581             'readcomponentresfile', 'rect', 'registerclass', 'registerclassalias',
00582             'registerclasses', 'registercomponents', 'registerintegerconsts',
00583             'registernoicon', 'registernonactivex', 'smallpoint', 'startclassgroup',
00584             'teststreamformat', 'unregisterclass', 'unregisterclasses',
00585             'unregisterintegerconsts', 'unregistermoduleclasses',
00586             'writecomponentresfile'
00587         ],
00588         'Math': [
00589             'arccos', 'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec',
00590             'arcsech', 'arcsin', 'arcsinh', 'arctan2', 'arctanh', 'ceil',
00591             'comparevalue', 'cosecant', 'cosh', 'cot', 'cotan', 'coth', 'csc',
00592             'csch', 'cycletodeg', 'cycletograd', 'cycletorad', 'degtocycle',
00593             'degtograd', 'degtorad', 'divmod', 'doubledecliningbalance',
00594             'ensurerange', 'floor', 'frexp', 'futurevalue', 'getexceptionmask',
00595             'getprecisionmode', 'getroundmode', 'gradtocycle', 'gradtodeg',
00596             'gradtorad', 'hypot', 'inrange', 'interestpayment', 'interestrate',
00597             'internalrateofreturn', 'intpower', 'isinfinite', 'isnan', 'iszero',
00598             'ldexp', 'lnxp1', 'log10', 'log2', 'logn', 'max', 'maxintvalue',
00599             'maxvalue', 'mean', 'meanandstddev', 'min', 'minintvalue', 'minvalue',
00600             'momentskewkurtosis', 'netpresentvalue', 'norm', 'numberofperiods',
00601             'payment', 'periodpayment', 'poly', 'popnstddev', 'popnvariance',
00602             'power', 'presentvalue', 'radtocycle', 'radtodeg', 'radtograd',
00603             'randg', 'randomrange', 'roundto', 'samevalue', 'sec', 'secant',
00604             'sech', 'setexceptionmask', 'setprecisionmode', 'setroundmode',
00605             'sign', 'simpleroundto', 'sincos', 'sinh', 'slndepreciation', 'stddev',
00606             'sum', 'sumint', 'sumofsquares', 'sumsandsquares', 'syddepreciation',
00607             'tan', 'tanh', 'totalvariance', 'variance'
00608         ]
00609     }
00610 
00611     ASM_REGISTERS = set([
00612         'ah', 'al', 'ax', 'bh', 'bl', 'bp', 'bx', 'ch', 'cl', 'cr0',
00613         'cr1', 'cr2', 'cr3', 'cr4', 'cs', 'cx', 'dh', 'di', 'dl', 'dr0',
00614         'dr1', 'dr2', 'dr3', 'dr4', 'dr5', 'dr6', 'dr7', 'ds', 'dx',
00615         'eax', 'ebp', 'ebx', 'ecx', 'edi', 'edx', 'es', 'esi', 'esp',
00616         'fs', 'gs', 'mm0', 'mm1', 'mm2', 'mm3', 'mm4', 'mm5', 'mm6',
00617         'mm7', 'si', 'sp', 'ss', 'st0', 'st1', 'st2', 'st3', 'st4', 'st5',
00618         'st6', 'st7', 'xmm0', 'xmm1', 'xmm2', 'xmm3', 'xmm4', 'xmm5',
00619         'xmm6', 'xmm7'
00620     ])
00621 
00622     ASM_INSTRUCTIONS = set([
00623         'aaa', 'aad', 'aam', 'aas', 'adc', 'add', 'and', 'arpl', 'bound',
00624         'bsf', 'bsr', 'bswap', 'bt', 'btc', 'btr', 'bts', 'call', 'cbw',
00625         'cdq', 'clc', 'cld', 'cli', 'clts', 'cmc', 'cmova', 'cmovae',
00626         'cmovb', 'cmovbe', 'cmovc', 'cmovcxz', 'cmove', 'cmovg',
00627         'cmovge', 'cmovl', 'cmovle', 'cmovna', 'cmovnae', 'cmovnb',
00628         'cmovnbe', 'cmovnc', 'cmovne', 'cmovng', 'cmovnge', 'cmovnl',
00629         'cmovnle', 'cmovno', 'cmovnp', 'cmovns', 'cmovnz', 'cmovo',
00630         'cmovp', 'cmovpe', 'cmovpo', 'cmovs', 'cmovz', 'cmp', 'cmpsb',
00631         'cmpsd', 'cmpsw', 'cmpxchg', 'cmpxchg486', 'cmpxchg8b', 'cpuid',
00632         'cwd', 'cwde', 'daa', 'das', 'dec', 'div', 'emms', 'enter', 'hlt',
00633         'ibts', 'icebp', 'idiv', 'imul', 'in', 'inc', 'insb', 'insd',
00634         'insw', 'int', 'int01', 'int03', 'int1', 'int3', 'into', 'invd',
00635         'invlpg', 'iret', 'iretd', 'iretw', 'ja', 'jae', 'jb', 'jbe',
00636         'jc', 'jcxz', 'jcxz', 'je', 'jecxz', 'jg', 'jge', 'jl', 'jle',
00637         'jmp', 'jna', 'jnae', 'jnb', 'jnbe', 'jnc', 'jne', 'jng', 'jnge',
00638         'jnl', 'jnle', 'jno', 'jnp', 'jns', 'jnz', 'jo', 'jp', 'jpe',
00639         'jpo', 'js', 'jz', 'lahf', 'lar', 'lcall', 'lds', 'lea', 'leave',
00640         'les', 'lfs', 'lgdt', 'lgs', 'lidt', 'ljmp', 'lldt', 'lmsw',
00641         'loadall', 'loadall286', 'lock', 'lodsb', 'lodsd', 'lodsw',
00642         'loop', 'loope', 'loopne', 'loopnz', 'loopz', 'lsl', 'lss', 'ltr',
00643         'mov', 'movd', 'movq', 'movsb', 'movsd', 'movsw', 'movsx',
00644         'movzx', 'mul', 'neg', 'nop', 'not', 'or', 'out', 'outsb', 'outsd',
00645         'outsw', 'pop', 'popa', 'popad', 'popaw', 'popf', 'popfd', 'popfw',
00646         'push', 'pusha', 'pushad', 'pushaw', 'pushf', 'pushfd', 'pushfw',
00647         'rcl', 'rcr', 'rdmsr', 'rdpmc', 'rdshr', 'rdtsc', 'rep', 'repe',
00648         'repne', 'repnz', 'repz', 'ret', 'retf', 'retn', 'rol', 'ror',
00649         'rsdc', 'rsldt', 'rsm', 'sahf', 'sal', 'salc', 'sar', 'sbb',
00650         'scasb', 'scasd', 'scasw', 'seta', 'setae', 'setb', 'setbe',
00651         'setc', 'setcxz', 'sete', 'setg', 'setge', 'setl', 'setle',
00652         'setna', 'setnae', 'setnb', 'setnbe', 'setnc', 'setne', 'setng',
00653         'setnge', 'setnl', 'setnle', 'setno', 'setnp', 'setns', 'setnz',
00654         'seto', 'setp', 'setpe', 'setpo', 'sets', 'setz', 'sgdt', 'shl',
00655         'shld', 'shr', 'shrd', 'sidt', 'sldt', 'smi', 'smint', 'smintold',
00656         'smsw', 'stc', 'std', 'sti', 'stosb', 'stosd', 'stosw', 'str',
00657         'sub', 'svdc', 'svldt', 'svts', 'syscall', 'sysenter', 'sysexit',
00658         'sysret', 'test', 'ud1', 'ud2', 'umov', 'verr', 'verw', 'wait',
00659         'wbinvd', 'wrmsr', 'wrshr', 'xadd', 'xbts', 'xchg', 'xlat',
00660         'xlatb', 'xor'
00661     ])
00662 
00663     def __init__(self, **options):
00664         Lexer.__init__(self, **options)
00665         self.keywords = set()
00666         if get_bool_opt(options, 'turbopascal', True):
00667             self.keywords.update(self.TURBO_PASCAL_KEYWORDS)
00668         if get_bool_opt(options, 'delphi', True):
00669             self.keywords.update(self.DELPHI_KEYWORDS)
00670         if get_bool_opt(options, 'freepascal', True):
00671             self.keywords.update(self.FREE_PASCAL_KEYWORDS)
00672         self.builtins = set()
00673         for unit in get_list_opt(options, 'units', self.BUILTIN_UNITS.keys()):
00674             self.builtins.update(self.BUILTIN_UNITS[unit])
00675 
00676     def get_tokens_unprocessed(self, text):
00677         scanner = Scanner(text, re.DOTALL | re.MULTILINE | re.IGNORECASE)
00678         stack = ['initial']
00679         in_function_block = False
00680         in_property_block = False
00681         was_dot = False
00682         next_token_is_function = False
00683         next_token_is_property = False
00684         collect_labels = False
00685         block_labels = set()
00686         brace_balance = [0, 0]
00687 
00688         while not scanner.eos:
00689             token = Error
00690 
00691             if stack[-1] == 'initial':
00692                 if scanner.scan(r'\s+'):
00693                     token = Text
00694                 elif scanner.scan(r'\{.*?\}|\(\*.*?\*\)'):
00695                     if scanner.match.startswith('$'):
00696                         token = Comment.Preproc
00697                     else:
00698                         token = Comment.Multiline
00699                 elif scanner.scan(r'//.*?$'):
00700                     token = Comment.Single
00701                 elif scanner.scan(r'[-+*\/=<>:;,.@\^]'):
00702                     token = Operator
00703                     # stop label highlighting on next ";"
00704                     if collect_labels and scanner.match == ';':
00705                         collect_labels = False
00706                 elif scanner.scan(r'[\(\)\[\]]+'):
00707                     token = Punctuation
00708                     # abort function naming ``foo = Function(...)``
00709                     next_token_is_function = False
00710                     # if we are in a function block we count the open
00711                     # braces because ootherwise it's impossible to
00712                     # determine the end of the modifier context
00713                     if in_function_block or in_property_block:
00714                         if scanner.match == '(':
00715                             brace_balance[0] += 1
00716                         elif scanner.match == ')':
00717                             brace_balance[0] -= 1
00718                         elif scanner.match == '[':
00719                             brace_balance[1] += 1
00720                         elif scanner.match == ']':
00721                             brace_balance[1] -= 1
00722                 elif scanner.scan(r'[A-Za-z_][A-Za-z_0-9]*'):
00723                     lowercase_name = scanner.match.lower()
00724                     if lowercase_name == 'result':
00725                         token = Name.Builtin.Pseudo
00726                     elif lowercase_name in self.keywords:
00727                         token = Keyword
00728                         # if we are in a special block and a
00729                         # block ending keyword occours (and the parenthesis
00730                         # is balanced) we end the current block context
00731                         if (in_function_block or in_property_block) and \
00732                            lowercase_name in self.BLOCK_KEYWORDS and \
00733                            brace_balance[0] <= 0 and \
00734                            brace_balance[1] <= 0:
00735                             in_function_block = False
00736                             in_property_block = False
00737                             brace_balance = [0, 0]
00738                             block_labels = set()
00739                         if lowercase_name in ('label', 'goto'):
00740                             collect_labels = True
00741                         elif lowercase_name == 'asm':
00742                             stack.append('asm')
00743                         elif lowercase_name == 'property':
00744                             in_property_block = True
00745                             next_token_is_property = True
00746                         elif lowercase_name in ('procedure', 'operator',
00747                                                 'function', 'constructor',
00748                                                 'destructor'):
00749                             in_function_block = True
00750                             next_token_is_function = True
00751                     # we are in a function block and the current name
00752                     # is in the set of registered modifiers. highlight
00753                     # it as pseudo keyword
00754                     elif in_function_block and \
00755                          lowercase_name in self.FUNCTION_MODIFIERS:
00756                         token = Keyword.Pseudo
00757                     # if we are in a property highlight some more
00758                     # modifiers
00759                     elif in_property_block and \
00760                          lowercase_name in ('read', 'write'):
00761                         token = Keyword.Pseudo
00762                         next_token_is_function = True
00763                     # if the last iteration set next_token_is_function
00764                     # to true we now want this name highlighted as
00765                     # function. so do that and reset the state
00766                     elif next_token_is_function:
00767                         # Look if the next token is a dot. If yes it's
00768                         # not a function, but a class name and the
00769                         # part after the dot a function name
00770                         if scanner.test(r'\s*\.\s*'):
00771                             token = Name.Class
00772                         # it's not a dot, our job is done
00773                         else:
00774                             token = Name.Function
00775                             next_token_is_function = False
00776                     # same for properties
00777                     elif next_token_is_property:
00778                         token = Name.Property
00779                         next_token_is_property = False
00780                     # Highlight this token as label and add it
00781                     # to the list of known labels
00782                     elif collect_labels:
00783                         token = Name.Label
00784                         block_labels.add(scanner.match.lower())
00785                     # name is in list of known labels
00786                     elif lowercase_name in block_labels:
00787                         token = Name.Label
00788                     elif lowercase_name in self.BUILTIN_TYPES:
00789                         token = Keyword.Type
00790                     elif lowercase_name in self.DIRECTIVES:
00791                         token = Keyword.Pseudo
00792                     # builtins are just builtins if the token
00793                     # before isn't a dot
00794                     elif not was_dot and lowercase_name in self.builtins:
00795                         token = Name.Builtin
00796                     else:
00797                         token = Name
00798                 elif scanner.scan(r"'"):
00799                     token = String
00800                     stack.append('string')
00801                 elif scanner.scan(r'\#(\d+|\$[0-9A-Fa-f]+)'):
00802                     token = String.Char
00803                 elif scanner.scan(r'\$[0-9A-Fa-f]+'):
00804                     token = Number.Hex
00805                 elif scanner.scan(r'\d+(?![eE]|\.[^.])'):
00806                     token = Number.Integer
00807                 elif scanner.scan(r'\d+(\.\d+([eE][+-]?\d+)?|[eE][+-]?\d+)'):
00808                     token = Number.Float
00809                 else:
00810                     # if the stack depth is deeper than once, pop
00811                     if len(stack) > 1:
00812                         stack.pop()
00813                     scanner.get_char()
00814 
00815             elif stack[-1] == 'string':
00816                 if scanner.scan(r"''"):
00817                     token = String.Escape
00818                 elif scanner.scan(r"'"):
00819                     token = String
00820                     stack.pop()
00821                 elif scanner.scan(r"[^']*"):
00822                     token = String
00823                 else:
00824                     scanner.get_char()
00825                     stack.pop()
00826 
00827             elif stack[-1] == 'asm':
00828                 if scanner.scan(r'\s+'):
00829                     token = Text
00830                 elif scanner.scan(r'end'):
00831                     token = Keyword
00832                     stack.pop()
00833                 elif scanner.scan(r'\{.*?\}|\(\*.*?\*\)'):
00834                     if scanner.match.startswith('$'):
00835                         token = Comment.Preproc
00836                     else:
00837                         token = Comment.Multiline
00838                 elif scanner.scan(r'//.*?$'):
00839                     token = Comment.Single
00840                 elif scanner.scan(r"'"):
00841                     token = String
00842                     stack.append('string')
00843                 elif scanner.scan(r'@@[A-Za-z_][A-Za-z_0-9]*'):
00844                     token = Name.Label
00845                 elif scanner.scan(r'[A-Za-z_][A-Za-z_0-9]*'):
00846                     lowercase_name = scanner.match.lower()
00847                     if lowercase_name in self.ASM_INSTRUCTIONS:
00848                         token = Keyword
00849                     elif lowercase_name in self.ASM_REGISTERS:
00850                         token = Name.Builtin
00851                     else:
00852                         token = Name
00853                 elif scanner.scan(r'[-+*\/=<>:;,.@\^]+'):
00854                     token = Operator
00855                 elif scanner.scan(r'[\(\)\[\]]+'):
00856                     token = Punctuation
00857                 elif scanner.scan(r'\$[0-9A-Fa-f]+'):
00858                     token = Number.Hex
00859                 elif scanner.scan(r'\d+(?![eE]|\.[^.])'):
00860                     token = Number.Integer
00861                 elif scanner.scan(r'\d+(\.\d+([eE][+-]?\d+)?|[eE][+-]?\d+)'):
00862                     token = Number.Float
00863                 else:
00864                     scanner.get_char()
00865                     stack.pop()
00866 
00867             # save the dot!!!11
00868             if scanner.match.strip():
00869                 was_dot = scanner.match == '.'
00870             yield scanner.start_pos, token, scanner.match or ''
00871 
00872 
00873 class JavaLexer(RegexLexer):
00874     """
00875     For `Java <http://www.sun.com/java/>`_ source code.
00876     """
00877 
00878     name = 'Java'
00879     aliases = ['java']
00880     filenames = ['*.java']
00881     mimetypes = ['text/x-java']
00882 
00883     flags = re.MULTILINE | re.DOTALL
00884 
00885     #: optional Comment or Whitespace
00886     _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+'
00887 
00888     tokens = {
00889         'root': [
00890             # method names
00891             (r'^(\s*(?:[a-zA-Z_][a-zA-Z0-9_\.\[\]]*\s+)+?)' # return arguments
00892              r'([a-zA-Z_][a-zA-Z0-9_]*)'                    # method name
00893              r'(\s*)(\()',                                  # signature start
00894              bygroups(using(this), Name.Function, Text, Operator)),
00895             (r'[^\S\n]+', Text),
00896             (r'//.*?\n', Comment.Single),
00897             (r'/\*.*?\*/', Comment.Multiline),
00898             (r'@[a-zA-Z_][a-zA-Z0-9_\.]*', Name.Decorator),
00899             (r'(assert|break|case|catch|continue|default|do|else|finally|for|'
00900              r'if|goto|instanceof|new|return|switch|this|throw|try|while)\b',
00901              Keyword),
00902             (r'(abstract|const|enum|extends|final|implements|native|private|'
00903              r'protected|public|static|strictfp|super|synchronized|throws|'
00904              r'transient|volatile)\b', Keyword.Declaration),
00905             (r'(boolean|byte|char|double|float|int|long|short|void)\b',
00906              Keyword.Type),
00907             (r'(package)(\s+)', bygroups(Keyword.Namespace, Text)),
00908             (r'(true|false|null)\b', Keyword.Constant),
00909             (r'(class|interface)(\s+)', bygroups(Keyword.Declaration, Text), 'class'),
00910             (r'(import)(\s+)', bygroups(Keyword.Namespace, Text), 'import'),
00911             (r'"(\\\\|\\"|[^"])*"', String),
00912             (r"'\\.'|'[^\\]'|'\\u[0-9a-f]{4}'", String.Char),
00913             (r'(\.)([a-zA-Z_][a-zA-Z0-9_]*)', bygroups(Operator, Name.Attribute)),
00914             (r'[a-zA-Z_][a-zA-Z0-9_]*:', Name.Label),
00915             (r'[a-zA-Z_\$][a-zA-Z0-9_]*', Name),
00916             (r'[~\^\*!%&\[\]\(\)\{\}<>\|+=:;,./?-]', Operator),
00917             (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
00918             (r'0x[0-9a-f]+', Number.Hex),
00919             (r'[0-9]+L?', Number.Integer),
00920             (r'\n', Text)
00921         ],
00922         'class': [
00923             (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop')
00924         ],
00925         'import': [
00926             (r'[a-zA-Z0-9_.]+\*?', Name.Namespace, '#pop')
00927         ],
00928     }
00929 
00930 class ScalaLexer(RegexLexer):
00931     """
00932     For `Scala <http://www.scala-lang.org>`_ source code.
00933     """
00934 
00935     name = 'Scala'
00936     aliases = ['scala']
00937     filenames = ['*.scala']
00938     mimetypes = ['text/x-scala']
00939 
00940     flags = re.MULTILINE | re.DOTALL
00941 
00942     #: optional Comment or Whitespace
00943     _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+'
00944 
00945     # don't use raw unicode strings!
00946     op = u'[-~\\^\\*!%&\\\\<>\\|+=:/?@\u00a6-\u00a7\u00a9\u00ac\u00ae\u00b0-\u00b1\u00b6\u00d7\u00f7\u03f6\u0482\u0606-\u0608\u060e-\u060f\u06e9\u06fd-\u06fe\u07f6\u09fa\u0b70\u0bf3-\u0bf8\u0bfa\u0c7f\u0cf1-\u0cf2\u0d79\u0f01-\u0f03\u0f13-\u0f17\u0f1a-\u0f1f\u0f34\u0f36\u0f38\u0fbe-\u0fc5\u0fc7-\u0fcf\u109e-\u109f\u1360\u1390-\u1399\u1940\u19e0-\u19ff\u1b61-\u1b6a\u1b74-\u1b7c\u2044\u2052\u207a-\u207c\u208a-\u208c\u2100-\u2101\u2103-\u2106\u2108-\u2109\u2114\u2116-\u2118\u211e-\u2123\u2125\u2127\u2129\u212e\u213a-\u213b\u2140-\u2144\u214a-\u214d\u214f\u2190-\u2328\u232b-\u244a\u249c-\u24e9\u2500-\u2767\u2794-\u27c4\u27c7-\u27e5\u27f0-\u2982\u2999-\u29d7\u29dc-\u29fb\u29fe-\u2b54\u2ce5-\u2cea\u2e80-\u2ffb\u3004\u3012-\u3013\u3020\u3036-\u3037\u303e-\u303f\u3190-\u3191\u3196-\u319f\u31c0-\u31e3\u3200-\u321e\u322a-\u3250\u3260-\u327f\u328a-\u32b0\u32c0-\u33ff\u4dc0-\u4dff\ua490-\ua4c6\ua828-\ua82b\ufb29\ufdfd\ufe62\ufe64-\ufe66\uff0b\uff1c-\uff1e\uff5c\uff5e\uffe2\uffe4\uffe8-\uffee\ufffc-\ufffd]+'
00947 
00948     letter = u'[a-zA-Z\\$_\u00aa\u00b5\u00ba\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02af\u0370-\u0373\u0376-\u0377\u037b-\u037d\u0386\u0388-\u03f5\u03f7-\u0481\u048a-\u0556\u0561-\u0587\u05d0-\u05f2\u0621-\u063f\u0641-\u064a\u066e-\u066f\u0671-\u06d3\u06d5\u06ee-\u06ef\u06fa-\u06fc\u06ff\u0710\u0712-\u072f\u074d-\u07a5\u07b1\u07ca-\u07ea\u0904-\u0939\u093d\u0950\u0958-\u0961\u0972-\u097f\u0985-\u09b9\u09bd\u09ce\u09dc-\u09e1\u09f0-\u09f1\u0a05-\u0a39\u0a59-\u0a5e\u0a72-\u0a74\u0a85-\u0ab9\u0abd\u0ad0-\u0ae1\u0b05-\u0b39\u0b3d\u0b5c-\u0b61\u0b71\u0b83-\u0bb9\u0bd0\u0c05-\u0c3d\u0c58-\u0c61\u0c85-\u0cb9\u0cbd\u0cde-\u0ce1\u0d05-\u0d3d\u0d60-\u0d61\u0d7a-\u0d7f\u0d85-\u0dc6\u0e01-\u0e30\u0e32-\u0e33\u0e40-\u0e45\u0e81-\u0eb0\u0eb2-\u0eb3\u0ebd-\u0ec4\u0edc-\u0f00\u0f40-\u0f6c\u0f88-\u0f8b\u1000-\u102a\u103f\u1050-\u1055\u105a-\u105d\u1061\u1065-\u1066\u106e-\u1070\u1075-\u1081\u108e\u10a0-\u10fa\u1100-\u135a\u1380-\u138f\u13a0-\u166c\u166f-\u1676\u1681-\u169a\u16a0-\u16ea\u16ee-\u1711\u1720-\u1731\u1740-\u1751\u1760-\u1770\u1780-\u17b3\u17dc\u1820-\u1842\u1844-\u18a8\u18aa-\u191c\u1950-\u19a9\u19c1-\u19c7\u1a00-\u1a16\u1b05-\u1b33\u1b45-\u1b4b\u1b83-\u1ba0\u1bae-\u1baf\u1c00-\u1c23\u1c4d-\u1c4f\u1c5a-\u1c77\u1d00-\u1d2b\u1d62-\u1d77\u1d79-\u1d9a\u1e00-\u1fbc\u1fbe\u1fc2-\u1fcc\u1fd0-\u1fdb\u1fe0-\u1fec\u1ff2-\u1ffc\u2071\u207f\u2102\u2107\u210a-\u2113\u2115\u2119-\u211d\u2124\u2126\u2128\u212a-\u212d\u212f-\u2139\u213c-\u213f\u2145-\u2149\u214e\u2160-\u2188\u2c00-\u2c7c\u2c80-\u2ce4\u2d00-\u2d65\u2d80-\u2dde\u3006-\u3007\u3021-\u3029\u3038-\u303a\u303c\u3041-\u3096\u309f\u30a1-\u30fa\u30ff-\u318e\u31a0-\u31b7\u31f0-\u31ff\u3400-\u4db5\u4e00-\ua014\ua016-\ua48c\ua500-\ua60b\ua610-\ua61f\ua62a-\ua66e\ua680-\ua697\ua722-\ua76f\ua771-\ua787\ua78b-\ua801\ua803-\ua805\ua807-\ua80a\ua80c-\ua822\ua840-\ua873\ua882-\ua8b3\ua90a-\ua925\ua930-\ua946\uaa00-\uaa28\uaa40-\uaa42\uaa44-\uaa4b\uac00-\ud7a3\uf900-\ufb1d\ufb1f-\ufb28\ufb2a-\ufd3d\ufd50-\ufdfb\ufe70-\ufefc\uff21-\uff3a\uff41-\uff5a\uff66-\uff6f\uff71-\uff9d\uffa0-\uffdc]'
00949 
00950     upper = u'[A-Z\\$_\u00c0-\u00d6\u00d8-\u00de\u0100\u0102\u0104\u0106\u0108\u010a\u010c\u010e\u0110\u0112\u0114\u0116\u0118\u011a\u011c\u011e\u0120\u0122\u0124\u0126\u0128\u012a\u012c\u012e\u0130\u0132\u0134\u0136\u0139\u013b\u013d\u013f\u0141\u0143\u0145\u0147\u014a\u014c\u014e\u0150\u0152\u0154\u0156\u0158\u015a\u015c\u015e\u0160\u0162\u0164\u0166\u0168\u016a\u016c\u016e\u0170\u0172\u0174\u0176\u0178-\u0179\u017b\u017d\u0181-\u0182\u0184\u0186-\u0187\u0189-\u018b\u018e-\u0191\u0193-\u0194\u0196-\u0198\u019c-\u019d\u019f-\u01a0\u01a2\u01a4\u01a6-\u01a7\u01a9\u01ac\u01ae-\u01af\u01b1-\u01b3\u01b5\u01b7-\u01b8\u01bc\u01c4\u01c7\u01ca\u01cd\u01cf\u01d1\u01d3\u01d5\u01d7\u01d9\u01db\u01de\u01e0\u01e2\u01e4\u01e6\u01e8\u01ea\u01ec\u01ee\u01f1\u01f4\u01f6-\u01f8\u01fa\u01fc\u01fe\u0200\u0202\u0204\u0206\u0208\u020a\u020c\u020e\u0210\u0212\u0214\u0216\u0218\u021a\u021c\u021e\u0220\u0222\u0224\u0226\u0228\u022a\u022c\u022e\u0230\u0232\u023a-\u023b\u023d-\u023e\u0241\u0243-\u0246\u0248\u024a\u024c\u024e\u0370\u0372\u0376\u0386\u0388-\u038f\u0391-\u03ab\u03cf\u03d2-\u03d4\u03d8\u03da\u03dc\u03de\u03e0\u03e2\u03e4\u03e6\u03e8\u03ea\u03ec\u03ee\u03f4\u03f7\u03f9-\u03fa\u03fd-\u042f\u0460\u0462\u0464\u0466\u0468\u046a\u046c\u046e\u0470\u0472\u0474\u0476\u0478\u047a\u047c\u047e\u0480\u048a\u048c\u048e\u0490\u0492\u0494\u0496\u0498\u049a\u049c\u049e\u04a0\u04a2\u04a4\u04a6\u04a8\u04aa\u04ac\u04ae\u04b0\u04b2\u04b4\u04b6\u04b8\u04ba\u04bc\u04be\u04c0-\u04c1\u04c3\u04c5\u04c7\u04c9\u04cb\u04cd\u04d0\u04d2\u04d4\u04d6\u04d8\u04da\u04dc\u04de\u04e0\u04e2\u04e4\u04e6\u04e8\u04ea\u04ec\u04ee\u04f0\u04f2\u04f4\u04f6\u04f8\u04fa\u04fc\u04fe\u0500\u0502\u0504\u0506\u0508\u050a\u050c\u050e\u0510\u0512\u0514\u0516\u0518\u051a\u051c\u051e\u0520\u0522\u0531-\u0556\u10a0-\u10c5\u1e00\u1e02\u1e04\u1e06\u1e08\u1e0a\u1e0c\u1e0e\u1e10\u1e12\u1e14\u1e16\u1e18\u1e1a\u1e1c\u1e1e\u1e20\u1e22\u1e24\u1e26\u1e28\u1e2a\u1e2c\u1e2e\u1e30\u1e32\u1e34\u1e36\u1e38\u1e3a\u1e3c\u1e3e\u1e40\u1e42\u1e44\u1e46\u1e48\u1e4a\u1e4c\u1e4e\u1e50\u1e52\u1e54\u1e56\u1e58\u1e5a\u1e5c\u1e5e\u1e60\u1e62\u1e64\u1e66\u1e68\u1e6a\u1e6c\u1e6e\u1e70\u1e72\u1e74\u1e76\u1e78\u1e7a\u1e7c\u1e7e\u1e80\u1e82\u1e84\u1e86\u1e88\u1e8a\u1e8c\u1e8e\u1e90\u1e92\u1e94\u1e9e\u1ea0\u1ea2\u1ea4\u1ea6\u1ea8\u1eaa\u1eac\u1eae\u1eb0\u1eb2\u1eb4\u1eb6\u1eb8\u1eba\u1ebc\u1ebe\u1ec0\u1ec2\u1ec4\u1ec6\u1ec8\u1eca\u1ecc\u1ece\u1ed0\u1ed2\u1ed4\u1ed6\u1ed8\u1eda\u1edc\u1ede\u1ee0\u1ee2\u1ee4\u1ee6\u1ee8\u1eea\u1eec\u1eee\u1ef0\u1ef2\u1ef4\u1ef6\u1ef8\u1efa\u1efc\u1efe\u1f08-\u1f0f\u1f18-\u1f1d\u1f28-\u1f2f\u1f38-\u1f3f\u1f48-\u1f4d\u1f59-\u1f5f\u1f68-\u1f6f\u1fb8-\u1fbb\u1fc8-\u1fcb\u1fd8-\u1fdb\u1fe8-\u1fec\u1ff8-\u1ffb\u2102\u2107\u210b-\u210d\u2110-\u2112\u2115\u2119-\u211d\u2124\u2126\u2128\u212a-\u212d\u2130-\u2133\u213e-\u213f\u2145\u2183\u2c00-\u2c2e\u2c60\u2c62-\u2c64\u2c67\u2c69\u2c6b\u2c6d-\u2c6f\u2c72\u2c75\u2c80\u2c82\u2c84\u2c86\u2c88\u2c8a\u2c8c\u2c8e\u2c90\u2c92\u2c94\u2c96\u2c98\u2c9a\u2c9c\u2c9e\u2ca0\u2ca2\u2ca4\u2ca6\u2ca8\u2caa\u2cac\u2cae\u2cb0\u2cb2\u2cb4\u2cb6\u2cb8\u2cba\u2cbc\u2cbe\u2cc0\u2cc2\u2cc4\u2cc6\u2cc8\u2cca\u2ccc\u2cce\u2cd0\u2cd2\u2cd4\u2cd6\u2cd8\u2cda\u2cdc\u2cde\u2ce0\u2ce2\ua640\ua642\ua644\ua646\ua648\ua64a\ua64c\ua64e\ua650\ua652\ua654\ua656\ua658\ua65a\ua65c\ua65e\ua662\ua664\ua666\ua668\ua66a\ua66c\ua680\ua682\ua684\ua686\ua688\ua68a\ua68c\ua68e\ua690\ua692\ua694\ua696\ua722\ua724\ua726\ua728\ua72a\ua72c\ua72e\ua732\ua734\ua736\ua738\ua73a\ua73c\ua73e\ua740\ua742\ua744\ua746\ua748\ua74a\ua74c\ua74e\ua750\ua752\ua754\ua756\ua758\ua75a\ua75c\ua75e\ua760\ua762\ua764\ua766\ua768\ua76a\ua76c\ua76e\ua779\ua77b\ua77d-\ua77e\ua780\ua782\ua784\ua786\ua78b\uff21-\uff3a]'
00951 
00952     idrest = ur'%s(?:%s|[0-9])*(?:(?<=_)%s)?' % (letter, letter, op)
00953 
00954     tokens = {
00955         'root': [
00956             # method names
00957             (r'(class|trait|object)(\s+)', bygroups(Keyword, Text), 'class'),
00958             (ur"'%s" % idrest, Text.Symbol),
00959             (r'[^\S\n]+', Text),
00960             (r'//.*?\n', Comment.Single),
00961             (r'/\*', Comment.Multiline, 'comment'),
00962             (ur'@%s' % idrest, Name.Decorator),
00963             (ur'(abstract|ca(?:se|tch)|d(?:ef|o)|e(?:lse|xtends)|'
00964              ur'f(?:inal(?:ly)?|or(?:Some)?)|i(?:f|mplicit)|'
00965              ur'lazy|match|new|override|pr(?:ivate|otected)'
00966              ur'|re(?:quires|turn)|s(?:ealed|uper)|'
00967              ur't(?:h(?:is|row)|ry)|va[lr]|w(?:hile|ith)|yield)\b|'
00968              u'(<[%:-]|=>|>:|[#=@_\u21D2\u2190])(\b|(?=\\s)|$)', Keyword),
00969             (ur':(?!%s)' % op, Keyword, 'type'),
00970             (ur'%s%s\b' % (upper, idrest), Name.Class),
00971             (r'(true|false|null)\b', Keyword.Constant),
00972             (r'(import|package)(\s+)', bygroups(Keyword, Text), 'import'),
00973             (r'(type)(\s+)', bygroups(Keyword, Text), 'type'),
00974             (r'"""(?:.|\n)*?"""', String),
00975             (r'"(\\\\|\\"|[^"])*"', String),
00976             (ur"'\\.'|'[^\\]'|'\\u[0-9a-f]{4}'", String.Char),
00977 #            (ur'(\.)(%s|%s|`[^`]+`)' % (idrest, op), bygroups(Operator,
00978 #             Name.Attribute)),
00979             (idrest, Name),
00980             (r'`[^`]+`', Name),
00981             (r'\[', Operator, 'typeparam'),
00982             (r'[\(\)\{\};,.]', Operator),
00983             (op, Operator),
00984             (ur'([0-9][0-9]*\.[0-9]*|\.[0-9]+)([eE][+-]?[0-9]+)?[fFdD]?',
00985              Number.Float),
00986             (r'0x[0-9a-f]+', Number.Hex),
00987             (r'[0-9]+L?', Number.Integer),
00988             (r'\n', Text)
00989         ],
00990         'class': [
00991             (ur'(%s|%s|`[^`]+`)(\s*)(\[)' % (idrest, op),
00992              bygroups(Name.Class, Text, Operator), 'typeparam'),
00993             (r'[\s\n]+', Text),
00994             (r'{', Operator, '#pop'),
00995             (r'\(', Operator, '#pop'),
00996             (ur'%s|%s|`[^`]+`' % (idrest, op), Name.Class, '#pop'),
00997         ],
00998         'type': [
00999             (r'\s+', Text),
01000             (u'<[%:]|>:|[#_\u21D2]|forSome|type', Keyword),
01001             (r'([,\);}]|=>|=)([\s\n]*)', bygroups(Operator, Text), '#pop'),
01002             (r'[\(\{]', Operator, '#push'),
01003             (ur'((?:%s|%s|`[^`]+`)(?:\.(?:%s|%s|`[^`]+`))*)(\s*)(\[)' %
01004              (idrest, op, idrest, op),
01005              bygroups(Keyword.Type, Text, Operator), ('#pop', 'typeparam')),
01006             (ur'((?:%s|%s|`[^`]+`)(?:\.(?:%s|%s|`[^`]+`))*)(\s*)$' %
01007              (idrest, op, idrest, op),
01008              bygroups(Keyword.Type, Text), '#pop'),
01009             (ur'\.|%s|%s|`[^`]+`' % (idrest, op), Keyword.Type)
01010         ],
01011         'typeparam': [
01012             (r'[\s\n,]+', Text),
01013             (u'<[%:]|=>|>:|[#_\u21D2]|forSome|type', Keyword),
01014             (r'([\]\)\}])', Operator, '#pop'),
01015             (r'[\(\[\{]', Operator, '#push'),
01016             (ur'\.|%s|%s|`[^`]+`' % (idrest, op), Keyword.Type)
01017         ],
01018         'comment': [
01019             (r'[^/\*]+', Comment.Multiline),
01020             (r'/\*', Comment.Multiline, '#push'),
01021             (r'\*/', Comment.Multiline, '#pop'),
01022             (r'[*/]', Comment.Multiline)
01023         ],
01024         'import': [
01025             (ur'(%s|\.)+' % idrest, Name.Namespace, '#pop')
01026         ],
01027     }
01028 
01029 
01030 class DylanLexer(RegexLexer):
01031     """
01032     For the `Dylan <http://www.opendylan.org/>`_ language.
01033 
01034     *New in Pygments 0.7.*
01035     """
01036 
01037     name = 'Dylan'
01038     aliases = ['dylan']
01039     filenames = ['*.dylan']
01040     mimetypes = ['text/x-dylan']
01041 
01042     flags = re.DOTALL
01043 
01044     tokens = {
01045         'root': [
01046             (r'\b(subclass|abstract|block|c(on(crete|stant)|lass)|domain'
01047              r'|ex(c(eption|lude)|port)|f(unction(|al))|generic|handler'
01048              r'|i(n(herited|line|stance|terface)|mport)|library|m(acro|ethod)'
01049              r'|open|primary|sealed|si(deways|ngleton)|slot'
01050              r'|v(ariable|irtual))\b', Name.Builtin),
01051             (r'<\w+>', Keyword.Type),
01052             (r'#?"(?:\\.|[^"])+?"', String.Double),
01053             (r'//.*?\n', Comment.Single),
01054             (r'/\*[\w\W]*?\*/', Comment.Multiline),
01055             (r'\'.*?\'', String.Single),
01056             (r'=>|\b(a(bove|fterwards)|b(e(gin|low)|y)|c(ase|leanup|reate)'
01057              r'|define|else(|if)|end|f(inally|or|rom)|i[fn]|l(et|ocal)|otherwise'
01058              r'|rename|s(elect|ignal)|t(hen|o)|u(n(less|til)|se)|wh(en|ile))\b',
01059              Keyword),
01060             (r'([ \t])([!\$%&\*\/:<=>\?~_^a-zA-Z0-9.+\-]*:)',
01061              bygroups(Text, Name.Variable)),
01062             (r'([ \t]*)(\S+[^:])([ \t]*)(\()([ \t]*)',
01063              bygroups(Text, Name.Function, Text, Punctuation, Text)),
01064             (r'-?[0-9.]+', Number),
01065             (r'[(),;]', Punctuation),
01066             (r'\$[a-zA-Z0-9-]+', Name.Constant),
01067             (r'[!$%&*/:<>=?~^.+\[\]{}-]+', Operator),
01068             (r'\s+', Text),
01069             (r'#[a-zA-Z0-9-]+', Keyword),
01070             (r'[a-zA-Z0-9-]+', Name.Variable),
01071         ],
01072     }
01073 
01074 
01075 class ObjectiveCLexer(RegexLexer):
01076     """
01077     For Objective-C source code with preprocessor directives.
01078     """
01079 
01080     name = 'Objective-C'
01081     aliases = ['objective-c', 'objectivec', 'obj-c', 'objc']
01082     #XXX: objc has .h files too :-/
01083     filenames = ['*.m']
01084     mimetypes = ['text/x-objective-c']
01085 
01086     #: optional Comment or Whitespace
01087     _ws = r'(?:\s|//.*?\n|/[*].*?[*]/)+'
01088 
01089     tokens = {
01090         'whitespace': [
01091             (r'^(\s*)(#if\s+0)', bygroups(Text, Comment.Preproc), 'if0'),
01092             (r'^(\s*)(#)', bygroups(Text, Comment.Preproc), 'macro'),
01093             (r'\n', Text),
01094             (r'\s+', Text),
01095             (r'\\\n', Text), # line continuation
01096             (r'//(\n|(.|\n)*?[^\\]\n)', Comment.Single),
01097             (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline),
01098         ],
01099         'statements': [
01100             (r'(L|@)?"', String, 'string'),
01101             (r"(L|@)?'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'",
01102              String.Char),
01103             (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float),
01104             (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
01105             (r'0x[0-9a-fA-F]+[Ll]?', Number.Hex),
01106             (r'0[0-7]+[Ll]?', Number.Oct),
01107             (r'\d+[Ll]?', Number.Integer),
01108             (r'[~!%^&*+=|?:<>/-]', Operator),
01109             (r'[()\[\],.]', Punctuation),
01110             (r'(auto|break|case|const|continue|default|do|else|enum|extern|'
01111              r'for|goto|if|register|restricted|return|sizeof|static|struct|'
01112              r'switch|typedef|union|volatile|virtual|while|in|@selector|'
01113              r'@private|@protected|@public|@encode|'
01114              r'@synchronized|@try|@throw|@catch|@finally|@end|@property|'
01115              r'@synthesize|@dynamic)\b', Keyword),
01116             (r'(int|long|float|short|double|char|unsigned|signed|void|'
01117              r'id|BOOL|IBOutlet|IBAction|SEL)\b', Keyword.Type),
01118             (r'(_{0,2}inline|naked|restrict|thread|typename)\b',
01119              Keyword.Reserved),
01120             (r'__(asm|int8|based|except|int16|stdcall|cdecl|fastcall|int32|'
01121              r'declspec|finally|int64|try|leave)\b', Keyword.Reserved),
01122             (r'(TRUE|FALSE|nil|NULL)\b', Name.Builtin),
01123             ('[a-zA-Z_][a-zA-Z0-9_]*:(?!:)', Name.Label),
01124             ('[a-zA-Z_][a-zA-Z0-9_]*', Name),
01125         ],
01126         'root': [
01127             include('whitespace'),
01128             # functions
01129             (r'((?:[a-zA-Z0-9_*\s])+?(?:\s|[*]))'    # return arguments
01130              r'([a-zA-Z_][a-zA-Z0-9_]*)'             # method name
01131              r'(\s*\([^;]*?\))'                      # signature
01132              r'(' + _ws + r')({)',
01133              bygroups(using(this), Name.Function,
01134                       using(this), Text, Punctuation),
01135              'function'),
01136             # function declarations
01137             (r'((?:[a-zA-Z0-9_*\s])+?(?:\s|[*]))'    # return arguments
01138              r'([a-zA-Z_][a-zA-Z0-9_]*)'             # method name
01139              r'(\s*\([^;]*?\))'                      # signature
01140              r'(' + _ws + r')(;)',
01141              bygroups(using(this), Name.Function,
01142                       using(this), Text, Punctuation)),
01143             (r'(@interface|@implementation)(\s+)', bygroups(Keyword, Text),
01144              'classname'),
01145             (r'(@class|@protocol)(\s+)', bygroups(Keyword, Text),
01146              'forward_classname'),
01147             (r'(\s*)(@end)(\s*)', bygroups(Text, Keyword, Text)),
01148             ('', Text, 'statement'),
01149         ],
01150         'classname' : [
01151             # interface definition that inherits
01152             ('([a-zA-Z_][a-zA-Z0-9_]*)(\s*:\s*)([a-zA-Z_][a-zA-Z0-9_]*)?',
01153              bygroups(Name.Class, Text, Name.Class), '#pop'),
01154             # interface definition for a category
01155             ('([a-zA-Z_][a-zA-Z0-9_]*)(\s*)(\([a-zA-Z_][a-zA-Z0-9_]*\))',
01156              bygroups(Name.Class, Text, Name.Label), '#pop'),
01157             # simple interface / implementation
01158             ('([a-zA-Z_][a-zA-Z0-9_]*)', Name.Class, '#pop')
01159         ],
01160         'forward_classname' : [
01161           ('([a-zA-Z_][a-zA-Z0-9_]*)(\s*,\s*)',
01162            bygroups(Name.Class, Text), 'forward_classname'),
01163           ('([a-zA-Z_][a-zA-Z0-9_]*)(\s*;?)',
01164            bygroups(Name.Class, Text), '#pop')
01165         ],
01166         'statement' : [
01167             include('whitespace'),
01168             include('statements'),
01169             ('[{}]', Punctuation),
01170             (';', Punctuation, '#pop'),
01171         ],
01172         'function': [
01173             include('whitespace'),
01174             include('statements'),
01175             (';', Punctuation),
01176             ('{', Punctuation, '#push'),
01177             ('}', Punctuation, '#pop'),
01178         ],
01179         'string': [
01180             (r'"', String, '#pop'),
01181             (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape),
01182             (r'[^\\"\n]+', String), # all other characters
01183             (r'\\\n', String), # line continuation
01184             (r'\\', String), # stray backslash
01185         ],
01186         'macro': [
01187             (r'[^/\n]+', Comment.Preproc),
01188             (r'/[*](.|\n)*?[*]/', Comment.Multiline),
01189             (r'//.*?\n', Comment.Single, '#pop'),
01190             (r'/', Comment.Preproc),
01191             (r'(?<=\\)\n', Comment.Preproc),
01192             (r'\n', Comment.Preproc, '#pop'),
01193         ],
01194         'if0': [
01195             (r'^\s*#if.*?(?<!\\)\n', Comment.Preproc, '#push'),
01196             (r'^\s*#endif.*?(?<!\\)\n', Comment.Preproc, '#pop'),
01197             (r'.*?\n', Comment),
01198         ]
01199     }
01200 
01201     def analyse_text(text):
01202         if '@"' in text: # strings
01203             return True
01204         if re.match(r'\[[a-zA-Z0-9.]:', text): # message
01205             return True
01206         return False
01207 
01208 class FortranLexer(RegexLexer):
01209     '''
01210     Lexer for FORTRAN 90 code.
01211 
01212     *New in Pygments 0.10.*
01213     '''
01214     name = 'Fortran'
01215     aliases = ['fortran']
01216     filenames = ['*.f', '*.f90']
01217     mimetypes = ['text/x-fortran']
01218     flags = re.IGNORECASE
01219 
01220     # Data Types: INTEGER, REAL, COMPLEX, LOGICAL, CHARACTER and DOUBLE PRECISION
01221     # Operators: **, *, +, -, /, <, >, <=, >=, ==, /=
01222     # Logical (?): NOT, AND, OR, EQV, NEQV
01223 
01224     # Builtins:
01225     # http://gcc.gnu.org/onlinedocs/gcc-3.4.6/g77/Table-of-Intrinsic-Functions.html
01226 
01227     tokens = {
01228         'root': [
01229             (r'!.*\n', Comment),
01230             include('strings'),
01231             include('core'),
01232             (r'[a-z][a-z0-9_]*', Name.Variable),
01233             include('nums'),
01234             (r'[\s]+', Text),
01235         ],
01236         'core': [
01237             # Statements
01238             (r'\b(ACCEPT|ALLOCATABLE|ALLOCATE|ARRAY|ASSIGN|BACKSPACE|BLOCK DATA|'
01239              r'BYTE|CALL|CASE|CLOSE|COMMON|CONTAINS|CONTINUE|CYCLE|DATA|'
01240              r'DEALLOCATE|DECODE|DIMENSION|DO|ENCODE|END FILE|ENDIF|END|ENTRY|'
01241              r'EQUIVALENCE|EXIT|EXTERNAL|EXTRINSIC|FORALL|FORMAT|FUNCTION|GOTO|'
01242              r'IF|IMPLICIT|INCLUDE|INQUIRE|INTENT|INTERFACE|INTRINSIC|MODULE|'
01243              r'NAMELIST|NULLIFY|NONE|OPEN|OPTIONAL|OPTIONS|PARAMETER|PAUSE|'
01244              r'POINTER|PRINT|PRIVATE|PROGRAM|PUBLIC|PURE|READ|RECURSIVE|RETURN|'
01245              r'REWIND|SAVE|SELECT|SEQUENCE|STOP|SUBROUTINE|TARGET|TYPE|USE|'
01246              r'VOLATILE|WHERE|WRITE|WHILE|THEN|ELSE|ENDIF)\s*\b',
01247              Keyword),
01248 
01249             # Data Types
01250             (r'\b(CHARACTER|COMPLEX|DOUBLE PRECISION|DOUBLE COMPLEX|INTEGER|'
01251              r'LOGICAL|REAL)\s*\b',
01252              Keyword.Type),
01253 
01254             # Operators
01255             (r'(\*\*|\*|\+|-|\/|<|>|<=|>=|==|\/=|=)', Operator),
01256 
01257             (r'(::)', Keyword.Declaration),
01258 
01259             (r'[(),:&%;]', Punctuation),
01260 
01261             # Intrinsics
01262             (r'\b(Abort|Abs|Access|AChar|ACos|AdjustL|AdjustR|AImag|AInt|Alarm|'
01263              r'All|Allocated|ALog|AMax|AMin|AMod|And|ANInt|Any|'
01264              r'ASin|Associated|ATan|BesJ|BesJN|BesY|BesYN|'
01265              r'Bit_Size|BTest|CAbs|CCos|Ceiling|CExp|Char|ChDir|ChMod|CLog|'
01266              r'Cmplx|Complex|Conjg|Cos|CosH|Count|CPU_Time|CShift|CSin|CSqRt|'
01267              r'CTime|DAbs|DACos|DASin|DATan|Date_and_Time|DbesJ|'
01268              r'DbesJ|DbesJN|DbesY|DbesY|DbesYN|Dble|DCos|DCosH|DDiM|DErF|DErFC|'
01269              r'DExp|Digits|DiM|DInt|DLog|DLog|DMax|DMin|DMod|DNInt|Dot_Product|'
01270              r'DProd|DSign|DSinH|DSin|DSqRt|DTanH|DTan|DTime|EOShift|Epsilon|'
01271              r'ErF|ErFC|ETime|Exit|Exp|Exponent|FDate|FGet|FGetC|Float|'
01272              r'Floor|Flush|FNum|FPutC|FPut|Fraction|FSeek|FStat|FTell|'
01273              r'GError|GetArg|GetCWD|GetEnv|GetGId|GetLog|GetPId|GetUId|'
01274              r'GMTime|HostNm|Huge|IAbs|IAChar|IAnd|IArgC|IBClr|IBits|'
01275              r'IBSet|IChar|IDate|IDiM|IDInt|IDNInt|IEOr|IErrNo|IFix|Imag|'
01276              r'ImagPart|Index|Int|IOr|IRand|IsaTty|IShft|IShftC|ISign|'
01277              r'ITime|Kill|Kind|LBound|Len|Len_Trim|LGe|LGt|Link|LLe|LLt|LnBlnk|'
01278              r'Loc|Log|Log|Logical|Long|LShift|LStat|LTime|MatMul|Max|'
01279              r'MaxExponent|MaxLoc|MaxVal|MClock|Merge|Min|MinExponent|MinLoc|'
01280              r'MinVal|Mod|Modulo|MvBits|Nearest|NInt|Not|Or|Pack|PError|'
01281              r'Precision|Present|Product|Radix|Rand|Random_Number|Random_Seed|'
01282              r'Range|Real|RealPart|Rename|Repeat|Reshape|RRSpacing|RShift|Scale|'
01283              r'Scan|Second|Selected_Int_Kind|Selected_Real_Kind|Set_Exponent|'
01284              r'Shape|Short|Sign|Signal|SinH|Sin|Sleep|Sngl|Spacing|Spread|SqRt|'
01285              r'SRand|Stat|Sum|SymLnk|System|System_Clock|Tan|TanH|Time|'
01286              r'Tiny|Transfer|Transpose|Trim|TtyNam|UBound|UMask|Unlink|Unpack|'
01287              r'Verify|XOr|ZAbs|ZCos|ZExp|ZLog|ZSin|ZSqRt)\s*\b',
01288              Name.Builtin),
01289 
01290             # Booleans
01291             (r'\.(true|false)\.', Name.Builtin),
01292             # Comparing Operators
01293             (r'\.(eq|ne|lt|le|gt|ge|not|and|or|eqv|neqv)\.', Operator.Word),
01294         ],
01295 
01296         'strings': [
01297             (r'"(\\\\|\\[0-7]+|\\.|[^"])*"', String.Double),
01298             (r"'(\\\\|\\[0-7]+|\\.|[^'])*'", String.Single),
01299         ],
01300 
01301         'nums': [
01302             (r'\d+(?![.Ee])', Number.Integer),
01303             (r'[+-]?\d*\.\d+([eE][-+]?\d+)?', Number.Float),
01304             (r'[+-]?\d+\.\d*([eE][-+]?\d+)?', Number.Float),
01305         ],
01306     }
01307 
01308 
01309 class GLShaderLexer(RegexLexer):
01310     """
01311     GLSL (OpenGL Shader) lexer.
01312 
01313     *New in Pygments 1.1.*
01314     """
01315     name = 'GLSL'
01316     aliases = ['glsl']
01317     filenames = ['*.vert', '*.frag', '*.geo']
01318     mimetypes = ['text/x-glslsrc']
01319 
01320     tokens = {
01321         'root': [
01322             (r'^#.*', Comment.Preproc),
01323             (r'//.*', Comment.Single),
01324             (r'/\*[\w\W]*\*/', Comment.Multiline),
01325             (r'\+|-|~|!=?|\*|/|%|<<|>>|<=?|>=?|==?|&&?|\^|\|\|?',
01326              Operator),
01327             (r'[?:]', Operator), # quick hack for ternary
01328             (r'\bdefined\b', Operator),
01329             (r'[;{}(),\[\]]', Punctuation),
01330             #FIXME when e is present, no decimal point needed
01331             (r'[+-]?\d*\.\d+([eE][-+]?\d+)?', Number.Float),
01332             (r'[+-]?\d+\.\d*([eE][-+]?\d+)?', Number.Float),
01333             (r'0[xX][0-9a-fA-F]*', Number.Hex),
01334             (r'0[0-7]*', Number.Octal),
01335             (r'[1-9][0-9]*', Number.Integer),
01336             (r'\b(attribute|const|uniform|varying|centroid|break|continue|'
01337              r'do|for|while|if|else|in|out|inout|float|int|void|bool|true|'
01338              r'false|invariant|discard|return|mat[234]|mat[234]x[234]|'
01339              r'vec[234]|[ib]vec[234]|sampler[123]D|samplerCube|'
01340              r'sampler[12]DShadow|struct)\b', Keyword),
01341             (r'\b(asm|class|union|enum|typedef|template|this|packed|goto|'
01342              r'switch|default|inline|noinline|volatile|public|static|extern|'
01343              r'external|interface|long|short|double|half|fixed|unsigned|'
01344              r'lowp|mediump|highp|precision|input|output|hvec[234]|'
01345              r'[df]vec[234]|sampler[23]DRect|sampler2DRectShadow|sizeof|'
01346              r'cast|namespace|using)\b', Keyword), #future use
01347             (r'[a-zA-Z_][a-zA-Z_0-9]*', Name.Variable),
01348             (r'\.', Punctuation),
01349             (r'\s+', Text),
01350         ],
01351     }
01352 
01353 class PrologLexer(RegexLexer):
01354     """
01355     Lexer for Prolog files.
01356     """
01357     name = 'Prolog'
01358     aliases = ['prolog']
01359     filenames = ['*.prolog', '*.pro', '*.pl']
01360     mimetypes = ['text/x-prolog']
01361 
01362     flags = re.UNICODE
01363 
01364     tokens = {
01365         'root': [
01366             (r'^#.*', Comment.Single),
01367             (r'/\*', Comment.Multiline, 'nested-comment'),
01368             (r'%.*', Comment.Single),
01369             (r'[0-9]+', Number),
01370             (r'[\[\](){}|.,;!]', Punctuation),
01371             (r':-|-->', Punctuation),
01372             (r'"(?:\\x[0-9a-fA-F]+\\|\\u[0-9a-fA-F]{4}|\U[0-9a-fA-F]{8}|'
01373              r'\\[0-7]+\\|\\[\w\W]|[^"])*"', String.Double),
01374             (r"'(?:''|[^'])*'", String.Atom), # quoted atom
01375             # Needs to not be followed by an atom.
01376             #(r'=(?=\s|[a-zA-Z\[])', Operator),
01377             (r'(is|<|>|=<|>=|==|=:=|=|/|//|\*|\+|-)(?=\s|[a-zA-Z0-9\[])',
01378              Operator),
01379             (r'(mod|div|not)\b', Operator),
01380             (r'_', Keyword), # The don't-care variable
01381             (r'([a-z]+)(:)', bygroups(Name.Namespace, Punctuation)),
01382             (u'([a-z\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]'
01383              u'[a-zA-Z0-9_$\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]*)'
01384              u'(\\s*)(:-|-->)',
01385              bygroups(Name.Function, Text, Operator)), # function defn
01386             (u'([a-z\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]'
01387              u'[a-zA-Z0-9_$\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]*)'
01388              u'(\\s*)(\\()',
01389              bygroups(Name.Function, Text, Punctuation)),
01390             (u'[a-z\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]'
01391              u'[a-zA-Z0-9_$\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]*',
01392              String.Atom), # atom, characters
01393             # This one includes !
01394             (u'[#&*+\\-./:<=>?@\\\\^~\u00a1-\u00bf\u2010-\u303f]+',
01395              String.Atom), # atom, graphics
01396             (r'[A-Z_][A-Za-z0-9_]*', Name.Variable),
01397             (u'\\s+|[\u2000-\u200f\ufff0-\ufffe\uffef]', Text),
01398         ],
01399         'nested-comment': [
01400             (r'\*/', Comment.Multiline, '#pop'),
01401             (r'/\*', Comment.Multiline, '#push'),
01402             (r'[^*/]+', Comment.Multiline),
01403             (r'[*/]', Comment.Multiline),
01404         ],
01405     }
01406 
01407     def analyse_text(text):
01408         return ':-' in text
01409 
01410 
01411 class CythonLexer(RegexLexer):
01412     """
01413     For Pyrex and `Cython <http://cython.org>`_ source code.
01414 
01415     *New in Pygments 1.1.*
01416     """
01417 
01418     name = 'Cython'
01419     aliases = ['cython', 'pyx']
01420     filenames = ['*.pyx', '*.pxd', '*.pxi']
01421     mimetypes = ['text/x-cython', 'application/x-cython']
01422 
01423     tokens = {
01424         'root': [
01425             (r'\n', Text),
01426             (r'^(\s*)("""(?:.|\n)*?""")', bygroups(Text, String.Doc)),
01427             (r"^(\s*)('''(?:.|\n)*?''')", bygroups(Text, String.Doc)),
01428             (r'[^\S\n]+', Text),
01429             (r'#.*$', Comment),
01430             (r'[]{}:(),;[]', Punctuation),
01431             (r'\\\n', Text),
01432             (r'\\', Text),
01433             (r'(in|is|and|or|not)\b', Operator.Word),
01434             (r'(<)([a-zA-Z0-9.?]+)(>)',
01435              bygroups(Punctuation, Keyword.Type, Punctuation)),
01436             (r'!=|==|<<|>>|[-~+/*%=<>&^|.?]', Operator),
01437             (r'(from)(\d+)(<=)(\s+)(<)(\d+)(:)',
01438              bygroups(Keyword, Number.Integer, Operator, Name, Operator,
01439                       Name, Punctuation)),
01440             include('keywords'),
01441             (r'(def|property)(\s+)', bygroups(Keyword, Text), 'funcname'),
01442             (r'(cp?def)(\s+)', bygroups(Keyword, Text), 'cdef'),
01443             (r'(class|struct)(\s+)', bygroups(Keyword, Text), 'classname'),
01444             (r'(from)(\s+)', bygroups(Keyword, Text), 'fromimport'),
01445             (r'(c?import)(\s+)', bygroups(Keyword, Text), 'import'),
01446             include('builtins'),
01447             include('backtick'),
01448             ('(?:[rR]|[uU][rR]|[rR][uU])"""', String, 'tdqs'),
01449             ("(?:[rR]|[uU][rR]|[rR][uU])'''", String, 'tsqs'),
01450             ('(?:[rR]|[uU][rR]|[rR][uU])"', String, 'dqs'),
01451             ("(?:[rR]|[uU][rR]|[rR][uU])'", String, 'sqs'),
01452             ('[uU]?"""', String, combined('stringescape', 'tdqs')),
01453             ("[uU]?'''", String, combined('stringescape', 'tsqs')),
01454             ('[uU]?"', String, combined('stringescape', 'dqs')),
01455             ("[uU]?'", String, combined('stringescape', 'sqs')),
01456             include('name'),
01457             include('numbers'),
01458         ],
01459         'keywords': [
01460             (r'(assert|break|by|continue|ctypedef|del|elif|else|except\??|exec|'
01461              r'finally|for|gil|global|if|include|lambda|nogil|pass|print|raise|'
01462              r'return|try|while|yield|as|with)\b', Keyword),
01463             (r'(DEF|IF|ELIF|ELSE)\b', Comment.Preproc),
01464         ],
01465         'builtins': [
01466             (r'(?<!\.)(__import__|abs|all|any|apply|basestring|bin|bool|buffer|'
01467              r'bytearray|bytes|callable|chr|classmethod|cmp|coerce|compile|'
01468              r'complex|delattr|dict|dir|divmod|enumerate|eval|execfile|exit|'
01469              r'file|filter|float|frozenset|getattr|globals|hasattr|hash|hex|id|'
01470              r'input|int|intern|isinstance|issubclass|iter|len|list|locals|'
01471              r'long|map|max|min|next|object|oct|open|ord|pow|property|range|'
01472              r'raw_input|reduce|reload|repr|reversed|round|set|setattr|slice|'
01473              r'sorted|staticmethod|str|sum|super|tuple|type|unichr|unicode|'
01474              r'vars|xrange|zip)\b', Name.Builtin),
01475             (r'(?<!\.)(self|None|Ellipsis|NotImplemented|False|True|NULL'
01476              r')\b', Name.Builtin.Pseudo),
01477             (r'(?<!\.)(ArithmeticError|AssertionError|AttributeError|'
01478              r'BaseException|DeprecationWarning|EOFError|EnvironmentError|'
01479              r'Exception|FloatingPointError|FutureWarning|GeneratorExit|IOError|'
01480              r'ImportError|ImportWarning|IndentationError|IndexError|KeyError|'
01481              r'KeyboardInterrupt|LookupError|MemoryError|NameError|'
01482              r'NotImplemented|NotImplementedError|OSError|OverflowError|'
01483              r'OverflowWarning|PendingDeprecationWarning|ReferenceError|'
01484              r'RuntimeError|RuntimeWarning|StandardError|StopIteration|'
01485              r'SyntaxError|SyntaxWarning|SystemError|SystemExit|TabError|'
01486              r'TypeError|UnboundLocalError|UnicodeDecodeError|'
01487              r'UnicodeEncodeError|UnicodeError|UnicodeTranslateError|'
01488              r'UnicodeWarning|UserWarning|ValueError|Warning|ZeroDivisionError'
01489              r')\b', Name.Exception),
01490         ],
01491         'numbers': [
01492             (r'(\d+\.?\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float),
01493             (r'0\d+', Number.Oct),
01494             (r'0[xX][a-fA-F0-9]+', Number.Hex),
01495             (r'\d+L', Number.Integer.Long),
01496             (r'\d+', Number.Integer)
01497         ],
01498         'backtick': [
01499             ('`.*?`', String.Backtick),
01500         ],
01501         'name': [
01502             (r'@[a-zA-Z0-9_]+', Name.Decorator),
01503             ('[a-zA-Z_][a-zA-Z0-9_]*', Name),
01504         ],
01505         'funcname': [
01506             ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Function, '#pop')
01507         ],
01508         'cdef': [
01509             (r'(public|readonly|extern|api|inline)\b', Keyword.Reserved),
01510             (r'(struct|enum|union|class)\b', Keyword),
01511             (r'([a-zA-Z_][a-zA-Z0-9_]*)(\s*)(?=[(:#=]|$)',
01512              bygroups(Name.Function, Text), '#pop'),
01513             (r'([a-zA-Z_][a-zA-Z0-9_]*)(\s*)(,)',
01514              bygroups(Name.Function, Text, Punctuation)),
01515             (r'from\b', Keyword, '#pop'),
01516             (r'as\b', Keyword),
01517             (r':', Punctuation, '#pop'),
01518             (r'(?=["\'])', Text, '#pop'),
01519             (r'[a-zA-Z_][a-zA-Z0-9_]*', Keyword.Type),
01520             (r'.', Text),
01521         ],
01522         'classname': [
01523             ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop')
01524         ],
01525         'import': [
01526             (r'(\s+)(as)(\s+)', bygroups(Text, Keyword, Text)),
01527             (r'[a-zA-Z_][a-zA-Z0-9_.]*', Name.Namespace),
01528             (r'(\s*)(,)(\s*)', bygroups(Text, Operator, Text)),
01529             (r'', Text, '#pop') # all else: go back
01530         ],
01531         'fromimport': [
01532             (r'(\s+)(c?import)\b', bygroups(Text, Keyword), '#pop'),
01533             (r'[a-zA-Z_.][a-zA-Z0-9_.]*', Name.Namespace),
01534             # ``cdef foo from "header"``, or ``for foo from 0 < i < 10``
01535             (r'', Text, '#pop'),
01536         ],
01537         'stringescape': [
01538             (r'\\([\\abfnrtv"\']|\n|N{.*?}|u[a-fA-F0-9]{4}|'
01539              r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
01540         ],
01541         'strings': [
01542             (r'%(\([a-zA-Z0-9]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
01543              '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol),
01544             (r'[^\\\'"%\n]+', String),
01545             # quotes, percents and backslashes must be parsed one at a time
01546             (r'[\'"\\]', String),
01547             # unhandled string formatting sign
01548             (r'%', String)
01549             # newlines are an error (use "nl" state)
01550         ],
01551         'nl': [
01552             (r'\n', String)
01553         ],
01554         'dqs': [
01555             (r'"', String, '#pop'),
01556             (r'\\\\|\\"|\\\n', String.Escape), # included here again for raw strings
01557             include('strings')
01558         ],
01559         'sqs': [
01560             (r"'", String, '#pop'),
01561             (r"\\\\|\\'|\\\n", String.Escape), # included here again for raw strings
01562             include('strings')
01563         ],
01564         'tdqs': [
01565             (r'"""', String, '#pop'),
01566             include('strings'),
01567             include('nl')
01568         ],
01569         'tsqs': [
01570             (r"'''", String, '#pop'),
01571             include('strings'),
01572             include('nl')
01573         ],
01574     }
01575 
01576 
01577 class ValaLexer(RegexLexer):
01578     """
01579     For Vala source code with preprocessor directives.
01580 
01581     *New in Pygments 1.1.*
01582     """
01583     name = 'Vala'
01584     aliases = ['vala', 'vapi']
01585     filenames = ['*.vala', '*.vapi']
01586     mimetypes = ['text/x-vala']
01587 
01588     tokens = {
01589         'whitespace': [
01590             (r'^\s*#if\s+0', Comment.Preproc, 'if0'),
01591             (r'\n', Text),
01592             (r'\s+', Text),
01593             (r'\\\n', Text), # line continuation
01594             (r'//(\n|(.|\n)*?[^\\]\n)', Comment.Single),
01595             (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline),
01596         ],
01597         'statements': [
01598             (r'L?"', String, 'string'),
01599             (r"L?'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'",
01600              String.Char),
01601             (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[lL]?', Number.Float),
01602             (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
01603             (r'0x[0-9a-fA-F]+[Ll]?', Number.Hex),
01604             (r'0[0-7]+[Ll]?', Number.Oct),
01605             (r'\d+[Ll]?', Number.Integer),
01606             (r'[~!%^&*+=|?:<>/-]', Operator),
01607             (r'(\[)(Compact|Immutable|(?:Boolean|Simple)Type)(\])',
01608              bygroups(Punctuation, Name.Decorator, Punctuation)),
01609             # TODO: "correctly" parse complex code attributes
01610             (r'(\[)(CCode|(?:Integer|Floating)Type)',
01611              bygroups(Punctuation, Name.Decorator)),
01612             (r'[()\[\],.]', Punctuation),
01613             (r'(as|base|break|case|catch|construct|continue|default|delete|do|'
01614              r'else|enum|finally|for|foreach|get|if|in|is|lock|new|out|params|'
01615              r'return|set|sizeof|switch|this|throw|try|typeof|while|yield)\b',
01616              Keyword),
01617             (r'(abstract|const|delegate|dynamic|ensures|extern|inline|internal|'
01618              r'override|owned|private|protected|public|ref|requires|signal|'
01619              r'static|throws|unowned|var|virtual|volatile|weak|yields)\b',
01620              Keyword.Declaration),
01621             (r'(namespace|using)(\s+)', bygroups(Keyword.Namespace, Text),
01622              'namespace'),
01623             (r'(class|errordomain|interface|struct)(\s+)',
01624              bygroups(Keyword.Declaration, Text), 'class'),
01625             (r'(\.)([a-zA-Z_][a-zA-Z0-9_]*)',
01626              bygroups(Operator, Name.Attribute)),
01627             # void is an actual keyword, others are in glib-2.0.vapi
01628             (r'(void|bool|char|double|float|int|int8|int16|int32|int64|long|'
01629              r'short|size_t|ssize_t|string|time_t|uchar|uint|uint8|uint16|'
01630              r'uint32|uint64|ulong|unichar|ushort)\b', Keyword.Type),
01631             (r'(true|false|null)\b', Name.Builtin),
01632             ('[a-zA-Z_][a-zA-Z0-9_]*', Name),
01633         ],
01634         'root': [
01635             include('whitespace'),
01636             ('', Text, 'statement'),
01637         ],
01638         'statement' : [
01639             include('whitespace'),
01640             include('statements'),
01641             ('[{}]', Punctuation),
01642             (';', Punctuation, '#pop'),
01643         ],
01644         'string': [
01645             (r'"', String, '#pop'),
01646             (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape),
01647             (r'[^\\"\n]+', String), # all other characters
01648             (r'\\\n', String), # line continuation
01649             (r'\\', String), # stray backslash
01650         ],
01651         'if0': [
01652             (r'^\s*#if.*?(?<!\\)\n', Comment.Preproc, '#push'),
01653             (r'^\s*#el(?:se|if).*\n', Comment.Preproc, '#pop'),
01654             (r'^\s*#endif.*?(?<!\\)\n', Comment.Preproc, '#pop'),
01655             (r'.*?\n', Comment),
01656         ],
01657         'class': [
01658             (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop')
01659         ],
01660         'namespace': [
01661             (r'[a-zA-Z_][a-zA-Z0-9_.]*', Name.Namespace, '#pop')
01662         ],
01663     }
01664 
01665 
01666 class OocLexer(RegexLexer):
01667     """
01668     For `Ooc`<http://ooc-lang.org/>_ source code
01669 
01670     *New in Pygments 1.2.*
01671     """
01672     name = 'Ooc'
01673     aliases = ['ooc']
01674     filenames = ['*.ooc']
01675     mimetypes = ['text/x-ooc']
01676 
01677     tokens = {
01678         'root': [
01679             (r'\b(class|interface|implement|abstract|extends|from|'
01680              r'this|super|new|const|final|static|import|use|extern|'
01681              r'inline|proto|break|continue|fallthrough|operator|if|else|for|'
01682              r'while|do|switch|case|as|in|version|return|true|false|null)\b',
01683              Keyword),
01684             (r'include\b', Keyword, 'include'),
01685             (r'(cover)([ \t]+)(from)([ \t]+)([a-zA-Z0-9_]+[*@]?)',
01686              bygroups(Keyword, Text, Keyword, Text, Name.Class)),
01687             (r'(func)((?:[ \t]|\\\n)+)(~[a-z_][a-zA-Z0-9_]*)',
01688              bygroups(Keyword, Text, Name.Function)),
01689             (r'\bfunc\b', Keyword),
01690             # Note: %= and ^= not listed on http://ooc-lang.org/syntax
01691             (r'//.*', Comment),
01692             (r'(?s)/\*.*?\*/', Comment.Multiline),
01693             (r'(==?|\+=?|-[=>]?|\*=?|/=?|:=|!=?|%=?|\?|>{1,3}=?|<{1,3}=?|\.\.|'
01694              r'&&?|\|\|?|\^=?)', Operator),
01695             (r'(\.)([ \t]*)([a-z]\w*)', bygroups(Operator, Text,
01696                                                  Name.Function)),
01697             (r'[A-Z][A-Z0-9_]+', Name.Constant),
01698             (r'[A-Z][a-zA-Z0-9_]*([@*]|\[[ \t]*\])?', Name.Class),
01699 
01700             (r'([a-z][a-zA-Z0-9_]*(?:~[a-z][a-zA-Z0-9_]*)?)((?:[ \t]|\\\n)*)(?=\()',
01701              bygroups(Name.Function, Text)),
01702             (r'[a-z][a-zA-Z0-9_]*', Name.Variable),
01703 
01704             # : introduces types
01705             (r'[:(){}\[\];,]', Punctuation),
01706 
01707             (r'0x[0-9a-fA-F]+', Number.Hex),
01708             (r'0c[0-9]+', Number.Octal),
01709             (r'0b[01]+', Number.Binary),
01710             (r'[0-9_]\.[0-9_]*(?!\.)', Number.Float),
01711             (r'[0-9_]+', Number.Decimal),
01712 
01713             (r'"(?:\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\"])*"',
01714              String.Double),
01715             (r"'(?:\\.|\\[0-9]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'",
01716              String.Char),
01717             (r'@', Punctuation), # pointer dereference
01718             (r'\.', Punctuation), # imports or chain operator
01719 
01720             (r'\\[ \t\n]', Text),
01721             (r'[ \t]+', Text),
01722         ],
01723         'include': [
01724             (r'[\w/]+', Name),
01725             (r',', Punctuation),
01726             (r'[ \t]', Text),
01727             (r'[;\n]', Text, '#pop'),
01728         ],
01729     }