Back to index

moin  1.9.0~rc2
functional.py
Go to the documentation of this file.
00001 # -*- coding: utf-8 -*-
00002 """
00003     pygments.lexers.functional
00004     ~~~~~~~~~~~~~~~~~~~~~~~~~~
00005 
00006     Lexers for functional languages.
00007 
00008     :copyright: Copyright 2006-2009 by the Pygments team, see AUTHORS.
00009     :license: BSD, see LICENSE for details.
00010 """
00011 
00012 import re
00013 try:
00014     set
00015 except NameError:
00016     from sets import Set as set
00017 
00018 from pygments.lexer import Lexer, RegexLexer, bygroups, include, do_insertions
00019 from pygments.token import Text, Comment, Operator, Keyword, Name, \
00020      String, Number, Punctuation, Literal, Generic
00021 
00022 
00023 __all__ = ['SchemeLexer', 'CommonLispLexer', 'HaskellLexer', 'LiterateHaskellLexer',
00024            'OcamlLexer', 'ErlangLexer', 'ErlangShellLexer']
00025 
00026 
00027 class SchemeLexer(RegexLexer):
00028     """
00029     A Scheme lexer, parsing a stream and outputting the tokens
00030     needed to highlight scheme code.
00031     This lexer could be most probably easily subclassed to parse
00032     other LISP-Dialects like Common Lisp, Emacs Lisp or AutoLisp.
00033 
00034     This parser is checked with pastes from the LISP pastebin
00035     at http://paste.lisp.org/ to cover as much syntax as possible.
00036 
00037     It supports the full Scheme syntax as defined in R5RS.
00038 
00039     *New in Pygments 0.6.*
00040     """
00041     name = 'Scheme'
00042     aliases = ['scheme', 'scm']
00043     filenames = ['*.scm']
00044     mimetypes = ['text/x-scheme', 'application/x-scheme']
00045 
00046     # list of known keywords and builtins taken form vim 6.4 scheme.vim
00047     # syntax file.
00048     keywords = [
00049         'lambda', 'define', 'if', 'else', 'cond', 'and', 'or', 'case', 'let',
00050         'let*', 'letrec', 'begin', 'do', 'delay', 'set!', '=>', 'quote',
00051         'quasiquote', 'unquote', 'unquote-splicing', 'define-syntax',
00052         'let-syntax', 'letrec-syntax', 'syntax-rules'
00053     ]
00054     builtins = [
00055         '*', '+', '-', '/', '<', '<=', '=', '>', '>=', 'abs', 'acos', 'angle',
00056         'append', 'apply', 'asin', 'assoc', 'assq', 'assv', 'atan',
00057         'boolean?', 'caaaar', 'caaadr', 'caaar', 'caadar', 'caaddr', 'caadr',
00058         'caar', 'cadaar', 'cadadr', 'cadar', 'caddar', 'cadddr', 'caddr',
00059         'cadr', 'call-with-current-continuation', 'call-with-input-file',
00060         'call-with-output-file', 'call-with-values', 'call/cc', 'car',
00061         'cdaaar', 'cdaadr', 'cdaar', 'cdadar', 'cdaddr', 'cdadr', 'cdar',
00062         'cddaar', 'cddadr', 'cddar', 'cdddar', 'cddddr', 'cdddr', 'cddr',
00063         'cdr', 'ceiling', 'char->integer', 'char-alphabetic?', 'char-ci<=?',
00064         'char-ci<?', 'char-ci=?', 'char-ci>=?', 'char-ci>?', 'char-downcase',
00065         'char-lower-case?', 'char-numeric?', 'char-ready?', 'char-upcase',
00066         'char-upper-case?', 'char-whitespace?', 'char<=?', 'char<?', 'char=?',
00067         'char>=?', 'char>?', 'char?', 'close-input-port', 'close-output-port',
00068         'complex?', 'cons', 'cos', 'current-input-port', 'current-output-port',
00069         'denominator', 'display', 'dynamic-wind', 'eof-object?', 'eq?',
00070         'equal?', 'eqv?', 'eval', 'even?', 'exact->inexact', 'exact?', 'exp',
00071         'expt', 'floor', 'for-each', 'force', 'gcd', 'imag-part',
00072         'inexact->exact', 'inexact?', 'input-port?', 'integer->char',
00073         'integer?', 'interaction-environment', 'lcm', 'length', 'list',
00074         'list->string', 'list->vector', 'list-ref', 'list-tail', 'list?',
00075         'load', 'log', 'magnitude', 'make-polar', 'make-rectangular',
00076         'make-string', 'make-vector', 'map', 'max', 'member', 'memq', 'memv',
00077         'min', 'modulo', 'negative?', 'newline', 'not', 'null-environment',
00078         'null?', 'number->string', 'number?', 'numerator', 'odd?',
00079         'open-input-file', 'open-output-file', 'output-port?', 'pair?',
00080         'peek-char', 'port?', 'positive?', 'procedure?', 'quotient',
00081         'rational?', 'rationalize', 'read', 'read-char', 'real-part', 'real?',
00082         'remainder', 'reverse', 'round', 'scheme-report-environment',
00083         'set-car!', 'set-cdr!', 'sin', 'sqrt', 'string', 'string->list',
00084         'string->number', 'string->symbol', 'string-append', 'string-ci<=?',
00085         'string-ci<?', 'string-ci=?', 'string-ci>=?', 'string-ci>?',
00086         'string-copy', 'string-fill!', 'string-length', 'string-ref',
00087         'string-set!', 'string<=?', 'string<?', 'string=?', 'string>=?',
00088         'string>?', 'string?', 'substring', 'symbol->string', 'symbol?',
00089         'tan', 'transcript-off', 'transcript-on', 'truncate', 'values',
00090         'vector', 'vector->list', 'vector-fill!', 'vector-length',
00091         'vector-ref', 'vector-set!', 'vector?', 'with-input-from-file',
00092         'with-output-to-file', 'write', 'write-char', 'zero?'
00093     ]
00094 
00095     # valid names for identifiers
00096     # well, names can only not consist fully of numbers
00097     # but this should be good enough for now
00098     valid_name = r'[a-zA-Z0-9!$%&*+,/:<=>?@^_~|-]+'
00099 
00100     tokens = {
00101         'root' : [
00102             # the comments - always starting with semicolon
00103             # and going to the end of the line
00104             (r';.*$', Comment.Single),
00105 
00106             # whitespaces - usually not relevant
00107             (r'\s+', Text),
00108 
00109             # numbers
00110             (r'-?\d+\.\d+', Number.Float),
00111             (r'-?\d+', Number.Integer),
00112             # support for uncommon kinds of numbers -
00113             # have to figure out what the characters mean
00114             #(r'(#e|#i|#b|#o|#d|#x)[\d.]+', Number),
00115 
00116             # strings, symbols and characters
00117             (r'"(\\\\|\\"|[^"])*"', String),
00118             (r"'" + valid_name, String.Symbol),
00119             (r"#\\([()/'\".'_!ยง$%& ?=+-]{1}|[a-zA-Z0-9]+)", String.Char),
00120 
00121             # constants
00122             (r'(#t|#f)', Name.Constant),
00123 
00124             # special operators
00125             (r"('|#|`|,@|,|\.)", Operator),
00126 
00127             # highlight the keywords
00128             ('(%s)' % '|'.join([
00129                 re.escape(entry) + ' ' for entry in keywords]),
00130                 Keyword
00131             ),
00132 
00133             # first variable in a quoted string like
00134             # '(this is syntactic sugar)
00135             (r"(?<='\()" + valid_name, Name.Variable),
00136             (r"(?<=#\()" + valid_name, Name.Variable),
00137 
00138             # highlight the builtins
00139             ("(?<=\()(%s)" % '|'.join([
00140                 re.escape(entry) + ' ' for entry in builtins]),
00141                 Name.Builtin
00142             ),
00143 
00144             # the remaining functions
00145             (r'(?<=\()' + valid_name, Name.Function),
00146             # find the remaining variables
00147             (valid_name, Name.Variable),
00148 
00149             # the famous parentheses!
00150             (r'(\(|\))', Punctuation),
00151         ],
00152     }
00153 
00154 
00155 class CommonLispLexer(RegexLexer):
00156     """
00157     A Common Lisp lexer.
00158 
00159     *New in Pygments 0.9.*
00160     """
00161     name = 'Common Lisp'
00162     aliases = ['common-lisp', 'cl']
00163     filenames = ['*.cl', '*.lisp', '*.el']  # use for Elisp too
00164     mimetypes = ['text/x-common-lisp']
00165 
00166     flags = re.IGNORECASE | re.MULTILINE
00167 
00168     ### couple of useful regexes
00169 
00170     # characters that are not macro-characters and can be used to begin a symbol
00171     nonmacro = r'\\.|[a-zA-Z0-9!$%&*+-/<=>?@\[\]^_{}~]'
00172     constituent = nonmacro + '|[#.:]'
00173     terminated = r'(?=[ "()\'\n,;`])' # whitespace or terminating macro characters
00174 
00175     ### symbol token, reverse-engineered from hyperspec
00176     # Take a deep breath...
00177     symbol = r'(\|[^|]+\||(?:%s)(?:%s)*)' % (nonmacro, constituent)
00178 
00179     def __init__(self, **options):
00180         from pygments.lexers._clbuiltins import BUILTIN_FUNCTIONS, \
00181             SPECIAL_FORMS, MACROS, LAMBDA_LIST_KEYWORDS, DECLARATIONS, \
00182             BUILTIN_TYPES, BUILTIN_CLASSES
00183         self.builtin_function = BUILTIN_FUNCTIONS
00184         self.special_forms = SPECIAL_FORMS
00185         self.macros = MACROS
00186         self.lambda_list_keywords = LAMBDA_LIST_KEYWORDS
00187         self.declarations = DECLARATIONS
00188         self.builtin_types = BUILTIN_TYPES
00189         self.builtin_classes = BUILTIN_CLASSES
00190         RegexLexer.__init__(self, **options)
00191 
00192     def get_tokens_unprocessed(self, text):
00193         stack = ['root']
00194         for index, token, value in RegexLexer.get_tokens_unprocessed(self, text, stack):
00195             if token is Name.Variable:
00196                 if value in self.builtin_function:
00197                     yield index, Name.Builtin, value
00198                     continue
00199                 if value in self.special_forms:
00200                     yield index, Keyword, value
00201                     continue
00202                 if value in self.macros:
00203                     yield index, Name.Builtin, value
00204                     continue
00205                 if value in self.lambda_list_keywords:
00206                     yield index, Keyword, value
00207                     continue
00208                 if value in self.declarations:
00209                     yield index, Keyword, value
00210                     continue
00211                 if value in self.builtin_types:
00212                     yield index, Keyword.Type, value
00213                     continue
00214                 if value in self.builtin_classes:
00215                     yield index, Name.Class, value
00216                     continue
00217             yield index, token, value
00218 
00219     tokens = {
00220         'root' : [
00221             ('', Text, 'body'),
00222         ],
00223         'multiline-comment' : [
00224             (r'#\|', Comment.Multiline, '#push'), # (cf. Hyperspec 2.4.8.19)
00225             (r'\|#', Comment.Multiline, '#pop'),
00226             (r'[^|#]+', Comment.Multiline),
00227             (r'[|#]', Comment.Multiline),
00228         ],
00229         'commented-form' : [
00230             (r'\(', Comment.Preproc, '#push'),
00231             (r'\)', Comment.Preproc, '#pop'),
00232             (r'[^()]+', Comment.Preproc),
00233         ],
00234         'body' : [
00235             # whitespace
00236             (r'\s+', Text),
00237 
00238             # single-line comment
00239             (r';.*$', Comment.Single),
00240 
00241             # multi-line comment
00242             (r'#\|', Comment.Multiline, 'multiline-comment'),
00243 
00244             # encoding comment (?)
00245             (r'#\d*Y.*$', Comment.Special),
00246 
00247             # strings and characters
00248             (r'"(\\.|[^"])*"', String),
00249             # quoting
00250             (r":" + symbol, String.Symbol),
00251             (r"'" + symbol, String.Symbol),
00252             (r"'", Operator),
00253             (r"`", Operator),
00254 
00255             # decimal numbers
00256             (r'[-+]?\d+\.?' + terminated, Number.Integer),
00257             (r'[-+]?\d+/\d+' + terminated, Number),
00258             (r'[-+]?(\d*\.\d+([defls][-+]?\d+)?|\d+(\.\d*)?[defls][-+]?\d+)' \
00259                 + terminated, Number.Float),
00260 
00261             # sharpsign strings and characters
00262             (r"#\\." + terminated, String.Char),
00263             (r"#\\" + symbol, String.Char),
00264 
00265             # vector
00266             (r'#\(', Operator, 'body'),
00267 
00268             # bitstring
00269             (r'#\d*\*[01]*', Literal.Other),
00270 
00271             # uninterned symbol
00272             (r'#:' + symbol, String.Symbol),
00273 
00274             # read-time and load-time evaluation
00275             (r'#[.,]', Operator),
00276 
00277             # function shorthand
00278             (r'#\'', Name.Function),
00279 
00280             # binary rational
00281             (r'#[bB][+-]?[01]+(/[01]+)?', Number),
00282 
00283             # octal rational
00284             (r'#[oO][+-]?[0-7]+(/[0-7]+)?', Number.Oct),
00285 
00286             # hex rational
00287             (r'#[xX][+-]?[0-9a-fA-F]+(/[0-9a-fA-F]+)?', Number.Hex),
00288 
00289             # radix rational
00290             (r'#\d+[rR][+-]?[0-9a-zA-Z]+(/[0-9a-zA-Z]+)?', Number),
00291 
00292             # complex
00293             (r'(#[cC])(\()', bygroups(Number, Punctuation), 'body'),
00294 
00295             # array
00296             (r'(#\d+[aA])(\()', bygroups(Literal.Other, Punctuation), 'body'),
00297 
00298             # structure
00299             (r'(#[sS])(\()', bygroups(Literal.Other, Punctuation), 'body'),
00300 
00301             # path
00302             (r'#[pP]?"(\\.|[^"])*"', Literal.Other),
00303 
00304             # reference
00305             (r'#\d+=', Operator),
00306             (r'#\d+#', Operator),
00307 
00308             # read-time comment
00309             (r'#+nil' + terminated + '\s*\(', Comment.Preproc, 'commented-form'),
00310 
00311             # read-time conditional
00312             (r'#[+-]', Operator),
00313 
00314             # special operators that should have been parsed already
00315             (r'(,@|,|\.)', Operator),
00316 
00317             # special constants
00318             (r'(t|nil)' + terminated, Name.Constant),
00319 
00320             # functions and variables
00321             (r'\*' + symbol + '\*', Name.Variable.Global),
00322             (symbol, Name.Variable),
00323 
00324             # parentheses
00325             (r'\(', Punctuation, 'body'),
00326             (r'\)', Punctuation, '#pop'),
00327         ],
00328     }
00329 
00330 
00331 class HaskellLexer(RegexLexer):
00332     """
00333     A Haskell lexer based on the lexemes defined in the Haskell 98 Report.
00334 
00335     *New in Pygments 0.8.*
00336     """
00337     name = 'Haskell'
00338     aliases = ['haskell', 'hs']
00339     filenames = ['*.hs']
00340     mimetypes = ['text/x-haskell']
00341 
00342     reserved = ['case','class','data','default','deriving','do','else',
00343                 'if','in','infix[lr]?','instance',
00344                 'let','newtype','of','then','type','where','_']
00345     ascii = ['NUL','SOH','[SE]TX','EOT','ENQ','ACK',
00346              'BEL','BS','HT','LF','VT','FF','CR','S[OI]','DLE',
00347              'DC[1-4]','NAK','SYN','ETB','CAN',
00348              'EM','SUB','ESC','[FGRU]S','SP','DEL']
00349 
00350     tokens = {
00351         'root': [
00352             # Whitespace:
00353             (r'\s+', Text),
00354             #(r'--\s*|.*$', Comment.Doc),
00355             (r'--(?![!#$%&*+./<=>?@\^|_~]).*?$', Comment.Single),
00356             (r'{-', Comment.Multiline, 'comment'),
00357             # Lexemes:
00358             #  Identifiers
00359             (r'\bimport\b', Keyword.Reserved, 'import'),
00360             (r'\bmodule\b', Keyword.Reserved, 'module'),
00361             (r'\berror\b', Name.Exception),
00362             (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
00363             (r'^[_a-z][\w\']*', Name.Function),
00364             (r'[_a-z][\w\']*', Name),
00365             (r'[A-Z][\w\']*', Keyword.Type),
00366             #  Operators
00367             (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator
00368             (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
00369             (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators
00370             (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators
00371             #  Numbers
00372             (r'\d+[eE][+-]?\d+', Number.Float),
00373             (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
00374             (r'0[oO][0-7]+', Number.Oct),
00375             (r'0[xX][\da-fA-F]+', Number.Hex),
00376             (r'\d+', Number.Integer),
00377             #  Character/String Literals
00378             (r"'", String.Char, 'character'),
00379             (r'"', String, 'string'),
00380             #  Special
00381             (r'\[\]', Keyword.Type),
00382             (r'\(\)', Name.Builtin),
00383             (r'[][(),;`{}]', Punctuation),
00384         ],
00385         'import': [
00386             # Import statements
00387             (r'\s+', Text),
00388             # after "funclist" state
00389             (r'\)', Punctuation, '#pop'),
00390             (r'qualified\b', Keyword),
00391             # import X as Y
00392             (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(as)(\s+)([A-Z][a-zA-Z0-9_.]*)',
00393              bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'),
00394             # import X hiding (functions)
00395             (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(hiding)(\s+)(\()',
00396              bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'),
00397             # import X (functions)
00398             (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(\()',
00399              bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
00400             # import X
00401             (r'[a-zA-Z0-9_.]+', Name.Namespace, '#pop'),
00402         ],
00403         'module': [
00404             (r'\s+', Text),
00405             (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(\()',
00406              bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
00407             (r'[A-Z][a-zA-Z0-9_.]*', Name.Namespace, '#pop'),
00408         ],
00409         'funclist': [
00410             (r'\s+', Text),
00411             (r'[A-Z][a-zA-Z0-9_]*', Keyword.Type),
00412             (r'[_a-z][\w\']+', Name.Function),
00413             (r'--.*$', Comment.Single),
00414             (r'{-', Comment.Multiline, 'comment'),
00415             (r',', Punctuation),
00416             (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
00417             # (HACK, but it makes sense to push two instances, believe me)
00418             (r'\(', Punctuation, ('funclist', 'funclist')),
00419             (r'\)', Punctuation, '#pop:2'),
00420         ],
00421         'comment': [
00422             # Multiline Comments
00423             (r'[^-{}]+', Comment.Multiline),
00424             (r'{-', Comment.Multiline, '#push'),
00425             (r'-}', Comment.Multiline, '#pop'),
00426             (r'[-{}]', Comment.Multiline),
00427         ],
00428         'character': [
00429             # Allows multi-chars, incorrectly.
00430             (r"[^\\']", String.Char),
00431             (r"\\", String.Escape, 'escape'),
00432             ("'", String.Char, '#pop'),
00433         ],
00434         'string': [
00435             (r'[^\\"]+', String),
00436             (r"\\", String.Escape, 'escape'),
00437             ('"', String, '#pop'),
00438         ],
00439         'escape': [
00440             (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
00441             (r'\^[][A-Z@\^_]', String.Escape, '#pop'),
00442             ('|'.join(ascii), String.Escape, '#pop'),
00443             (r'o[0-7]+', String.Escape, '#pop'),
00444             (r'x[\da-fA-F]+', String.Escape, '#pop'),
00445             (r'\d+', String.Escape, '#pop'),
00446             (r'\n\s+\\', String.Escape, '#pop'),
00447         ],
00448     }
00449 
00450 
00451 line_re = re.compile('.*?\n')
00452 bird_re = re.compile(r'(>[ \t]*)(.*\n)')
00453 
00454 class LiterateHaskellLexer(Lexer):
00455     """
00456     For Literate Haskell (Bird-style or LaTeX) source.
00457 
00458     Additional options accepted:
00459 
00460     `litstyle`
00461         If given, must be ``"bird"`` or ``"latex"``.  If not given, the style
00462         is autodetected: if the first non-whitespace character in the source
00463         is a backslash or percent character, LaTeX is assumed, else Bird.
00464 
00465     *New in Pygments 0.9.*
00466     """
00467     name = 'Literate Haskell'
00468     aliases = ['lhs', 'literate-haskell']
00469     filenames = ['*.lhs']
00470     mimetypes = ['text/x-literate-haskell']
00471 
00472     def get_tokens_unprocessed(self, text):
00473         hslexer = HaskellLexer(**self.options)
00474 
00475         style = self.options.get('litstyle')
00476         if style is None:
00477             style = (text.lstrip()[0] in '%\\') and 'latex' or 'bird'
00478 
00479         code = ''
00480         insertions = []
00481         if style == 'bird':
00482             # bird-style
00483             for match in line_re.finditer(text):
00484                 line = match.group()
00485                 m = bird_re.match(line)
00486                 if m:
00487                     insertions.append((len(code),
00488                                        [(0, Comment.Special, m.group(1))]))
00489                     code += m.group(2)
00490                 else:
00491                     insertions.append((len(code), [(0, Text, line)]))
00492         else:
00493             # latex-style
00494             from pygments.lexers.text import TexLexer
00495             lxlexer = TexLexer(**self.options)
00496 
00497             codelines = 0
00498             latex = ''
00499             for match in line_re.finditer(text):
00500                 line = match.group()
00501                 if codelines:
00502                     if line.lstrip().startswith('\\end{code}'):
00503                         codelines = 0
00504                         latex += line
00505                     else:
00506                         code += line
00507                 elif line.lstrip().startswith('\\begin{code}'):
00508                     codelines = 1
00509                     latex += line
00510                     insertions.append((len(code),
00511                                        list(lxlexer.get_tokens_unprocessed(latex))))
00512                     latex = ''
00513                 else:
00514                     latex += line
00515             insertions.append((len(code),
00516                                list(lxlexer.get_tokens_unprocessed(latex))))
00517         for item in do_insertions(insertions, hslexer.get_tokens_unprocessed(code)):
00518             yield item
00519 
00520 
00521 class OcamlLexer(RegexLexer):
00522     """
00523     For the OCaml language.
00524 
00525     *New in Pygments 0.7.*
00526     """
00527 
00528     name = 'OCaml'
00529     aliases = ['ocaml']
00530     filenames = ['*.ml', '*.mli', '*.mll', '*.mly']
00531     mimetypes = ['text/x-ocaml']
00532 
00533     keywords = [
00534       'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done',
00535       'downto', 'else', 'end', 'exception', 'external', 'false',
00536       'for', 'fun', 'function', 'functor', 'if', 'in', 'include',
00537       'inherit', 'initializer', 'lazy', 'let', 'match', 'method',
00538       'module', 'mutable', 'new', 'object', 'of', 'open', 'private',
00539       'raise', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try',
00540       'type', 'val', 'virtual', 'when', 'while', 'with'
00541     ]
00542     keyopts = [
00543       '!=','#','&','&&','\(','\)','\*','\+',',','-',
00544       '-\.','->','\.','\.\.',':','::',':=',':>',';',';;','<',
00545       '<-','=','>','>]','>}','\?','\?\?','\[','\[<','\[>','\[\|',
00546       ']','_','`','{','{<','\|','\|]','}','~'
00547     ]
00548 
00549     operators = r'[!$%&*+\./:<=>?@^|~-]'
00550     word_operators = ['and', 'asr', 'land', 'lor', 'lsl', 'lxor', 'mod', 'or']
00551     prefix_syms = r'[!?~]'
00552     infix_syms = r'[=<>@^|&+\*/$%-]'
00553     primitives = ['unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array']
00554 
00555     tokens = {
00556         'escape-sequence': [
00557             (r'\\[\"\'ntbr]', String.Escape),
00558             (r'\\[0-9]{3}', String.Escape),
00559             (r'\\x[0-9a-fA-F]{2}', String.Escape),
00560         ],
00561         'root': [
00562             (r'\s+', Text),
00563             (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo),
00564             (r'\b([A-Z][A-Za-z0-9_\']*)(?=\s*\.)',
00565              Name.Namespace, 'dotted'),
00566             (r'\b([A-Z][A-Za-z0-9_\']*)', Name.Class),
00567             (r'\(\*', Comment, 'comment'),
00568             (r'\b(%s)\b' % '|'.join(keywords), Keyword),
00569             (r'(%s)' % '|'.join(keyopts), Operator),
00570             (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator),
00571             (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word),
00572             (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type),
00573 
00574             (r"[^\W\d][\w']*", Name),
00575 
00576             (r'\d[\d_]*', Number.Integer),
00577             (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),
00578             (r'0[oO][0-7][0-7_]*', Number.Oct),
00579             (r'0[bB][01][01_]*', Number.Binary),
00580             (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
00581 
00582             (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
00583              String.Char),
00584             (r"'.'", String.Char),
00585             (r"'", Keyword), # a stray quote is another syntax element
00586 
00587             (r'"', String.Double, 'string'),
00588 
00589             (r'[~?][a-z][\w\']*:', Name.Variable),
00590         ],
00591         'comment': [
00592             (r'[^(*)]+', Comment),
00593             (r'\(\*', Comment, '#push'),
00594             (r'\*\)', Comment, '#pop'),
00595             (r'[(*)]', Comment),
00596         ],
00597         'string': [
00598             (r'[^\\"]+', String.Double),
00599             include('escape-sequence'),
00600             (r'\\\n', String.Double),
00601             (r'"', String.Double, '#pop'),
00602         ],
00603         'dotted': [
00604             (r'\s+', Text),
00605             (r'\.', Punctuation),
00606             (r'[A-Z][A-Za-z0-9_\']*(?=\s*\.)', Name.Namespace),
00607             (r'[A-Z][A-Za-z0-9_\']*', Name.Class, '#pop'),
00608             (r'[a-z][a-z0-9_\']*', Name, '#pop'),
00609         ],
00610     }
00611 
00612 
00613 class ErlangLexer(RegexLexer):
00614     """
00615     For the Erlang functional programming language.
00616 
00617     Blame Jeremy Thurgood (http://jerith.za.net/).
00618 
00619     *New in Pygments 0.9.*
00620     """
00621 
00622     name = 'Erlang'
00623     aliases = ['erlang']
00624     filenames = ['*.erl', '*.hrl']
00625     mimetypes = ['text/x-erlang']
00626 
00627     keywords = [
00628         'after', 'begin', 'case', 'catch', 'cond', 'end', 'fun', 'if',
00629         'let', 'of', 'query', 'receive', 'try', 'when',
00630         ]
00631 
00632     builtins = [ # See erlang(3) man page
00633         'abs', 'append_element', 'apply', 'atom_to_list', 'binary_to_list',
00634         'bitstring_to_list', 'binary_to_term', 'bit_size', 'bump_reductions',
00635         'byte_size', 'cancel_timer', 'check_process_code', 'delete_module',
00636         'demonitor', 'disconnect_node', 'display', 'element', 'erase', 'exit',
00637         'float', 'float_to_list', 'fun_info', 'fun_to_list',
00638         'function_exported', 'garbage_collect', 'get', 'get_keys',
00639         'group_leader', 'hash', 'hd', 'integer_to_list', 'iolist_to_binary',
00640         'iolist_size', 'is_atom', 'is_binary', 'is_bitstring', 'is_boolean',
00641         'is_builtin', 'is_float', 'is_function', 'is_integer', 'is_list',
00642         'is_number', 'is_pid', 'is_port', 'is_process_alive', 'is_record',
00643         'is_reference', 'is_tuple', 'length', 'link', 'list_to_atom',
00644         'list_to_binary', 'list_to_bitstring', 'list_to_existing_atom',
00645         'list_to_float', 'list_to_integer', 'list_to_pid', 'list_to_tuple',
00646         'load_module', 'localtime_to_universaltime', 'make_tuple', 'md5',
00647         'md5_final', 'md5_update', 'memory', 'module_loaded', 'monitor',
00648         'monitor_node', 'node', 'nodes', 'open_port', 'phash', 'phash2',
00649         'pid_to_list', 'port_close', 'port_command', 'port_connect',
00650         'port_control', 'port_call', 'port_info', 'port_to_list',
00651         'process_display', 'process_flag', 'process_info', 'purge_module',
00652         'put', 'read_timer', 'ref_to_list', 'register', 'resume_process',
00653         'round', 'send', 'send_after', 'send_nosuspend', 'set_cookie',
00654         'setelement', 'size', 'spawn', 'spawn_link', 'spawn_monitor',
00655         'spawn_opt', 'split_binary', 'start_timer', 'statistics',
00656         'suspend_process', 'system_flag', 'system_info', 'system_monitor',
00657         'system_profile', 'term_to_binary', 'tl', 'trace', 'trace_delivered',
00658         'trace_info', 'trace_pattern', 'trunc', 'tuple_size', 'tuple_to_list',
00659         'universaltime_to_localtime', 'unlink', 'unregister', 'whereis'
00660         ]
00661 
00662     operators = r'(\+|-|\*|/|<|>|=|==|/=|=:=|=/=|=<|>=|\+\+|--|<-|!)'
00663     word_operators = [
00664         'and', 'andalso', 'band', 'bnot', 'bor', 'bsl', 'bsr', 'bxor',
00665         'div', 'not', 'or', 'orelse', 'rem', 'xor'
00666         ]
00667 
00668     atom_re = r"(?:[a-z][a-zA-Z0-9_]*|'[^\n']*[^\\]')"
00669 
00670     variable_re = r'(?:[A-Z_][a-zA-Z0-9_]*)'
00671 
00672     escape_re = r'(?:\\(?:[bdefnrstv\'"\\/]|[0-7][0-7]?[0-7]?|\^[a-zA-Z]))'
00673 
00674     macro_re = r'(?:'+variable_re+r'|'+atom_re+r')'
00675 
00676     base_re = r'(?:[2-9]|[12][0-9]|3[0-6])'
00677 
00678     tokens = {
00679         'root': [
00680             (r'\s+', Text),
00681             (r'%.*\n', Comment),
00682             ('(' + '|'.join(keywords) + r')\b', Keyword),
00683             ('(' + '|'.join(builtins) + r')\b', Name.Builtin),
00684             ('(' + '|'.join(word_operators) + r')\b', Operator.Word),
00685             (r'^-', Punctuation, 'directive'),
00686             (operators, Operator),
00687             (r'"', String, 'string'),
00688             (r'<<', Name.Label),
00689             (r'>>', Name.Label),
00690             (r'('+atom_re+')(:)', bygroups(Name.Namespace, Punctuation)),
00691             (r'^('+atom_re+r')(\s*)(\()', bygroups(Name.Function, Text, Punctuation)),
00692             (r'[+-]?'+base_re+r'#[0-9a-zA-Z]+', Number.Integer),
00693             (r'[+-]?\d+', Number.Integer),
00694             (r'[+-]?\d+.\d+', Number.Float),
00695             (r'[][:_@\".{}()|;,]', Punctuation),
00696             (variable_re, Name.Variable),
00697             (atom_re, Name),
00698             (r'\?'+macro_re, Name.Constant),
00699             (r'\$(?:'+escape_re+r'|\\[ %]|[^\\])', String.Char),
00700             (r'#'+atom_re+r'(:?\.'+atom_re+r')?', Name.Label),
00701             ],
00702         'string': [
00703             (escape_re, String.Escape),
00704             (r'"', String, '#pop'),
00705             (r'~[0-9.*]*[~#+bBcdefginpPswWxX]', String.Interpol),
00706             (r'[^"\\~]+', String),
00707             (r'~', String),
00708             ],
00709         'directive': [
00710             (r'(define)(\s*)(\()('+macro_re+r')',
00711              bygroups(Name.Entity, Text, Punctuation, Name.Constant), '#pop'),
00712             (r'(record)(\s*)(\()('+macro_re+r')',
00713              bygroups(Name.Entity, Text, Punctuation, Name.Label), '#pop'),
00714             (atom_re, Name.Entity, '#pop'),
00715             ],
00716         }
00717 
00718 
00719 class ErlangShellLexer(Lexer):
00720     """
00721     Shell sessions in erl (for Erlang code).
00722 
00723     *New in Pygments 1.1.*
00724     """
00725     name = 'Erlang erl session'
00726     aliases = ['erl']
00727     filenames = ['*.erl-sh']
00728     mimetypes = ['text/x-erl-shellsession']
00729 
00730     _prompt_re = re.compile(r'\d+>(?=\s|\Z)')
00731 
00732     def get_tokens_unprocessed(self, text):
00733         erlexer = ErlangLexer(**self.options)
00734 
00735         curcode = ''
00736         insertions = []
00737         for match in line_re.finditer(text):
00738             line = match.group()
00739             m = self._prompt_re.match(line)
00740             if m is not None:
00741                 end = m.end()
00742                 insertions.append((len(curcode),
00743                                    [(0, Generic.Prompt, line[:end])]))
00744                 curcode += line[end:]
00745             else:
00746                 if curcode:
00747                     for item in do_insertions(insertions,
00748                                     erlexer.get_tokens_unprocessed(curcode)):
00749                         yield item
00750                     curcode = ''
00751                     insertions = []
00752                 if line.startswith('*'):
00753                     yield match.start(), Generic.Traceback, line
00754                 else:
00755                     yield match.start(), Generic.Output, line
00756         if curcode:
00757             for item in do_insertions(insertions,
00758                                       erlexer.get_tokens_unprocessed(curcode)):
00759                 yield item
00760