Back to index

moin  1.9.0~rc2
text.py
Go to the documentation of this file.
00001 # -*- coding: utf-8 -*-
00002 """
00003     pygments.lexers.text
00004     ~~~~~~~~~~~~~~~~~~~~
00005 
00006     Lexers for non-source code file types.
00007 
00008     :copyright: Copyright 2006-2009 by the Pygments team, see AUTHORS.
00009     :license: BSD, see LICENSE for details.
00010 """
00011 
00012 import re
00013 try:
00014     set
00015 except NameError:
00016     from sets import Set as set
00017 from bisect import bisect
00018 
00019 from pygments.lexer import Lexer, LexerContext, RegexLexer, ExtendedRegexLexer, \
00020      bygroups, include, using, this, do_insertions
00021 from pygments.token import Punctuation, Text, Comment, Keyword, Name, String, \
00022      Generic, Operator, Number, Whitespace, Literal
00023 from pygments.util import get_bool_opt
00024 from pygments.lexers.other import BashLexer
00025 
00026 __all__ = ['IniLexer', 'SourcesListLexer', 'BaseMakefileLexer',
00027            'MakefileLexer', 'DiffLexer', 'IrcLogsLexer', 'TexLexer',
00028            'GroffLexer', 'ApacheConfLexer', 'BBCodeLexer', 'MoinWikiLexer',
00029            'RstLexer', 'VimLexer', 'GettextLexer', 'SquidConfLexer',
00030            'DebianControlLexer', 'DarcsPatchLexer', 'YamlLexer',
00031            'LighttpdConfLexer', 'NginxConfLexer', 'CMakeLexer']
00032 
00033 
00034 class IniLexer(RegexLexer):
00035     """
00036     Lexer for configuration files in INI style.
00037     """
00038 
00039     name = 'INI'
00040     aliases = ['ini', 'cfg']
00041     filenames = ['*.ini', '*.cfg', '*.properties']
00042     mimetypes = ['text/x-ini']
00043 
00044     tokens = {
00045         'root': [
00046             (r'\s+', Text),
00047             (r'[;#].*?$', Comment),
00048             (r'\[.*?\]$', Keyword),
00049             (r'(.*?)([ \t]*)(=)([ \t]*)(.*?)$',
00050              bygroups(Name.Attribute, Text, Operator, Text, String))
00051         ]
00052     }
00053 
00054     def analyse_text(text):
00055         npos = text.find('\n')
00056         if npos < 3:
00057             return False
00058         return text[0] == '[' and text[npos-1] == ']'
00059 
00060 
00061 class SourcesListLexer(RegexLexer):
00062     """
00063     Lexer that highlights debian sources.list files.
00064 
00065     *New in Pygments 0.7.*
00066     """
00067 
00068     name = 'Debian Sourcelist'
00069     aliases = ['sourceslist', 'sources.list']
00070     filenames = ['sources.list']
00071     mimetype = ['application/x-debian-sourceslist']
00072 
00073     tokens = {
00074         'root': [
00075             (r'\s+', Text),
00076             (r'#.*?$', Comment),
00077             (r'^(deb(?:-src)?)(\s+)',
00078              bygroups(Keyword, Text), 'distribution')
00079         ],
00080         'distribution': [
00081             (r'#.*?$', Comment, '#pop'),
00082             (r'\$\(ARCH\)', Name.Variable),
00083             (r'[^\s$[]+', String),
00084             (r'\[', String.Other, 'escaped-distribution'),
00085             (r'\$', String),
00086             (r'\s+', Text, 'components')
00087         ],
00088         'escaped-distribution': [
00089             (r'\]', String.Other, '#pop'),
00090             (r'\$\(ARCH\)', Name.Variable),
00091             (r'[^\]$]+', String.Other),
00092             (r'\$', String.Other)
00093         ],
00094         'components': [
00095             (r'#.*?$', Comment, '#pop:2'),
00096             (r'$', Text, '#pop:2'),
00097             (r'\s+', Text),
00098             (r'\S+', Keyword.Pseudo),
00099         ]
00100     }
00101 
00102     def analyse_text(text):
00103         for line in text.split('\n'):
00104             line = line.strip()
00105             if not (line.startswith('#') or line.startswith('deb ') or
00106                     line.startswith('deb-src ') or not line):
00107                 return False
00108         return True
00109 
00110 
00111 class MakefileLexer(Lexer):
00112     """
00113     Lexer for BSD and GNU make extensions (lenient enough to handle both in
00114     the same file even).
00115 
00116     *Rewritten in Pygments 0.10.*
00117     """
00118 
00119     name = 'Makefile'
00120     aliases = ['make', 'makefile', 'mf', 'bsdmake']
00121     filenames = ['*.mak', 'Makefile', 'makefile', 'Makefile.*', 'GNUmakefile']
00122     mimetypes = ['text/x-makefile']
00123 
00124     r_special = re.compile(r'^(?:'
00125         # BSD Make
00126         r'\.\s*(include|undef|error|warning|if|else|elif|endif|for|endfor)|'
00127         # GNU Make
00128         r'\s*(ifeq|ifneq|ifdef|ifndef|else|endif|-?include|define|endef|:))(?=\s)')
00129     r_comment = re.compile(r'^\s*@?#')
00130 
00131     def get_tokens_unprocessed(self, text):
00132         ins = []
00133         lines = text.splitlines(True)
00134         done = ''
00135         lex = BaseMakefileLexer(**self.options)
00136         backslashflag = False
00137         for line in lines:
00138             if self.r_special.match(line) or backslashflag:
00139                 ins.append((len(done), [(0, Comment.Preproc, line)]))
00140                 backslashflag = line.strip().endswith('\\')
00141             elif self.r_comment.match(line):
00142                 ins.append((len(done), [(0, Comment, line)]))
00143             else:
00144                 done += line
00145         for item in do_insertions(ins, lex.get_tokens_unprocessed(done)):
00146             yield item
00147 
00148 
00149 class BaseMakefileLexer(RegexLexer):
00150     """
00151     Lexer for simple Makefiles (no preprocessing).
00152 
00153     *New in Pygments 0.10.*
00154     """
00155 
00156     name = 'Makefile'
00157     aliases = ['basemake']
00158     filenames = []
00159     mimetypes = []
00160 
00161     tokens = {
00162         'root': [
00163             (r'^(?:[\t ]+.*\n|\n)+', using(BashLexer)),
00164             (r'\$\((?:.*\\\n|.*\n)+', using(BashLexer)),
00165             (r'\s+', Text),
00166             (r'#.*?\n', Comment),
00167             (r'(export)(\s+)(?=[a-zA-Z0-9_${}\t -]+\n)',
00168              bygroups(Keyword, Text), 'export'),
00169             (r'export\s+', Keyword),
00170             # assignment
00171             (r'([a-zA-Z0-9_${}.-]+)(\s*)([!?:+]?=)([ \t]*)((?:.*\\\n|.*\n)+)',
00172              bygroups(Name.Variable, Text, Operator, Text, using(BashLexer))),
00173             # strings
00174             (r'"(\\\\|\\"|[^"])*"', String.Double),
00175             (r"'(\\\\|\\'|[^'])*'", String.Single),
00176             # targets
00177             (r'([^\n:]+)(:+)([ \t]*)', bygroups(Name.Function, Operator, Text),
00178              'block-header'),
00179             # TODO: add paren handling (grr)
00180         ],
00181         'export': [
00182             (r'[a-zA-Z0-9_${}-]+', Name.Variable),
00183             (r'\n', Text, '#pop'),
00184             (r'\s+', Text),
00185         ],
00186         'block-header': [
00187             (r'[^,\\\n#]+', Number),
00188             (r',', Punctuation),
00189             (r'#.*?\n', Comment),
00190             (r'\\\n', Text), # line continuation
00191             (r'\\.', Text),
00192             (r'(?:[\t ]+.*\n|\n)+', using(BashLexer), '#pop'),
00193         ],
00194     }
00195 
00196 
00197 class DiffLexer(RegexLexer):
00198     """
00199     Lexer for unified or context-style diffs or patches.
00200     """
00201 
00202     name = 'Diff'
00203     aliases = ['diff', 'udiff']
00204     filenames = ['*.diff', '*.patch']
00205     mimetypes = ['text/x-diff', 'text/x-patch']
00206 
00207     tokens = {
00208         'root': [
00209             (r' .*\n', Text),
00210             (r'\+.*\n', Generic.Inserted),
00211             (r'-.*\n', Generic.Deleted),
00212             (r'!.*\n', Generic.Strong),
00213             (r'@.*\n', Generic.Subheading),
00214             (r'([Ii]ndex|diff).*\n', Generic.Heading),
00215             (r'=.*\n', Generic.Heading),
00216             (r'.*\n', Text),
00217         ]
00218     }
00219 
00220     def analyse_text(text):
00221         if text[:7] == 'Index: ':
00222             return True
00223         if text[:5] == 'diff ':
00224             return True
00225         if text[:4] == '--- ':
00226             return 0.9
00227 
00228 
00229 DPATCH_KEYWORDS = ['hunk', 'addfile', 'adddir', 'rmfile', 'rmdir', 'move',
00230     'replace']
00231 
00232 class DarcsPatchLexer(RegexLexer):
00233     """
00234     DarcsPatchLexer is a lexer for the various versions of the darcs patch
00235     format.  Examples of this format are derived by commands such as
00236     ``darcs annotate --patch`` and ``darcs send``.
00237 
00238     *New in Pygments 0.10.*
00239     """
00240     name = 'Darcs Patch'
00241     aliases = ['dpatch']
00242     filenames = ['*.dpatch', '*.darcspatch']
00243 
00244     tokens = {
00245         'root': [
00246             (r'<', Operator),
00247             (r'>', Operator),
00248             (r'{', Operator),
00249             (r'}', Operator),
00250             (r'(\[)((?:TAG )?)(.*)(\n)(.*)(\*\*)(\d+)(\s?)(\])',
00251              bygroups(Operator, Keyword, Name, Text, Name, Operator,
00252                       Literal.Date, Text, Operator)),
00253             (r'(\[)((?:TAG )?)(.*)(\n)(.*)(\*\*)(\d+)(\s?)',
00254              bygroups(Operator, Keyword, Name, Text, Name, Operator,
00255                       Literal.Date, Text), 'comment'),
00256             (r'New patches:', Generic.Heading),
00257             (r'Context:', Generic.Heading),
00258             (r'Patch bundle hash:', Generic.Heading),
00259             (r'(\s*)(%s)(.*\n)' % '|'.join(DPATCH_KEYWORDS),
00260                 bygroups(Text, Keyword, Text)),
00261             (r'\+', Generic.Inserted, "insert"),
00262             (r'-', Generic.Deleted, "delete"),
00263             (r'.*\n', Text),
00264         ],
00265         'comment': [
00266             (r'[^\]].*\n', Comment),
00267             (r'\]', Operator, "#pop"),
00268         ],
00269         'specialText': [ # darcs add [_CODE_] special operators for clarity
00270             (r'\n', Text, "#pop"), # line-based
00271             (r'\[_[^_]*_]', Operator),
00272         ],
00273         'insert': [
00274             include('specialText'),
00275             (r'\[', Generic.Inserted),
00276             (r'[^\n\[]*', Generic.Inserted),
00277         ],
00278         'delete': [
00279             include('specialText'),
00280             (r'\[', Generic.Deleted),
00281             (r'[^\n\[]*', Generic.Deleted),
00282         ],
00283     }
00284 
00285 
00286 class IrcLogsLexer(RegexLexer):
00287     """
00288     Lexer for IRC logs in *irssi*, *xchat* or *weechat* style.
00289     """
00290 
00291     name = 'IRC logs'
00292     aliases = ['irc']
00293     filenames = ['*.weechatlog']
00294     mimetypes = ['text/x-irclog']
00295 
00296     flags = re.VERBOSE | re.MULTILINE
00297     timestamp = r"""
00298         (
00299           # irssi / xchat and others
00300           (?: \[|\()?                  # Opening bracket or paren for the timestamp
00301             (?:                        # Timestamp
00302                 (?: (?:\d{1,4} [-/]?)+ # Date as - or /-separated groups of digits
00303                  [T ])?                # Date/time separator: T or space
00304                 (?: \d?\d [:.]?)+      # Time as :/.-separated groups of 1 or 2 digits
00305             )
00306           (?: \]|\))?\s+               # Closing bracket or paren for the timestamp
00307         |
00308           # weechat
00309           \d{4}\s\w{3}\s\d{2}\s        # Date
00310           \d{2}:\d{2}:\d{2}\s+         # Time + Whitespace
00311         |
00312           # xchat
00313           \w{3}\s\d{2}\s               # Date
00314           \d{2}:\d{2}:\d{2}\s+         # Time + Whitespace
00315         )?
00316     """
00317     tokens = {
00318         'root': [
00319                 # log start/end
00320             (r'^\*\*\*\*(.*)\*\*\*\*$', Comment),
00321             # hack
00322             ("^" + timestamp + r'(\s*<[^>]*>\s*)$', bygroups(Comment.Preproc, Name.Tag)),
00323             # normal msgs
00324             ("^" + timestamp + r"""
00325                 (\s*<.*?>\s*)          # Nick """,
00326              bygroups(Comment.Preproc, Name.Tag), 'msg'),
00327             # /me msgs
00328             ("^" + timestamp + r"""
00329                 (\s*[*]\s+)            # Star
00330                 ([^\s]+\s+.*?\n)       # Nick + rest of message """,
00331              bygroups(Comment.Preproc, Keyword, Generic.Inserted)),
00332             # join/part msgs
00333             ("^" + timestamp + r"""
00334                 (\s*(?:\*{3}|<?-[!@=P]?->?)\s*)  # Star(s) or symbols
00335                 ([^\s]+\s+)                     # Nick + Space
00336                 (.*?\n)                         # Rest of message """,
00337              bygroups(Comment.Preproc, Keyword, String, Comment)),
00338             (r"^.*?\n", Text),
00339         ],
00340         'msg': [
00341             (r"[^\s]+:(?!//)", Name.Attribute),  # Prefix
00342             (r".*\n", Text, '#pop'),
00343         ],
00344     }
00345 
00346 
00347 class BBCodeLexer(RegexLexer):
00348     """
00349     A lexer that highlights BBCode(-like) syntax.
00350 
00351     *New in Pygments 0.6.*
00352     """
00353 
00354     name = 'BBCode'
00355     aliases = ['bbcode']
00356     mimetypes = ['text/x-bbcode']
00357 
00358     tokens = {
00359         'root': [
00360             (r'[^[]+', Text),
00361             # tag/end tag begin
00362             (r'\[/?\w+', Keyword, 'tag'),
00363             # stray bracket
00364             (r'\[', Text),
00365         ],
00366         'tag': [
00367             (r'\s+', Text),
00368             # attribute with value
00369             (r'(\w+)(=)("?[^\s"\]]+"?)',
00370              bygroups(Name.Attribute, Operator, String)),
00371             # tag argument (a la [color=green])
00372             (r'(=)("?[^\s"\]]+"?)',
00373              bygroups(Operator, String)),
00374             # tag end
00375             (r'\]', Keyword, '#pop'),
00376         ],
00377     }
00378 
00379 
00380 class TexLexer(RegexLexer):
00381     """
00382     Lexer for the TeX and LaTeX typesetting languages.
00383     """
00384 
00385     name = 'TeX'
00386     aliases = ['tex', 'latex']
00387     filenames = ['*.tex', '*.aux', '*.toc']
00388     mimetypes = ['text/x-tex', 'text/x-latex']
00389 
00390     tokens = {
00391         'general': [
00392             (r'%.*?\n', Comment),
00393             (r'[{}]', Name.Builtin),
00394             (r'[&_^]', Name.Builtin),
00395         ],
00396         'root': [
00397             (r'\\\[', String.Backtick, 'displaymath'),
00398             (r'\\\(', String, 'inlinemath'),
00399             (r'\$\$', String.Backtick, 'displaymath'),
00400             (r'\$', String, 'inlinemath'),
00401             (r'\\([a-zA-Z]+|.)', Keyword, 'command'),
00402             include('general'),
00403             (r'[^\\$%&_^{}]+', Text),
00404         ],
00405         'math': [
00406             (r'\\([a-zA-Z]+|.)', Name.Variable),
00407             include('general'),
00408             (r'[0-9]+', Number),
00409             (r'[-=!+*/()\[\]]', Operator),
00410             (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin),
00411         ],
00412         'inlinemath': [
00413             (r'\\\)', String, '#pop'),
00414             (r'\$', String, '#pop'),
00415             include('math'),
00416         ],
00417         'displaymath': [
00418             (r'\\\]', String, '#pop'),
00419             (r'\$\$', String, '#pop'),
00420             (r'\$', Name.Builtin),
00421             include('math'),
00422         ],
00423         'command': [
00424             (r'\[.*?\]', Name.Attribute),
00425             (r'\*', Keyword),
00426             (r'', Text, '#pop'),
00427         ],
00428     }
00429 
00430     def analyse_text(text):
00431         for start in ("\\documentclass", "\\input", "\\documentstyle",
00432                       "\\relax"):
00433             if text[:len(start)] == start:
00434                 return True
00435 
00436 
00437 class GroffLexer(RegexLexer):
00438     """
00439     Lexer for the (g)roff typesetting language, supporting groff
00440     extensions. Mainly useful for highlighting manpage sources.
00441 
00442     *New in Pygments 0.6.*
00443     """
00444 
00445     name = 'Groff'
00446     aliases = ['groff', 'nroff', 'man']
00447     filenames = ['*.[1234567]', '*.man']
00448     mimetypes = ['application/x-troff', 'text/troff']
00449 
00450     tokens = {
00451         'root': [
00452             (r'(?i)(\.)(\w+)', bygroups(Text, Keyword), 'request'),
00453             (r'\.', Punctuation, 'request'),
00454             # Regular characters, slurp till we find a backslash or newline
00455             (r'[^\\\n]*', Text, 'textline'),
00456         ],
00457         'textline': [
00458             include('escapes'),
00459             (r'[^\\\n]+', Text),
00460             (r'\n', Text, '#pop'),
00461         ],
00462         'escapes': [
00463             # groff has many ways to write escapes.
00464             (r'\\"[^\n]*', Comment),
00465             (r'\\[fn]\w', String.Escape),
00466             (r'\\\(..', String.Escape),
00467             (r'\\.\[.*\]', String.Escape),
00468             (r'\\.', String.Escape),
00469             (r'\\\n', Text, 'request'),
00470         ],
00471         'request': [
00472             (r'\n', Text, '#pop'),
00473             include('escapes'),
00474             (r'"[^\n"]+"', String.Double),
00475             (r'\d+', Number),
00476             (r'\S+', String),
00477             (r'\s+', Text),
00478         ],
00479     }
00480 
00481     def analyse_text(text):
00482         if text[:1] != '.':
00483             return False
00484         if text[:3] == '.\\"':
00485             return True
00486         if text[:4] == '.TH ':
00487             return True
00488         if text[1:3].isalnum() and text[3].isspace():
00489             return 0.9
00490 
00491 
00492 class ApacheConfLexer(RegexLexer):
00493     """
00494     Lexer for configuration files following the Apache config file
00495     format.
00496 
00497     *New in Pygments 0.6.*
00498     """
00499 
00500     name = 'ApacheConf'
00501     aliases = ['apacheconf', 'aconf', 'apache']
00502     filenames = ['.htaccess', 'apache.conf', 'apache2.conf']
00503     mimetypes = ['text/x-apacheconf']
00504     flags = re.MULTILINE | re.IGNORECASE
00505 
00506     tokens = {
00507         'root': [
00508             (r'\s+', Text),
00509             (r'(#.*?)$', Comment),
00510             (r'(<[^\s>]+)(?:(\s+)(.*?))?(>)',
00511              bygroups(Name.Tag, Text, String, Name.Tag)),
00512             (r'([a-zA-Z][a-zA-Z0-9]*)(\s+)',
00513              bygroups(Name.Builtin, Text), 'value'),
00514             (r'\.+', Text),
00515         ],
00516         'value': [
00517             (r'$', Text, '#pop'),
00518             (r'[^\S\n]+', Text),
00519             (r'\d+\.\d+\.\d+\.\d+(?:/\d+)?', Number),
00520             (r'\d+', Number),
00521             (r'/([a-zA-Z0-9][a-zA-Z0-9_./-]+)', String.Other),
00522             (r'(on|off|none|any|all|double|email|dns|min|minimal|'
00523              r'os|productonly|full|emerg|alert|crit|error|warn|'
00524              r'notice|info|debug|registry|script|inetd|standalone|'
00525              r'user|group)\b', Keyword),
00526             (r'"([^"\\]*(?:\\.[^"\\]*)*)"', String.Double),
00527             (r'[^\s"]+', Text)
00528         ]
00529     }
00530 
00531 
00532 class MoinWikiLexer(RegexLexer):
00533     """
00534     For MoinMoin (and Trac) Wiki markup.
00535 
00536     *New in Pygments 0.7.*
00537     """
00538 
00539     name = 'MoinMoin/Trac Wiki markup'
00540     aliases = ['trac-wiki', 'moin']
00541     filenames = []
00542     mimetypes = ['text/x-trac-wiki']
00543     flags = re.MULTILINE | re.IGNORECASE
00544 
00545     tokens = {
00546         'root': [
00547             (r'^#.*$', Comment),
00548             (r'(!)(\S+)', bygroups(Keyword, Text)), # Ignore-next
00549             # Titles
00550             (r'^(=+)([^=]+)(=+)(\s*#.+)?$',
00551              bygroups(Generic.Heading, using(this), Generic.Heading, String)),
00552             # Literal code blocks, with optional shebang
00553             (r'({{{)(\n#!.+)?', bygroups(Name.Builtin, Name.Namespace), 'codeblock'),
00554             (r'(\'\'\'?|\|\||`|__|~~|\^|,,|::)', Comment), # Formatting
00555             # Lists
00556             (r'^( +)([.*-])( )', bygroups(Text, Name.Builtin, Text)),
00557             (r'^( +)([a-zivx]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)),
00558             # Other Formatting
00559             (r'\[\[\w+.*?\]\]', Keyword), # Macro
00560             (r'(\[[^\s\]]+)(\s+[^\]]+?)?(\])',
00561              bygroups(Keyword, String, Keyword)), # Link
00562             (r'^----+$', Keyword), # Horizontal rules
00563             (r'[^\n\'\[{!_~^,|]+', Text),
00564             (r'\n', Text),
00565             (r'.', Text),
00566         ],
00567         'codeblock': [
00568             (r'}}}', Name.Builtin, '#pop'),
00569             # these blocks are allowed to be nested in Trac, but not MoinMoin
00570             (r'{{{', Text, '#push'),
00571             (r'[^{}]+', Comment.Preproc), # slurp boring text
00572             (r'.', Comment.Preproc), # allow loose { or }
00573         ],
00574     }
00575 
00576 
00577 class RstLexer(RegexLexer):
00578     """
00579     For `reStructuredText <http://docutils.sf.net/rst.html>`_ markup.
00580 
00581     *New in Pygments 0.7.*
00582 
00583     Additional options accepted:
00584 
00585     `handlecodeblocks`
00586         Highlight the contents of ``.. sourcecode:: langauge`` and
00587         ``.. code:: language`` directives with a lexer for the given
00588         language (default: ``True``). *New in Pygments 0.8.*
00589     """
00590     name = 'reStructuredText'
00591     aliases = ['rst', 'rest', 'restructuredtext']
00592     filenames = ['*.rst', '*.rest']
00593     mimetypes = ["text/x-rst", "text/prs.fallenstein.rst"]
00594     flags = re.MULTILINE
00595 
00596     def _handle_sourcecode(self, match):
00597         from pygments.lexers import get_lexer_by_name
00598         from pygments.util import ClassNotFound
00599 
00600         # section header
00601         yield match.start(1), Punctuation, match.group(1)
00602         yield match.start(2), Text, match.group(2)
00603         yield match.start(3), Operator.Word, match.group(3)
00604         yield match.start(4), Punctuation, match.group(4)
00605         yield match.start(5), Text, match.group(5)
00606         yield match.start(6), Keyword, match.group(6)
00607         yield match.start(7), Text, match.group(7)
00608 
00609         # lookup lexer if wanted and existing
00610         lexer = None
00611         if self.handlecodeblocks:
00612             try:
00613                 lexer = get_lexer_by_name(match.group(6).strip())
00614             except ClassNotFound:
00615                 pass
00616         indention = match.group(8)
00617         indention_size = len(indention)
00618         code = (indention + match.group(9) + match.group(10) + match.group(11))
00619 
00620         # no lexer for this language. handle it like it was a code block
00621         if lexer is None:
00622             yield match.start(8), String, code
00623             return
00624 
00625         # highlight the lines with the lexer.
00626         ins = []
00627         codelines = code.splitlines(True)
00628         code = ''
00629         for line in codelines:
00630             if len(line) > indention_size:
00631                 ins.append((len(code), [(0, Text, line[:indention_size])]))
00632                 code += line[indention_size:]
00633             else:
00634                 code += line
00635         for item in do_insertions(ins, lexer.get_tokens_unprocessed(code)):
00636             yield item
00637 
00638     tokens = {
00639         'root': [
00640             # Heading with overline
00641             (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)(.+)(\n)(\1)(\n)',
00642              bygroups(Generic.Heading, Text, Generic.Heading,
00643                       Text, Generic.Heading, Text)),
00644             # Plain heading
00645             (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|'
00646              r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)',
00647              bygroups(Generic.Heading, Text, Generic.Heading, Text)),
00648             # Bulleted lists
00649             (r'^(\s*)([-*+])( .+\n(?:\1  .+\n)*)',
00650              bygroups(Text, Number, using(this, state='inline'))),
00651             # Numbered lists
00652             (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1  .+\n)*)',
00653              bygroups(Text, Number, using(this, state='inline'))),
00654             (r'^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1  .+\n)*)',
00655              bygroups(Text, Number, using(this, state='inline'))),
00656             # Numbered, but keep words at BOL from becoming lists
00657             (r'^(\s*)([A-Z]+\.)( .+\n(?:\1  .+\n)+)',
00658              bygroups(Text, Number, using(this, state='inline'))),
00659             (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1  .+\n)+)',
00660              bygroups(Text, Number, using(this, state='inline'))),
00661             # Sourcecode directives
00662             (r'^( *\.\.)(\s*)((?:source)?code)(::)([ \t]*)([^\n]+)'
00663              r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*|)\n)+)',
00664              _handle_sourcecode),
00665             # A directive
00666             (r'^( *\.\.)(\s*)([\w-]+)(::)(?:([ \t]*)(.+))?',
00667              bygroups(Punctuation, Text, Operator.Word, Punctuation, Text, Keyword)),
00668             # A reference target
00669             (r'^( *\.\.)(\s*)([\w\t ]+:)(.*?)$',
00670              bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
00671             # A footnote target
00672             (r'^( *\.\.)(\s*)(\[.+\])(.*?)$',
00673              bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
00674             # Comments
00675             (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment.Preproc),
00676             # Field list
00677             (r'^( *)(:.*?:)([ \t]+)(.*?)$', bygroups(Text, Name.Class, Text,
00678                                                      Name.Function)),
00679             # Definition list
00680             (r'^([^ ].*(?<!::)\n)((?:(?: +.*)\n)+)',
00681              bygroups(using(this, state='inline'), using(this, state='inline'))),
00682             # Code blocks
00683             (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*|)\n)+)',
00684              bygroups(String.Escape, Text, String, String, Text, String)),
00685             include('inline'),
00686         ],
00687         'inline': [
00688             (r'\\.', Text), # escape
00689             (r'``', String, 'literal'), # code
00690             (r'(`)(.+?)(`__?)',
00691              bygroups(Punctuation, using(this), Punctuation)), # reference
00692             (r'(`.+?`)(:[a-zA-Z0-9-]+?:)?',
00693              bygroups(Name.Variable, Name.Attribute)), # role
00694             (r'(:[a-zA-Z0-9-]+?:)(`.+?`)',
00695              bygroups(Name.Attribute, Name.Variable)), # user-defined role
00696             (r'\*\*.+?\*\*', Generic.Strong), # Strong emphasis
00697             (r'\*.+?\*', Generic.Emph), # Emphasis
00698             (r'\[.*?\]_', String), # Footnote or citation
00699             (r'<.+?>', Name.Tag), # Hyperlink
00700             (r'[^\\\n\[*`:]+', Text),
00701             (r'.', Text),
00702         ],
00703         'literal': [
00704             (r'[^`\\]+', String),
00705             (r'\\.', String),
00706             (r'``', String, '#pop'),
00707             (r'[`\\]', String),
00708         ]
00709     }
00710 
00711     def __init__(self, **options):
00712         self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
00713         RegexLexer.__init__(self, **options)
00714 
00715     def analyse_text(text):
00716         if text[:2] == '..' and text[2:3] != '.':
00717             return 0.3
00718         p1 = text.find("\n")
00719         p2 = text.find("\n", p1 + 1)
00720         if (p2 > -1 and              # has two lines
00721             p1 * 2 + 1 == p2 and     # they are the same length
00722             text[p1+1] in '-=' and   # the next line both starts and ends with
00723             text[p1+1] == text[p2-1]): # ...a sufficiently high header
00724             return 0.5
00725 
00726 
00727 class VimLexer(RegexLexer):
00728     """
00729     Lexer for VimL script files.
00730 
00731     *New in Pygments 0.8.*
00732     """
00733     name = 'VimL'
00734     aliases = ['vim']
00735     filenames = ['*.vim', '.vimrc']
00736     mimetypes = ['text/x-vim']
00737     flags = re.MULTILINE
00738 
00739     tokens = {
00740         'root': [
00741             # Who decided that doublequote was a good comment character??
00742             (r'^\s*".*', Comment),
00743             (r'(?<=\s)"[^\-:.%#=*].*', Comment),
00744 
00745             (r'[ \t]+', Text),
00746             # TODO: regexes can have other delims
00747             (r'/(\\\\|\\/|[^\n/])*/', String.Regex),
00748             (r'"(\\\\|\\"|[^\n"])*"', String.Double),
00749             (r"'(\\\\|\\'|[^\n'])*'", String.Single),
00750             (r'-?\d+', Number),
00751             (r'#[0-9a-f]{6}', Number.Hex),
00752             (r'^:', Punctuation),
00753             (r'[()<>+=!|,~-]', Punctuation), # Inexact list.  Looks decent.
00754             (r'\b(let|if|else|endif|elseif|fun|function|endfunction)\b',
00755              Keyword),
00756             (r'\b(NONE|bold|italic|underline|dark|light)\b', Name.Builtin),
00757             (r'\b\w+\b', Name.Other), # These are postprocessed below
00758             (r'.', Text),
00759         ],
00760     }
00761     def __init__(self, **options):
00762         from pygments.lexers._vimbuiltins import command, option, auto
00763         self._cmd = command
00764         self._opt = option
00765         self._aut = auto
00766 
00767         RegexLexer.__init__(self, **options)
00768 
00769     def is_in(self, w, mapping):
00770         r"""
00771         It's kind of difficult to decide if something might be a keyword
00772         in VimL because it allows you to abbreviate them.  In fact,
00773         'ab[breviate]' is a good example.  :ab, :abbre, or :abbreviate are
00774         valid ways to call it so rather than making really awful regexps
00775         like::
00776 
00777             \bab(?:b(?:r(?:e(?:v(?:i(?:a(?:t(?:e)?)?)?)?)?)?)?)?\b
00778 
00779         we match `\b\w+\b` and then call is_in() on those tokens.  See
00780         `scripts/get_vimkw.py` for how the lists are extracted.
00781         """
00782         p = bisect(mapping, (w,))
00783         if p > 0:
00784             if mapping[p-1][0] == w[:len(mapping[p-1][0])] and \
00785                mapping[p-1][1][:len(w)] == w: return True
00786         if p < len(mapping):
00787             return mapping[p][0] == w[:len(mapping[p][0])] and \
00788                    mapping[p][1][:len(w)] == w
00789         return False
00790 
00791     def get_tokens_unprocessed(self, text):
00792         # TODO: builtins are only subsequent tokens on lines
00793         #       and 'keywords' only happen at the beginning except
00794         #       for :au ones
00795         for index, token, value in \
00796             RegexLexer.get_tokens_unprocessed(self, text):
00797             if token is Name.Other:
00798                 if self.is_in(value, self._cmd):
00799                     yield index, Keyword, value
00800                 elif self.is_in(value, self._opt) or \
00801                      self.is_in(value, self._aut):
00802                     yield index, Name.Builtin, value
00803                 else:
00804                     yield index, Text, value
00805             else:
00806                 yield index, token, value
00807 
00808 
00809 class GettextLexer(RegexLexer):
00810     """
00811     Lexer for Gettext catalog files.
00812 
00813     *New in Pygments 0.9.*
00814     """
00815     name = 'Gettext Catalog'
00816     aliases = ['pot', 'po']
00817     filenames = ['*.pot', '*.po']
00818     mimetypes = ['application/x-gettext', 'text/x-gettext', 'text/gettext']
00819 
00820     tokens = {
00821         'root': [
00822             (r'^#,\s.*?$', Keyword.Type),
00823             (r'^#:\s.*?$', Keyword.Declaration),
00824             #(r'^#$', Comment),
00825             (r'^(#|#\.\s|#\|\s|#~\s|#\s).*$', Comment.Single),
00826             (r'^(")([\w-]*:)(.*")$',
00827              bygroups(String, Name.Property, String)),
00828             (r'^".*"$', String),
00829             (r'^(msgid|msgid_plural|msgstr)(\s+)(".*")$',
00830              bygroups(Name.Variable, Text, String)),
00831             (r'^(msgstr\[)(\d)(\])(\s+)(".*")$',
00832              bygroups(Name.Variable, Number.Integer, Name.Variable, Text, String)),
00833         ]
00834     }
00835 
00836 
00837 class SquidConfLexer(RegexLexer):
00838     """
00839     Lexer for `squid <http://www.squid-cache.org/>`_ configuration files.
00840 
00841     *New in Pygments 0.9.*
00842     """
00843 
00844     name = 'SquidConf'
00845     aliases = ['squidconf', 'squid.conf', 'squid']
00846     filenames = ['squid.conf']
00847     mimetypes = ['text/x-squidconf']
00848     flags = re.IGNORECASE
00849 
00850     keywords = [ "acl", "always_direct", "announce_host",
00851                  "announce_period", "announce_port", "announce_to",
00852                  "anonymize_headers", "append_domain", "as_whois_server",
00853                  "auth_param_basic", "authenticate_children",
00854                  "authenticate_program", "authenticate_ttl", "broken_posts",
00855                  "buffered_logs", "cache_access_log", "cache_announce",
00856                  "cache_dir", "cache_dns_program", "cache_effective_group",
00857                  "cache_effective_user", "cache_host", "cache_host_acl",
00858                  "cache_host_domain", "cache_log", "cache_mem",
00859                  "cache_mem_high", "cache_mem_low", "cache_mgr",
00860                  "cachemgr_passwd", "cache_peer", "cache_peer_access",
00861                  "cahce_replacement_policy", "cache_stoplist",
00862                  "cache_stoplist_pattern", "cache_store_log", "cache_swap",
00863                  "cache_swap_high", "cache_swap_log", "cache_swap_low",
00864                  "client_db", "client_lifetime", "client_netmask",
00865                  "connect_timeout", "coredump_dir", "dead_peer_timeout",
00866                  "debug_options", "delay_access", "delay_class",
00867                  "delay_initial_bucket_level", "delay_parameters",
00868                  "delay_pools", "deny_info", "dns_children", "dns_defnames",
00869                  "dns_nameservers", "dns_testnames", "emulate_httpd_log",
00870                  "err_html_text", "fake_user_agent", "firewall_ip",
00871                  "forwarded_for", "forward_snmpd_port", "fqdncache_size",
00872                  "ftpget_options", "ftpget_program", "ftp_list_width",
00873                  "ftp_passive", "ftp_user", "half_closed_clients",
00874                  "header_access", "header_replace", "hierarchy_stoplist",
00875                  "high_response_time_warning", "high_page_fault_warning",
00876                  "htcp_port", "http_access", "http_anonymizer", "httpd_accel",
00877                  "httpd_accel_host", "httpd_accel_port",
00878                  "httpd_accel_uses_host_header", "httpd_accel_with_proxy",
00879                  "http_port", "http_reply_access", "icp_access",
00880                  "icp_hit_stale", "icp_port", "icp_query_timeout",
00881                  "ident_lookup", "ident_lookup_access", "ident_timeout",
00882                  "incoming_http_average", "incoming_icp_average",
00883                  "inside_firewall", "ipcache_high", "ipcache_low",
00884                  "ipcache_size", "local_domain", "local_ip", "logfile_rotate",
00885                  "log_fqdn", "log_icp_queries", "log_mime_hdrs",
00886                  "maximum_object_size", "maximum_single_addr_tries",
00887                  "mcast_groups", "mcast_icp_query_timeout", "mcast_miss_addr",
00888                  "mcast_miss_encode_key", "mcast_miss_port", "memory_pools",
00889                  "memory_pools_limit", "memory_replacement_policy",
00890                  "mime_table", "min_http_poll_cnt", "min_icp_poll_cnt",
00891                  "minimum_direct_hops", "minimum_object_size",
00892                  "minimum_retry_timeout", "miss_access", "negative_dns_ttl",
00893                  "negative_ttl", "neighbor_timeout", "neighbor_type_domain",
00894                  "netdb_high", "netdb_low", "netdb_ping_period",
00895                  "netdb_ping_rate", "never_direct", "no_cache",
00896                  "passthrough_proxy", "pconn_timeout", "pid_filename",
00897                  "pinger_program", "positive_dns_ttl", "prefer_direct",
00898                  "proxy_auth", "proxy_auth_realm", "query_icmp", "quick_abort",
00899                  "quick_abort", "quick_abort_max", "quick_abort_min",
00900                  "quick_abort_pct", "range_offset_limit", "read_timeout",
00901                  "redirect_children", "redirect_program",
00902                  "redirect_rewrites_host_header", "reference_age",
00903                  "reference_age", "refresh_pattern", "reload_into_ims",
00904                  "request_body_max_size", "request_size", "request_timeout",
00905                  "shutdown_lifetime", "single_parent_bypass",
00906                  "siteselect_timeout", "snmp_access", "snmp_incoming_address",
00907                  "snmp_port", "source_ping", "ssl_proxy",
00908                  "store_avg_object_size", "store_objects_per_bucket",
00909                  "strip_query_terms", "swap_level1_dirs", "swap_level2_dirs",
00910                  "tcp_incoming_address", "tcp_outgoing_address",
00911                  "tcp_recv_bufsize", "test_reachability", "udp_hit_obj",
00912                  "udp_hit_obj_size", "udp_incoming_address",
00913                  "udp_outgoing_address", "unique_hostname", "unlinkd_program",
00914                  "uri_whitespace", "useragent_log", "visible_hostname",
00915                  "wais_relay", "wais_relay_host", "wais_relay_port",
00916                  ]
00917 
00918     opts = [ "proxy-only", "weight", "ttl", "no-query", "default",
00919              "round-robin", "multicast-responder", "on", "off", "all",
00920              "deny", "allow", "via", "parent", "no-digest", "heap", "lru",
00921              "realm", "children", "credentialsttl", "none", "disable",
00922              "offline_toggle", "diskd", "q1", "q2",
00923              ]
00924 
00925     actions = [ "shutdown", "info", "parameter", "server_list",
00926                 "client_list", r'squid\.conf',
00927                 ]
00928 
00929     actions_stats = [ "objects", "vm_objects", "utilization",
00930                       "ipcache", "fqdncache", "dns", "redirector", "io",
00931                       "reply_headers", "filedescriptors", "netdb",
00932                       ]
00933 
00934     actions_log = [ "status", "enable", "disable", "clear"]
00935 
00936     acls = [ "url_regex", "urlpath_regex", "referer_regex", "port",
00937              "proto", "req_mime_type", "rep_mime_type", "method",
00938              "browser", "user", "src", "dst", "time", "dstdomain", "ident",
00939              "snmp_community",
00940              ]
00941 
00942     ip_re = r'\b(?:\d{1,3}\.){3}\d{1,3}\b'
00943 
00944     def makelistre(list):
00945         return r'\b(?:'+'|'.join(list)+r')\b'
00946 
00947     tokens = {
00948         'root': [
00949             (r'\s+', Text),
00950             (r'#', Comment, 'comment'),
00951             (makelistre(keywords), Keyword),
00952             (makelistre(opts), Name.Constant),
00953             # Actions
00954             (makelistre(actions), String),
00955             (r'stats/'+makelistre(actions), String),
00956             (r'log/'+makelistre(actions)+r'=', String),
00957             (makelistre(acls), Keyword),
00958             (ip_re+r'(?:/(?:'+ip_re+r')|\d+)?', Number),
00959             (r'\b\d+\b', Number),
00960             (r'\S+', Text),
00961         ],
00962         'comment': [
00963             (r'\s*TAG:.*', String.Escape, '#pop'),
00964             (r'.*', Comment, '#pop'),
00965         ],
00966     }
00967 
00968 
00969 class DebianControlLexer(RegexLexer):
00970     """
00971     Lexer for Debian ``control`` files and ``apt-cache show <pkg>`` outputs.
00972 
00973     *New in Pygments 0.9.*
00974     """
00975     name = 'Debian Control file'
00976     aliases = ['control']
00977     filenames = ['control']
00978 
00979     tokens = {
00980         'root': [
00981             (r'^(Description)', Keyword, 'description'),
00982             (r'^(Maintainer)(:\s*)', bygroups(Keyword, Text), 'maintainer'),
00983             (r'^((Build-)?Depends)', Keyword, 'depends'),
00984             (r'^((?:Python-)?Version)(:\s*)([^\s]+)$',
00985              bygroups(Keyword, Text, Number)),
00986             (r'^((?:Installed-)?Size)(:\s*)([^\s]+)$',
00987              bygroups(Keyword, Text, Number)),
00988             (r'^(MD5Sum|SHA1|SHA256)(:\s*)([^\s]+)$',
00989              bygroups(Keyword, Text, Number)),
00990             (r'^([a-zA-Z\-0-9\.]*?)(:\s*)(.*?)$',
00991              bygroups(Keyword, Whitespace, String)),
00992         ],
00993         'maintainer': [
00994             (r'<[^>]+>', Generic.Strong),
00995             (r'<[^>]+>$', Generic.Strong, '#pop'),
00996             (r',\n?', Text),
00997             (r'.', Text),
00998         ],
00999         'description': [
01000             (r'(.*)(Homepage)(: )([^\s]+)', bygroups(Text, String, Name, Name.Class)),
01001             (r':.*\n', Generic.Strong),
01002             (r' .*\n', Text),
01003             ('', Text, '#pop'),
01004         ],
01005         'depends': [
01006             (r':\s*', Text),
01007             (r'(\$)(\{)(\w+\s*:\s*\w+)', bygroups(Operator, Text, Name.Entity)),
01008             (r'\(', Text, 'depend_vers'),
01009             (r',', Text),
01010             (r'\|', Operator),
01011             (r'[\s]+', Text),
01012             (r'[}\)]\s*$', Text, '#pop'),
01013             (r'[}]', Text),
01014             (r'[^,]$', Name.Function, '#pop'),
01015             (r'([\+\.a-zA-Z0-9-][\s\n]*)', Name.Function),
01016         ],
01017         'depend_vers': [
01018             (r'\),', Text, '#pop'),
01019             (r'\)[^,]', Text, '#pop:2'),
01020             (r'([><=]+)(\s*)([^\)]+)', bygroups(Operator, Text, Number))
01021         ]
01022     }
01023 
01024 
01025 class YamlLexerContext(LexerContext):
01026     """Indentation context for the YAML lexer."""
01027 
01028     def __init__(self, *args, **kwds):
01029         super(YamlLexerContext, self).__init__(*args, **kwds)
01030         self.indent_stack = []
01031         self.indent = -1
01032         self.next_indent = 0
01033         self.block_scalar_indent = None
01034 
01035 
01036 class YamlLexer(ExtendedRegexLexer):
01037     """
01038     Lexer for `YAML <http://yaml.org/>`_, a human-friendly data serialization
01039     language.
01040 
01041     *New in Pygments 0.11.*
01042     """
01043 
01044     name = 'YAML'
01045     aliases = ['yaml']
01046     filenames = ['*.yaml', '*.yml']
01047     mimetypes = ['text/x-yaml']
01048 
01049 
01050     def something(token_class):
01051         """Do not produce empty tokens."""
01052         def callback(lexer, match, context):
01053             text = match.group()
01054             if not text:
01055                 return
01056             yield match.start(), token_class, text
01057             context.pos = match.end()
01058         return callback
01059 
01060     def reset_indent(token_class):
01061         """Reset the indentation levels."""
01062         def callback(lexer, match, context):
01063             text = match.group()
01064             context.indent_stack = []
01065             context.indent = -1
01066             context.next_indent = 0
01067             context.block_scalar_indent = None
01068             yield match.start(), token_class, text
01069             context.pos = match.end()
01070         return callback
01071 
01072     def save_indent(token_class, start=False):
01073         """Save a possible indentation level."""
01074         def callback(lexer, match, context):
01075             text = match.group()
01076             extra = ''
01077             if start:
01078                 context.next_indent = len(text)
01079                 if context.next_indent < context.indent:
01080                     while context.next_indent < context.indent:
01081                         context.indent = context.indent_stack.pop()
01082                     if context.next_indent > context.indent:
01083                         extra = text[context.indent:]
01084                         text = text[:context.indent]
01085             else:
01086                 context.next_indent += len(text)
01087             if text:
01088                 yield match.start(), token_class, text
01089             if extra:
01090                 yield match.start()+len(text), token_class.Error, extra
01091             context.pos = match.end()
01092         return callback
01093 
01094     def set_indent(token_class, implicit=False):
01095         """Set the previously saved indentation level."""
01096         def callback(lexer, match, context):
01097             text = match.group()
01098             if context.indent < context.next_indent:
01099                 context.indent_stack.append(context.indent)
01100                 context.indent = context.next_indent
01101             if not implicit:
01102                 context.next_indent += len(text)
01103             yield match.start(), token_class, text
01104             context.pos = match.end()
01105         return callback
01106 
01107     def set_block_scalar_indent(token_class):
01108         """Set an explicit indentation level for a block scalar."""
01109         def callback(lexer, match, context):
01110             text = match.group()
01111             context.block_scalar_indent = None
01112             if not text:
01113                 return
01114             increment = match.group(1)
01115             if increment:
01116                 current_indent = max(context.indent, 0)
01117                 increment = int(increment)
01118                 context.block_scalar_indent = current_indent + increment
01119             if text:
01120                 yield match.start(), token_class, text
01121                 context.pos = match.end()
01122         return callback
01123 
01124     def parse_block_scalar_empty_line(indent_token_class, content_token_class):
01125         """Process an empty line in a block scalar."""
01126         def callback(lexer, match, context):
01127             text = match.group()
01128             if (context.block_scalar_indent is None or
01129                     len(text) <= context.block_scalar_indent):
01130                 if text:
01131                     yield match.start(), indent_token_class, text
01132             else:
01133                 indentation = text[:context.block_scalar_indent]
01134                 content = text[context.block_scalar_indent:]
01135                 yield match.start(), indent_token_class, indentation
01136                 yield (match.start()+context.block_scalar_indent,
01137                         content_token_class, content)
01138             context.pos = match.end()
01139         return callback
01140 
01141     def parse_block_scalar_indent(token_class):
01142         """Process indentation spaces in a block scalar."""
01143         def callback(lexer, match, context):
01144             text = match.group()
01145             if context.block_scalar_indent is None:
01146                 if len(text) <= max(context.indent, 0):
01147                     context.stack.pop()
01148                     context.stack.pop()
01149                     return
01150                 context.block_scalar_indent = len(text)
01151             else:
01152                 if len(text) < context.block_scalar_indent:
01153                     context.stack.pop()
01154                     context.stack.pop()
01155                     return
01156             if text:
01157                 yield match.start(), token_class, text
01158                 context.pos = match.end()
01159         return callback
01160 
01161     def parse_plain_scalar_indent(token_class):
01162         """Process indentation spaces in a plain scalar."""
01163         def callback(lexer, match, context):
01164             text = match.group()
01165             if len(text) <= context.indent:
01166                 context.stack.pop()
01167                 context.stack.pop()
01168                 return
01169             if text:
01170                 yield match.start(), token_class, text
01171                 context.pos = match.end()
01172         return callback
01173 
01174 
01175 
01176     tokens = {
01177         # the root rules
01178         'root': [
01179             # ignored whitespaces
01180             (r'[ ]+(?=#|$)', Text),
01181             # line breaks
01182             (r'\n+', Text),
01183             # a comment
01184             (r'#[^\n]*', Comment.Single),
01185             # the '%YAML' directive
01186             (r'^%YAML(?=[ ]|$)', reset_indent(Name.Tag), 'yaml-directive'),
01187             # the %TAG directive
01188             (r'^%TAG(?=[ ]|$)', reset_indent(Name.Tag), 'tag-directive'),
01189             # document start and document end indicators
01190             (r'^(?:---|\.\.\.)(?=[ ]|$)', reset_indent(Name.Namespace),
01191              'block-line'),
01192             # indentation spaces
01193             (r'[ ]*(?![ \t\n\r\f\v]|$)', save_indent(Text, start=True),
01194              ('block-line', 'indentation')),
01195         ],
01196 
01197         # trailing whitespaces after directives or a block scalar indicator
01198         'ignored-line': [
01199             # ignored whitespaces
01200             (r'[ ]+(?=#|$)', Text),
01201             # a comment
01202             (r'#[^\n]*', Comment.Single),
01203             # line break
01204             (r'\n', Text, '#pop:2'),
01205         ],
01206 
01207         # the %YAML directive
01208         'yaml-directive': [
01209             # the version number
01210             (r'([ ]+)([0-9]+\.[0-9]+)',
01211              bygroups(Text, Number), 'ignored-line'),
01212         ],
01213 
01214         # the %YAG directive
01215         'tag-directive': [
01216             # a tag handle and the corresponding prefix
01217             (r'([ ]+)(!|![0-9A-Za-z_-]*!)'
01218              r'([ ]+)(!|!?[0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+)',
01219              bygroups(Text, Keyword.Type, Text, Keyword.Type),
01220              'ignored-line'),
01221         ],
01222 
01223         # block scalar indicators and indentation spaces
01224         'indentation': [
01225             # trailing whitespaces are ignored
01226             (r'[ ]*$', something(Text), '#pop:2'),
01227             # whitespaces preceeding block collection indicators
01228             (r'[ ]+(?=[?:-](?:[ ]|$))', save_indent(Text)),
01229             # block collection indicators
01230             (r'[?:-](?=[ ]|$)', set_indent(Punctuation.Indicator)),
01231             # the beginning a block line
01232             (r'[ ]*', save_indent(Text), '#pop'),
01233         ],
01234 
01235         # an indented line in the block context
01236         'block-line': [
01237             # the line end
01238             (r'[ ]*(?=#|$)', something(Text), '#pop'),
01239             # whitespaces separating tokens
01240             (r'[ ]+', Text),
01241             # tags, anchors and aliases,
01242             include('descriptors'),
01243             # block collections and scalars
01244             include('block-nodes'),
01245             # flow collections and quoted scalars
01246             include('flow-nodes'),
01247             # a plain scalar
01248             (r'(?=[^ \t\n\r\f\v?:,\[\]{}#&*!|>\'"%@`-]|[?:-][^ \t\n\r\f\v])',
01249              something(Name.Variable),
01250              'plain-scalar-in-block-context'),
01251         ],
01252 
01253         # tags, anchors, aliases
01254         'descriptors' : [
01255             # a full-form tag
01256             (r'!<[0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+>', Keyword.Type),
01257             # a tag in the form '!', '!suffix' or '!handle!suffix'
01258             (r'!(?:[0-9A-Za-z_-]+)?'
01259              r'(?:![0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+)?', Keyword.Type),
01260             # an anchor
01261             (r'&[0-9A-Za-z_-]+', Name.Label),
01262             # an alias
01263             (r'\*[0-9A-Za-z_-]+', Name.Variable),
01264         ],
01265 
01266         # block collections and scalars
01267         'block-nodes': [
01268             # implicit key
01269             (r':(?=[ ]|$)', set_indent(Punctuation.Indicator, implicit=True)),
01270             # literal and folded scalars
01271             (r'[|>]', Punctuation.Indicator,
01272              ('block-scalar-content', 'block-scalar-header')),
01273         ],
01274 
01275         # flow collections and quoted scalars
01276         'flow-nodes': [
01277             # a flow sequence
01278             (r'\[', Punctuation.Indicator, 'flow-sequence'),
01279             # a flow mapping
01280             (r'\{', Punctuation.Indicator, 'flow-mapping'),
01281             # a single-quoted scalar
01282             (r'\'', String, 'single-quoted-scalar'),
01283             # a double-quoted scalar
01284             (r'\"', String, 'double-quoted-scalar'),
01285         ],
01286 
01287         # the content of a flow collection
01288         'flow-collection': [
01289             # whitespaces
01290             (r'[ ]+', Text),
01291             # line breaks
01292             (r'\n+', Text),
01293             # a comment
01294             (r'#[^\n]*', Comment.Single),
01295             # simple indicators
01296             (r'[?:,]', Punctuation.Indicator),
01297             # tags, anchors and aliases
01298             include('descriptors'),
01299             # nested collections and quoted scalars
01300             include('flow-nodes'),
01301             # a plain scalar
01302             (r'(?=[^ \t\n\r\f\v?:,\[\]{}#&*!|>\'"%@`])',
01303              something(Name.Variable),
01304              'plain-scalar-in-flow-context'),
01305         ],
01306 
01307         # a flow sequence indicated by '[' and ']'
01308         'flow-sequence': [
01309             # include flow collection rules
01310             include('flow-collection'),
01311             # the closing indicator
01312             (r'\]', Punctuation.Indicator, '#pop'),
01313         ],
01314 
01315         # a flow mapping indicated by '{' and '}'
01316         'flow-mapping': [
01317             # include flow collection rules
01318             include('flow-collection'),
01319             # the closing indicator
01320             (r'\}', Punctuation.Indicator, '#pop'),
01321         ],
01322 
01323         # block scalar lines
01324         'block-scalar-content': [
01325             # line break
01326             (r'\n', Text),
01327             # empty line
01328             (r'^[ ]+$',
01329              parse_block_scalar_empty_line(Text, Name.Constant)),
01330             # indentation spaces (we may leave the state here)
01331             (r'^[ ]*', parse_block_scalar_indent(Text)),
01332             # line content
01333             (r'[^\n\r\f\v]+', Name.Constant),
01334         ],
01335 
01336         # the content of a literal or folded scalar
01337         'block-scalar-header': [
01338             # indentation indicator followed by chomping flag
01339             (r'([1-9])?[+-]?(?=[ ]|$)',
01340              set_block_scalar_indent(Punctuation.Indicator),
01341              'ignored-line'),
01342             # chomping flag followed by indentation indicator
01343             (r'[+-]?([1-9])?(?=[ ]|$)',
01344              set_block_scalar_indent(Punctuation.Indicator),
01345              'ignored-line'),
01346         ],
01347 
01348         # ignored and regular whitespaces in quoted scalars
01349         'quoted-scalar-whitespaces': [
01350             # leading and trailing whitespaces are ignored
01351             (r'^[ ]+|[ ]+$', Text),
01352             # line breaks are ignored
01353             (r'\n+', Text),
01354             # other whitespaces are a part of the value
01355             (r'[ ]+', Name.Variable),
01356         ],
01357 
01358         # single-quoted scalars
01359         'single-quoted-scalar': [
01360             # include whitespace and line break rules
01361             include('quoted-scalar-whitespaces'),
01362             # escaping of the quote character
01363             (r'\'\'', String.Escape),
01364             # regular non-whitespace characters
01365             (r'[^ \t\n\r\f\v\']+', String),
01366             # the closing quote
01367             (r'\'', String, '#pop'),
01368         ],
01369 
01370         # double-quoted scalars
01371         'double-quoted-scalar': [
01372             # include whitespace and line break rules
01373             include('quoted-scalar-whitespaces'),
01374             # escaping of special characters
01375             (r'\\[0abt\tn\nvfre "\\N_LP]', String),
01376             # escape codes
01377             (r'\\(?:x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})',
01378              String.Escape),
01379             # regular non-whitespace characters
01380             (r'[^ \t\n\r\f\v\"\\]+', String),
01381             # the closing quote
01382             (r'"', String, '#pop'),
01383         ],
01384 
01385         # the beginning of a new line while scanning a plain scalar
01386         'plain-scalar-in-block-context-new-line': [
01387             # empty lines
01388             (r'^[ ]+$', Text),
01389             # line breaks
01390             (r'\n+', Text),
01391             # document start and document end indicators
01392             (r'^(?=---|\.\.\.)', something(Name.Namespace), '#pop:3'),
01393             # indentation spaces (we may leave the block line state here)
01394             (r'^[ ]*', parse_plain_scalar_indent(Text), '#pop'),
01395         ],
01396 
01397         # a plain scalar in the block context
01398         'plain-scalar-in-block-context': [
01399             # the scalar ends with the ':' indicator
01400             (r'[ ]*(?=:[ ]|:$)', something(Text), '#pop'),
01401             # the scalar ends with whitespaces followed by a comment
01402             (r'[ ]+(?=#)', Text, '#pop'),
01403             # trailing whitespaces are ignored
01404             (r'[ ]+$', Text),
01405             # line breaks are ignored
01406             (r'\n+', Text, 'plain-scalar-in-block-context-new-line'),
01407             # other whitespaces are a part of the value
01408             (r'[ ]+', Literal.Scalar.Plain),
01409             # regular non-whitespace characters
01410             (r'(?::(?![ \t\n\r\f\v])|[^ \t\n\r\f\v:])+', Literal.Scalar.Plain),
01411         ],
01412 
01413         # a plain scalar is the flow context
01414         'plain-scalar-in-flow-context': [
01415             # the scalar ends with an indicator character
01416             (r'[ ]*(?=[,:?\[\]{}])', something(Text), '#pop'),
01417             # the scalar ends with a comment
01418             (r'[ ]+(?=#)', Text, '#pop'),
01419             # leading and trailing whitespaces are ignored
01420             (r'^[ ]+|[ ]+$', Text),
01421             # line breaks are ignored
01422             (r'\n+', Text),
01423             # other whitespaces are a part of the value
01424             (r'[ ]+', Name.Variable),
01425             # regular non-whitespace characters
01426             (r'[^ \t\n\r\f\v,:?\[\]{}]+', Name.Variable),
01427         ],
01428 
01429     }
01430 
01431     def get_tokens_unprocessed(self, text=None, context=None):
01432         if context is None:
01433             context = YamlLexerContext(text, 0)
01434         return super(YamlLexer, self).get_tokens_unprocessed(text, context)
01435 
01436 
01437 class LighttpdConfLexer(RegexLexer):
01438     """
01439     Lexer for `Lighttpd <http://lighttpd.net/>`_ configuration files.
01440 
01441     *New in Pygments 0.11.*
01442     """
01443     name = 'Lighttpd configuration file'
01444     aliases = ['lighty', 'lighttpd']
01445     filenames = []
01446     mimetypes = ['text/x-lighttpd-conf']
01447 
01448     tokens = {
01449         'root': [
01450             (r'#.*\n', Comment.Single),
01451             (r'/\S*', Name), # pathname
01452             (r'[a-zA-Z._-]+', Keyword),
01453             (r'\d+\.\d+\.\d+\.\d+(?:/\d+)?', Number),
01454             (r'[0-9]+', Number),
01455             (r'=>|=~|\+=|==|=|\+', Operator),
01456             (r'\$[A-Z]+', Name.Builtin),
01457             (r'[(){}\[\],]', Punctuation),
01458             (r'"([^"\\]*(?:\\.[^"\\]*)*)"', String.Double),
01459             (r'\s+', Text),
01460         ],
01461 
01462     }
01463 
01464 
01465 class NginxConfLexer(RegexLexer):
01466     """
01467     Lexer for `Nginx <http://nginx.net/>`_ configuration files.
01468 
01469     *New in Pygments 0.11.*
01470     """
01471     name = 'Nginx configuration file'
01472     aliases = ['nginx']
01473     filenames = []
01474     mimetypes = ['text/x-nginx-conf']
01475 
01476     tokens = {
01477         'root': [
01478             (r'(include)(\s+)([^\s;]+)', bygroups(Keyword, Text, Name)),
01479             (r'[^\s;#]+', Keyword, 'stmt'),
01480             include('base'),
01481         ],
01482         'block': [
01483             (r'}', Punctuation, '#pop:2'),
01484             (r'[^\s;#]+', Keyword.Namespace, 'stmt'),
01485             include('base'),
01486         ],
01487         'stmt': [
01488             (r'{', Punctuation, 'block'),
01489             (r';', Punctuation, '#pop'),
01490             include('base'),
01491         ],
01492         'base': [
01493             (r'#.*\n', Comment.Single),
01494             (r'on|off', Name.Constant),
01495             (r'\$[^\s;#()]+', Name.Variable),
01496             (r'([a-z0-9.-]+)(:)([0-9]+)',
01497              bygroups(Name, Punctuation, Number.Integer)),
01498             (r'[a-z-]+/[a-z-+]+', String), # mimetype
01499             #(r'[a-zA-Z._-]+', Keyword),
01500             (r'[0-9]+[km]?\b', Number.Integer),
01501             (r'(~)(\s*)([^\s{]+)', bygroups(Punctuation, Text, String.Regex)),
01502             (r'[:=~]', Punctuation),
01503             (r'[^\s;#{}$]+', String), # catch all
01504             (r'/[^\s;#]*', Name), # pathname
01505             (r'\s+', Text),
01506         ],
01507     }
01508 
01509 
01510 class CMakeLexer(RegexLexer):
01511     """
01512     Lexer for `CMake <http://cmake.org/Wiki/CMake>`_ files.
01513 
01514     *New in Pygments 1.2.*
01515     """
01516     name = 'CMake'
01517     aliases = ['cmake']
01518     filenames = ['*.cmake']
01519     mimetypes = ['text/x-cmake']
01520 
01521     tokens = {
01522         'root': [
01523             #(r'(ADD_CUSTOM_COMMAND|ADD_CUSTOM_TARGET|ADD_DEFINITIONS|'
01524             # r'ADD_DEPENDENCIES|ADD_EXECUTABLE|ADD_LIBRARY|ADD_SUBDIRECTORY|'
01525             # r'ADD_TEST|AUX_SOURCE_DIRECTORY|BUILD_COMMAND|BUILD_NAME|'
01526             # r'CMAKE_MINIMUM_REQUIRED|CONFIGURE_FILE|CREATE_TEST_SOURCELIST|'
01527             # r'ELSE|ELSEIF|ENABLE_LANGUAGE|ENABLE_TESTING|ENDFOREACH|'
01528             # r'ENDFUNCTION|ENDIF|ENDMACRO|ENDWHILE|EXEC_PROGRAM|'
01529             # r'EXECUTE_PROCESS|EXPORT_LIBRARY_DEPENDENCIES|FILE|FIND_FILE|'
01530             # r'FIND_LIBRARY|FIND_PACKAGE|FIND_PATH|FIND_PROGRAM|FLTK_WRAP_UI|'
01531             # r'FOREACH|FUNCTION|GET_CMAKE_PROPERTY|GET_DIRECTORY_PROPERTY|'
01532             # r'GET_FILENAME_COMPONENT|GET_SOURCE_FILE_PROPERTY|'
01533             # r'GET_TARGET_PROPERTY|GET_TEST_PROPERTY|IF|INCLUDE|'
01534             # r'INCLUDE_DIRECTORIES|INCLUDE_EXTERNAL_MSPROJECT|'
01535             # r'INCLUDE_REGULAR_EXPRESSION|INSTALL|INSTALL_FILES|'
01536             # r'INSTALL_PROGRAMS|INSTALL_TARGETS|LINK_DIRECTORIES|'
01537             # r'LINK_LIBRARIES|LIST|LOAD_CACHE|LOAD_COMMAND|MACRO|'
01538             # r'MAKE_DIRECTORY|MARK_AS_ADVANCED|MATH|MESSAGE|OPTION|'
01539             # r'OUTPUT_REQUIRED_FILES|PROJECT|QT_WRAP_CPP|QT_WRAP_UI|REMOVE|'
01540             # r'REMOVE_DEFINITIONS|SEPARATE_ARGUMENTS|SET|'
01541             # r'SET_DIRECTORY_PROPERTIES|SET_SOURCE_FILES_PROPERTIES|'
01542             # r'SET_TARGET_PROPERTIES|SET_TESTS_PROPERTIES|SITE_NAME|'
01543             # r'SOURCE_GROUP|STRING|SUBDIR_DEPENDS|SUBDIRS|'
01544             # r'TARGET_LINK_LIBRARIES|TRY_COMPILE|TRY_RUN|UNSET|'
01545             # r'USE_MANGLED_MESA|UTILITY_SOURCE|VARIABLE_REQUIRES|'
01546             # r'VTK_MAKE_INSTANTIATOR|VTK_WRAP_JAVA|VTK_WRAP_PYTHON|'
01547             # r'VTK_WRAP_TCL|WHILE|WRITE_FILE|'
01548             # r'COUNTARGS)\b', Name.Builtin, 'args'),
01549             (r'\b([A-Za-z_]+)([ \t]*)(\()', bygroups(Name.Builtin, Text,
01550                                                      Punctuation), 'args'),
01551             include('keywords'),
01552             include('ws')
01553         ],
01554         'args': [
01555             (r'\(', Punctuation, '#push'),
01556             (r'\)', Punctuation, '#pop'),
01557             (r'(\${)(.+?)(})', bygroups(Operator, Name.Variable, Operator)),
01558             (r'(?s)".*?"', String.Double),
01559             (r'\\\S+', String),
01560             (r'[^\)$"# \t\n]+', String),
01561             (r'\n', Text), # explicitly legal
01562             include('keywords'),
01563             include('ws')
01564         ],
01565         'string': [
01566 
01567         ],
01568         'keywords': [
01569             (r'\b(WIN32|UNIX|APPLE|CYGWIN|BORLAND|MINGW|MSVC|MSVC_IDE|MSVC60|'
01570              r'MSVC70|MSVC71|MSVC80|MSVC90)\b', Keyword),
01571         ],
01572         'ws': [
01573             (r'[ \t]+', Text),
01574             (r'#.+\n', Comment),
01575         ]
01576     }
01577