Back to index

moin  1.9.0~rc2
web.py
Go to the documentation of this file.
00001 # -*- coding: utf-8 -*-
00002 """
00003     pygments.lexers.web
00004     ~~~~~~~~~~~~~~~~~~~
00005 
00006     Lexers for web-related languages and markup.
00007 
00008     :copyright: Copyright 2006-2009 by the Pygments team, see AUTHORS.
00009     :license: BSD, see LICENSE for details.
00010 """
00011 
00012 import re
00013 try:
00014     set
00015 except NameError:
00016     from sets import Set as set
00017 
00018 from pygments.lexer import RegexLexer, bygroups, using, include, this
00019 from pygments.token import \
00020      Text, Comment, Operator, Keyword, Name, String, Number, Other, Punctuation
00021 from pygments.util import get_bool_opt, get_list_opt, looks_like_xml, \
00022                           html_doctype_matches
00023 
00024 
00025 __all__ = ['HtmlLexer', 'XmlLexer', 'JavascriptLexer', 'CssLexer',
00026            'PhpLexer', 'ActionScriptLexer', 'XsltLexer', 'ActionScript3Lexer',
00027            'MxmlLexer']
00028 
00029 
00030 class JavascriptLexer(RegexLexer):
00031     """
00032     For JavaScript source code.
00033     """
00034 
00035     name = 'JavaScript'
00036     aliases = ['js', 'javascript']
00037     filenames = ['*.js']
00038     mimetypes = ['application/x-javascript', 'text/x-javascript', 'text/javascript']
00039 
00040     flags = re.DOTALL
00041     tokens = {
00042         'commentsandwhitespace': [
00043             (r'\s+', Text),
00044             (r'<!--', Comment),
00045             (r'//.*?\n', Comment.Single),
00046             (r'/\*.*?\*/', Comment.Multiline)
00047         ],
00048         'slashstartsregex': [
00049             include('commentsandwhitespace'),
00050             (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
00051              r'([gim]+\b|\B)', String.Regex, '#pop'),
00052             (r'(?=/)', Text, ('#pop', 'badregex')),
00053             (r'', Text, '#pop')
00054         ],
00055         'badregex': [
00056             ('\n', Text, '#pop')
00057         ],
00058         'root': [
00059             (r'^(?=\s|/|<!--)', Text, 'slashstartsregex'),
00060             include('commentsandwhitespace'),
00061             (r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|'
00062              r'(<<|>>>?|==?|!=?|[-<>+*%&\|\^/])=?', Operator, 'slashstartsregex'),
00063             (r'[{(\[;,]', Punctuation, 'slashstartsregex'),
00064             (r'[})\].]', Punctuation),
00065             (r'(for|in|while|do|break|return|continue|switch|case|default|if|else|'
00066              r'throw|try|catch|finally|new|delete|typeof|instanceof|void|'
00067              r'this)\b', Keyword, 'slashstartsregex'),
00068             (r'(var|with|function)\b', Keyword.Declaration, 'slashstartsregex'),
00069             (r'(abstract|boolean|byte|char|class|const|debugger|double|enum|export|'
00070              r'extends|final|float|goto|implements|import|int|interface|long|native|'
00071              r'package|private|protected|public|short|static|super|synchronized|throws|'
00072              r'transient|volatile)\b', Keyword.Reserved),
00073             (r'(true|false|null|NaN|Infinity|undefined)\b', Keyword.Constant),
00074             (r'(Array|Boolean|Date|Error|Function|Math|netscape|'
00075              r'Number|Object|Packages|RegExp|String|sun|decodeURI|'
00076              r'decodeURIComponent|encodeURI|encodeURIComponent|'
00077              r'Error|eval|isFinite|isNaN|parseFloat|parseInt|document|this|'
00078              r'window)\b', Name.Builtin),
00079             (r'[$a-zA-Z_][a-zA-Z0-9_]*', Name.Other),
00080             (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
00081             (r'0x[0-9a-fA-F]+', Number.Hex),
00082             (r'[0-9]+', Number.Integer),
00083             (r'"(\\\\|\\"|[^"])*"', String.Double),
00084             (r"'(\\\\|\\'|[^'])*'", String.Single),
00085         ]
00086     }
00087 
00088 
00089 class ActionScriptLexer(RegexLexer):
00090     """
00091     For ActionScript source code.
00092 
00093     *New in Pygments 0.9.*
00094     """
00095 
00096     name = 'ActionScript'
00097     aliases = ['as', 'actionscript']
00098     filenames = ['*.as']
00099     mimetypes = ['application/x-actionscript', 'text/x-actionscript',
00100                  'text/actionscript']
00101 
00102     flags = re.DOTALL
00103     tokens = {
00104         'root': [
00105             (r'\s+', Text),
00106             (r'//.*?\n', Comment.Single),
00107             (r'/\*.*?\*/', Comment.Multiline),
00108             (r'/(\\\\|\\/|[^/\n])*/[gim]*', String.Regex),
00109             (r'[~\^\*!%&<>\|+=:;,/?\\-]+', Operator),
00110             (r'[{}\[\]();.]+', Punctuation),
00111             (r'(case|default|for|each|in|while|do|break|return|continue|if|else|'
00112              r'throw|try|catch|var|with|new|typeof|arguments|instanceof|this|'
00113              r'switch)\b', Keyword),
00114             (r'(class|public|final|internal|native|override|private|protected|'
00115              r'static|import|extends|implements|interface|intrinsic|return|super|'
00116              r'dynamic|function|const|get|namespace|package|set)\b',
00117              Keyword.Declaration),
00118             (r'(true|false|null|NaN|Infinity|-Infinity|undefined|Void)\b',
00119              Keyword.Constant),
00120             (r'(Accessibility|AccessibilityProperties|ActionScriptVersion|'
00121              r'ActivityEvent|AntiAliasType|ApplicationDomain|AsBroadcaster|Array|'
00122              r'AsyncErrorEvent|AVM1Movie|BevelFilter|Bitmap|BitmapData|'
00123              r'BitmapDataChannel|BitmapFilter|BitmapFilterQuality|BitmapFilterType|'
00124              r'BlendMode|BlurFilter|Boolean|ByteArray|Camera|Capabilities|CapsStyle|'
00125              r'Class|Color|ColorMatrixFilter|ColorTransform|ContextMenu|'
00126              r'ContextMenuBuiltInItems|ContextMenuEvent|ContextMenuItem|'
00127              r'ConvultionFilter|CSMSettings|DataEvent|Date|DefinitionError|'
00128              r'DeleteObjectSample|Dictionary|DisplacmentMapFilter|DisplayObject|'
00129              r'DisplacmentMapFilterMode|DisplayObjectContainer|DropShadowFilter|'
00130              r'Endian|EOFError|Error|ErrorEvent|EvalError|Event|EventDispatcher|'
00131              r'EventPhase|ExternalInterface|FileFilter|FileReference|'
00132              r'FileReferenceList|FocusDirection|FocusEvent|Font|FontStyle|FontType|'
00133              r'FrameLabel|FullScreenEvent|Function|GlowFilter|GradientBevelFilter|'
00134              r'GradientGlowFilter|GradientType|Graphics|GridFitType|HTTPStatusEvent|'
00135              r'IBitmapDrawable|ID3Info|IDataInput|IDataOutput|IDynamicPropertyOutput'
00136              r'IDynamicPropertyWriter|IEventDispatcher|IExternalizable|'
00137              r'IllegalOperationError|IME|IMEConversionMode|IMEEvent|int|'
00138              r'InteractiveObject|InterpolationMethod|InvalidSWFError|InvokeEvent|'
00139              r'IOError|IOErrorEvent|JointStyle|Key|Keyboard|KeyboardEvent|KeyLocation|'
00140              r'LineScaleMode|Loader|LoaderContext|LoaderInfo|LoadVars|LocalConnection|'
00141              r'Locale|Math|Matrix|MemoryError|Microphone|MorphShape|Mouse|MouseEvent|'
00142              r'MovieClip|MovieClipLoader|Namespace|NetConnection|NetStatusEvent|'
00143              r'NetStream|NewObjectSample|Number|Object|ObjectEncoding|PixelSnapping|'
00144              r'Point|PrintJob|PrintJobOptions|PrintJobOrientation|ProgressEvent|Proxy|'
00145              r'QName|RangeError|Rectangle|ReferenceError|RegExp|Responder|Sample|Scene|'
00146              r'ScriptTimeoutError|Security|SecurityDomain|SecurityError|'
00147              r'SecurityErrorEvent|SecurityPanel|Selection|Shape|SharedObject|'
00148              r'SharedObjectFlushStatus|SimpleButton|Socket|Sound|SoundChannel|'
00149              r'SoundLoaderContext|SoundMixer|SoundTransform|SpreadMethod|Sprite|'
00150              r'StackFrame|StackOverflowError|Stage|StageAlign|StageDisplayState|'
00151              r'StageQuality|StageScaleMode|StaticText|StatusEvent|String|StyleSheet|'
00152              r'SWFVersion|SyncEvent|SyntaxError|System|TextColorType|TextField|'
00153              r'TextFieldAutoSize|TextFieldType|TextFormat|TextFormatAlign|'
00154              r'TextLineMetrics|TextRenderer|TextSnapshot|Timer|TimerEvent|Transform|'
00155              r'TypeError|uint|URIError|URLLoader|URLLoaderDataFormat|URLRequest|'
00156              r'URLRequestHeader|URLRequestMethod|URLStream|URLVariabeles|VerifyError|'
00157              r'Video|XML|XMLDocument|XMLList|XMLNode|XMLNodeType|XMLSocket|XMLUI)\b',
00158              Name.Builtin),
00159             (r'(decodeURI|decodeURIComponent|encodeURI|escape|eval|isFinite|isNaN|'
00160              r'isXMLName|clearInterval|fscommand|getTimer|getURL|getVersion|'
00161              r'isFinite|parseFloat|parseInt|setInterval|trace|updateAfterEvent|'
00162              r'unescape)\b',Name.Function),
00163             (r'[$a-zA-Z_][a-zA-Z0-9_]*', Name.Other),
00164             (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
00165             (r'0x[0-9a-f]+', Number.Hex),
00166             (r'[0-9]+', Number.Integer),
00167             (r'"(\\\\|\\"|[^"])*"', String.Double),
00168             (r"'(\\\\|\\'|[^'])*'", String.Single),
00169         ]
00170     }
00171 
00172     def analyse_text(text):
00173         return 0.05
00174 
00175 
00176 class ActionScript3Lexer(RegexLexer):
00177     """
00178     For ActionScript 3 source code.
00179 
00180     *New in Pygments 0.11.*
00181     """
00182 
00183     name = 'ActionScript 3'
00184     aliases = ['as3', 'actionscript3']
00185     filenames = ['*.as']
00186     mimetypes = ['application/x-actionscript', 'text/x-actionscript',
00187                  'text/actionscript']
00188 
00189     identifier = r'[$a-zA-Z_][a-zA-Z0-9_]*'
00190 
00191     flags = re.DOTALL | re.MULTILINE
00192     tokens = {
00193         'root': [
00194             (r'\s+', Text),
00195             (r'(function\s+)(' + identifier + r')(\s*)(\()',
00196              bygroups(Keyword.Declaration, Name.Function, Text, Operator),
00197              'funcparams'),
00198             (r'(var|const)(\s+)(' + identifier + r')(\s*)(:)(\s*)(' + identifier + r')',
00199              bygroups(Keyword.Declaration, Text, Name, Text, Punctuation, Text,
00200                       Keyword.Type)),
00201             (r'(import|package)(\s+)((?:' + identifier + r'|\.)+)(\s*)',
00202              bygroups(Keyword, Text, Name.Namespace, Text)),
00203             (r'(new)(\s+)(' + identifier + r')(\s*)(\()',
00204              bygroups(Keyword, Text, Keyword.Type, Text, Operator)),
00205             (r'//.*?\n', Comment.Single),
00206             (r'/\*.*?\*/', Comment.Multiline),
00207             (r'/(\\\\|\\/|[^\n])*/[gisx]*', String.Regex),
00208             (r'(\.)(' + identifier + r')', bygroups(Operator, Name.Attribute)),
00209             (r'(case|default|for|each|in|while|do|break|return|continue|if|else|'
00210              r'throw|try|catch|with|new|typeof|arguments|instanceof|this|'
00211              r'switch|import|include|as|is)\b',
00212              Keyword),
00213             (r'(class|public|final|internal|native|override|private|protected|'
00214              r'static|import|extends|implements|interface|intrinsic|return|super|'
00215              r'dynamic|function|const|get|namespace|package|set)\b',
00216              Keyword.Declaration),
00217             (r'(true|false|null|NaN|Infinity|-Infinity|undefined|void)\b',
00218              Keyword.Constant),
00219             (r'(decodeURI|decodeURIComponent|encodeURI|escape|eval|isFinite|isNaN|'
00220              r'isXMLName|clearInterval|fscommand|getTimer|getURL|getVersion|'
00221              r'isFinite|parseFloat|parseInt|setInterval|trace|updateAfterEvent|'
00222              r'unescape)\b', Name.Function),
00223             (identifier, Name),
00224             (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
00225             (r'0x[0-9a-f]+', Number.Hex),
00226             (r'[0-9]+', Number.Integer),
00227             (r'"(\\\\|\\"|[^"])*"', String.Double),
00228             (r"'(\\\\|\\'|[^'])*'", String.Single),
00229             (r'[~\^\*!%&<>\|+=:;,/?\\{}\[\]();.-]+', Operator),
00230         ],
00231         'funcparams': [
00232             (r'\s+', Text),
00233             (r'(\s*)(\.\.\.)?(' + identifier + r')(\s*)(:)(\s*)(' +
00234              identifier + r'|\*)(\s*)',
00235              bygroups(Text, Punctuation, Name, Text, Operator, Text,
00236                       Keyword.Type, Text), 'defval'),
00237             (r'\)', Operator, 'type')
00238         ],
00239         'type': [
00240             (r'(\s*)(:)(\s*)(' + identifier + r'|\*)',
00241              bygroups(Text, Operator, Text, Keyword.Type), '#pop:2'),
00242             (r'\s*', Text, '#pop:2')
00243         ],
00244         'defval': [
00245             (r'(=)(\s*)([^(),]+)(\s*)(,?)',
00246              bygroups(Operator, Text, using(this), Text, Operator), '#pop'),
00247             (r',?', Operator, '#pop')
00248         ]
00249     }
00250 
00251     def analyse_text(text):
00252         if re.match(r'\w+\s*:\s*\w', text): return 0.3
00253         return 0.1
00254 
00255 
00256 class CssLexer(RegexLexer):
00257     """
00258     For CSS (Cascading Style Sheets).
00259     """
00260 
00261     name = 'CSS'
00262     aliases = ['css']
00263     filenames = ['*.css']
00264     mimetypes = ['text/css']
00265 
00266     tokens = {
00267         'root': [
00268             include('basics'),
00269         ],
00270         'basics': [
00271             (r'\s+', Text),
00272             (r'/\*(?:.|\n)*?\*/', Comment),
00273             (r'{', Punctuation, 'content'),
00274             (r'\:[a-zA-Z0-9_-]+', Name.Decorator),
00275             (r'\.[a-zA-Z0-9_-]+', Name.Class),
00276             (r'\#[a-zA-Z0-9_-]+', Name.Function),
00277             (r'@[a-zA-Z0-9_-]+', Keyword, 'atrule'),
00278             (r'[a-zA-Z0-9_-]+', Name.Tag),
00279             (r'[~\^\*!%&\[\]\(\)<>\|+=@:;,./?-]', Operator),
00280             (r'"(\\\\|\\"|[^"])*"', String.Double),
00281             (r"'(\\\\|\\'|[^'])*'", String.Single)
00282         ],
00283         'atrule': [
00284             (r'{', Punctuation, 'atcontent'),
00285             (r';', Punctuation, '#pop'),
00286             include('basics'),
00287         ],
00288         'atcontent': [
00289             include('basics'),
00290             (r'}', Punctuation, '#pop:2'),
00291         ],
00292         'content': [
00293             (r'\s+', Text),
00294             (r'}', Punctuation, '#pop'),
00295             (r'url\(.*?\)', String.Other),
00296             (r'^@.*?$', Comment.Preproc),
00297             (r'(azimuth|background-attachment|background-color|'
00298              r'background-image|background-position|background-repeat|'
00299              r'background|border-bottom-color|border-bottom-style|'
00300              r'border-bottom-width|border-left-color|border-left-style|'
00301              r'border-left-width|border-right|border-right-color|'
00302              r'border-right-style|border-right-width|border-top-color|'
00303              r'border-top-style|border-top-width|border-bottom|'
00304              r'border-collapse|border-left|border-width|border-color|'
00305              r'border-spacing|border-style|border-top|border|caption-side|'
00306              r'clear|clip|color|content|counter-increment|counter-reset|'
00307              r'cue-after|cue-before|cue|cursor|direction|display|'
00308              r'elevation|empty-cells|float|font-family|font-size|'
00309              r'font-size-adjust|font-stretch|font-style|font-variant|'
00310              r'font-weight|font|height|letter-spacing|line-height|'
00311              r'list-style-type|list-style-image|list-style-position|'
00312              r'list-style|margin-bottom|margin-left|margin-right|'
00313              r'margin-top|margin|marker-offset|marks|max-height|max-width|'
00314              r'min-height|min-width|opacity|orphans|outline|outline-color|'
00315              r'outline-style|outline-width|overflow|padding-bottom|'
00316              r'padding-left|padding-right|padding-top|padding|page|'
00317              r'page-break-after|page-break-before|page-break-inside|'
00318              r'pause-after|pause-before|pause|pitch|pitch-range|'
00319              r'play-during|position|quotes|richness|right|size|'
00320              r'speak-header|speak-numeral|speak-punctuation|speak|'
00321              r'speech-rate|stress|table-layout|text-align|text-decoration|'
00322              r'text-indent|text-shadow|text-transform|top|unicode-bidi|'
00323              r'vertical-align|visibility|voice-family|volume|white-space|'
00324              r'widows|width|word-spacing|z-index|bottom|left|'
00325              r'above|absolute|always|armenian|aural|auto|avoid|baseline|'
00326              r'behind|below|bidi-override|blink|block|bold|bolder|both|'
00327              r'capitalize|center-left|center-right|center|circle|'
00328              r'cjk-ideographic|close-quote|collapse|condensed|continuous|'
00329              r'crop|crosshair|cross|cursive|dashed|decimal-leading-zero|'
00330              r'decimal|default|digits|disc|dotted|double|e-resize|embed|'
00331              r'extra-condensed|extra-expanded|expanded|fantasy|far-left|'
00332              r'far-right|faster|fast|fixed|georgian|groove|hebrew|help|'
00333              r'hidden|hide|higher|high|hiragana-iroha|hiragana|icon|'
00334              r'inherit|inline-table|inline|inset|inside|invert|italic|'
00335              r'justify|katakana-iroha|katakana|landscape|larger|large|'
00336              r'left-side|leftwards|level|lighter|line-through|list-item|'
00337              r'loud|lower-alpha|lower-greek|lower-roman|lowercase|ltr|'
00338              r'lower|low|medium|message-box|middle|mix|monospace|'
00339              r'n-resize|narrower|ne-resize|no-close-quote|no-open-quote|'
00340              r'no-repeat|none|normal|nowrap|nw-resize|oblique|once|'
00341              r'open-quote|outset|outside|overline|pointer|portrait|px|'
00342              r'relative|repeat-x|repeat-y|repeat|rgb|ridge|right-side|'
00343              r'rightwards|s-resize|sans-serif|scroll|se-resize|'
00344              r'semi-condensed|semi-expanded|separate|serif|show|silent|'
00345              r'slow|slower|small-caps|small-caption|smaller|soft|solid|'
00346              r'spell-out|square|static|status-bar|super|sw-resize|'
00347              r'table-caption|table-cell|table-column|table-column-group|'
00348              r'table-footer-group|table-header-group|table-row|'
00349              r'table-row-group|text|text-bottom|text-top|thick|thin|'
00350              r'transparent|ultra-condensed|ultra-expanded|underline|'
00351              r'upper-alpha|upper-latin|upper-roman|uppercase|url|'
00352              r'visible|w-resize|wait|wider|x-fast|x-high|x-large|x-loud|'
00353              r'x-low|x-small|x-soft|xx-large|xx-small|yes)\b', Keyword),
00354             (r'(indigo|gold|firebrick|indianred|yellow|darkolivegreen|'
00355              r'darkseagreen|mediumvioletred|mediumorchid|chartreuse|'
00356              r'mediumslateblue|black|springgreen|crimson|lightsalmon|brown|'
00357              r'turquoise|olivedrab|cyan|silver|skyblue|gray|darkturquoise|'
00358              r'goldenrod|darkgreen|darkviolet|darkgray|lightpink|teal|'
00359              r'darkmagenta|lightgoldenrodyellow|lavender|yellowgreen|thistle|'
00360              r'violet|navy|orchid|blue|ghostwhite|honeydew|cornflowerblue|'
00361              r'darkblue|darkkhaki|mediumpurple|cornsilk|red|bisque|slategray|'
00362              r'darkcyan|khaki|wheat|deepskyblue|darkred|steelblue|aliceblue|'
00363              r'gainsboro|mediumturquoise|floralwhite|coral|purple|lightgrey|'
00364              r'lightcyan|darksalmon|beige|azure|lightsteelblue|oldlace|'
00365              r'greenyellow|royalblue|lightseagreen|mistyrose|sienna|'
00366              r'lightcoral|orangered|navajowhite|lime|palegreen|burlywood|'
00367              r'seashell|mediumspringgreen|fuchsia|papayawhip|blanchedalmond|'
00368              r'peru|aquamarine|white|darkslategray|ivory|dodgerblue|'
00369              r'lemonchiffon|chocolate|orange|forestgreen|slateblue|olive|'
00370              r'mintcream|antiquewhite|darkorange|cadetblue|moccasin|'
00371              r'limegreen|saddlebrown|darkslateblue|lightskyblue|deeppink|'
00372              r'plum|aqua|darkgoldenrod|maroon|sandybrown|magenta|tan|'
00373              r'rosybrown|pink|lightblue|palevioletred|mediumseagreen|'
00374              r'dimgray|powderblue|seagreen|snow|mediumblue|midnightblue|'
00375              r'paleturquoise|palegoldenrod|whitesmoke|darkorchid|salmon|'
00376              r'lightslategray|lawngreen|lightgreen|tomato|hotpink|'
00377              r'lightyellow|lavenderblush|linen|mediumaquamarine|green|'
00378              r'blueviolet|peachpuff)\b', Name.Builtin),
00379             (r'\!important', Comment.Preproc),
00380             (r'/\*(?:.|\n)*?\*/', Comment),
00381             (r'\#[a-zA-Z0-9]{1,6}', Number),
00382             (r'[\.-]?[0-9]*[\.]?[0-9]+(em|px|\%|pt|pc|in|mm|cm|ex)', Number),
00383             (r'-?[0-9]+', Number),
00384             (r'[~\^\*!%&<>\|+=@:,./?-]+', Operator),
00385             (r'[\[\]();]+', Punctuation),
00386             (r'"(\\\\|\\"|[^"])*"', String.Double),
00387             (r"'(\\\\|\\'|[^'])*'", String.Single),
00388             (r'[a-zA-Z][a-zA-Z0-9]+', Name)
00389         ]
00390     }
00391 
00392 
00393 class HtmlLexer(RegexLexer):
00394     """
00395     For HTML 4 and XHTML 1 markup. Nested JavaScript and CSS is highlighted
00396     by the appropriate lexer.
00397     """
00398 
00399     name = 'HTML'
00400     aliases = ['html']
00401     filenames = ['*.html', '*.htm', '*.xhtml', '*.xslt']
00402     mimetypes = ['text/html', 'application/xhtml+xml']
00403 
00404     flags = re.IGNORECASE | re.DOTALL
00405     tokens = {
00406         'root': [
00407             ('[^<&]+', Text),
00408             (r'&\S*?;', Name.Entity),
00409             (r'<\!\[CDATA\[.*?\]\]>', Comment.Preproc),
00410             ('<!--', Comment, 'comment'),
00411             (r'<\?.*?\?>', Comment.Preproc),
00412             ('<![^>]*>', Comment.Preproc),
00413             (r'<\s*script\s*', Name.Tag, ('script-content', 'tag')),
00414             (r'<\s*style\s*', Name.Tag, ('style-content', 'tag')),
00415             (r'<\s*[a-zA-Z0-9:]+', Name.Tag, 'tag'),
00416             (r'<\s*/\s*[a-zA-Z0-9:]+\s*>', Name.Tag),
00417         ],
00418         'comment': [
00419             ('[^-]+', Comment),
00420             ('-->', Comment, '#pop'),
00421             ('-', Comment),
00422         ],
00423         'tag': [
00424             (r'\s+', Text),
00425             (r'[a-zA-Z0-9_:-]+\s*=', Name.Attribute, 'attr'),
00426             (r'[a-zA-Z0-9_:-]+', Name.Attribute),
00427             (r'/?\s*>', Name.Tag, '#pop'),
00428         ],
00429         'script-content': [
00430             (r'<\s*/\s*script\s*>', Name.Tag, '#pop'),
00431             (r'.+?(?=<\s*/\s*script\s*>)', using(JavascriptLexer)),
00432         ],
00433         'style-content': [
00434             (r'<\s*/\s*style\s*>', Name.Tag, '#pop'),
00435             (r'.+?(?=<\s*/\s*style\s*>)', using(CssLexer)),
00436         ],
00437         'attr': [
00438             ('".*?"', String, '#pop'),
00439             ("'.*?'", String, '#pop'),
00440             (r'[^\s>]+', String, '#pop'),
00441         ],
00442     }
00443 
00444     def analyse_text(text):
00445         if html_doctype_matches(text):
00446             return 0.5
00447 
00448 
00449 class PhpLexer(RegexLexer):
00450     """
00451     For `PHP <http://www.php.net/>`_ source code.
00452     For PHP embedded in HTML, use the `HtmlPhpLexer`.
00453 
00454     Additional options accepted:
00455 
00456     `startinline`
00457         If given and ``True`` the lexer starts highlighting with
00458         php code (i.e.: no starting ``<?php`` required).  The default
00459         is ``False``.
00460     `funcnamehighlighting`
00461         If given and ``True``, highlight builtin function names
00462         (default: ``True``).
00463     `disabledmodules`
00464         If given, must be a list of module names whose function names
00465         should not be highlighted. By default all modules are highlighted
00466         except the special ``'unknown'`` module that includes functions
00467         that are known to php but are undocumented.
00468 
00469         To get a list of allowed modules have a look into the
00470         `_phpbuiltins` module:
00471 
00472         .. sourcecode:: pycon
00473 
00474             >>> from pygments.lexers._phpbuiltins import MODULES
00475             >>> MODULES.keys()
00476             ['PHP Options/Info', 'Zip', 'dba', ...]
00477 
00478         In fact the names of those modules match the module names from
00479         the php documentation.
00480     """
00481 
00482     name = 'PHP'
00483     aliases = ['php', 'php3', 'php4', 'php5']
00484     filenames = ['*.php', '*.php[345]']
00485     mimetypes = ['text/x-php']
00486 
00487     flags = re.IGNORECASE | re.DOTALL | re.MULTILINE
00488     tokens = {
00489         'root': [
00490             (r'<\?(php)?', Comment.Preproc, 'php'),
00491             (r'[^<]+', Other),
00492             (r'<', Other)
00493         ],
00494         'php': [
00495             (r'\?>', Comment.Preproc, '#pop'),
00496             (r'<<<([a-zA-Z_][a-zA-Z0-9_]*)\n.*?\n\1\;?\n', String),
00497             (r'\s+', Text),
00498             (r'#.*?\n', Comment.Single),
00499             (r'//.*?\n', Comment.Single),
00500             # put the empty comment here, it is otherwise seen as
00501             # the start of a docstring
00502             (r'/\*\*/', Comment.Multiline),
00503             (r'/\*\*.*?\*/', String.Doc),
00504             (r'/\*.*?\*/', Comment.Multiline),
00505             (r'(->|::)(\s*)([a-zA-Z_][a-zA-Z0-9_]*)',
00506              bygroups(Operator, Text, Name.Attribute)),
00507             (r'[~!%^&*+=|:.<>/?@-]+', Operator),
00508             (r'[\[\]{}();,]+', Punctuation),
00509             (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'),
00510             (r'(function)(\s+)(&?)(\s*)',
00511               bygroups(Keyword, Text, Operator, Text), 'functionname'),
00512             (r'(const)(\s+)([a-zA-Z_][a-zA-Z0-9_]*)',
00513               bygroups(Keyword, Text, Name.Constant)),
00514             (r'(and|E_PARSE|old_function|E_ERROR|or|as|E_WARNING|parent|'
00515              r'eval|PHP_OS|break|exit|case|extends|PHP_VERSION|cfunction|'
00516              r'FALSE|print|for|require|continue|foreach|require_once|'
00517              r'declare|return|default|static|do|switch|die|stdClass|'
00518              r'echo|else|TRUE|elseif|var|empty|if|xor|enddeclare|include|'
00519              r'virtual|endfor|include_once|while|endforeach|global|__FILE__|'
00520              r'endif|list|__LINE__|endswitch|new|__sleep|endwhile|not|'
00521              r'array|__wakeup|E_ALL|NULL|final|php_user_filter|interface|'
00522              r'implements|public|private|protected|abstract|clone|try|'
00523              r'catch|throw|this)\b', Keyword),
00524             ('(true|false|null)\b', Keyword.Constant),
00525             (r'\$\{\$+[a-zA-Z_][a-zA-Z0-9_]*\}', Name.Variable),
00526             (r'\$+[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable),
00527             ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Other),
00528             (r"[0-9](\.[0-9]*)?(eE[+-][0-9])?[flFLdD]?|"
00529              r"0[xX][0-9a-fA-F]+[Ll]?", Number),
00530             (r"'([^'\\]*(?:\\.[^'\\]*)*)'", String.Single),
00531             (r'`([^`\\]*(?:\\.[^`\\]*)*)`', String.Backtick),
00532             (r'"', String.Double, 'string'),
00533         ],
00534         'classname': [
00535             (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop')
00536         ],
00537         'functionname': [
00538             (r'[a-zA-Z_][a-zA-Z0-9_]*', Name.Function, '#pop')
00539         ],
00540         'string': [
00541             (r'"', String.Double, '#pop'),
00542             (r'[^{$"\\]+', String.Double),
00543             (r'\\([nrt\"$]|[0-7]{1,3}|x[0-9A-Fa-f]{1,2})', String.Escape),
00544             (r'\$[a-zA-Z_][a-zA-Z0-9_]*(\[\S+\]|->[a-zA-Z_][a-zA-Z0-9_]*)?',
00545              String.Interpol),
00546             (r'(\{\$\{)(.*?)(\}\})',
00547              bygroups(String.Interpol, using(this, _startinline=True),
00548                       String.Interpol)),
00549             (r'(\{)(\$.*?)(\})',
00550              bygroups(String.Interpol, using(this, _startinline=True),
00551                       String.Interpol)),
00552             (r'(\$\{)(\S+)(\})',
00553              bygroups(String.Interpol, Name.Variable, String.Interpol)),
00554             (r'[${\\]+', String.Double)
00555         ],
00556     }
00557 
00558     def __init__(self, **options):
00559         self.funcnamehighlighting = get_bool_opt(
00560             options, 'funcnamehighlighting', True)
00561         self.disabledmodules = get_list_opt(
00562             options, 'disabledmodules', ['unknown'])
00563         self.startinline = get_bool_opt(options, 'startinline', False)
00564 
00565         # private option argument for the lexer itself
00566         if '_startinline' in options:
00567             self.startinline = options.pop('_startinline')
00568 
00569         # collect activated functions in a set
00570         self._functions = set()
00571         if self.funcnamehighlighting:
00572             from pygments.lexers._phpbuiltins import MODULES
00573             for key, value in MODULES.iteritems():
00574                 if key not in self.disabledmodules:
00575                     self._functions.update(value)
00576         RegexLexer.__init__(self, **options)
00577 
00578     def get_tokens_unprocessed(self, text):
00579         stack = ['root']
00580         if self.startinline:
00581             stack.append('php')
00582         for index, token, value in \
00583             RegexLexer.get_tokens_unprocessed(self, text, stack):
00584             if token is Name.Other:
00585                 if value in self._functions:
00586                     yield index, Name.Builtin, value
00587                     continue
00588             yield index, token, value
00589 
00590     def analyse_text(text):
00591         rv = 0.0
00592         if re.search(r'<\?(?!xml)', text):
00593             rv += 0.3
00594         if '?>' in text:
00595             rv += 0.1
00596         return rv
00597 
00598 
00599 class XmlLexer(RegexLexer):
00600     """
00601     Generic lexer for XML (eXtensible Markup Language).
00602     """
00603 
00604     flags = re.MULTILINE | re.DOTALL
00605 
00606     name = 'XML'
00607     aliases = ['xml']
00608     filenames = ['*.xml', '*.xsl', '*.rss', '*.xslt', '*.xsd', '*.wsdl']
00609     mimetypes = ['text/xml', 'application/xml', 'image/svg+xml',
00610                  'application/rss+xml', 'application/atom+xml',
00611                  'application/xsl+xml', 'application/xslt+xml']
00612 
00613     tokens = {
00614         'root': [
00615             ('[^<&]+', Text),
00616             (r'&\S*?;', Name.Entity),
00617             (r'<\!\[CDATA\[.*?\]\]>', Comment.Preproc),
00618             ('<!--', Comment, 'comment'),
00619             (r'<\?.*?\?>', Comment.Preproc),
00620             ('<![^>]*>', Comment.Preproc),
00621             (r'<\s*[a-zA-Z0-9:._-]+', Name.Tag, 'tag'),
00622             (r'<\s*/\s*[a-zA-Z0-9:._-]+\s*>', Name.Tag),
00623         ],
00624         'comment': [
00625             ('[^-]+', Comment),
00626             ('-->', Comment, '#pop'),
00627             ('-', Comment),
00628         ],
00629         'tag': [
00630             (r'\s+', Text),
00631             (r'[a-zA-Z0-9_.:-]+\s*=', Name.Attribute, 'attr'),
00632             (r'/?\s*>', Name.Tag, '#pop'),
00633         ],
00634         'attr': [
00635             ('\s+', Text),
00636             ('".*?"', String, '#pop'),
00637             ("'.*?'", String, '#pop'),
00638             (r'[^\s>]+', String, '#pop'),
00639         ],
00640     }
00641 
00642     def analyse_text(text):
00643         if looks_like_xml(text):
00644             return 0.5
00645 
00646 
00647 class XsltLexer(XmlLexer):
00648     '''
00649     A lexer for XSLT.
00650 
00651     *New in Pygments 0.10.*
00652     '''
00653 
00654     name = 'XSLT'
00655     aliases = ['xslt']
00656     filenames = ['*.xsl', '*.xslt']
00657 
00658     EXTRA_KEYWORDS = set([
00659         'apply-imports', 'apply-templates', 'attribute',
00660         'attribute-set', 'call-template', 'choose', 'comment',
00661         'copy', 'copy-of', 'decimal-format', 'element', 'fallback',
00662         'for-each', 'if', 'import', 'include', 'key', 'message',
00663         'namespace-alias', 'number', 'otherwise', 'output', 'param',
00664         'preserve-space', 'processing-instruction', 'sort',
00665         'strip-space', 'stylesheet', 'template', 'text', 'transform',
00666         'value-of', 'variable', 'when', 'with-param'
00667     ])
00668 
00669     def get_tokens_unprocessed(self, text):
00670         for index, token, value in XmlLexer.get_tokens_unprocessed(self, text):
00671             m = re.match('</?xsl:([^>]*)/?>?', value)
00672 
00673             if token is Name.Tag and m and m.group(1) in self.EXTRA_KEYWORDS:
00674                 yield index, Keyword, value
00675             else:
00676                 yield index, token, value
00677 
00678     def analyse_text(text):
00679         if looks_like_xml(text) and '<xsl' in text:
00680             return 0.8
00681 
00682 
00683 
00684 class MxmlLexer(RegexLexer):
00685     """
00686     For MXML markup.
00687     Nested AS3 in <script> tags is highlighted by the appropriate lexer.
00688     """
00689     flags = re.MULTILINE | re.DOTALL
00690     name = 'MXML'
00691     aliases = ['mxml']
00692     filenames = ['*.mxml']
00693     mimetimes = ['text/xml', 'application/xml']
00694 
00695     tokens = {
00696             'root': [
00697                 ('[^<&]+', Text),
00698                 (r'&\S*?;', Name.Entity),
00699                 (r'(<\!\[CDATA\[)(.*?)(\]\]>)',
00700                  bygroups(String, using(ActionScript3Lexer), String)),
00701                 ('<!--', Comment, 'comment'),
00702                 (r'<\?.*?\?>', Comment.Preproc),
00703                 ('<![^>]*>', Comment.Preproc),
00704                 (r'<\s*[a-zA-Z0-9:._-]+', Name.Tag, 'tag'),
00705                 (r'<\s*/\s*[a-zA-Z0-9:._-]+\s*>', Name.Tag),
00706             ],
00707             'comment': [
00708                 ('[^-]+', Comment),
00709                 ('-->', Comment, '#pop'),
00710                 ('-', Comment),
00711             ],
00712             'tag': [
00713                 (r'\s+', Text),
00714                 (r'[a-zA-Z0-9_.:-]+\s*=', Name.Attribute, 'attr'),
00715                 (r'/?\s*>', Name.Tag, '#pop'),
00716             ],
00717             'attr': [
00718                 ('\s+', Text),
00719                 ('".*?"', String, '#pop'),
00720                 ("'.*?'", String, '#pop'),
00721                 (r'[^\s>]+', String, '#pop'),
00722             ],
00723         }