Back to index

moin  1.9.0~rc2
asm.py
Go to the documentation of this file.
00001 # -*- coding: utf-8 -*-
00002 """
00003     pygments.lexers.asm
00004     ~~~~~~~~~~~~~~~~~~~
00005 
00006     Lexers for assembly languages.
00007 
00008     :copyright: Copyright 2006-2009 by the Pygments team, see AUTHORS.
00009     :license: BSD, see LICENSE for details.
00010 """
00011 
00012 import re
00013 try:
00014     set
00015 except NameError:
00016     from sets import Set as set
00017 
00018 from pygments.lexer import RegexLexer, include, bygroups, using, DelegatingLexer
00019 from pygments.lexers.compiled import DLexer, CppLexer, CLexer
00020 from pygments.token import *
00021 
00022 __all__ = ['GasLexer', 'ObjdumpLexer','DObjdumpLexer', 'CppObjdumpLexer',
00023            'CObjdumpLexer', 'LlvmLexer', 'NasmLexer']
00024 
00025 
00026 class GasLexer(RegexLexer):
00027     """
00028     For Gas (AT&T) assembly code.
00029     """
00030     name = 'GAS'
00031     aliases = ['gas']
00032     filenames = ['*.s', '*.S']
00033     mimetypes = ['text/x-gas']
00034 
00035     #: optional Comment or Whitespace
00036     string = r'"(\\"|[^"])*"'
00037     char = r'[a-zA-Z$._0-9@]'
00038     identifier = r'(?:[a-zA-Z$_]' + char + '*|\.' + char + '+)'
00039     number = r'(?:0[xX][a-zA-Z0-9]+|\d+)'
00040 
00041     tokens = {
00042         'root': [
00043             include('whitespace'),
00044             (identifier + ':', Name.Label),
00045             (r'\.' + identifier, Name.Attribute, 'directive-args'),
00046             (r'lock|rep(n?z)?|data\d+', Name.Attribute),
00047             (identifier, Name.Function, 'instruction-args'),
00048             (r'[\r\n]+', Text)
00049         ],
00050         'directive-args': [
00051             (identifier, Name.Constant),
00052             (string, String),
00053             ('@' + identifier, Name.Attribute),
00054             (number, Number.Integer),
00055             (r'[\r\n]+', Text, '#pop'),
00056 
00057             (r'#.*?$', Comment, '#pop'),
00058 
00059             include('punctuation'),
00060             include('whitespace')
00061         ],
00062         'instruction-args': [
00063             # For objdump-disassembled code, shouldn't occur in
00064             # actual assembler input
00065             ('([a-z0-9]+)( )(<)('+identifier+')(>)',
00066                 bygroups(Number.Hex, Text, Punctuation, Name.Constant,
00067                          Punctuation)),
00068             ('([a-z0-9]+)( )(<)('+identifier+')([-+])('+number+')(>)',
00069                 bygroups(Number.Hex, Text, Punctuation, Name.Constant,
00070                          Punctuation, Number.Integer, Punctuation)),
00071 
00072             # Address constants
00073             (identifier, Name.Constant),
00074             (number, Number.Integer),
00075             # Registers
00076             ('%' + identifier, Name.Variable),
00077             # Numeric constants
00078             ('$'+number, Number.Integer),
00079             (r'[\r\n]+', Text, '#pop'),
00080             (r'#.*?$', Comment, '#pop'),
00081             include('punctuation'),
00082             include('whitespace')
00083         ],
00084         'whitespace': [
00085             (r'\n', Text),
00086             (r'\s+', Text),
00087             (r'#.*?\n', Comment)
00088         ],
00089         'punctuation': [
00090             (r'[-*,.():]+', Punctuation)
00091         ]
00092     }
00093 
00094     def analyse_text(text):
00095         return re.match(r'^\.\w+', text, re.M)
00096 
00097 class ObjdumpLexer(RegexLexer):
00098     """
00099     For the output of 'objdump -dr'
00100     """
00101     name = 'objdump'
00102     aliases = ['objdump']
00103     filenames = ['*.objdump']
00104     mimetypes = ['text/x-objdump']
00105 
00106     hex = r'[0-9A-Za-z]'
00107 
00108     tokens = {
00109         'root': [
00110             # File name & format:
00111             ('(.*?)(:)( +file format )(.*?)$',
00112                 bygroups(Name.Label, Punctuation, Text, String)),
00113             # Section header
00114             ('(Disassembly of section )(.*?)(:)$',
00115                 bygroups(Text, Name.Label, Punctuation)),
00116             # Function labels
00117             # (With offset)
00118             ('('+hex+'+)( )(<)(.*?)([-+])(0[xX][A-Za-z0-9]+)(>:)$',
00119                 bygroups(Number.Hex, Text, Punctuation, Name.Function,
00120                          Punctuation, Number.Hex, Punctuation)),
00121             # (Without offset)
00122             ('('+hex+'+)( )(<)(.*?)(>:)$',
00123                 bygroups(Number.Hex, Text, Punctuation, Name.Function,
00124                          Punctuation)),
00125             # Code line with disassembled instructions
00126             ('( *)('+hex+r'+:)(\t)((?:'+hex+hex+' )+)( *\t)([a-zA-Z].*?)$',
00127                 bygroups(Text, Name.Label, Text, Number.Hex, Text,
00128                          using(GasLexer))),
00129             # Code line with ascii
00130             ('( *)('+hex+r'+:)(\t)((?:'+hex+hex+' )+)( *)(.*?)$',
00131                 bygroups(Text, Name.Label, Text, Number.Hex, Text, String)),
00132             # Continued code line, only raw opcodes without disassembled
00133             # instruction
00134             ('( *)('+hex+r'+:)(\t)((?:'+hex+hex+' )+)$',
00135                 bygroups(Text, Name.Label, Text, Number.Hex)),
00136             # Skipped a few bytes
00137             ('\t\.\.\.$', Text),
00138             # Relocation line
00139             # (With offset)
00140             ('(\t\t\t)('+hex+'+:)( )([^\t]+)(\t)(.*?)([-+])(0x' + hex + '+)$',
00141                 bygroups(Text, Name.Label, Text, Name.Property, Text,
00142                          Name.Constant, Punctuation, Number.Hex)),
00143             # (Without offset)
00144             ('(\t\t\t)('+hex+'+:)( )([^\t]+)(\t)(.*?)$',
00145                 bygroups(Text, Name.Label, Text, Name.Property, Text,
00146                          Name.Constant)),
00147             ('[^\n]+\n', Other)
00148         ]
00149     }
00150 
00151 
00152 class DObjdumpLexer(DelegatingLexer):
00153     """
00154     For the output of 'objdump -Sr on compiled D files'
00155     """
00156     name = 'd-objdump'
00157     aliases = ['d-objdump']
00158     filenames = ['*.d-objdump']
00159     mimetypes = ['text/x-d-objdump']
00160 
00161     def __init__(self, **options):
00162         super(DObjdumpLexer, self).__init__(DLexer, ObjdumpLexer, **options)
00163 
00164 
00165 class CppObjdumpLexer(DelegatingLexer):
00166     """
00167     For the output of 'objdump -Sr on compiled C++ files'
00168     """
00169     name = 'cpp-objdump'
00170     aliases = ['cpp-objdump', 'c++-objdumb', 'cxx-objdump']
00171     filenames = ['*.cpp-objdump', '*.c++-objdump', '*.cxx-objdump']
00172     mimetypes = ['text/x-cpp-objdump']
00173 
00174     def __init__(self, **options):
00175         super(CppObjdumpLexer, self).__init__(CppLexer, ObjdumpLexer, **options)
00176 
00177 
00178 class CObjdumpLexer(DelegatingLexer):
00179     """
00180     For the output of 'objdump -Sr on compiled C files'
00181     """
00182     name = 'c-objdump'
00183     aliases = ['c-objdump']
00184     filenames = ['*.c-objdump']
00185     mimetypes = ['text/x-c-objdump']
00186 
00187     def __init__(self, **options):
00188         super(CObjdumpLexer, self).__init__(CLexer, ObjdumpLexer, **options)
00189 
00190 
00191 class LlvmLexer(RegexLexer):
00192     """
00193     For LLVM assembly code.
00194     """
00195     name = 'LLVM'
00196     aliases = ['llvm']
00197     filenames = ['*.ll']
00198     mimetypes = ['text/x-llvm']
00199 
00200     #: optional Comment or Whitespace
00201     string = r'"[^"]*?"'
00202     identifier = r'([a-zA-Z$._][a-zA-Z$._0-9]*|' + string + ')'
00203 
00204     tokens = {
00205         'root': [
00206             include('whitespace'),
00207 
00208             # Before keywords, because keywords are valid label names :(...
00209             (r'^\s*' + identifier + '\s*:', Name.Label),
00210 
00211             include('keyword'),
00212 
00213             (r'%' + identifier, Name.Variable),#Name.Identifier.Local),
00214             (r'@' + identifier, Name.Constant),#Name.Identifier.Global),
00215             (r'%\d+', Name.Variable.Anonymous),#Name.Identifier.Anonymous),
00216             (r'c?' + string, String),
00217 
00218             (r'0[xX][a-fA-F0-9]+', Number),
00219             (r'-?\d+(?:[.]\d+)?(?:[eE][-+]?\d+(?:[.]\d+)?)?', Number),
00220 
00221             (r'[=<>{}\[\]()*.,]|x\b', Punctuation)
00222         ],
00223         'whitespace': [
00224             (r'(\n|\s)+', Text),
00225             (r';.*?\n', Comment)
00226         ],
00227         'keyword': [
00228             # Regular keywords
00229             (r'(void|label|float|double|opaque'
00230              r'|to'
00231              r'|alias|type'
00232              r'|zeroext|signext|inreg|sret|noalias|noreturn|nounwind|nest'
00233              r'|module|asm|target|datalayout|triple'
00234              r'|true|false|null|zeroinitializer|undef'
00235              r'|global|internal|external|linkonce|weak|appending|extern_weak'
00236              r'|dllimport|dllexport'
00237              r'|ccc|fastcc|coldcc|cc|tail'
00238              r'|default|hidden|protected'
00239              r'|thread_local|constant|align|section'
00240              r'|define|declare'
00241 
00242              # Statements & expressions
00243              r'|trunc|zext|sext|fptrunc|fpext|fptoui|fptosi|uitofp|sitofp'
00244              r'|ptrtoint|inttoptr|bitcast|getelementptr|select|icmp|fcmp'
00245              r'|extractelement|insertelement|shufflevector'
00246              r'|sideeffect|volatile'
00247              r'|ret|br|switch|invoke|unwind|unreachable'
00248              r'|add|sub|mul|udiv|sdiv|fdiv|urem|srem|frem'
00249              r'|shl|lshr|ashr|and|or|xor'
00250              r'|malloc|free|alloca|load|store'
00251              r'|phi|call|va_arg|va_list'
00252 
00253              # Comparison condition codes for icmp
00254              r'|eq|ne|ugt|uge|ult|ule|sgt|sge|slt|sle'
00255              # Ditto for fcmp: (minus keywords mentioned in other contexts)
00256              r'|oeq|ogt|oge|olt|ole|one|ord|ueq|ugt|uge|une|uno'
00257 
00258              r')\b', Keyword),
00259             # Integer types
00260             (r'i[1-9]\d*', Keyword)
00261         ]
00262     }
00263 
00264 
00265 class NasmLexer(RegexLexer):
00266     """
00267     For Nasm (Intel) assembly code.
00268     """
00269     name = 'NASM'
00270     aliases = ['nasm']
00271     filenames = ['*.asm', '*.ASM']
00272     mimetypes = ['text/x-nasm']
00273 
00274     identifier = r'[a-zA-Z$._?][a-zA-Z0-9$._?#@~]*'
00275     hexn = r'(?:0[xX][0-9a-fA-F]+|$0[0-9a-fA-F]*|[0-9a-fA-F]+h)'
00276     octn = r'[0-7]+q'
00277     binn = r'[01]+b'
00278     decn = r'[0-9]+'
00279     floatn = decn + r'\.e?' + decn
00280     string = r'"(\\"|[^"])*"|' + r"'(\\'|[^'])*'"
00281     declkw = r'(?:res|d)[bwdqt]|times'
00282     register = (r'[a-d][lh]|e?[a-d]x|e?[sb]p|e?[sd]i|[c-gs]s|st[0-7]|'
00283                 r'mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7]')
00284     wordop = r'seg|wrt|strict'
00285     type = r'byte|[dq]?word'
00286     directives = (r'BITS|USE16|USE32|SECTION|SEGMENT|ABSOLUTE|EXTERN|GLOBAL|'
00287                   r'COMMON|CPU|GROUP|UPPERCASE|IMPORT|EXPORT|LIBRARY|MODULE')
00288 
00289     flags = re.IGNORECASE | re.MULTILINE
00290     tokens = {
00291         'root': [
00292             include('whitespace'),
00293             (r'^\s*%', Comment.Preproc, 'preproc'),
00294             (identifier + ':', Name.Label),
00295             (directives, Keyword, 'instruction-args'),
00296             (r'(%s)\s+(equ)' % identifier,
00297                 bygroups(Name.Constant, Keyword.Declaration),
00298                 'instruction-args'),
00299             (declkw, Keyword.Declaration, 'instruction-args'),
00300             (identifier, Name.Function, 'instruction-args'),
00301             (r'[\r\n]+', Text)
00302         ],
00303         'instruction-args': [
00304             (string, String),
00305             (hexn, Number.Hex),
00306             (octn, Number.Oct),
00307             (binn, Number),
00308             (floatn, Number.Float),
00309             (decn, Number.Integer),
00310             include('punctuation'),
00311             (register, Name.Builtin),
00312             (identifier, Name.Variable),
00313             (r'[\r\n]+', Text, '#pop'),
00314             include('whitespace')
00315         ],
00316         'preproc': [
00317             (r'[^;\n]+', Comment.Preproc),
00318             (r';.*?\n', Comment.Single, '#pop'),
00319             (r'\n', Comment.Preproc, '#pop'),
00320         ],
00321         'whitespace': [
00322             (r'\n', Text),
00323             (r'[ \t]+', Text),
00324             (r';.*', Comment.Single)
00325         ],
00326         'punctuation': [
00327             (r'[,():\[\]]+', Punctuation),
00328             (r'[&|^<>+*/%~-]+', Operator),
00329             (r'[$]+', Keyword.Constant),
00330             (wordop, Operator.Word),
00331             (type, Keyword.Type)
00332         ],
00333     }