Back to index

moin  1.9.0~rc2
pep8.py
Go to the documentation of this file.
00001 #!/usr/bin/python
00002 # pep8.py - Check Python source code formatting, according to PEP 8
00003 # Copyright (C) 2006 Johann C. Rocholl <johann@browsershots.org>
00004 #
00005 # Permission is hereby granted, free of charge, to any person
00006 # obtaining a copy of this software and associated documentation files
00007 # (the "Software"), to deal in the Software without restriction,
00008 # including without limitation the rights to use, copy, modify, merge,
00009 # publish, distribute, sublicense, and/or sell copies of the Software,
00010 # and to permit persons to whom the Software is furnished to do so,
00011 # subject to the following conditions:
00012 #
00013 # The above copyright notice and this permission notice shall be
00014 # included in all copies or substantial portions of the Software.
00015 #
00016 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
00017 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00018 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
00019 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
00020 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
00021 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
00022 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
00023 # SOFTWARE.
00024 
00025 """
00026 Check Python source code formatting, according to PEP 8:
00027 http://www.python.org/dev/peps/pep-0008/
00028 
00029 For usage and a list of options, try this:
00030 $ python pep8.py -h
00031 
00032 This program and its regression test suite live here:
00033 http://svn.browsershots.org/trunk/devtools/pep8/
00034 http://trac.browsershots.org/browser/trunk/devtools/pep8/
00035 
00036 Groups of errors and warnings:
00037 E errors
00038 W warnings
00039 100 indentation
00040 200 whitespace
00041 300 blank lines
00042 400 imports
00043 500 line length
00044 600 deprecation
00045 700 statements
00046 
00047 You can add checks to this program by writing plugins. Each plugin is
00048 a simple function that is called for each line of source code, either
00049 physical or logical.
00050 
00051 Physical line:
00052 - Raw line of text from the input file.
00053 
00054 Logical line:
00055 - Multi-line statements converted to a single line.
00056 - Stripped left and right.
00057 - Contents of strings replaced with 'xxx' of same length.
00058 - Comments removed.
00059 
00060 The check function requests physical or logical lines by the name of
00061 the first argument:
00062 
00063 def maximum_line_length(physical_line)
00064 def extraneous_whitespace(logical_line)
00065 def blank_lines(logical_line, blank_lines, indent_level, line_number)
00066 
00067 The last example above demonstrates how check plugins can request
00068 additional information with extra arguments. All attributes of the
00069 Checker object are available. Some examples:
00070 
00071 lines: a list of the raw lines from the input file
00072 tokens: the tokens that contribute to this logical line
00073 line_number: line number in the input file
00074 blank_lines: blank lines before this one
00075 indent_char: first indentation character in this file (' ' or '\t')
00076 indent_level: indentation (with tabs expanded to multiples of 8)
00077 previous_indent_level: indentation on previous line
00078 previous_logical: previous logical line
00079 
00080 The docstring of each check function shall be the relevant part of
00081 text from PEP 8. It is printed if the user enables --show-pep8.
00082 
00083 """
00084 
00085 import os
00086 import sys
00087 import re
00088 import time
00089 import inspect
00090 import tokenize
00091 from optparse import OptionParser
00092 from keyword import iskeyword
00093 from fnmatch import fnmatch
00094 
00095 __version__ = '0.2.0'
00096 __revision__ = '$Rev$'
00097 
00098 default_exclude = '.svn,CVS,*.pyc,*.pyo'
00099 
00100 indent_match = re.compile(r'([ \t]*)').match
00101 raise_comma_match = re.compile(r'raise\s+\w+\s*(,)').match
00102 equals_boolean_search = re.compile(r'([!=]=\s*(True|False))|((True|False)\s*[!=]=)').search
00103 equals_None_search = re.compile(r'([!=]=\s*None)|(None\s*[!=]=)').search
00104 
00105 not_one_ws_around_operators_match = re.compile(r'^[^\(\[]+[^\s](\+|\-|\*|/|%|\^|&|\||=|<|>|>>|<<|\+=|\-=|\*=|/=|%=|\^=|&=|\|=|==|<=|>=|>>=|<<=|!=|<>)[^\s][^\)\]]+$').match
00106 
00107 operators = """
00108 +  -  *  /  %  ^  &  |  =  <  >  >>  <<
00109 += -= *= /= %= ^= &= |= == <= >= >>= <<=
00110 != <> :
00111 in is or not and
00112 """.split()
00113 
00114 options = None
00115 args = None
00116 
00117 
00118 ##############################################################################
00119 # Plugins (check functions) for physical lines
00120 ##############################################################################
00121 
00122 
00123 def tabs_or_spaces(physical_line, indent_char):
00124     """
00125     Never mix tabs and spaces.
00126 
00127     The most popular way of indenting Python is with spaces only.  The
00128     second-most popular way is with tabs only.  Code indented with a mixture
00129     of tabs and spaces should be converted to using spaces exclusively.  When
00130     invoking the Python command line interpreter with the -t option, it issues
00131     warnings about code that illegally mixes tabs and spaces.  When using -tt
00132     these warnings become errors.  These options are highly recommended!
00133     """
00134     indent = indent_match(physical_line).group(1)
00135     for offset, char in enumerate(indent):
00136         if char != indent_char:
00137             return offset, "E101 indentation contains mixed spaces and tabs"
00138 
00139 
00140 def tabs_obsolete(physical_line):
00141     """
00142     For new projects, spaces-only are strongly recommended over tabs.  Most
00143     editors have features that make this easy to do.
00144     """
00145     indent = indent_match(physical_line).group(1)
00146     if indent.count('\t'):
00147         return indent.index('\t'), "W191 indentation contains tabs"
00148 
00149 
00150 def trailing_whitespace(physical_line):
00151     """
00152     JCR: Trailing whitespace is superfluous.
00153     """
00154     physical_line = physical_line.rstrip('\n') # chr(10), newline
00155     physical_line = physical_line.rstrip('\r') # chr(13), carriage return
00156     physical_line = physical_line.rstrip('\x0c') # chr(12), form feed, ^L
00157     stripped = physical_line.rstrip()
00158     if physical_line != stripped:
00159         return len(stripped), "W291 trailing whitespace"
00160 
00161 
00162 def trailing_blank_lines(physical_line, lines, line_number):
00163     """
00164     JCR: Trailing blank lines are superfluous.
00165     """
00166     if physical_line.strip() == '' and line_number == len(lines):
00167         return 0, "W391 blank line at end of file"
00168 
00169 
00170 def missing_newline(physical_line):
00171     """
00172     JCR: The last line should have a newline.
00173     """
00174     if physical_line.rstrip() == physical_line:
00175         return len(physical_line), "W292 no newline at end of file"
00176 
00177 
00178 def maximum_line_length(physical_line):
00179     """
00180     Limit all lines to a maximum of 79 characters.
00181 
00182     There are still many devices around that are limited to 80 character
00183     lines; plus, limiting windows to 80 characters makes it possible to have
00184     several windows side-by-side.  The default wrapping on such devices looks
00185     ugly.  Therefore, please limit all lines to a maximum of 79 characters.
00186     For flowing long blocks of text (docstrings or comments), limiting the
00187     length to 72 characters is recommended.
00188     """
00189     length = len(physical_line.rstrip())
00190     if length > 79:
00191         return 79, "E501 line too long (%d characters)" % length
00192 
00193 
00194 def crlf_lines(physical_line):
00195     """
00196     Line contains CR (e.g. as a CRLF line ending).
00197 
00198     Many free software projects have a strong focus on POSIX platforms (like
00199     Linux, *BSD, Unix, Mac OS X, etc.) and they all use LF-only line endings.
00200     Only Win32 platform uses CRLF line endings.
00201     So if you have a Win32-only source code using CRLF line endings, you might
00202     want to exclude this test.
00203     """
00204     pos = physical_line.find('\r')
00205     if pos >= 0:
00206         return pos, "W293 line contains CR char(s)"
00207 
00208 
00209 ##############################################################################
00210 # Plugins (check functions) for logical lines
00211 ##############################################################################
00212 
00213 
00214 def blank_lines(logical_line, blank_lines, indent_level, line_number,
00215                 previous_logical):
00216     """
00217     Separate top-level function and class definitions with two blank lines.
00218 
00219     Method definitions inside a class are separated by a single blank line.
00220 
00221     Extra blank lines may be used (sparingly) to separate groups of related
00222     functions.  Blank lines may be omitted between a bunch of related
00223     one-liners (e.g. a set of dummy implementations).
00224 
00225     Use blank lines in functions, sparingly, to indicate logical sections.
00226     """
00227     if line_number == 1:
00228         return # Don't expect blank lines before the first line
00229     if previous_logical.startswith('@'):
00230         return # Don't expect blank lines after function decorator
00231     if (logical_line.startswith('def ') or
00232         logical_line.startswith('class ') or
00233         logical_line.startswith('@')):
00234         if indent_level > 0 and blank_lines != 1:
00235             return 0, "E301 expected 1 blank line, found %d" % blank_lines
00236         if indent_level == 0 and blank_lines != 2:
00237             return 0, "E302 expected 2 blank lines, found %d" % blank_lines
00238     if blank_lines > 2:
00239         return 0, "E303 too many blank lines (%d)" % blank_lines
00240 
00241 
00242 def extraneous_whitespace(logical_line):
00243     """
00244     Avoid extraneous whitespace in the following situations:
00245 
00246     - Immediately inside parentheses, brackets or braces.
00247 
00248     - Immediately before a comma, semicolon, or colon.
00249     """
00250     line = logical_line
00251     for char in '([{':
00252         found = line.find(char + ' ')
00253         if found > -1:
00254             return found + 1, "E201 whitespace after '%s'" % char
00255     for char in '}])':
00256         found = line.find(' ' + char)
00257         if found > -1 and line[found - 1] != ',':
00258             return found, "E202 whitespace before '%s'" % char
00259     for char in ',;:':
00260         found = line.find(' ' + char)
00261         if found > -1:
00262             return found, "E203 whitespace before '%s'" % char
00263 
00264 
00265 def missing_whitespace(logical_line):
00266     """
00267     JCR: Each comma, semicolon or colon should be followed by whitespace.
00268     """
00269     line = logical_line
00270     for index in range(len(line) - 1):
00271         char = line[index]
00272         if char in ',;:' and line[index + 1] != ' ':
00273             before = line[:index]
00274             if char == ':' and before.count('[') > before.count(']'):
00275                 continue # Slice syntax, no space required
00276             return index, "E231 missing whitespace after '%s'" % char
00277 
00278 
00279 def indentation(logical_line, previous_logical, indent_char,
00280                 indent_level, previous_indent_level):
00281     """
00282     Use 4 spaces per indentation level.
00283 
00284     For really old code that you don't want to mess up, you can continue to
00285     use 8-space tabs.
00286     """
00287     if indent_char == ' ' and indent_level % 4:
00288         return 0, "E111 indentation is not a multiple of four"
00289     indent_expect = previous_logical.endswith(':')
00290     if indent_expect and indent_level <= previous_indent_level:
00291         return 0, "E112 expected an indented block"
00292     if indent_level > previous_indent_level and not indent_expect:
00293         return 0, "E113 unexpected indentation"
00294 
00295 
00296 def whitespace_before_parameters(logical_line, tokens):
00297     """
00298     Avoid extraneous whitespace in the following situations:
00299 
00300     - Immediately before the open parenthesis that starts the argument
00301       list of a function call.
00302 
00303     - Immediately before the open parenthesis that starts an indexing or
00304       slicing.
00305     """
00306     prev_type = tokens[0][0]
00307     prev_text = tokens[0][1]
00308     prev_end = tokens[0][3]
00309     for index in range(1, len(tokens)):
00310         token_type, text, start, end, line = tokens[index]
00311         if (token_type == tokenize.OP and
00312             text in '([' and
00313             start != prev_end and
00314             prev_type == tokenize.NAME and
00315             (index < 2 or tokens[index - 2][1] != 'class') and
00316             (not iskeyword(prev_text))):
00317             return prev_end, "E211 whitespace before '%s'" % text
00318         prev_type = token_type
00319         prev_text = text
00320         prev_end = end
00321 
00322 
00323 def extra_whitespace_around_operator(logical_line):
00324     """
00325     Avoid extraneous whitespace in the following situations:
00326 
00327     - More than one space around an assignment (or other) operator to
00328       align it with another.
00329     """
00330     line = logical_line
00331     for operator in operators:
00332         found = line.find('  ' + operator)
00333         if found > -1:
00334             return found, "E221 multiple spaces before operator"
00335         found = line.find(operator + '  ')
00336         if found > -1:
00337             return found, "E222 multiple spaces after operator"
00338         found = line.find('\t' + operator)
00339         if found > -1:
00340             return found, "E223 tab before operator"
00341         found = line.find(operator + '\t')
00342         if found > -1:
00343             return found, "E224 tab after operator"
00344 
00345 
00346 def whitespace_around_operator(logical_line):
00347     """
00348     Have exactly 1 space left and right of the operator.
00349     """
00350     match = not_one_ws_around_operators_match(logical_line)
00351     if match and not 'lambda' in logical_line:
00352         return match.start(1), "E225 operators shall be surrounded by a single space on each side %s" % logical_line
00353 
00354 
00355 def whitespace_around_comma(logical_line):
00356     """
00357     Avoid extraneous whitespace in the following situations:
00358 
00359     - More than one space around an assignment (or other) operator to
00360       align it with another.
00361 
00362     JCR: This should also be applied around comma etc.
00363     """
00364     line = logical_line
00365     for separator in ',;:':
00366         found = line.find(separator + '  ')
00367         if found > -1:
00368             return found + 1, "E241 multiple spaces after '%s'" % separator
00369         found = line.find(separator + '\t')
00370         if found > -1:
00371             return found + 1, "E242 tab after '%s'" % separator
00372 
00373 
00374 def imports_on_separate_lines(logical_line):
00375     """
00376     Imports should usually be on separate lines.
00377     """
00378     line = logical_line
00379     if line.startswith('import '):
00380         found = line.find(',')
00381         if found > -1:
00382             return found, "E401 multiple imports on one line"
00383 
00384 
00385 def compound_statements(logical_line):
00386     """
00387     Compound statements (multiple statements on the same line) are
00388     generally discouraged.
00389     """
00390     line = logical_line
00391     found = line.find(':')
00392     if -1 < found < len(line) - 1:
00393         before = line[:found]
00394         if (before.count('{') <= before.count('}') and # {'a': 1} (dict)
00395             before.count('[') <= before.count(']') and # [1:2] (slice)
00396             not re.search(r'\blambda\b', before)):     # lambda x: x
00397             return found, "E701 multiple statements on one line (colon)"
00398     found = line.find(';')
00399     if -1 < found:
00400         return found, "E702 multiple statements on one line (semicolon)"
00401 
00402 
00403 def python_3000_has_key(logical_line):
00404     """
00405     The {}.has_key() method will be removed in the future version of
00406     Python. Use the 'in' operation instead, like:
00407     d = {"a": 1, "b": 2}
00408     if "b" in d:
00409         print d["b"]
00410     """
00411     pos = logical_line.find('.has_key(')
00412     if pos > -1:
00413         return pos, "W601 .has_key() is deprecated, use 'in'"
00414 
00415 
00416 def python_3000_raise_comma(logical_line):
00417     """
00418     When raising an exception, use "raise ValueError('message')"
00419     instead of the older form "raise ValueError, 'message'".
00420 
00421     The paren-using form is preferred because when the exception arguments
00422     are long or include string formatting, you don't need to use line
00423     continuation characters thanks to the containing parentheses.  The older
00424     form will be removed in Python 3000.
00425     """
00426     match = raise_comma_match(logical_line)
00427     if match:
00428         return match.start(1), "W602 deprecated form of raising exception"
00429 
00430 
00431 def dumb_equals_boolean(logical_line):
00432     """
00433     Using "if x == True:" or "if x == False:" is wrong in any case:
00434 
00435     First if you already have a boolean, you don't need to compare it to
00436     another boolean. Just use "if x:" or "if not x:".
00437 
00438     Second, even if you have some sort of "tristate logic", not only using
00439     True/False, but other values, then you want to use "if x is True:" or
00440     "if x is False:" because there is exactly one True and one False object.
00441     """
00442     match = equals_boolean_search(logical_line)
00443     if match:
00444         return match.start(1), "E798 don't use 'x == <boolean>', but just 'x' or 'not x' or 'x is <boolean>'"
00445 
00446 
00447 def dumb_equals_None(logical_line):
00448     """
00449     Using "if x == None:" is wrong in any case:
00450 
00451     You either want to use "if x is None:" (there is only 1 None object) or -
00452     in some simple cases - just "if not x:".
00453     """
00454     match = equals_None_search(logical_line)
00455     if match:
00456         return match.start(1), "E799 don't use 'x == None', but just 'x is None' or 'not x'"
00457 
00458 
00459 ##############################################################################
00460 # Helper functions
00461 ##############################################################################
00462 
00463 
00464 def expand_indent(line):
00465     """
00466     Return the amount of indentation.
00467     Tabs are expanded to the next multiple of 8.
00468 
00469     >>> expand_indent('    ')
00470     4
00471     >>> expand_indent('\\t')
00472     8
00473     >>> expand_indent('    \\t')
00474     8
00475     >>> expand_indent('       \\t')
00476     8
00477     >>> expand_indent('        \\t')
00478     16
00479     """
00480     result = 0
00481     for char in line:
00482         if char == '\t':
00483             result = result / 8 * 8 + 8
00484         elif char == ' ':
00485             result += 1
00486         else:
00487             break
00488     return result
00489 
00490 
00491 ##############################################################################
00492 # Framework to run all checks
00493 ##############################################################################
00494 
00495 
00496 def message(text):
00497     """Print a message."""
00498     # print >> sys.stderr, options.prog + ': ' + text
00499     # print >> sys.stderr, text
00500     print text
00501 
00502 
00503 def find_checks(argument_name):
00504     """
00505     Find all globally visible functions where the first argument name
00506     starts with argument_name.
00507     """
00508     checks = []
00509     function_type = type(find_checks)
00510     for name, function in globals().iteritems():
00511         if type(function) is function_type:
00512             args = inspect.getargspec(function)[0]
00513             if len(args) >= 1 and args[0].startswith(argument_name):
00514                 checks.append((name, function, args))
00515     checks.sort()
00516     return checks
00517 
00518 
00519 def mute_string(text):
00520     """
00521     Replace contents with 'xxx' to prevent syntax matching.
00522 
00523     >>> mute_string('"abc"')
00524     '"xxx"'
00525     >>> mute_string("'''abc'''")
00526     "'''xxx'''"
00527     >>> mute_string("r'abc'")
00528     "r'xxx'"
00529     """
00530     start = 1
00531     end = len(text) - 1
00532     # String modifiers (e.g. u or r)
00533     if text.endswith('"'):
00534         start += text.index('"')
00535     elif text.endswith("'"):
00536         start += text.index("'")
00537     # Triple quotes
00538     if text.endswith('"""') or text.endswith("'''"):
00539         start += 2
00540         end -= 2
00541     return text[:start] + 'x' * (end - start) + text[end:]
00542 
00543 
00544 class Checker:
00545     """
00546     Load a Python source file, tokenize it, check coding style.
00547     """
00548 
00549     def __init__(self, filename):
00550         self.filename = filename
00551         self.lines = file(filename, 'rb').readlines()
00552         self.physical_checks = find_checks('physical_line')
00553         self.logical_checks = find_checks('logical_line')
00554         options.counters['physical lines'] = \
00555             options.counters.get('physical lines', 0) + len(self.lines)
00556 
00557     def readline(self):
00558         """
00559         Get the next line from the input buffer.
00560         """
00561         self.line_number += 1
00562         if self.line_number > len(self.lines):
00563             return ''
00564         return self.lines[self.line_number - 1]
00565 
00566     def readline_check_physical(self):
00567         """
00568         Check and return the next physical line. This method can be
00569         used to feed tokenize.generate_tokens.
00570         """
00571         line = self.readline()
00572         if line:
00573             self.check_physical(line)
00574         return line
00575 
00576     def run_check(self, check, argument_names):
00577         """
00578         Run a check plugin.
00579         """
00580         arguments = []
00581         for name in argument_names:
00582             arguments.append(getattr(self, name))
00583         return check(*arguments)
00584 
00585     def check_physical(self, line):
00586         """
00587         Run all physical checks on a raw input line.
00588         """
00589         self.physical_line = line
00590         if self.indent_char is None and len(line) and line[0] in ' \t':
00591             self.indent_char = line[0]
00592         for name, check, argument_names in self.physical_checks:
00593             result = self.run_check(check, argument_names)
00594             if result is not None:
00595                 offset, text = result
00596                 self.report_error(self.line_number, offset, text, check)
00597 
00598     def build_tokens_line(self):
00599         """
00600         Build a logical line from tokens.
00601         """
00602         self.mapping = []
00603         logical = []
00604         length = 0
00605         previous = None
00606         for token in self.tokens:
00607             token_type, text = token[0:2]
00608             if token_type in (tokenize.COMMENT, tokenize.NL,
00609                               tokenize.INDENT, tokenize.DEDENT,
00610                               tokenize.NEWLINE):
00611                 continue
00612             if token_type == tokenize.STRING:
00613                 text = mute_string(text)
00614             if previous:
00615                 end_line, end = previous[3]
00616                 start_line, start = token[2]
00617                 if end_line != start_line: # different row
00618                     if self.lines[end_line - 1][end - 1] not in '{[(':
00619                         logical.append(' ')
00620                         length += 1
00621                 elif end != start: # different column
00622                     fill = self.lines[end_line - 1][end:start]
00623                     logical.append(fill)
00624                     length += len(fill)
00625             self.mapping.append((length, token))
00626             logical.append(text)
00627             length += len(text)
00628             previous = token
00629         self.logical_line = ''.join(logical)
00630         assert self.logical_line.lstrip() == self.logical_line
00631         assert self.logical_line.rstrip() == self.logical_line
00632 
00633     def check_logical(self):
00634         """
00635         Build a line from tokens and run all logical checks on it.
00636         """
00637         options.counters['logical lines'] = \
00638             options.counters.get('logical lines', 0) + 1
00639         self.build_tokens_line()
00640         first_line = self.lines[self.mapping[0][1][2][0] - 1]
00641         indent = first_line[:self.mapping[0][1][2][1]]
00642         self.previous_indent_level = self.indent_level
00643         self.indent_level = expand_indent(indent)
00644         if options.verbose >= 2:
00645             print self.logical_line[:80].rstrip()
00646         for name, check, argument_names in self.logical_checks:
00647             if options.verbose >= 3:
00648                 print '   ', name
00649             result = self.run_check(check, argument_names)
00650             if result is not None:
00651                 offset, text = result
00652                 if type(offset) is tuple:
00653                     original_number, original_offset = offset
00654                 else:
00655                     for token_offset, token in self.mapping:
00656                         if offset >= token_offset:
00657                             original_number = token[2][0]
00658                             original_offset = (token[2][1]
00659                                                + offset - token_offset)
00660                 self.report_error(original_number, original_offset,
00661                                   text, check)
00662         self.previous_logical = self.logical_line
00663 
00664     def check_all(self):
00665         """
00666         Run all checks on the input file.
00667         """
00668         self.file_errors = 0
00669         self.line_number = 0
00670         self.indent_char = None
00671         self.indent_level = 0
00672         self.previous_logical = ''
00673         self.blank_lines = 0
00674         self.tokens = []
00675         parens = 0
00676         for token in tokenize.generate_tokens(self.readline_check_physical):
00677             # print tokenize.tok_name[token[0]], repr(token)
00678             self.tokens.append(token)
00679             token_type, text = token[0:2]
00680             if token_type == tokenize.OP and text in '([{':
00681                 parens += 1
00682             if token_type == tokenize.OP and text in '}])':
00683                 parens -= 1
00684             if token_type == tokenize.NEWLINE and not parens:
00685                 self.check_logical()
00686                 self.blank_lines = 0
00687                 self.tokens = []
00688             if token_type == tokenize.NL and not parens:
00689                 self.blank_lines += 1
00690                 self.tokens = []
00691             if token_type == tokenize.COMMENT:
00692                 self.blank_lines = 0
00693         return self.file_errors
00694 
00695     def report_error(self, line_number, offset, text, check):
00696         """
00697         Report an error, according to options.
00698         """
00699         if options.quiet == 1 and not self.file_errors:
00700             message(self.filename)
00701         code = text[:4]
00702         if ignore_code(code):
00703             return
00704         self.file_errors += 1
00705         options.counters[code] = options.counters.get(code, 0) + 1
00706         options.messages[code] = text[5:]
00707         if options.quiet:
00708             return
00709         if options.testsuite:
00710             base = os.path.basename(self.filename)[:4]
00711             if base == code:
00712                 return
00713             if base[0] == 'E' and code[0] == 'W':
00714                 return
00715         if options.counters[code] == 1 or options.repeat:
00716             message("%s:%s:%d: %s" %
00717                     (self.filename, line_number, offset + 1, text))
00718             if options.show_source:
00719                 line = self.lines[line_number - 1]
00720                 message(line.rstrip())
00721                 message(' ' * offset + '^')
00722             if options.show_pep8:
00723                 message(check.__doc__.lstrip('\n').rstrip())
00724 
00725 
00726 def input_file(filename):
00727     """
00728     Run all checks on a Python source file.
00729     """
00730     if excluded(filename) or not filename_match(filename):
00731         return {}
00732     if options.verbose:
00733         message('checking ' + filename)
00734     options.counters['files'] = options.counters.get('files', 0) + 1
00735     errors = Checker(filename).check_all()
00736     if options.testsuite and not errors:
00737         message("%s: %s" % (filename, "no errors found"))
00738 
00739 
00740 def input_dir(dirname):
00741     """
00742     Check all Python source files in this directory and all subdirectories.
00743     """
00744     dirname = dirname.rstrip('/')
00745     if excluded(dirname):
00746         return
00747     for root, dirs, files in os.walk(dirname):
00748         if options.verbose:
00749             message('directory ' + root)
00750         options.counters['directories'] = \
00751             options.counters.get('directories', 0) + 1
00752         dirs.sort()
00753         for subdir in dirs:
00754             if excluded(subdir):
00755                 dirs.remove(subdir)
00756         files.sort()
00757         for filename in files:
00758             input_file(os.path.join(root, filename))
00759 
00760 
00761 def excluded(filename):
00762     """
00763     Check if options.exclude contains a pattern that matches filename.
00764     """
00765     basename = os.path.basename(filename)
00766     for pattern in options.exclude:
00767         if fnmatch(basename, pattern):
00768             # print basename, 'excluded because it matches', pattern
00769             return True
00770 
00771 
00772 def filename_match(filename):
00773     """
00774     Check if options.filename contains a pattern that matches filename.
00775     If options.filename is unspecified, this always returns True.
00776     """
00777     if not options.filename:
00778         return True
00779     for pattern in options.filename:
00780         if fnmatch(filename, pattern):
00781             return True
00782 
00783 
00784 def ignore_code(code):
00785     """
00786     Check if options.ignore contains a prefix of the error code.
00787     """
00788     for ignore in options.ignore:
00789         if code.startswith(ignore):
00790             return True
00791 
00792 
00793 def get_error_statistics():
00794     """Get error statistics."""
00795     return get_statistics("E")
00796 
00797 
00798 def get_warning_statistics():
00799     """Get warning statistics."""
00800     return get_statistics("W")
00801 
00802 
00803 def get_statistics(prefix=''):
00804     """
00805     Get statistics for message codes that start with the prefix.
00806 
00807     prefix='' matches all errors and warnings
00808     prefix='E' matches all errors
00809     prefix='W' matches all warnings
00810     prefix='E4' matches all errors that have to do with imports
00811     """
00812     stats = []
00813     keys = options.messages.keys()
00814     keys.sort()
00815     for key in keys:
00816         if key.startswith(prefix):
00817             stats.append('%-7s %s %s' %
00818                          (options.counters[key], key, options.messages[key]))
00819     return stats
00820 
00821 
00822 def print_statistics(prefix=''):
00823     """Print overall statistics (number of errors and warnings)."""
00824     for line in get_statistics(prefix):
00825         print line
00826 
00827 
00828 def print_benchmark(elapsed):
00829     """
00830     Print benchmark numbers.
00831     """
00832     print '%-7.2f %s' % (elapsed, 'seconds elapsed')
00833     keys = ['directories', 'files',
00834             'logical lines', 'physical lines']
00835     for key in keys:
00836         if key in options.counters:
00837             print '%-7d %s per second (%d total)' % (
00838                 options.counters[key] / elapsed, key,
00839                 options.counters[key])
00840 
00841 
00842 def process_options(arglist=None):
00843     """
00844     Process options passed either via arglist or via command line args.
00845     """
00846     global options, args
00847     usage = "%prog [options] input ..."
00848     parser = OptionParser(usage)
00849     parser.add_option('-v', '--verbose', default=0, action='count',
00850                       help="print status messages, or debug with -vv")
00851     parser.add_option('-q', '--quiet', default=0, action='count',
00852                       help="report only file names, or nothing with -qq")
00853     parser.add_option('--exclude', metavar='patterns', default=default_exclude,
00854                       help="skip matches (default %s)" % default_exclude)
00855     parser.add_option('--filename', metavar='patterns',
00856                       help="only check matching files (e.g. *.py)")
00857     parser.add_option('--ignore', metavar='errors', default='',
00858                       help="skip errors and warnings (e.g. E4,W)")
00859     parser.add_option('--repeat', action='store_true',
00860                       help="show all occurrences of the same error")
00861     parser.add_option('--show-source', action='store_true',
00862                       help="show source code for each error")
00863     parser.add_option('--show-pep8', action='store_true',
00864                       help="show text of PEP 8 for each error")
00865     parser.add_option('--statistics', action='store_true',
00866                       help="count errors and warnings")
00867     parser.add_option('--benchmark', action='store_true',
00868                       help="measure processing speed")
00869     parser.add_option('--testsuite', metavar='dir',
00870                       help="run regression tests from dir")
00871     parser.add_option('--doctest', action='store_true',
00872                       help="run doctest on myself")
00873     options, args = parser.parse_args(arglist)
00874     if options.testsuite:
00875         args.append(options.testsuite)
00876     if len(args) == 0:
00877         parser.error('input not specified')
00878     options.prog = os.path.basename(sys.argv[0])
00879     options.exclude = options.exclude.split(',')
00880     for index in range(len(options.exclude)):
00881         options.exclude[index] = options.exclude[index].rstrip('/')
00882     if options.filename:
00883         options.filename = options.filename.split(',')
00884     if options.ignore:
00885         options.ignore = options.ignore.split(',')
00886     else:
00887         options.ignore = []
00888     options.counters = {}
00889     options.messages = {}
00890 
00891     return options, args
00892 
00893 
00894 def _main():
00895     """
00896     Parse options and run checks on Python source.
00897     """
00898     options, args = process_options()
00899     if options.doctest:
00900         import doctest
00901         return doctest.testmod()
00902     start_time = time.time()
00903     for path in args:
00904         if os.path.isdir(path):
00905             input_dir(path)
00906         else:
00907             input_file(path)
00908     elapsed = time.time() - start_time
00909     if options.statistics:
00910         print_statistics()
00911     if options.benchmark:
00912         print_benchmark(elapsed)
00913 
00914 
00915 if __name__ == '__main__':
00916     _main()