Back to index

moin  1.9.0~rc2
lint.py
Go to the documentation of this file.
00001 # -*- coding: utf-8 -*-
00002 """
00003     werkzeug.contrib.lint
00004     ~~~~~~~~~~~~~~~~~~~~~
00005 
00006     .. versionadded:: 0.5
00007 
00008     This module provides a middleware that performs sanity checks of the WSGI
00009     application.  It checks that :pep:`333` is properly implemented and warns
00010     on some common HTTP errors such as non-empty responses for 304 status
00011     codes.
00012 
00013     This module provides a middleware, the :class:`LintMiddleware`.  Wrap your
00014     application with it and it will warn about common problems with WSGI and
00015     HTTP while your application is running.
00016 
00017     It's strongly recommended to use it during development.
00018 
00019     :copyright: (c) 2009 by the Werkzeug Team, see AUTHORS for more details.
00020     :license: BSD, see LICENSE for more details.
00021 """
00022 from urlparse import urlparse
00023 from warnings import warn
00024 from werkzeug.datastructures import Headers
00025 from werkzeug.utils import FileWrapper
00026 from werkzeug.http import is_entity_header
00027 
00028 
00029 class WSGIWarning(Warning):
00030     """Warning class for WSGI warnings."""
00031 
00032 
00033 class HTTPWarning(Warning):
00034     """Warning class for HTTP warnings."""
00035 
00036 
00037 def check_string(context, obj, stacklevel=3):
00038     if type(obj) is not str:
00039         warn(WSGIWarning('%s requires bytestrings, got %s' %
00040             (context, obj.__class__.__name__)))
00041 
00042 
00043 class InputStream(object):
00044 
00045     def __init__(self, stream):
00046         self._stream = stream
00047 
00048     def read(self, *args):
00049         if len(args) == 0:
00050             warn(WSGIWarning('wsgi does not guarantee an EOF marker on the '
00051                              'input stream, thus making calls to '
00052                              'wsgi.input.read() unsafe.  Conforming servers '
00053                              'may never return from this call.'),
00054                  stacklevel=2)
00055         elif len(args) != 1:
00056             warn(WSGIWarning('too many parameters passed to wsgi.input.read()'),
00057                  stacklevel=2)
00058         return self._stream.read(*args)
00059 
00060     def readline(self, *args):
00061         if len(args) == 0:
00062             warn(WSGIWarning('Calls to wsgi.input.readline() without arguments'
00063                              ' are unsafe.  Use wsgi.input.read() instead.'),
00064                  stacklevel=2)
00065         elif len(args) == 1:
00066             warn(WSGIWarning('wsgi.input.readline() was called with a size hint. '
00067                              'WSGI does not support this, although it\'s available '
00068                              'on all major servers.'),
00069                  stacklevel=2)
00070         else:
00071             raise TypeError('too many arguments passed to wsgi.input.readline()')
00072         return self._stream.readline(*args)
00073 
00074     def __iter__(self):
00075         try:
00076             return iter(self._stream)
00077         except TypeError:
00078             warn(WSGIWarning('wsgi.input is not iterable.'), stacklevel=2)
00079             return iter(())
00080 
00081     def close(self):
00082         warn(WSGIWarning('application closed the input stream!'),
00083              stacklevel=2)
00084         self._stream.close()
00085 
00086 
00087 class ErrorStream(object):
00088 
00089     def __init__(self, stream):
00090         self._stream = stream
00091 
00092     def write(self, s):
00093         check_string('wsgi.error.write()', s)
00094         self._stream.write(s)
00095 
00096     def flush(self):
00097         self._stream.flush()
00098 
00099     def writelines(self, seq):
00100         for line in seq:
00101             self.write(seq)
00102 
00103     def close(self):
00104         warn(WSGIWarning('application closed the error stream!'),
00105              stacklevel=2)
00106         self._stream.close()
00107 
00108 
00109 class GuardedWrite(object):
00110 
00111     def __init__(self, write, chunks):
00112         self._write = write
00113         self._chunks = chunks
00114 
00115     def __call__(self, s):
00116         check_string('write()', s)
00117         self._write.write(s)
00118         self._chunks.append(len(s))
00119 
00120 
00121 class GuardedIterator(object):
00122 
00123     def __init__(self, iterator, headers_set, chunks):
00124         self._iterator = iterator
00125         self._next = iter(iterator).next
00126         self.closed = False
00127         self.headers_set = headers_set
00128         self.chunks = chunks
00129 
00130     def __iter__(self):
00131         return self
00132 
00133     def next(self):
00134         if self.closed:
00135             warn(WSGIWarning('iterated over closed app_iter'),
00136                  stacklevel=2)
00137         rv = self._next()
00138         if not self.headers_set:
00139             warn(WSGIWarning('Application returned before it '
00140                              'started the response'), stacklevel=2)
00141         check_string('application iterator items', rv)
00142         self.chunks.append(len(rv))
00143         return rv
00144 
00145     def close(self):
00146         self.closed = True
00147         if hasattr(self._iterator, 'close'):
00148             self._iterator.close()
00149 
00150         if self.headers_set:
00151             status_code, headers = self.headers_set
00152             bytes_sent = sum(self.chunks)
00153             content_length = headers.get('content-length', type=int)
00154 
00155             if status_code == 304:
00156                 for key, value in headers:
00157                     key = key.lower()
00158                     if key not in ('expires', 'content-location') and \
00159                        is_entity_header(key):
00160                         warn(HTTPWarning('entity header %r found in 304 '
00161                             'response' % key))
00162                 if bytes_sent:
00163                     warn(HTTPWarning('304 responses must not have a body'))
00164             elif 100 <= status_code < 200 or status_code == 204:
00165                 if content_length != 0:
00166                     warn(HTTPWarning('%r responses must have an empty '
00167                                      'content length') % status_code)
00168                 if bytes_sent:
00169                     warn(HTTPWarning('%r responses must not have a body' %
00170                                      status_code))
00171             elif content_length is not None and content_length != bytes_sent:
00172                 warn(WSGIWarning('Content-Length and the number of bytes '
00173                                  'sent to the client do not match.'))
00174 
00175     def __del__(self):
00176         if not self.closed:
00177             try:
00178                 warn(WSGIWarning('Iterator was garbage collected before '
00179                                  'it was closed.'))
00180             except:
00181                 pass
00182 
00183 
00184 class LintMiddleware(object):
00185     """This middleware wraps an application and warns on common errors.
00186     Among other thing it currently checks for the following problems:
00187 
00188     -   invalid status codes
00189     -   non-bytestrings sent to the WSGI server
00190     -   strings returned from the WSGI application
00191     -   non-empty conditional responses
00192     -   unquoted etags
00193     -   relative URLs in the Location header
00194     -   unsafe calls to wsgi.input
00195     -   unclosed iterators
00196 
00197     Detected errors are emitted using the standard Python :mod:`warnings`
00198     system and usually end up on :data:`stderr`.
00199 
00200     ::
00201 
00202         from werkzeug.contrib.lint import LintMiddleware
00203         app = LintMiddleware(app)
00204 
00205     :param app: the application to wrap
00206     """
00207 
00208     def __init__(self, app):
00209         self.app = app
00210 
00211     def check_environ(self, environ):
00212         if type(environ) is not dict:
00213             warn(WSGIWarning('WSGI environment is not a standard python dict.'),
00214                  stacklevel=4)
00215         for key in ('REQUEST_METHOD', 'SERVER_NAME', 'SERVER_PORT',
00216                     'wsgi.version', 'wsgi.input', 'wsgi.errors',
00217                     'wsgi.multithread', 'wsgi.multiprocess',
00218                     'wsgi.run_once'):
00219             if key not in environ:
00220                 warn(WSGIWarning('required environment key %r not found'
00221                      % key), stacklevel=3)
00222         if environ['wsgi.version'] != (1, 0):
00223             warn(WSGIWarning('environ is not a WSGI 1.0 environ'),
00224                  stacklevel=3)
00225 
00226         script_name = environ.get('SCRIPT_NAME', '')
00227         if script_name and script_name[:1] != '/':
00228             warn(WSGIWarning('SCRIPT_NAME does not start with a slash: %r'
00229                              % script_name), stacklevel=3)
00230         path_info = environ.get('PATH_INFO', '')
00231         if path_info[:1] != '/':
00232             warn(WSGIWarning('PATH_INFO does not start with a slash: %r'
00233                              % path_info), stacklevel=3)
00234 
00235 
00236     def check_start_response(self, status, headers, exc_info):
00237         check_string('status', status)
00238         status_code = status.split(None, 1)[0]
00239         if len(status_code) != 3 or not status_code.isdigit():
00240             warn(WSGIWarning('Status code must be three digits'), stacklevel=3)
00241         if len(status) < 4 or status[3] != ' ':
00242             warn(WSGIWarning('Invalid value for status %r.  Valid '
00243                              'status strings are three digits, a space '
00244                              'and a status explanation'), stacklevel=3)
00245         status_code = int(status_code)
00246         if status_code < 100:
00247             warn(WSGIWarning('status code < 100 detected'), stacklevel=3)
00248 
00249         if type(headers) is not list:
00250             warn(WSGIWarning('header list is not a list'), stacklevel=3)
00251         for item in headers:
00252             if type(item) is not tuple or len(item) != 2:
00253                 warn(WSGIWarning('Headers must tuple 2-item tuples'),
00254                      stacklevel=3)
00255             name, value = item
00256             if type(name) is not str or type(value) is not str:
00257                 warn(WSGIWarning('header items must be strings'),
00258                      stacklevel=3)
00259             if name.lower() == 'status':
00260                 warn(WSGIWarning('The status header is not supported due to '
00261                                  'conflicts with the CGI spec.'),
00262                                  stacklevel=3)
00263 
00264         if exc_info is not None and not isinstance(exc_info, tuple):
00265             warn(WSGIWarning('invalid value for exc_info'), stacklevel=3)
00266 
00267         headers = Headers(headers)
00268         self.check_headers(headers)
00269 
00270         return status_code, headers
00271 
00272     def check_headers(self, headers):
00273         etag = headers.get('etag')
00274         if etag is not None:
00275             if etag.startswith('w/'):
00276                 etag = etag[2:]
00277             if not (etag[:1] == etag[-1:] == '"'):
00278                 warn(HTTPWarning('unquoted etag emitted.'), stacklevel=4)
00279 
00280         location = headers.get('location')
00281         if location is not None:
00282             if not urlparse(location).netloc:
00283                 warn(HTTPWarning('absolute URLs required for location header'),
00284                      stacklevel=4)
00285 
00286     def check_iterator(self, app_iter):
00287         if isinstance(app_iter, basestring):
00288             warn(WSGIWarning('application returned string.  Response will '
00289                              'send character for character to the client '
00290                              'which will kill the performance.  Return a '
00291                              'list or iterable instead.'), stacklevel=3)
00292 
00293     def __call__(self, *args, **kwargs):
00294         if len(args) != 2:
00295             warn(WSGIWarning('Two arguments to WSGI app required'), stacklevel=2)
00296         if kwargs:
00297             warn(WSGIWarning('No keyword arguments to WSGI app allowed'),
00298                  stacklevel=2)
00299         environ, start_response = args
00300 
00301         self.check_environ(environ)
00302         environ['wsgi.input'] = InputStream(environ['wsgi.input'])
00303         environ['wsgi.errors'] = ErrorStream(environ['wsgi.errors'])
00304 
00305         # hook our own file wrapper in so that applications will always
00306         # iterate to the end and we can check the content length
00307         environ['wsgi.file_wrapper'] = FileWrapper
00308 
00309         headers_set = []
00310         chunks = []
00311 
00312         def checking_start_response(*args, **kwargs):
00313             if len(args) not in (2, 3):
00314                 warn(WSGIWarning('Invalid number of arguments: %s, expected '
00315                      '2 or 3' % len(args), stacklevel=2))
00316             if kwargs:
00317                 warn(WSGIWarning('no keyword arguments allowed.'))
00318 
00319             status, headers = args[:2]
00320             if len(args) == 3:
00321                 exc_info = args[2]
00322             else:
00323                 exc_info = None
00324 
00325             headers_set[:] = self.check_start_response(status, headers,
00326                                                        exc_info)
00327             return GuardedWrite(start_response(status, headers, exc_info),
00328                                 chunks)
00329 
00330         app_iter = self.app(environ, checking_start_response)
00331         self.check_iterator(app_iter)
00332         return GuardedIterator(app_iter, headers_set, chunks)