Back to index

moin  1.9.0~rc2
scgi_base.py
Go to the documentation of this file.
00001 # Copyright (c) 2005, 2006 Allan Saddi <allan@saddi.com>
00002 # All rights reserved.
00003 #
00004 # Redistribution and use in source and binary forms, with or without
00005 # modification, are permitted provided that the following conditions
00006 # are met:
00007 # 1. Redistributions of source code must retain the above copyright
00008 #    notice, this list of conditions and the following disclaimer.
00009 # 2. Redistributions in binary form must reproduce the above copyright
00010 #    notice, this list of conditions and the following disclaimer in the
00011 #    documentation and/or other materials provided with the distribution.
00012 #
00013 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
00014 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00015 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00016 # ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
00017 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00018 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00019 # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00020 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00021 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00022 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00023 # SUCH DAMAGE.
00024 #
00025 # $Id$
00026 
00027 __author__ = 'Allan Saddi <allan@saddi.com>'
00028 __version__ = '$Revision$'
00029 
00030 import sys
00031 import logging
00032 import socket
00033 import select
00034 import errno
00035 import cStringIO as StringIO
00036 import signal
00037 import datetime
00038 import os
00039 import warnings
00040 
00041 # Threads are required. If you want a non-threaded (forking) version, look at
00042 # SWAP <http://www.idyll.org/~t/www-tools/wsgi/>.
00043 import thread
00044 import threading
00045 
00046 __all__ = ['BaseSCGIServer']
00047 
00048 from flup.server import NoDefault
00049 
00050 # The main classes use this name for logging.
00051 LoggerName = 'scgi-wsgi'
00052 
00053 # Set up module-level logger.
00054 console = logging.StreamHandler()
00055 console.setLevel(logging.DEBUG)
00056 console.setFormatter(logging.Formatter('%(asctime)s : %(message)s',
00057                                        '%Y-%m-%d %H:%M:%S'))
00058 logging.getLogger(LoggerName).addHandler(console)
00059 del console
00060 
00061 class ProtocolError(Exception):
00062     """
00063     Exception raised when the server does something unexpected or
00064     sends garbled data. Usually leads to a Connection closing.
00065     """
00066     pass
00067 
00068 def recvall(sock, length):
00069     """
00070     Attempts to receive length bytes from a socket, blocking if necessary.
00071     (Socket may be blocking or non-blocking.)
00072     """
00073     dataList = []
00074     recvLen = 0
00075     while length:
00076         try:
00077             data = sock.recv(length)
00078         except socket.error, e:
00079             if e[0] == errno.EAGAIN:
00080                 select.select([sock], [], [])
00081                 continue
00082             else:
00083                 raise
00084         if not data: # EOF
00085             break
00086         dataList.append(data)
00087         dataLen = len(data)
00088         recvLen += dataLen
00089         length -= dataLen
00090     return ''.join(dataList), recvLen
00091 
00092 def readNetstring(sock):
00093     """
00094     Attempt to read a netstring from a socket.
00095     """
00096     # First attempt to read the length.
00097     size = ''
00098     while True:
00099         try:
00100             c = sock.recv(1)
00101         except socket.error, e:
00102             if e[0] == errno.EAGAIN:
00103                 select.select([sock], [], [])
00104                 continue
00105             else:
00106                 raise
00107         if c == ':':
00108             break
00109         if not c:
00110             raise EOFError
00111         size += c
00112 
00113     # Try to decode the length.
00114     try:
00115         size = int(size)
00116         if size < 0:
00117             raise ValueError
00118     except ValueError:
00119         raise ProtocolError, 'invalid netstring length'
00120 
00121     # Now read the string.
00122     s, length = recvall(sock, size)
00123 
00124     if length < size:
00125         raise EOFError
00126 
00127     # Lastly, the trailer.
00128     trailer, length = recvall(sock, 1)
00129 
00130     if length < 1:
00131         raise EOFError
00132 
00133     if trailer != ',':
00134         raise ProtocolError, 'invalid netstring trailer'
00135 
00136     return s
00137 
00138 class StdoutWrapper(object):
00139     """
00140     Wrapper for sys.stdout so we know if data has actually been written.
00141     """
00142     def __init__(self, stdout):
00143         self._file = stdout
00144         self.dataWritten = False
00145 
00146     def write(self, data):
00147         if data:
00148             self.dataWritten = True
00149         self._file.write(data)
00150 
00151     def writelines(self, lines):
00152         for line in lines:
00153             self.write(line)
00154 
00155     def __getattr__(self, name):
00156         return getattr(self._file, name)
00157 
00158 class Request(object):
00159     """
00160     Encapsulates data related to a single request.
00161 
00162     Public attributes:
00163       environ - Environment variables from web server.
00164       stdin - File-like object representing the request body.
00165       stdout - File-like object for writing the response.
00166     """
00167     def __init__(self, conn, environ, input, output):
00168         self._conn = conn
00169         self.environ = environ
00170         self.stdin = input
00171         self.stdout = StdoutWrapper(output)
00172 
00173         self.logger = logging.getLogger(LoggerName)
00174 
00175     def run(self):
00176         self.logger.info('%s %s%s',
00177                          self.environ['REQUEST_METHOD'],
00178                          self.environ.get('SCRIPT_NAME', ''),
00179                          self.environ.get('PATH_INFO', ''))
00180 
00181         start = datetime.datetime.now()
00182 
00183         try:
00184             self._conn.server.handler(self)
00185         except:
00186             self.logger.exception('Exception caught from handler')
00187             if not self.stdout.dataWritten:
00188                 self._conn.server.error(self)
00189 
00190         end = datetime.datetime.now()
00191 
00192         handlerTime = end - start
00193         self.logger.debug('%s %s%s done (%.3f secs)',
00194                           self.environ['REQUEST_METHOD'],
00195                           self.environ.get('SCRIPT_NAME', ''),
00196                           self.environ.get('PATH_INFO', ''),
00197                           handlerTime.seconds +
00198                           handlerTime.microseconds / 1000000.0)
00199 
00200 class Connection(object):
00201     """
00202     Represents a single client (web server) connection. A single request
00203     is handled, after which the socket is closed.
00204     """
00205     def __init__(self, sock, addr, server):
00206         self._sock = sock
00207         self._addr = addr
00208         self.server = server
00209 
00210         self.logger = logging.getLogger(LoggerName)
00211 
00212     def run(self):
00213         if len(self._addr) == 2:
00214             self.logger.debug('Connection starting up (%s:%d)',
00215                               self._addr[0], self._addr[1])
00216 
00217         try:
00218             self.processInput()
00219         except (EOFError, KeyboardInterrupt):
00220             pass
00221         except ProtocolError, e:
00222             self.logger.error("Protocol error '%s'", str(e))
00223         except:
00224             self.logger.exception('Exception caught in Connection')
00225 
00226         if len(self._addr) == 2:
00227             self.logger.debug('Connection shutting down (%s:%d)',
00228                               self._addr[0], self._addr[1])
00229 
00230         # All done!
00231         self._sock.close()
00232 
00233     def processInput(self):
00234         # Read headers
00235         headers = readNetstring(self._sock)
00236         headers = headers.split('\x00')[:-1]
00237         if len(headers) % 2 != 0:
00238             raise ProtocolError, 'invalid headers'
00239         environ = {}
00240         for i in range(len(headers) / 2):
00241             environ[headers[2*i]] = headers[2*i+1]
00242 
00243         clen = environ.get('CONTENT_LENGTH')
00244         if clen is None:
00245             raise ProtocolError, 'missing CONTENT_LENGTH'
00246         try:
00247             clen = int(clen)
00248             if clen < 0:
00249                 raise ValueError
00250         except ValueError:
00251             raise ProtocolError, 'invalid CONTENT_LENGTH'
00252 
00253         self._sock.setblocking(1)
00254         if clen:
00255             input = self._sock.makefile('r')
00256         else:
00257             # Empty input.
00258             input = StringIO.StringIO()
00259 
00260         # stdout
00261         output = self._sock.makefile('w')
00262 
00263         # Allocate Request
00264         req = Request(self, environ, input, output)
00265 
00266         # Run it.
00267         req.run()
00268 
00269         output.close()
00270         input.close()
00271 
00272 class BaseSCGIServer(object):
00273     # What Request class to use.
00274     requestClass = Request
00275 
00276     def __init__(self, application, scriptName=NoDefault, environ=None,
00277                  multithreaded=True, multiprocess=False,
00278                  bindAddress=('localhost', 4000), umask=None,
00279                  allowedServers=NoDefault,
00280                  loggingLevel=logging.INFO, debug=False):
00281         """
00282         scriptName is the initial portion of the URL path that "belongs"
00283         to your application. It is used to determine PATH_INFO (which doesn't
00284         seem to be passed in). An empty scriptName means your application
00285         is mounted at the root of your virtual host.
00286 
00287         environ, which must be a dictionary, can contain any additional
00288         environment variables you want to pass to your application.
00289 
00290         Set multithreaded to False if your application is not thread-safe.
00291 
00292         Set multiprocess to True to explicitly set wsgi.multiprocess to
00293         True. (Only makes sense with threaded servers.)
00294 
00295         bindAddress is the address to bind to, which must be a string or
00296         a tuple of length 2. If a tuple, the first element must be a string,
00297         which is the host name or IPv4 address of a local interface. The
00298         2nd element of the tuple is the port number. If a string, it will
00299         be interpreted as a filename and a UNIX socket will be opened.
00300 
00301         If binding to a UNIX socket, umask may be set to specify what
00302         the umask is to be changed to before the socket is created in the
00303         filesystem. After the socket is created, the previous umask is
00304         restored.
00305         
00306         allowedServers must be None or a list of strings representing the
00307         IPv4 addresses of servers allowed to connect. None means accept
00308         connections from anywhere. By default, it is a list containing
00309         the single item '127.0.0.1'.
00310 
00311         loggingLevel sets the logging level of the module-level logger.
00312         """
00313         if environ is None:
00314             environ = {}
00315 
00316         self.application = application
00317         self.scriptName = scriptName
00318         self.environ = environ
00319         self.multithreaded = multithreaded
00320         self.multiprocess = multiprocess
00321         self.debug = debug
00322         self._bindAddress = bindAddress
00323         self._umask = umask
00324         if allowedServers is NoDefault:
00325             allowedServers = ['127.0.0.1']
00326         self._allowedServers = allowedServers
00327 
00328         # Used to force single-threadedness.
00329         self._appLock = thread.allocate_lock()
00330 
00331         self.logger = logging.getLogger(LoggerName)
00332         self.logger.setLevel(loggingLevel)
00333 
00334     def _setupSocket(self):
00335         """Creates and binds the socket for communication with the server."""
00336         oldUmask = None
00337         if type(self._bindAddress) is str:
00338             # Unix socket
00339             sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
00340             try:
00341                 os.unlink(self._bindAddress)
00342             except OSError:
00343                 pass
00344             if self._umask is not None:
00345                 oldUmask = os.umask(self._umask)
00346         else:
00347             # INET socket
00348             assert type(self._bindAddress) is tuple
00349             assert len(self._bindAddress) == 2
00350             sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
00351             sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
00352 
00353         sock.bind(self._bindAddress)
00354         sock.listen(socket.SOMAXCONN)
00355 
00356         if oldUmask is not None:
00357             os.umask(oldUmask)
00358 
00359         return sock
00360 
00361     def _cleanupSocket(self, sock):
00362         """Closes the main socket."""
00363         sock.close()
00364 
00365     def _isClientAllowed(self, addr):
00366         ret = self._allowedServers is None or \
00367               len(addr) != 2 or \
00368               (len(addr) == 2 and addr[0] in self._allowedServers)
00369         if not ret:
00370             self.logger.warning('Server connection from %s disallowed',
00371                                 addr[0])
00372         return ret
00373 
00374     def handler(self, request):
00375         """
00376         WSGI handler. Sets up WSGI environment, calls the application,
00377         and sends the application's response.
00378         """
00379         environ = request.environ
00380         environ.update(self.environ)
00381 
00382         environ['wsgi.version'] = (1,0)
00383         environ['wsgi.input'] = request.stdin
00384         environ['wsgi.errors'] = sys.stderr
00385         environ['wsgi.multithread'] = self.multithreaded
00386         environ['wsgi.multiprocess'] = self.multiprocess
00387         environ['wsgi.run_once'] = False
00388 
00389         if environ.get('HTTPS', 'off') in ('on', '1'):
00390             environ['wsgi.url_scheme'] = 'https'
00391         else:
00392             environ['wsgi.url_scheme'] = 'http'
00393 
00394         self._sanitizeEnv(environ)
00395 
00396         headers_set = []
00397         headers_sent = []
00398         result = None
00399 
00400         def write(data):
00401             assert type(data) is str, 'write() argument must be string'
00402             assert headers_set, 'write() before start_response()'
00403 
00404             if not headers_sent:
00405                 status, responseHeaders = headers_sent[:] = headers_set
00406                 found = False
00407                 for header,value in responseHeaders:
00408                     if header.lower() == 'content-length':
00409                         found = True
00410                         break
00411                 if not found and result is not None:
00412                     try:
00413                         if len(result) == 1:
00414                             responseHeaders.append(('Content-Length',
00415                                                     str(len(data))))
00416                     except:
00417                         pass
00418                 s = 'Status: %s\r\n' % status
00419                 for header in responseHeaders:
00420                     s += '%s: %s\r\n' % header
00421                 s += '\r\n'
00422                 request.stdout.write(s)
00423 
00424             request.stdout.write(data)
00425             request.stdout.flush()
00426 
00427         def start_response(status, response_headers, exc_info=None):
00428             if exc_info:
00429                 try:
00430                     if headers_sent:
00431                         # Re-raise if too late
00432                         raise exc_info[0], exc_info[1], exc_info[2]
00433                 finally:
00434                     exc_info = None # avoid dangling circular ref
00435             else:
00436                 assert not headers_set, 'Headers already set!'
00437 
00438             assert type(status) is str, 'Status must be a string'
00439             assert len(status) >= 4, 'Status must be at least 4 characters'
00440             assert int(status[:3]), 'Status must begin with 3-digit code'
00441             assert status[3] == ' ', 'Status must have a space after code'
00442             assert type(response_headers) is list, 'Headers must be a list'
00443             if __debug__:
00444                 for name,val in response_headers:
00445                     assert type(name) is str, 'Header name "%s" must be a string' % name
00446                     assert type(val) is str, 'Value of header "%s" must be a string' % name
00447 
00448             headers_set[:] = [status, response_headers]
00449             return write
00450 
00451         if not self.multithreaded:
00452             self._appLock.acquire()
00453         try:
00454             try:
00455                 result = self.application(environ, start_response)
00456                 try:
00457                     for data in result:
00458                         if data:
00459                             write(data)
00460                     if not headers_sent:
00461                         write('') # in case body was empty
00462                 finally:
00463                     if hasattr(result, 'close'):
00464                         result.close()
00465             except socket.error, e:
00466                 if e[0] != errno.EPIPE:
00467                     raise # Don't let EPIPE propagate beyond server
00468         finally:
00469             if not self.multithreaded:
00470                 self._appLock.release()
00471 
00472     def _sanitizeEnv(self, environ):
00473         """Fill-in/deduce missing values in environ."""
00474         reqUri = None
00475         if environ.has_key('REQUEST_URI'):
00476             reqUri = environ['REQUEST_URI'].split('?', 1)
00477 
00478         # Ensure QUERY_STRING exists
00479         if not environ.has_key('QUERY_STRING') or not environ['QUERY_STRING']:
00480             if reqUri is not None and len(reqUri) > 1:
00481                 environ['QUERY_STRING'] = reqUri[1]
00482             else:
00483                 environ['QUERY_STRING'] = ''
00484 
00485         # Check WSGI_SCRIPT_NAME
00486         scriptName = environ.get('WSGI_SCRIPT_NAME')
00487         if scriptName is None:
00488             scriptName = self.scriptName
00489         else:
00490             warnings.warn('WSGI_SCRIPT_NAME environment variable for scgi '
00491                           'servers is deprecated',
00492                           DeprecationWarning)
00493             if scriptName.lower() == 'none':
00494                 scriptName = None
00495 
00496         if scriptName is None:
00497             # Do nothing (most likely coming from cgi2scgi)
00498             return
00499 
00500         if scriptName is NoDefault:
00501             # Pull SCRIPT_NAME/PATH_INFO from environment, with empty defaults
00502             if not environ.has_key('SCRIPT_NAME'):
00503                 environ['SCRIPT_NAME'] = ''
00504             if not environ.has_key('PATH_INFO') or not environ['PATH_INFO']:
00505                 if reqUri is not None:
00506                     environ['PATH_INFO'] = reqUri[0]
00507                 else:
00508                     environ['PATH_INFO'] = ''
00509         else:
00510             # Configured scriptName
00511             warnings.warn('Configured SCRIPT_NAME is deprecated\n'
00512                           'Do not use WSGI_SCRIPT_NAME or the scriptName\n'
00513                           'keyword parameter -- they will be going away',
00514                           DeprecationWarning)
00515 
00516             value = environ['SCRIPT_NAME']
00517             value += environ.get('PATH_INFO', '')
00518             if not value.startswith(scriptName):
00519                 self.logger.warning('scriptName does not match request URI')
00520 
00521             environ['PATH_INFO'] = value[len(scriptName):]
00522             environ['SCRIPT_NAME'] = scriptName
00523 
00524     def error(self, request):
00525         """
00526         Override to provide custom error handling. Ideally, however,
00527         all errors should be caught at the application level.
00528         """
00529         if self.debug:
00530             import cgitb
00531             request.stdout.write('Content-Type: text/html\r\n\r\n' +
00532                                  cgitb.html(sys.exc_info()))
00533         else:
00534             errorpage = """<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
00535 <html><head>
00536 <title>Unhandled Exception</title>
00537 </head><body>
00538 <h1>Unhandled Exception</h1>
00539 <p>An unhandled exception was thrown by the application.</p>
00540 </body></html>
00541 """
00542             request.stdout.write('Content-Type: text/html\r\n\r\n' +
00543                                  errorpage)