Back to index

python-weblib  1.3.9
helper.py
Go to the documentation of this file.
00001 """
00002 pyweblib.helper - Misc. stuff useful in CGI-BINs
00003 (c) by Michael Stroeder <michael@stroeder.com>
00004 
00005 This module is distributed under the terms of the
00006 GPL (GNU GENERAL PUBLIC LICENSE) Version 2
00007 (see http://www.gnu.org/copyleft/gpl.html)
00008 
00009 $Id: helper.py,v 1.15 2011/01/16 09:04:10 michael Exp $
00010 """
00011 
00012 __version__ = '0.3.1'
00013 
00014 import os,re,UserDict
00015 
00016 known_browsers = {
00017   'MSIE':'Microsoft Internet Explorer',
00018   'Mozilla':'Netscape Navigator',
00019   'Lynx':'Lynx',
00020   'Opera':'Opera',
00021   'StarOffice':'StarOffice',
00022   'NCSA_Mosaic':'NCSA Mosaic',
00023   'NetPositive':'Net Positive',
00024   'Mozilla':'Firefox',
00025   'Mozilla':'Seamonkey',
00026 }
00027 known_browsers_rev = {}
00028 for b in known_browsers.keys():
00029   known_browsers_rev[known_browsers[b]]=b
00030 
00031 compatible_browsers = known_browsers.keys()
00032 compatible_browsers.remove('Mozilla')
00033 
00034 compatible_browsers_re = re.compile('(%s)[/ ]+([0-9.]*)' % '|'.join(compatible_browsers))
00035 mozilla_re             = re.compile('(Mozilla)[/ ]+([0-9.]*)')
00036 
00037 
00038 def BrowserType(http_user_agent):
00039   """
00040   Parse the HTTP_USER_AGENT environment variable and return the
00041   tuple (Browser,Version).
00042 
00043   Not sure if this succeeds in every situation since most
00044   browsers have very obscure HTTP_USER_AGENT entries for compability reasons.
00045   The following browsers are known by name:
00046   Netscape  Netscape Navigator, Netscape Communicator)
00047   MSIE    MS Internet Explorer
00048   Opera   Opera browser from http://www.operasoftware.com/
00049   StarOffice  built-in browser of Star Office
00050   Lynx    the text-based browser Lynx
00051   NetPositive Net Positive (BeOS)
00052   """
00053 
00054   if not http_user_agent:
00055     return ('','')
00056   else:
00057     browserrm = compatible_browsers_re.search(http_user_agent)
00058     if browserrm:
00059       return browserrm.groups()
00060     else:
00061       browserrm = mozilla_re.search(http_user_agent)
00062       if browserrm:
00063         return browserrm.groups()
00064       else:
00065         return ('','')
00066 
00067 
00068 def guessClientAddr(env=None):
00069   """
00070   Guesses the host name or IP address of the HTTP client by looking
00071   at various HTTP headers mapped to CGI-BIN environment.
00072 
00073   env
00074         dictionary containing environment vars (default os.env)
00075   """
00076   env = env or os.environ
00077   return env.get('FORWARDED_FOR',
00078          env.get('HTTP_X_FORWARDED_FOR',
00079          env.get('REMOTE_HOST',
00080          env.get('REMOTE_ADDR',None))))
00081 
00082 
00083 class AcceptHeaderDict(UserDict.UserDict):
00084   """
00085   This dictionary class is used to parse
00086   Accept-header lines with quality weights.
00087 
00088   It's a base class for all Accept-* headers described
00089   in sections 14.1 to 14.5 of RFC2616.
00090   """
00091 
00092   def __init__(self,envKey,env=None,defaultValue=None):
00093     """
00094     Parse the Accept-* header line.
00095 
00096     httpHeader
00097         string with value of Accept-* header line
00098     """
00099     env = env or os.environ
00100     UserDict.UserDict.__init__(self)
00101     self.defaultValue = defaultValue
00102     self.preferred_value = []
00103     try:
00104       http_accept_value = [
00105         s
00106         for s in env[envKey].strip().split(',')
00107         if len(s)
00108       ]
00109     except KeyError:
00110       self.data = {'*':1.0}
00111     else:
00112       if not http_accept_value:
00113         self.data = {'*':1.0}
00114       else:
00115         self.data = {}
00116         for i in http_accept_value:
00117           try:
00118             c,w=i.split(';')
00119           except ValueError:
00120             c,w = i,''
00121           # Normalize charset name
00122           c=c.strip().lower()
00123           try:
00124             q,qvalue_str=w.split('=',1)
00125             qvalue = float(qvalue_str)
00126           except ValueError:
00127             qvalue = 1.0
00128           # Add to capability dictionary
00129           if c:
00130             self.data[c] = qvalue
00131     return # AcceptHeaderDict.__init__()
00132 
00133   def __getitem__(self,value):
00134     """
00135     value
00136         String representing the value for which to return
00137         the floating point capability weight.
00138     """
00139     return self.data.get(
00140       value.lower(),
00141       self.data.get('*',0)
00142     )
00143 
00144   def items(self):
00145     """
00146     Return the accepted values as tuples (value,weigth)
00147     in descending order of capability weight
00148     """
00149     l = self.data.items()
00150     l.sort(lambda x,y:cmp(y[1],x[1]))
00151     return l
00152 
00153   def keys(self):
00154     """
00155     Return the accepted values in descending order of capability weight
00156     """
00157     l = self.items()
00158     return [ k for k,v in l ]
00159 
00160 
00161 class AcceptCharsetDict(AcceptHeaderDict):
00162   """
00163   Special class for Accept-Charset header
00164   """
00165 
00166   def __init__(self,envKey='HTTP_ACCEPT_CHARSET',env=None,defaultValue='utf-8'):
00167     AcceptHeaderDict.__init__(self,envKey,env,defaultValue)
00168     # Special treating of ISO-8859-1 charset to be compliant to RFC2616
00169     self.data['iso-8859-1'] = self.data.get('iso-8859-1',self.data.get('*',1.0))
00170     return # AcceptCharsetDict.__init__()
00171 
00172   def preferred(self):
00173     """
00174     Return the value name with highest capability weigth
00175     """
00176     l = self.items()
00177     while l and l[0][0]!='*':
00178       try:
00179         u''.encode(l[0][0])
00180       except LookupError:
00181         l.pop(0)
00182       else:
00183         break
00184     if l:
00185       if self.defaultValue and l[0][0]=='*':
00186         return self.defaultValue
00187       else:
00188         return l[0][0]
00189     else:
00190       return self.defaultValue
00191