Back to index

python3.2  3.2.2
Classes | Functions | Variables
http.cookiejar Namespace Reference

Classes

class  Cookie
class  CookiePolicy
class  DefaultCookiePolicy
class  Absent
class  CookieJar
class  LoadError
class  FileCookieJar
class  LWPCookieJar
class  MozillaCookieJar

Functions

def _debug
def _warn_unhandled_exception
def _timegm
def time2isoz
def time2netscape
def offset_from_tz_string
def _str2time
def http2time
def iso2time
def unmatched
def split_header_words
def join_header_words
def strip_quotes
def parse_ns_headers
def is_HDN
def domain_match
def liberal_is_HDN
def user_domain_match
def request_host
def eff_request_host
def request_path
def request_port
def uppercase_escaped_char
def escape_path
def reach
def is_third_party
def vals_sorted_by_key
def deepvalues
def lwp_cookie_str

Variables

list __all__
 debug = False
 logger = None
tuple DEFAULT_HTTP_PORT = str(http.client.HTTP_PORT)
tuple MISSING_FILENAME_TEXT
int EPOCH_YEAR = 1970
list DAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
list MONTHS
list MONTHS_LOWER = []
dictionary UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}
tuple TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$", re.ASCII)
tuple STRICT_DATE_RE
tuple WEEKDAY_RE
tuple LOOSE_HTTP_DATE_RE
tuple ISO_DATE_RE
tuple HEADER_TOKEN_RE = re.compile(r"^\s*([^=\s;,]+)")
tuple HEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"")
tuple HEADER_VALUE_RE = re.compile(r"^\s*=\s*([^\s;,]*)")
tuple HEADER_ESCAPE_RE = re.compile(r"\\(.)")
tuple HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])")
tuple IPV4_RE = re.compile(r"\.\d+$", re.ASCII)
tuple cut_port_re = re.compile(r":\d+$", re.ASCII)
string HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()"
tuple ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])")

Detailed Description

HTTP cookie handling for web clients.

This module has (now fairly distant) origins in Gisle Aas' Perl module
HTTP::Cookies, from the libwww-perl library.

Docstrings, comments and debug strings in this code refer to the
attributes of the HTTP cookie system as cookie-attributes, to distinguish
them clearly from Python attributes.

Class diagram (note that BSDDBCookieJar and the MSIE* classes are not
distributed with the Python standard library, but are available from
http://wwwsearch.sf.net/):

                        CookieJar____
                        /     \      \
            FileCookieJar      \      \
             /    |   \         \      \
 MozillaCookieJar | LWPCookieJar \      \
                  |               |      \
                  |   ---MSIEBase |       \
                  |  /      |     |        \
                  | /   MSIEDBCookieJar BSDDBCookieJar
                  |/
               MSIECookieJar

Class Documentation

class http::cookiejar::Absent

Definition at line 1204 of file cookiejar.py.

class http::cookiejar::LoadError

Definition at line 1735 of file cookiejar.py.


Function Documentation

def http.cookiejar._debug (   args) [private]

Definition at line 46 of file cookiejar.py.

00046 
00047 def _debug(*args):
00048     if not debug:
00049         return
00050     global logger
00051     if not logger:
00052         import logging
00053         logger = logging.getLogger("http.cookiejar")
00054     return logger.debug(*args)
00055 

Here is the caller graph for this function:

def http.cookiejar._str2time (   day,
  mon,
  yr,
  hr,
  min,
  sec,
  tz 
) [private]

Definition at line 145 of file cookiejar.py.

00145 
00146 def _str2time(day, mon, yr, hr, min, sec, tz):
00147     # translate month name to number
00148     # month numbers start with 1 (January)
00149     try:
00150         mon = MONTHS_LOWER.index(mon.lower())+1
00151     except ValueError:
00152         # maybe it's already a number
00153         try:
00154             imon = int(mon)
00155         except ValueError:
00156             return None
00157         if 1 <= imon <= 12:
00158             mon = imon
00159         else:
00160             return None
00161 
00162     # make sure clock elements are defined
00163     if hr is None: hr = 0
00164     if min is None: min = 0
00165     if sec is None: sec = 0
00166 
00167     yr = int(yr)
00168     day = int(day)
00169     hr = int(hr)
00170     min = int(min)
00171     sec = int(sec)
00172 
00173     if yr < 1000:
00174         # find "obvious" year
00175         cur_yr = time.localtime(time.time())[0]
00176         m = cur_yr % 100
00177         tmp = yr
00178         yr = yr + cur_yr - m
00179         m = m - tmp
00180         if abs(m) > 50:
00181             if m > 0: yr = yr + 100
00182             else: yr = yr - 100
00183 
00184     # convert UTC time tuple to seconds since epoch (not timezone-adjusted)
00185     t = _timegm((yr, mon, day, hr, min, sec, tz))
00186 
00187     if t is not None:
00188         # adjust time using timezone string, to get absolute time since epoch
00189         if tz is None:
00190             tz = "UTC"
00191         tz = tz.upper()
00192         offset = offset_from_tz_string(tz)
00193         if offset is None:
00194             return None
00195         t = t - offset
00196 
00197     return t

Here is the call graph for this function:

Here is the caller graph for this function:

def http.cookiejar._timegm (   tt) [private]

Definition at line 75 of file cookiejar.py.

00075 
00076 def _timegm(tt):
00077     year, month, mday, hour, min, sec = tt[:6]
00078     if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and
00079         (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):
00080         return timegm(tt)
00081     else:
00082         return None

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 60 of file cookiejar.py.

00060 
00061 def _warn_unhandled_exception():
00062     # There are a few catch-all except: statements in this module, for
00063     # catching input that's bad in unexpected ways.  Warn if any
00064     # exceptions are caught there.
00065     import io, warnings, traceback
00066     f = io.StringIO()
00067     traceback.print_exc(None, f)
00068     msg = f.getvalue()
00069     warnings.warn("http.cookiejar bug!\n%s" % msg, stacklevel=2)
00070 
00071 
00072 # Date/time conversion
00073 # -----------------------------------------------------------------------------

Here is the call graph for this function:

Here is the caller graph for this function:

def http.cookiejar.deepvalues (   mapping)
Iterates over nested mapping, depth-first, in sorted order by key.

Definition at line 1185 of file cookiejar.py.

01185 
01186 def deepvalues(mapping):
01187     """Iterates over nested mapping, depth-first, in sorted order by key."""
01188     values = vals_sorted_by_key(mapping)
01189     for obj in values:
01190         mapping = False
01191         try:
01192             obj.items
01193         except AttributeError:
01194             pass
01195         else:
01196             mapping = True
01197             for subobj in deepvalues(obj):
01198                 yield subobj
01199         if not mapping:
01200             yield obj
01201 
01202 
01203 # Used as second parameter to dict.get() method, to distinguish absent
# dict key from one with a None value.

Here is the call graph for this function:

Here is the caller graph for this function:

def http.cookiejar.domain_match (   A,
  B 
)
Return True if domain A domain-matches domain B, according to RFC 2965.

A and B may be host domain names or IP addresses.

RFC 2965, section 1:

Host names can be specified either as an IP address or a HDN string.
Sometimes we compare one host name with another.  (Such comparisons SHALL
be case-insensitive.)  Host A's name domain-matches host B's if

     *  their host name strings string-compare equal; or

     * A is a HDN string and has the form NB, where N is a non-empty
        name string, B has the form .B', and B' is a HDN string.  (So,
        x.y.com domain-matches .Y.com but not Y.com.)

Note that domain-match is not a commutative operation: a.b.c.com
domain-matches .c.com, but not the reverse.

Definition at line 520 of file cookiejar.py.

00520 
00521 def domain_match(A, B):
00522     """Return True if domain A domain-matches domain B, according to RFC 2965.
00523 
00524     A and B may be host domain names or IP addresses.
00525 
00526     RFC 2965, section 1:
00527 
00528     Host names can be specified either as an IP address or a HDN string.
00529     Sometimes we compare one host name with another.  (Such comparisons SHALL
00530     be case-insensitive.)  Host A's name domain-matches host B's if
00531 
00532          *  their host name strings string-compare equal; or
00533 
00534          * A is a HDN string and has the form NB, where N is a non-empty
00535             name string, B has the form .B', and B' is a HDN string.  (So,
00536             x.y.com domain-matches .Y.com but not Y.com.)
00537 
00538     Note that domain-match is not a commutative operation: a.b.c.com
00539     domain-matches .c.com, but not the reverse.
00540 
00541     """
00542     # Note that, if A or B are IP addresses, the only relevant part of the
00543     # definition of the domain-match algorithm is the direct string-compare.
00544     A = A.lower()
00545     B = B.lower()
00546     if A == B:
00547         return True
00548     if not is_HDN(A):
00549         return False
00550     i = A.rfind(B)
00551     if i == -1 or i == 0:
00552         # A does not have form NB, or N is the empty string
00553         return False
00554     if not B.startswith("."):
00555         return False
00556     if not is_HDN(B[1:]):
00557         return False
00558     return True

Here is the call graph for this function:

Here is the caller graph for this function:

Return a tuple (request-host, effective request-host name).

As defined by RFC 2965, except both are lowercased.

Definition at line 606 of file cookiejar.py.

00606 
00607 def eff_request_host(request):
00608     """Return a tuple (request-host, effective request-host name).
00609 
00610     As defined by RFC 2965, except both are lowercased.
00611 
00612     """
00613     erhn = req_host = request_host(request)
00614     if req_host.find(".") == -1 and not IPV4_RE.search(req_host):
00615         erhn = req_host + ".local"
00616     return req_host, erhn

Here is the call graph for this function:

Here is the caller graph for this function:

Escape any invalid characters in HTTP URL, and uppercase all escapes.

Definition at line 647 of file cookiejar.py.

00647 
00648 def escape_path(path):
00649     """Escape any invalid characters in HTTP URL, and uppercase all escapes."""
00650     # There's no knowing what character encoding was used to create URLs
00651     # containing %-escapes, but since we have to pick one to escape invalid
00652     # path characters, we pick UTF-8, as recommended in the HTML 4.0
00653     # specification:
00654     # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
00655     # And here, kind of: draft-fielding-uri-rfc2396bis-03
00656     # (And in draft IRI specification: draft-duerst-iri-05)
00657     # (And here, for new URI schemes: RFC 2718)
00658     path = urllib.parse.quote(path, HTTP_PATH_SAFE)
00659     path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
00660     return path

Here is the call graph for this function:

Here is the caller graph for this function:

def http.cookiejar.http2time (   text)
Returns time in seconds since epoch of time represented by a string.

Return value is an integer.

None is returned if the format of str is unrecognized, the time is outside
the representable range, or the timezone string is not recognized.  If the
string contains no timezone, UTC is assumed.

The timezone in the string may be numerical (like "-0800" or "+0100") or a
string timezone (like "UTC", "GMT", "BST" or "EST").  Currently, only the
timezone strings equivalent to UTC (zero offset) are known to the function.

The function loosely parses the following formats:

Wed, 09 Feb 1994 22:23:32 GMT       -- HTTP format
Tuesday, 08-Feb-94 14:15:29 GMT     -- old rfc850 HTTP format
Tuesday, 08-Feb-1994 14:15:29 GMT   -- broken rfc850 HTTP format
09 Feb 1994 22:23:32 GMT            -- HTTP format (no weekday)
08-Feb-94 14:15:29 GMT              -- rfc850 format (no weekday)
08-Feb-1994 14:15:29 GMT            -- broken rfc850 format (no weekday)

The parser ignores leading and trailing whitespace.  The time may be
absent.

If the year is given with only 2 digits, the function will select the
century that makes the year closest to the current date.

Definition at line 220 of file cookiejar.py.

00220 
00221 def http2time(text):
00222     """Returns time in seconds since epoch of time represented by a string.
00223 
00224     Return value is an integer.
00225 
00226     None is returned if the format of str is unrecognized, the time is outside
00227     the representable range, or the timezone string is not recognized.  If the
00228     string contains no timezone, UTC is assumed.
00229 
00230     The timezone in the string may be numerical (like "-0800" or "+0100") or a
00231     string timezone (like "UTC", "GMT", "BST" or "EST").  Currently, only the
00232     timezone strings equivalent to UTC (zero offset) are known to the function.
00233 
00234     The function loosely parses the following formats:
00235 
00236     Wed, 09 Feb 1994 22:23:32 GMT       -- HTTP format
00237     Tuesday, 08-Feb-94 14:15:29 GMT     -- old rfc850 HTTP format
00238     Tuesday, 08-Feb-1994 14:15:29 GMT   -- broken rfc850 HTTP format
00239     09 Feb 1994 22:23:32 GMT            -- HTTP format (no weekday)
00240     08-Feb-94 14:15:29 GMT              -- rfc850 format (no weekday)
00241     08-Feb-1994 14:15:29 GMT            -- broken rfc850 format (no weekday)
00242 
00243     The parser ignores leading and trailing whitespace.  The time may be
00244     absent.
00245 
00246     If the year is given with only 2 digits, the function will select the
00247     century that makes the year closest to the current date.
00248 
00249     """
00250     # fast exit for strictly conforming string
00251     m = STRICT_DATE_RE.search(text)
00252     if m:
00253         g = m.groups()
00254         mon = MONTHS_LOWER.index(g[1].lower()) + 1
00255         tt = (int(g[2]), mon, int(g[0]),
00256               int(g[3]), int(g[4]), float(g[5]))
00257         return _timegm(tt)
00258 
00259     # No, we need some messy parsing...
00260 
00261     # clean up
00262     text = text.lstrip()
00263     text = WEEKDAY_RE.sub("", text, 1)  # Useless weekday
00264 
00265     # tz is time zone specifier string
00266     day, mon, yr, hr, min, sec, tz = [None]*7
00267 
00268     # loose regexp parse
00269     m = LOOSE_HTTP_DATE_RE.search(text)
00270     if m is not None:
00271         day, mon, yr, hr, min, sec, tz = m.groups()
00272     else:
00273         return None  # bad format
00274 
00275     return _str2time(day, mon, yr, hr, min, sec, tz)

Here is the call graph for this function:

Here is the caller graph for this function:

def http.cookiejar.is_HDN (   text)
Return True if text is a host domain name.

Definition at line 505 of file cookiejar.py.

00505 
00506 def is_HDN(text):
00507     """Return True if text is a host domain name."""
00508     # XXX
00509     # This may well be wrong.  Which RFC is HDN defined in, if any (for
00510     #  the purposes of RFC 2965)?
00511     # For the current implementation, what about IPv6?  Remember to look
00512     #  at other uses of IPV4_RE also, if change this.
00513     if IPV4_RE.search(text):
00514         return False
00515     if text == "":
00516         return False
00517     if text[0] == "." or text[-1] == ".":
00518         return False
00519     return True

Here is the caller graph for this function:

def http.cookiejar.is_third_party (   request)
RFC 2965, section 3.3.6:

    An unverifiable transaction is to a third-party host if its request-
    host U does not domain-match the reach R of the request-host O in the
    origin transaction.

Definition at line 696 of file cookiejar.py.

00696 
00697 def is_third_party(request):
00698     """
00699 
00700     RFC 2965, section 3.3.6:
00701 
00702         An unverifiable transaction is to a third-party host if its request-
00703         host U does not domain-match the reach R of the request-host O in the
00704         origin transaction.
00705 
00706     """
00707     req_host = request_host(request)
00708     if not domain_match(req_host, reach(request.get_origin_req_host())):
00709         return True
00710     else:
00711         return False
00712 

Here is the call graph for this function:

Here is the caller graph for this function:

def http.cookiejar.iso2time (   text)
As for http2time, but parses the ISO 8601 formats:

1994-02-03 14:15:29 -0100    -- ISO 8601 format
1994-02-03 14:15:29          -- zone is optional
1994-02-03                   -- only date
1994-02-03T14:15:29          -- Use T as separator
19940203T141529Z             -- ISO 8601 compact format
19940203                     -- only date

Definition at line 292 of file cookiejar.py.

00292 
00293 def iso2time(text):
00294     """
00295     As for http2time, but parses the ISO 8601 formats:
00296 
00297     1994-02-03 14:15:29 -0100    -- ISO 8601 format
00298     1994-02-03 14:15:29          -- zone is optional
00299     1994-02-03                   -- only date
00300     1994-02-03T14:15:29          -- Use T as separator
00301     19940203T141529Z             -- ISO 8601 compact format
00302     19940203                     -- only date
00303 
00304     """
00305     # clean up
00306     text = text.lstrip()
00307 
00308     # tz is time zone specifier string
00309     day, mon, yr, hr, min, sec, tz = [None]*7
00310 
00311     # loose regexp parse
00312     m = ISO_DATE_RE.search(text)
00313     if m is not None:
00314         # XXX there's an extra bit of the timezone I'm ignoring here: is
00315         #   this the right thing to do?
00316         yr, mon, day, hr, min, sec, tz, _ = m.groups()
00317     else:
00318         return None  # bad format
00319 
00320     return _str2time(day, mon, yr, hr, min, sec, tz)
00321 
00322 
00323 # Header parsing
00324 # -----------------------------------------------------------------------------

Here is the call graph for this function:

Here is the caller graph for this function:

Do the inverse (almost) of the conversion done by split_header_words.

Takes a list of lists of (key, value) pairs and produces a single header
value.  Attribute values are quoted if needed.

>>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
'text/plain; charset="iso-8859/1"'
>>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
'text/plain, charset="iso-8859/1"'

Definition at line 420 of file cookiejar.py.

00420 
00421 def join_header_words(lists):
00422     """Do the inverse (almost) of the conversion done by split_header_words.
00423 
00424     Takes a list of lists of (key, value) pairs and produces a single header
00425     value.  Attribute values are quoted if needed.
00426 
00427     >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
00428     'text/plain; charset="iso-8859/1"'
00429     >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
00430     'text/plain, charset="iso-8859/1"'
00431 
00432     """
00433     headers = []
00434     for pairs in lists:
00435         attr = []
00436         for k, v in pairs:
00437             if v is not None:
00438                 if not re.search(r"^\w+$", v):
00439                     v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v)  # escape " and \
00440                     v = '"%s"' % v
00441                 k = "%s=%s" % (k, v)
00442             attr.append(k)
00443         if attr: headers.append("; ".join(attr))
00444     return ", ".join(headers)

Here is the call graph for this function:

Here is the caller graph for this function:

Return True if text is a sort-of-like a host domain name.

For accepting/blocking domains.

Definition at line 559 of file cookiejar.py.

00559 
00560 def liberal_is_HDN(text):
00561     """Return True if text is a sort-of-like a host domain name.
00562 
00563     For accepting/blocking domains.
00564 
00565     """
00566     if IPV4_RE.search(text):
00567         return False
00568     return True

Here is the caller graph for this function:

Return string representation of Cookie in an the LWP cookie file format.

Actually, the format is extended a bit -- see module docstring.

Definition at line 1798 of file cookiejar.py.

01798 
01799 def lwp_cookie_str(cookie):
01800     """Return string representation of Cookie in an the LWP cookie file format.
01801 
01802     Actually, the format is extended a bit -- see module docstring.
01803 
01804     """
01805     h = [(cookie.name, cookie.value),
01806          ("path", cookie.path),
01807          ("domain", cookie.domain)]
01808     if cookie.port is not None: h.append(("port", cookie.port))
01809     if cookie.path_specified: h.append(("path_spec", None))
01810     if cookie.port_specified: h.append(("port_spec", None))
01811     if cookie.domain_initial_dot: h.append(("domain_dot", None))
01812     if cookie.secure: h.append(("secure", None))
01813     if cookie.expires: h.append(("expires",
01814                                time2isoz(float(cookie.expires))))
01815     if cookie.discard: h.append(("discard", None))
01816     if cookie.comment: h.append(("comment", cookie.comment))
01817     if cookie.comment_url: h.append(("commenturl", cookie.comment_url))
01818 
01819     keys = sorted(cookie._rest.keys())
01820     for k in keys:
01821         h.append((k, str(cookie._rest[k])))
01822 
01823     h.append(("version", str(cookie.version)))
01824 
01825     return join_header_words([h])

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 131 of file cookiejar.py.

00131 
00132 def offset_from_tz_string(tz):
00133     offset = None
00134     if tz in UTC_ZONES:
00135         offset = 0
00136     else:
00137         m = TIMEZONE_RE.search(tz)
00138         if m:
00139             offset = 3600 * int(m.group(2))
00140             if m.group(3):
00141                 offset = offset + 60 * int(m.group(3))
00142             if m.group(1) == '-':
00143                 offset = -offset
00144     return offset

Here is the caller graph for this function:

def http.cookiejar.parse_ns_headers (   ns_headers)
Ad-hoc parser for Netscape protocol cookie-attributes.

The old Netscape cookie format for Set-Cookie can for instance contain
an unquoted "," in the expires field, so we have to use this ad-hoc
parser instead of split_header_words.

XXX This may not make the best possible effort to parse all the crap
that Netscape Cookie headers contain.  Ronald Tschalar's HTTPClient
parser is probably better, so could do worse than following that if
this ever gives any trouble.

Currently, this is also used for parsing RFC 2109 cookies.

Definition at line 452 of file cookiejar.py.

00452 
00453 def parse_ns_headers(ns_headers):
00454     """Ad-hoc parser for Netscape protocol cookie-attributes.
00455 
00456     The old Netscape cookie format for Set-Cookie can for instance contain
00457     an unquoted "," in the expires field, so we have to use this ad-hoc
00458     parser instead of split_header_words.
00459 
00460     XXX This may not make the best possible effort to parse all the crap
00461     that Netscape Cookie headers contain.  Ronald Tschalar's HTTPClient
00462     parser is probably better, so could do worse than following that if
00463     this ever gives any trouble.
00464 
00465     Currently, this is also used for parsing RFC 2109 cookies.
00466 
00467     """
00468     known_attrs = ("expires", "domain", "path", "secure",
00469                    # RFC 2109 attrs (may turn up in Netscape cookies, too)
00470                    "version", "port", "max-age")
00471 
00472     result = []
00473     for ns_header in ns_headers:
00474         pairs = []
00475         version_set = False
00476         for ii, param in enumerate(re.split(r";\s*", ns_header)):
00477             param = param.rstrip()
00478             if param == "": continue
00479             if "=" not in param:
00480                 k, v = param, None
00481             else:
00482                 k, v = re.split(r"\s*=\s*", param, 1)
00483                 k = k.lstrip()
00484             if ii != 0:
00485                 lc = k.lower()
00486                 if lc in known_attrs:
00487                     k = lc
00488                 if k == "version":
00489                     # This is an RFC 2109 cookie.
00490                     v = strip_quotes(v)
00491                     version_set = True
00492                 if k == "expires":
00493                     # convert expires date to seconds since epoch
00494                     v = http2time(strip_quotes(v))  # None if invalid
00495             pairs.append((k, v))
00496 
00497         if pairs:
00498             if not version_set:
00499                 pairs.append(("version", "0"))
00500             result.append(pairs)
00501 
00502     return result
00503 

Here is the call graph for this function:

Here is the caller graph for this function:

def http.cookiejar.reach (   h)
Return reach of host h, as defined by RFC 2965, section 1.

The reach R of a host name H is defined as follows:

   *  If

      -  H is the host domain name of a host; and,

      -  H has the form A.B; and

      -  A has no embedded (that is, interior) dots; and

      -  B has at least one embedded dot, or B is the string "local".
         then the reach of H is .B.

   *  Otherwise, the reach of H is H.

>>> reach("www.acme.com")
'.acme.com'
>>> reach("acme.com")
'acme.com'
>>> reach("acme.local")
'.local'

Definition at line 661 of file cookiejar.py.

00661 
00662 def reach(h):
00663     """Return reach of host h, as defined by RFC 2965, section 1.
00664 
00665     The reach R of a host name H is defined as follows:
00666 
00667        *  If
00668 
00669           -  H is the host domain name of a host; and,
00670 
00671           -  H has the form A.B; and
00672 
00673           -  A has no embedded (that is, interior) dots; and
00674 
00675           -  B has at least one embedded dot, or B is the string "local".
00676              then the reach of H is .B.
00677 
00678        *  Otherwise, the reach of H is H.
00679 
00680     >>> reach("www.acme.com")
00681     '.acme.com'
00682     >>> reach("acme.com")
00683     'acme.com'
00684     >>> reach("acme.local")
00685     '.local'
00686 
00687     """
00688     i = h.find(".")
00689     if i >= 0:
00690         #a = h[:i]  # this line is only here to show what a is
00691         b = h[i+1:]
00692         i = b.find(".")
00693         if is_HDN(h) and (i >= 0 or b == "local"):
00694             return "."+b
00695     return h

Here is the call graph for this function:

Here is the caller graph for this function:

def http.cookiejar.request_host (   request)
Return request-host, as defined by RFC 2965.

Variation from RFC: returned value is lowercased, for convenient
comparison.

Definition at line 590 of file cookiejar.py.

00590 
00591 def request_host(request):
00592     """Return request-host, as defined by RFC 2965.
00593 
00594     Variation from RFC: returned value is lowercased, for convenient
00595     comparison.
00596 
00597     """
00598     url = request.get_full_url()
00599     host = urllib.parse.urlparse(url)[1]
00600     if host == "":
00601         host = request.get_header("Host", "")
00602 
00603     # remove port, if present
00604     host = cut_port_re.sub("", host, 1)
00605     return host.lower()

Here is the call graph for this function:

Here is the caller graph for this function:

def http.cookiejar.request_path (   request)
Path component of request-URI, as defined by RFC 2965.

Definition at line 617 of file cookiejar.py.

00617 
00618 def request_path(request):
00619     """Path component of request-URI, as defined by RFC 2965."""
00620     url = request.get_full_url()
00621     parts = urllib.parse.urlsplit(url)
00622     path = escape_path(parts.path)
00623     if not path.startswith("/"):
00624         # fix bad RFC 2396 absoluteURI
00625         path = "/" + path
00626     return path

Here is the call graph for this function:

Here is the caller graph for this function:

def http.cookiejar.request_port (   request)

Definition at line 627 of file cookiejar.py.

00627 
00628 def request_port(request):
00629     host = request.get_host()
00630     i = host.find(':')
00631     if i >= 0:
00632         port = host[i+1:]
00633         try:
00634             int(port)
00635         except ValueError:
00636             _debug("nonnumeric port: '%s'", port)
00637             return None
00638     else:
00639         port = DEFAULT_HTTP_PORT
00640     return port
00641 
00642 # Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't
# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738).

Here is the call graph for this function:

Here is the caller graph for this function:

def http.cookiejar.split_header_words (   header_values)

Definition at line 334 of file cookiejar.py.

00334 
00335 def split_header_words(header_values):
00336     r"""Parse header values into a list of lists containing key,value pairs.
00337 
00338     The function knows how to deal with ",", ";" and "=" as well as quoted
00339     values after "=".  A list of space separated tokens are parsed as if they
00340     were separated by ";".
00341 
00342     If the header_values passed as argument contains multiple values, then they
00343     are treated as if they were a single value separated by comma ",".
00344 
00345     This means that this function is useful for parsing header fields that
00346     follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
00347     the requirement for tokens).
00348 
00349       headers           = #header
00350       header            = (token | parameter) *( [";"] (token | parameter))
00351 
00352       token             = 1*<any CHAR except CTLs or separators>
00353       separators        = "(" | ")" | "<" | ">" | "@"
00354                         | "," | ";" | ":" | "\" | <">
00355                         | "/" | "[" | "]" | "?" | "="
00356                         | "{" | "}" | SP | HT
00357 
00358       quoted-string     = ( <"> *(qdtext | quoted-pair ) <"> )
00359       qdtext            = <any TEXT except <">>
00360       quoted-pair       = "\" CHAR
00361 
00362       parameter         = attribute "=" value
00363       attribute         = token
00364       value             = token | quoted-string
00365 
00366     Each header is represented by a list of key/value pairs.  The value for a
00367     simple token (not part of a parameter) is None.  Syntactically incorrect
00368     headers will not necessarily be parsed as you would want.
00369 
00370     This is easier to describe with some examples:
00371 
00372     >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz'])
00373     [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]
00374     >>> split_header_words(['text/html; charset="iso-8859-1"'])
00375     [[('text/html', None), ('charset', 'iso-8859-1')]]
00376     >>> split_header_words([r'Basic realm="\"foo\bar\""'])
00377     [[('Basic', None), ('realm', '"foobar"')]]
00378 
00379     """
00380     assert not isinstance(header_values, str)
00381     result = []
00382     for text in header_values:
00383         orig_text = text
00384         pairs = []
00385         while text:
00386             m = HEADER_TOKEN_RE.search(text)
00387             if m:
00388                 text = unmatched(m)
00389                 name = m.group(1)
00390                 m = HEADER_QUOTED_VALUE_RE.search(text)
00391                 if m:  # quoted value
00392                     text = unmatched(m)
00393                     value = m.group(1)
00394                     value = HEADER_ESCAPE_RE.sub(r"\1", value)
00395                 else:
00396                     m = HEADER_VALUE_RE.search(text)
00397                     if m:  # unquoted value
00398                         text = unmatched(m)
00399                         value = m.group(1)
00400                         value = value.rstrip()
00401                     else:
00402                         # no value, a lone token
00403                         value = None
00404                 pairs.append((name, value))
00405             elif text.lstrip().startswith(","):
00406                 # concatenated headers, as per RFC 2616 section 4.2
00407                 text = text.lstrip()[1:]
00408                 if pairs: result.append(pairs)
00409                 pairs = []
00410             else:
00411                 # skip junk
00412                 non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text)
00413                 assert nr_junk_chars > 0, (
00414                     "split_header_words bug: '%s', '%s', %s" %
00415                     (orig_text, text, pairs))
00416                 text = non_junk
00417         if pairs: result.append(pairs)
00418     return result

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 445 of file cookiejar.py.

00445 
00446 def strip_quotes(text):
00447     if text.startswith('"'):
00448         text = text[1:]
00449     if text.endswith('"'):
00450         text = text[:-1]
00451     return text

Here is the caller graph for this function:

def http.cookiejar.time2isoz (   t = None)
Return a string representing time in seconds since epoch, t.

If the function is called without an argument, it will use the current
time.

The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
representing Universal Time (UTC, aka GMT).  An example of this format is:

1994-11-24 08:49:37Z

Definition at line 89 of file cookiejar.py.

00089 
00090 def time2isoz(t=None):
00091     """Return a string representing time in seconds since epoch, t.
00092 
00093     If the function is called without an argument, it will use the current
00094     time.
00095 
00096     The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
00097     representing Universal Time (UTC, aka GMT).  An example of this format is:
00098 
00099     1994-11-24 08:49:37Z
00100 
00101     """
00102     if t is None:
00103         dt = datetime.datetime.utcnow()
00104     else:
00105         dt = datetime.datetime.utcfromtimestamp(t)
00106     return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
00107         dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second)

Here is the call graph for this function:

Here is the caller graph for this function:

def http.cookiejar.time2netscape (   t = None)
Return a string representing time in seconds since epoch, t.

If the function is called without an argument, it will use the current
time.

The format of the returned string is like this:

Wed, DD-Mon-YYYY HH:MM:SS GMT

Definition at line 108 of file cookiejar.py.

00108 
00109 def time2netscape(t=None):
00110     """Return a string representing time in seconds since epoch, t.
00111 
00112     If the function is called without an argument, it will use the current
00113     time.
00114 
00115     The format of the returned string is like this:
00116 
00117     Wed, DD-Mon-YYYY HH:MM:SS GMT
00118 
00119     """
00120     if t is None:
00121         dt = datetime.datetime.utcnow()
00122     else:
00123         dt = datetime.datetime.utcfromtimestamp(t)
00124     return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % (
00125         DAYS[dt.weekday()], dt.day, MONTHS[dt.month-1],
00126         dt.year, dt.hour, dt.minute, dt.second)
00127 

Here is the call graph for this function:

Here is the caller graph for this function:

def http.cookiejar.unmatched (   match)
Return unmatched part of re.Match object.

Definition at line 325 of file cookiejar.py.

00325 
00326 def unmatched(match):
00327     """Return unmatched part of re.Match object."""
00328     start, end = match.span(0)
00329     return match.string[:start]+match.string[end:]

Here is the caller graph for this function:

Definition at line 645 of file cookiejar.py.

00645 
00646 def uppercase_escaped_char(match):
    return "%%%s" % match.group(1).upper()
def http.cookiejar.user_domain_match (   A,
  B 
)
For blocking/accepting domains.

A and B may be host domain names or IP addresses.

Definition at line 569 of file cookiejar.py.

00569 
00570 def user_domain_match(A, B):
00571     """For blocking/accepting domains.
00572 
00573     A and B may be host domain names or IP addresses.
00574 
00575     """
00576     A = A.lower()
00577     B = B.lower()
00578     if not (liberal_is_HDN(A) and liberal_is_HDN(B)):
00579         if A == B:
00580             # equal IP addresses
00581             return True
00582         return False
00583     initial_dot = B.startswith(".")
00584     if initial_dot and A.endswith(B):
00585         return True
00586     if not initial_dot and A == B:
00587         return True
00588     return False

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 1181 of file cookiejar.py.

01181 
01182 def vals_sorted_by_key(adict):
01183     keys = sorted(adict.keys())
01184     return map(adict.get, keys)

Here is the call graph for this function:

Here is the caller graph for this function:


Variable Documentation

Initial value:
00001 ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy',
00002            'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar']

Definition at line 28 of file cookiejar.py.

tuple http.cookiejar.cut_port_re = re.compile(r":\d+$", re.ASCII)

Definition at line 589 of file cookiejar.py.

list http.cookiejar.DAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]

Definition at line 83 of file cookiejar.py.

Definition at line 43 of file cookiejar.py.

Definition at line 56 of file cookiejar.py.

Definition at line 74 of file cookiejar.py.

tuple http.cookiejar.ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])")

Definition at line 644 of file cookiejar.py.

Definition at line 333 of file cookiejar.py.

Definition at line 419 of file cookiejar.py.

tuple http.cookiejar.HEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"")

Definition at line 331 of file cookiejar.py.

tuple http.cookiejar.HEADER_TOKEN_RE = re.compile(r"^\s*([^=\s;,]+)")

Definition at line 330 of file cookiejar.py.

tuple http.cookiejar.HEADER_VALUE_RE = re.compile(r"^\s*=\s*([^\s;,]*)")

Definition at line 332 of file cookiejar.py.

string http.cookiejar.HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()"

Definition at line 643 of file cookiejar.py.

tuple http.cookiejar.IPV4_RE = re.compile(r"\.\d+$", re.ASCII)

Definition at line 504 of file cookiejar.py.

Initial value:
00001 re.compile(
00002     """^    (\d{4})              # year       [-\/]?    (\d\d?)              # numerical month       [-\/]?    (\d\d?)              # day   (?:         (?:\s+|[-:Tt])  # separator before clock      (\d\d?):?(\d\d)    # hour:min      (?::?(\d\d(?:\.\d*)?))?  # optional seconds (and fractional)   )?                    # optional clock      \s*   ([-+]?\d\d?:?(:?\d\d)?    |Z|z)?               # timezone  (Z is "zero meridian", i.e. GMT)      \s*$""", re.X | re. ASCII)

Definition at line 276 of file cookiejar.py.

Definition at line 44 of file cookiejar.py.

Initial value:
00001 re.compile(
00002     r"""^    (\d\d?)            # day       (?:\s+|[-\/])    (\w+)              # month        (?:\s+|[-\/])    (\d+)              # year    (?:          (?:\s+|:)    # separator before clock       (\d\d?):(\d\d)  # hour:min       (?::(\d\d))?    # optional seconds    )?                 # optional clock       \s*    ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone       \s*    (?:\(\w+\))?       # ASCII representation of timezone in parens.       \s*$""", re.X | re.ASCII)

Definition at line 203 of file cookiejar.py.

Initial value:
00001 ("a filename was not supplied (nor was the CookieJar "
00002                          "instance initialised with one)")

Definition at line 57 of file cookiejar.py.

Initial value:
00001 ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
00002           "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]

Definition at line 84 of file cookiejar.py.

Definition at line 86 of file cookiejar.py.

Initial value:
00001 re.compile(
00002     r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) "
00003     "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$", re.ASCII)

Definition at line 198 of file cookiejar.py.

tuple http.cookiejar.TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$", re.ASCII)

Definition at line 130 of file cookiejar.py.

dictionary http.cookiejar.UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}

Definition at line 128 of file cookiejar.py.

Initial value:
00001 re.compile(
00002     r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I | re.ASCII)

Definition at line 201 of file cookiejar.py.