Back to index

python3.2  3.2.2
Classes | Functions | Variables
encodings.punycode Namespace Reference

Classes

class  Codec
 Codec APIs. More...
class  IncrementalEncoder
class  IncrementalDecoder
class  StreamWriter
class  StreamReader

Functions

def segregate
 Encoding #####################################.
def selective_len
def selective_find
def insertion_unsort
def T
def generate_generalized_integer
def adapt
def generate_integers
def punycode_encode
def decode_generalized_number
 Decoding #####################################.
def insertion_sort
def punycode_decode
def getregentry

Variables

string digits = "abcdefghijklmnopqrstuvwxyz0123456789"

Detailed Description

Codec for the Punicode encoding, as specified in RFC 3492

Written by Martin v. Löwis.

Function Documentation

def encodings.punycode.adapt (   delta,
  first,
  numchars 
)

Definition at line 91 of file punycode.py.

00091 
00092 def adapt(delta, first, numchars):
00093     if first:
00094         delta //= 700
00095     else:
00096         delta //= 2
00097     delta += delta // numchars
00098     # ((base - tmin) * tmax) // 2 == 455
00099     divisions = 0
00100     while delta > 455:
00101         delta = delta // 35 # base - tmin
00102         divisions += 36
00103     bias = divisions + (36 * delta // (delta + 38))
00104     return bias
00105 

Here is the caller graph for this function:

def encodings.punycode.decode_generalized_number (   extended,
  extpos,
  bias,
  errors 
)

Decoding #####################################.

3.3 Generalized variable-length integers

Definition at line 127 of file punycode.py.

00127 
00128 def decode_generalized_number(extended, extpos, bias, errors):
00129     """3.3 Generalized variable-length integers"""
00130     result = 0
00131     w = 1
00132     j = 0
00133     while 1:
00134         try:
00135             char = ord(extended[extpos])
00136         except IndexError:
00137             if errors == "strict":
00138                 raise UnicodeError("incomplete punicode string")
00139             return extpos + 1, None
00140         extpos += 1
00141         if 0x41 <= char <= 0x5A: # A-Z
00142             digit = char - 0x41
00143         elif 0x30 <= char <= 0x39:
00144             digit = char - 22 # 0x30-26
00145         elif errors == "strict":
00146             raise UnicodeError("Invalid extended code point '%s'"
00147                                % extended[extpos])
00148         else:
00149             return extpos, None
00150         t = T(j, bias)
00151         result += digit * w
00152         if digit < t:
00153             return extpos, result
00154         w = w * (36 - t)
00155         j += 1
00156 

Here is the call graph for this function:

Here is the caller graph for this function:

3.3 Generalized variable-length integers

Definition at line 78 of file punycode.py.

00078 
00079 def generate_generalized_integer(N, bias):
00080     """3.3 Generalized variable-length integers"""
00081     result = bytearray()
00082     j = 0
00083     while 1:
00084         t = T(j, bias)
00085         if N < t:
00086             result.append(digits[N])
00087             return bytes(result)
00088         result.append(digits[t + ((N - t) % (36 - t))])
00089         N = (N - t) // (36 - t)
00090         j += 1

Here is the call graph for this function:

Here is the caller graph for this function:

def encodings.punycode.generate_integers (   baselen,
  deltas 
)
3.4 Bias adaptation

Definition at line 106 of file punycode.py.

00106 
00107 def generate_integers(baselen, deltas):
00108     """3.4 Bias adaptation"""
00109     # Punycode parameters: initial bias = 72, damp = 700, skew = 38
00110     result = bytearray()
00111     bias = 72
00112     for points, delta in enumerate(deltas):
00113         s = generate_generalized_integer(delta, bias)
00114         result.extend(s)
00115         bias = adapt(delta, points==0, baselen+points+1)
00116     return bytes(result)

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 228 of file punycode.py.

00228 
00229 def getregentry():
00230     return codecs.CodecInfo(
00231         name='punycode',
00232         encode=Codec().encode,
00233         decode=Codec().decode,
00234         incrementalencoder=IncrementalEncoder,
00235         incrementaldecoder=IncrementalDecoder,
00236         streamwriter=StreamWriter,
00237         streamreader=StreamReader,
00238     )
def encodings.punycode.insertion_sort (   base,
  extended,
  errors 
)
3.2 Insertion unsort coding

Definition at line 157 of file punycode.py.

00157 
00158 def insertion_sort(base, extended, errors):
00159     """3.2 Insertion unsort coding"""
00160     char = 0x80
00161     pos = -1
00162     bias = 72
00163     extpos = 0
00164     while extpos < len(extended):
00165         newpos, delta = decode_generalized_number(extended, extpos,
00166                                                   bias, errors)
00167         if delta is None:
00168             # There was an error in decoding. We can't continue because
00169             # synchronization is lost.
00170             return base
00171         pos += delta+1
00172         char += pos // (len(base) + 1)
00173         if char > 0x10FFFF:
00174             if errors == "strict":
00175                 raise UnicodeError("Invalid character U+%x" % char)
00176             char = ord('?')
00177         pos = pos % (len(base) + 1)
00178         base = base[:pos] + chr(char) + base[pos:]
00179         bias = adapt(delta, (extpos == 0), len(base))
00180         extpos = newpos
00181     return base

Here is the call graph for this function:

Here is the caller graph for this function:

def encodings.punycode.insertion_unsort (   str,
  extended 
)
3.2 Insertion unsort coding

Definition at line 48 of file punycode.py.

00048 
00049 def insertion_unsort(str, extended):
00050     """3.2 Insertion unsort coding"""
00051     oldchar = 0x80
00052     result = []
00053     oldindex = -1
00054     for c in extended:
00055         index = pos = -1
00056         char = ord(c)
00057         curlen = selective_len(str, char)
00058         delta = (curlen+1) * (char - oldchar)
00059         while 1:
00060             index,pos = selective_find(str,c,index,pos)
00061             if index == -1:
00062                 break
00063             delta += index - oldindex
00064             result.append(delta-1)
00065             oldindex = index
00066             delta = 0
00067         oldchar = char
00068 
00069     return result

Here is the call graph for this function:

Here is the caller graph for this function:

def encodings.punycode.punycode_decode (   text,
  errors 
)

Definition at line 182 of file punycode.py.

00182 
00183 def punycode_decode(text, errors):
00184     if isinstance(text, str):
00185         text = text.encode("ascii")
00186     if isinstance(text, memoryview):
00187         text = bytes(text)
00188     pos = text.rfind(b"-")
00189     if pos == -1:
00190         base = ""
00191         extended = str(text, "ascii").upper()
00192     else:
00193         base = str(text[:pos], "ascii", errors)
00194         extended = str(text[pos+1:], "ascii").upper()
00195     return insertion_sort(base, extended, errors)

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 117 of file punycode.py.

00117 
00118 def punycode_encode(text):
00119     base, extended = segregate(text)
00120     deltas = insertion_unsort(text, extended)
00121     extended = generate_integers(len(base), deltas)
00122     if base:
00123         return base + b"-" + extended
00124     return extended

Here is the call graph for this function:

Here is the caller graph for this function:

Encoding #####################################.

3.1 Basic code point segregation

Definition at line 10 of file punycode.py.

00010 
00011 def segregate(str):
00012     """3.1 Basic code point segregation"""
00013     base = bytearray()
00014     extended = set()
00015     for c in str:
00016         if ord(c) < 128:
00017             base.append(ord(c))
00018         else:
00019             extended.add(c)
00020     extended = sorted(extended)
00021     return bytes(base), extended

Here is the call graph for this function:

Here is the caller graph for this function:

def encodings.punycode.selective_find (   str,
  char,
  index,
  pos 
)
Return a pair (index, pos), indicating the next occurrence of
char in str. index is the position of the character considering
only ordinals up to and including char, and pos is the position in
the full string. index/pos is the starting position in the full
string.

Definition at line 30 of file punycode.py.

00030 
00031 def selective_find(str, char, index, pos):
00032     """Return a pair (index, pos), indicating the next occurrence of
00033     char in str. index is the position of the character considering
00034     only ordinals up to and including char, and pos is the position in
00035     the full string. index/pos is the starting position in the full
00036     string."""
00037 
00038     l = len(str)
00039     while 1:
00040         pos += 1
00041         if pos == l:
00042             return (-1, -1)
00043         c = str[pos]
00044         if c == char:
00045             return index+1, pos
00046         elif c < char:
00047             index += 1

Here is the caller graph for this function:

def encodings.punycode.selective_len (   str,
  max 
)
Return the length of str, considering only characters below max.

Definition at line 22 of file punycode.py.

00022 
00023 def selective_len(str, max):
00024     """Return the length of str, considering only characters below max."""
00025     res = 0
00026     for c in str:
00027         if ord(c) < max:
00028             res += 1
00029     return res

Here is the call graph for this function:

Here is the caller graph for this function:

def encodings.punycode.T (   j,
  bias 
)

Definition at line 70 of file punycode.py.

00070 
00071 def T(j, bias):
00072     # Punycode parameters: tmin = 1, tmax = 26, base = 36
00073     res = 36 * (j + 1) - bias
00074     if res < 1: return 1
00075     if res > 26: return 26
00076     return res

Here is the caller graph for this function:


Variable Documentation

string encodings.punycode.digits = "abcdefghijklmnopqrstuvwxyz0123456789"

Definition at line 77 of file punycode.py.