Back to index

python3.2  3.2.2
Public Member Functions
test.test_codeccallbacks.CodecCallbackTest Class Reference

List of all members.

Public Member Functions

def test_xmlcharrefreplace
def test_xmlcharnamereplace
def test_uninamereplace
def test_backslashescape
def test_decoding_callbacks
def test_charmapencode
def test_decodeunicodeinternal
def test_callbacks
def test_longstrings
def check_exceptionobjectargs
def test_unicodeencodeerror
def test_unicodedecodeerror
def test_unicodetranslateerror
def test_badandgoodstrictexceptions
def test_badandgoodignoreexceptions
def test_badandgoodreplaceexceptions
def test_badandgoodxmlcharrefreplaceexceptions
def test_badandgoodbackslashreplaceexceptions
def test_badhandlerresults
def test_lookup
def test_unencodablereplacement
def test_badregistercall
def test_badlookupcall
def test_unknownhandler
def test_xmlcharrefvalues
def test_decodehelper
def test_encodehelper
def test_translatehelper
def test_bug828737
def test_mutatingdecodehandler

Detailed Description

Definition at line 63 of file test_codeccallbacks.py.


Member Function Documentation

def test.test_codeccallbacks.CodecCallbackTest.check_exceptionobjectargs (   self,
  exctype,
  args,
  msg 
)

Definition at line 307 of file test_codeccallbacks.py.

00307 
00308     def check_exceptionobjectargs(self, exctype, args, msg):
00309         # Test UnicodeError subclasses: construction, attribute assignment and __str__ conversion
00310         # check with one missing argument
00311         self.assertRaises(TypeError, exctype, *args[:-1])
00312         # check with one argument too much
00313         self.assertRaises(TypeError, exctype, *(args + ["too much"]))
00314         # check with one argument of the wrong type
00315         wrongargs = [ "spam", b"eggs", b"spam", 42, 1.0, None ]
00316         for i in range(len(args)):
00317             for wrongarg in wrongargs:
00318                 if type(wrongarg) is type(args[i]):
00319                     continue
00320                 # build argument array
00321                 callargs = []
00322                 for j in range(len(args)):
00323                     if i==j:
00324                         callargs.append(wrongarg)
00325                     else:
00326                         callargs.append(args[i])
00327                 self.assertRaises(TypeError, exctype, *callargs)
00328 
00329         # check with the correct number and type of arguments
00330         exc = exctype(*args)
00331         self.assertEqual(str(exc), msg)

Here is the call graph for this function:

Here is the caller graph for this function:

Definition at line 135 of file test_codeccallbacks.py.

00135 
00136     def test_backslashescape(self):
00137         # Does the same as the "unicode-escape" encoding, but with different
00138         # base encodings.
00139         sin = "a\xac\u1234\u20ac\u8000"
00140         if sys.maxunicode > 0xffff:
00141             sin += chr(sys.maxunicode)
00142         sout = b"a\\xac\\u1234\\u20ac\\u8000"
00143         if sys.maxunicode > 0xffff:
00144             sout += bytes("\\U%08x" % sys.maxunicode, "ascii")
00145         self.assertEqual(sin.encode("ascii", "backslashreplace"), sout)
00146 
00147         sout = b"a\xac\\u1234\\u20ac\\u8000"
00148         if sys.maxunicode > 0xffff:
00149             sout += bytes("\\U%08x" % sys.maxunicode, "ascii")
00150         self.assertEqual(sin.encode("latin-1", "backslashreplace"), sout)
00151 
00152         sout = b"a\xac\\u1234\xa4\\u8000"
00153         if sys.maxunicode > 0xffff:
00154             sout += bytes("\\U%08x" % sys.maxunicode, "ascii")
00155         self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout)

Here is the call graph for this function:

Definition at line 530 of file test_codeccallbacks.py.

00530 
00531     def test_badandgoodbackslashreplaceexceptions(self):
00532         # "backslashreplace" complains about a non-exception passed in
00533         self.assertRaises(
00534            TypeError,
00535            codecs.backslashreplace_errors,
00536            42
00537         )
00538         # "backslashreplace" complains about the wrong exception types
00539         self.assertRaises(
00540            TypeError,
00541            codecs.backslashreplace_errors,
00542            UnicodeError("ouch")
00543         )
00544         # "backslashreplace" can only be used for encoding
00545         self.assertRaises(
00546             TypeError,
00547             codecs.backslashreplace_errors,
00548             UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch")
00549         )
00550         self.assertRaises(
00551             TypeError,
00552             codecs.backslashreplace_errors,
00553             UnicodeTranslateError("\u3042", 0, 1, "ouch")
00554         )
00555         # Use the correct exception
00556         self.assertEqual(
00557             codecs.backslashreplace_errors(
00558                 UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")),
00559             ("\\u3042", 1)
00560         )
00561         self.assertEqual(
00562             codecs.backslashreplace_errors(
00563                 UnicodeEncodeError("ascii", "\x00", 0, 1, "ouch")),
00564             ("\\x00", 1)
00565         )
00566         self.assertEqual(
00567             codecs.backslashreplace_errors(
00568                 UnicodeEncodeError("ascii", "\xff", 0, 1, "ouch")),
00569             ("\\xff", 1)
00570         )
00571         self.assertEqual(
00572             codecs.backslashreplace_errors(
00573                 UnicodeEncodeError("ascii", "\u0100", 0, 1, "ouch")),
00574             ("\\u0100", 1)
00575         )
00576         self.assertEqual(
00577             codecs.backslashreplace_errors(
00578                 UnicodeEncodeError("ascii", "\uffff", 0, 1, "ouch")),
00579             ("\\uffff", 1)
00580         )
00581         # 1 on UCS-4 builds, 2 on UCS-2
00582         len_wide = len("\U00010000")
00583         self.assertEqual(
00584             codecs.backslashreplace_errors(
00585                 UnicodeEncodeError("ascii", "\U00010000",
00586                                    0, len_wide, "ouch")),
00587             ("\\U00010000", len_wide)
00588         )
00589         self.assertEqual(
00590             codecs.backslashreplace_errors(
00591                 UnicodeEncodeError("ascii", "\U0010ffff",
00592                                    0, len_wide, "ouch")),
00593             ("\\U0010ffff", len_wide)
00594         )
00595         # Lone surrogates (regardless of unicode width)
00596         self.assertEqual(
00597             codecs.backslashreplace_errors(
00598                 UnicodeEncodeError("ascii", "\ud800", 0, 1, "ouch")),
00599             ("\\ud800", 1)
00600         )
00601         self.assertEqual(
00602             codecs.backslashreplace_errors(
00603                 UnicodeEncodeError("ascii", "\udfff", 0, 1, "ouch")),
00604             ("\\udfff", 1)
00605         )

Here is the call graph for this function:

Definition at line 426 of file test_codeccallbacks.py.

00426 
00427     def test_badandgoodignoreexceptions(self):
00428         # "ignore" complains about a non-exception passed in
00429         self.assertRaises(
00430            TypeError,
00431            codecs.ignore_errors,
00432            42
00433         )
00434         # "ignore" complains about the wrong exception type
00435         self.assertRaises(
00436            TypeError,
00437            codecs.ignore_errors,
00438            UnicodeError("ouch")
00439         )
00440         # If the correct exception is passed in, "ignore" returns an empty replacement
00441         self.assertEqual(
00442             codecs.ignore_errors(
00443                 UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")),
00444             ("", 1)
00445         )
00446         self.assertEqual(
00447             codecs.ignore_errors(
00448                 UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch")),
00449             ("", 1)
00450         )
00451         self.assertEqual(
00452             codecs.ignore_errors(
00453                 UnicodeTranslateError("\u3042", 0, 1, "ouch")),
00454             ("", 1)
00455         )

Here is the call graph for this function:

Definition at line 456 of file test_codeccallbacks.py.

00456 
00457     def test_badandgoodreplaceexceptions(self):
00458         # "replace" complains about a non-exception passed in
00459         self.assertRaises(
00460            TypeError,
00461            codecs.replace_errors,
00462            42
00463         )
00464         # "replace" complains about the wrong exception type
00465         self.assertRaises(
00466            TypeError,
00467            codecs.replace_errors,
00468            UnicodeError("ouch")
00469         )
00470         self.assertRaises(
00471             TypeError,
00472             codecs.replace_errors,
00473             BadObjectUnicodeEncodeError()
00474         )
00475         self.assertRaises(
00476             TypeError,
00477             codecs.replace_errors,
00478             BadObjectUnicodeDecodeError()
00479         )
00480         # With the correct exception, "replace" returns an "?" or "\ufffd" replacement
00481         self.assertEqual(
00482             codecs.replace_errors(
00483                 UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")),
00484             ("?", 1)
00485         )
00486         self.assertEqual(
00487             codecs.replace_errors(
00488                 UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch")),
00489             ("\ufffd", 1)
00490         )
00491         self.assertEqual(
00492             codecs.replace_errors(
00493                 UnicodeTranslateError("\u3042", 0, 1, "ouch")),
00494             ("\ufffd", 1)
00495         )

Here is the call graph for this function:

Definition at line 405 of file test_codeccallbacks.py.

00405 
00406     def test_badandgoodstrictexceptions(self):
00407         # "strict" complains about a non-exception passed in
00408         self.assertRaises(
00409             TypeError,
00410             codecs.strict_errors,
00411             42
00412         )
00413         # "strict" complains about the wrong exception type
00414         self.assertRaises(
00415             Exception,
00416             codecs.strict_errors,
00417             Exception("ouch")
00418         )
00419 
00420         # If the correct exception is passed in, "strict" raises it
00421         self.assertRaises(
00422             UnicodeEncodeError,
00423             codecs.strict_errors,
00424             UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")
00425         )

Here is the call graph for this function:

Definition at line 496 of file test_codeccallbacks.py.

00496 
00497     def test_badandgoodxmlcharrefreplaceexceptions(self):
00498         # "xmlcharrefreplace" complains about a non-exception passed in
00499         self.assertRaises(
00500            TypeError,
00501            codecs.xmlcharrefreplace_errors,
00502            42
00503         )
00504         # "xmlcharrefreplace" complains about the wrong exception types
00505         self.assertRaises(
00506            TypeError,
00507            codecs.xmlcharrefreplace_errors,
00508            UnicodeError("ouch")
00509         )
00510         # "xmlcharrefreplace" can only be used for encoding
00511         self.assertRaises(
00512             TypeError,
00513             codecs.xmlcharrefreplace_errors,
00514             UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch")
00515         )
00516         self.assertRaises(
00517             TypeError,
00518             codecs.xmlcharrefreplace_errors,
00519             UnicodeTranslateError("\u3042", 0, 1, "ouch")
00520         )
00521         # Use the correct exception
00522         cs = (0, 1, 9, 10, 99, 100, 999, 1000, 9999, 10000, 0x3042)
00523         s = "".join(chr(c) for c in cs)
00524         self.assertEqual(
00525             codecs.xmlcharrefreplace_errors(
00526                 UnicodeEncodeError("ascii", s, 0, len(s), "ouch")
00527             ),
00528             ("".join("&#%d;" % ord(c) for c in s), len(s))
00529         )

Here is the call graph for this function:

Definition at line 606 of file test_codeccallbacks.py.

00606 
00607     def test_badhandlerresults(self):
00608         results = ( 42, "foo", (1,2,3), ("foo", 1, 3), ("foo", None), ("foo",), ("foo", 1, 3), ("foo", None), ("foo",) )
00609         encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15")
00610 
00611         for res in results:
00612             codecs.register_error("test.badhandler", lambda x: res)
00613             for enc in encs:
00614                 self.assertRaises(
00615                     TypeError,
00616                     "\u3042".encode,
00617                     enc,
00618                     "test.badhandler"
00619                 )
00620             for (enc, bytes) in (
00621                 ("ascii", b"\xff"),
00622                 ("utf-8", b"\xff"),
00623                 ("utf-7", b"+x-"),
00624                 ("unicode-internal", b"\x00"),
00625             ):
00626                 self.assertRaises(
00627                     TypeError,
00628                     bytes.decode,
00629                     enc,
00630                     "test.badhandler"
00631                 )

Here is the call graph for this function:

Definition at line 667 of file test_codeccallbacks.py.

00667 
00668     def test_badlookupcall(self):
00669         # enhance coverage of:
00670         # Modules/_codecsmodule.c::lookup_error()
00671         self.assertRaises(TypeError, codecs.lookup_error)

Here is the call graph for this function:

Definition at line 660 of file test_codeccallbacks.py.

00660 
00661     def test_badregistercall(self):
00662         # enhance coverage of:
00663         # Modules/_codecsmodule.c::register_error()
00664         # Python/codecs.c::PyCodec_RegisterError()
00665         self.assertRaises(TypeError, codecs.register_error, 42)
00666         self.assertRaises(TypeError, codecs.register_error, "test.dummy", 42)

Here is the call graph for this function:

Definition at line 815 of file test_codeccallbacks.py.

00815 
00816     def test_bug828737(self):
00817         charmap = {
00818             ord("&"): "&",
00819             ord("<"): "&lt;",
00820             ord(">"): "&gt;",
00821             ord('"'): "&quot;",
00822         }
00823 
00824         for n in (1, 10, 100, 1000):
00825             text = 'abc<def>ghi'*n
00826             text.translate(charmap)

Here is the call graph for this function:

Definition at line 231 of file test_codeccallbacks.py.

00231 
00232     def test_callbacks(self):
00233         def handler1(exc):
00234             r = range(exc.start, exc.end)
00235             if isinstance(exc, UnicodeEncodeError):
00236                 l = ["<%d>" % ord(exc.object[pos]) for pos in r]
00237             elif isinstance(exc, UnicodeDecodeError):
00238                 l = ["<%d>" % exc.object[pos] for pos in r]
00239             else:
00240                 raise TypeError("don't know how to handle %r" % exc)
00241             return ("[%s]" % "".join(l), exc.end)
00242 
00243         codecs.register_error("test.handler1", handler1)
00244 
00245         def handler2(exc):
00246             if not isinstance(exc, UnicodeDecodeError):
00247                 raise TypeError("don't know how to handle %r" % exc)
00248             l = ["<%d>" % exc.object[pos] for pos in range(exc.start, exc.end)]
00249             return ("[%s]" % "".join(l), exc.end+1) # skip one character
00250 
00251         codecs.register_error("test.handler2", handler2)
00252 
00253         s = b"\x00\x81\x7f\x80\xff"
00254 
00255         self.assertEqual(
00256             s.decode("ascii", "test.handler1"),
00257             "\x00[<129>]\x7f[<128>][<255>]"
00258         )
00259         self.assertEqual(
00260             s.decode("ascii", "test.handler2"),
00261             "\x00[<129>][<128>]"
00262         )
00263 
00264         self.assertEqual(
00265             b"\\u3042\u3xxx".decode("unicode-escape", "test.handler1"),
00266             "\u3042[<92><117><51><120>]xx"
00267         )
00268 
00269         self.assertEqual(
00270             b"\\u3042\u3xx".decode("unicode-escape", "test.handler1"),
00271             "\u3042[<92><117><51><120><120>]"
00272         )
00273 
00274         self.assertEqual(
00275             codecs.charmap_decode(b"abc", "test.handler1", {ord("a"): "z"})[0],
00276             "z[<98>][<99>]"
00277         )
00278 
00279         self.assertEqual(
00280             "g\xfc\xdfrk".encode("ascii", "test.handler1"),
00281             b"g[<252><223>]rk"
00282         )
00283 
00284         self.assertEqual(
00285             "g\xfc\xdf".encode("ascii", "test.handler1"),
00286             b"g[<252><223>]"
00287         )

Here is the call graph for this function:

Definition at line 181 of file test_codeccallbacks.py.

00181 
00182     def test_charmapencode(self):
00183         # For charmap encodings the replacement string will be
00184         # mapped through the encoding again. This means, that
00185         # to be able to use e.g. the "replace" handler, the
00186         # charmap has to have a mapping for "?".
00187         charmap = dict((ord(c), bytes(2*c.upper(), 'ascii')) for c in "abcdefgh")
00188         sin = "abc"
00189         sout = b"AABBCC"
00190         self.assertEqual(codecs.charmap_encode(sin, "strict", charmap)[0], sout)
00191 
00192         sin = "abcA"
00193         self.assertRaises(UnicodeError, codecs.charmap_encode, sin, "strict", charmap)
00194 
00195         charmap[ord("?")] = b"XYZ"
00196         sin = "abcDEF"
00197         sout = b"AABBCCXYZXYZXYZ"
00198         self.assertEqual(codecs.charmap_encode(sin, "replace", charmap)[0], sout)
00199 
00200         charmap[ord("?")] = "XYZ" # wrong type in mapping
00201         self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)

Here is the call graph for this function:

Definition at line 690 of file test_codeccallbacks.py.

00690 
00691     def test_decodehelper(self):
00692         # enhance coverage of:
00693         # Objects/unicodeobject.c::unicode_decode_call_errorhandler()
00694         # and callers
00695         self.assertRaises(LookupError, b"\xff".decode, "ascii", "test.unknown")
00696 
00697         def baddecodereturn1(exc):
00698             return 42
00699         codecs.register_error("test.baddecodereturn1", baddecodereturn1)
00700         self.assertRaises(TypeError, b"\xff".decode, "ascii", "test.baddecodereturn1")
00701         self.assertRaises(TypeError, b"\\".decode, "unicode-escape", "test.baddecodereturn1")
00702         self.assertRaises(TypeError, b"\\x0".decode, "unicode-escape", "test.baddecodereturn1")
00703         self.assertRaises(TypeError, b"\\x0y".decode, "unicode-escape", "test.baddecodereturn1")
00704         self.assertRaises(TypeError, b"\\Uffffeeee".decode, "unicode-escape", "test.baddecodereturn1")
00705         self.assertRaises(TypeError, b"\\uyyyy".decode, "raw-unicode-escape", "test.baddecodereturn1")
00706 
00707         def baddecodereturn2(exc):
00708             return ("?", None)
00709         codecs.register_error("test.baddecodereturn2", baddecodereturn2)
00710         self.assertRaises(TypeError, b"\xff".decode, "ascii", "test.baddecodereturn2")
00711 
00712         handler = PosReturn()
00713         codecs.register_error("test.posreturn", handler.handle)
00714 
00715         # Valid negative position
00716         handler.pos = -1
00717         self.assertEqual(b"\xff0".decode("ascii", "test.posreturn"), "<?>0")
00718 
00719         # Valid negative position
00720         handler.pos = -2
00721         self.assertEqual(b"\xff0".decode("ascii", "test.posreturn"), "<?><?>")
00722 
00723         # Negative position out of bounds
00724         handler.pos = -3
00725         self.assertRaises(IndexError, b"\xff0".decode, "ascii", "test.posreturn")
00726 
00727         # Valid positive position
00728         handler.pos = 1
00729         self.assertEqual(b"\xff0".decode("ascii", "test.posreturn"), "<?>0")
00730 
00731         # Largest valid positive position (one beyond end of input)
00732         handler.pos = 2
00733         self.assertEqual(b"\xff0".decode("ascii", "test.posreturn"), "<?>")
00734 
00735         # Invalid positive position
00736         handler.pos = 3
00737         self.assertRaises(IndexError, b"\xff0".decode, "ascii", "test.posreturn")
00738 
00739         # Restart at the "0"
00740         handler.pos = 6
00741         self.assertEqual(b"\\uyyyy0".decode("raw-unicode-escape", "test.posreturn"), "<?>0")
00742 
00743         class D(dict):
00744             def __getitem__(self, key):
00745                 raise ValueError
00746         self.assertRaises(UnicodeError, codecs.charmap_decode, b"\xff", "strict", {0xff: None})
00747         self.assertRaises(ValueError, codecs.charmap_decode, b"\xff", "strict", D())
00748         self.assertRaises(TypeError, codecs.charmap_decode, b"\xff", "strict", {0xff: sys.maxunicode+1})

Here is the call graph for this function:

Definition at line 202 of file test_codeccallbacks.py.

00202 
00203     def test_decodeunicodeinternal(self):
00204         self.assertRaises(
00205             UnicodeDecodeError,
00206             b"\x00\x00\x00\x00\x00".decode,
00207             "unicode-internal",
00208         )
00209         if sys.maxunicode > 0xffff:
00210             def handler_unicodeinternal(exc):
00211                 if not isinstance(exc, UnicodeDecodeError):
00212                     raise TypeError("don't know how to handle %r" % exc)
00213                 return ("\x01", 1)
00214 
00215             self.assertEqual(
00216                 b"\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"),
00217                 "\u0000"
00218             )
00219 
00220             self.assertEqual(
00221                 b"\x00\x00\x00\x00\x00".decode("unicode-internal", "replace"),
00222                 "\u0000\ufffd"
00223             )
00224 
00225             codecs.register_error("test.hui", handler_unicodeinternal)
00226 
00227             self.assertEqual(
00228                 b"\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui"),
00229                 "\u0000\u0001\u0000"
00230             )

Here is the call graph for this function:

Definition at line 156 of file test_codeccallbacks.py.

00156 
00157     def test_decoding_callbacks(self):
00158         # This is a test for a decoding callback handler
00159         # that allows the decoding of the invalid sequence
00160         # "\xc0\x80" and returns "\x00" instead of raising an error.
00161         # All other illegal sequences will be handled strictly.
00162         def relaxedutf8(exc):
00163             if not isinstance(exc, UnicodeDecodeError):
00164                 raise TypeError("don't know how to handle %r" % exc)
00165             if exc.object[exc.start:exc.start+2] == b"\xc0\x80":
00166                 return ("\x00", exc.start+2) # retry after two bytes
00167             else:
00168                 raise exc
00169 
00170         codecs.register_error("test.relaxedutf8", relaxedutf8)
00171 
00172         # all the "\xc0\x80" will be decoded to "\x00"
00173         sin = b"a\x00b\xc0\x80c\xc3\xbc\xc0\x80\xc0\x80"
00174         sout = "a\x00b\x00c\xfc\x00\x00"
00175         self.assertEqual(sin.decode("utf-8", "test.relaxedutf8"), sout)
00176 
00177         # "\xc0\x81" is not valid and a UnicodeDecodeError will be raised
00178         sin = b"\xc0\x80\xc0\x81"
00179         self.assertRaises(UnicodeDecodeError, sin.decode,
00180                           "utf-8", "test.relaxedutf8")

Here is the call graph for this function:

Definition at line 749 of file test_codeccallbacks.py.

00749 
00750     def test_encodehelper(self):
00751         # enhance coverage of:
00752         # Objects/unicodeobject.c::unicode_encode_call_errorhandler()
00753         # and callers
00754         self.assertRaises(LookupError, "\xff".encode, "ascii", "test.unknown")
00755 
00756         def badencodereturn1(exc):
00757             return 42
00758         codecs.register_error("test.badencodereturn1", badencodereturn1)
00759         self.assertRaises(TypeError, "\xff".encode, "ascii", "test.badencodereturn1")
00760 
00761         def badencodereturn2(exc):
00762             return ("?", None)
00763         codecs.register_error("test.badencodereturn2", badencodereturn2)
00764         self.assertRaises(TypeError, "\xff".encode, "ascii", "test.badencodereturn2")
00765 
00766         handler = PosReturn()
00767         codecs.register_error("test.posreturn", handler.handle)
00768 
00769         # Valid negative position
00770         handler.pos = -1
00771         self.assertEqual("\xff0".encode("ascii", "test.posreturn"), b"<?>0")
00772 
00773         # Valid negative position
00774         handler.pos = -2
00775         self.assertEqual("\xff0".encode("ascii", "test.posreturn"), b"<?><?>")
00776 
00777         # Negative position out of bounds
00778         handler.pos = -3
00779         self.assertRaises(IndexError, "\xff0".encode, "ascii", "test.posreturn")
00780 
00781         # Valid positive position
00782         handler.pos = 1
00783         self.assertEqual("\xff0".encode("ascii", "test.posreturn"), b"<?>0")
00784 
00785         # Largest valid positive position (one beyond end of input
00786         handler.pos = 2
00787         self.assertEqual("\xff0".encode("ascii", "test.posreturn"), b"<?>")
00788 
00789         # Invalid positive position
00790         handler.pos = 3
00791         self.assertRaises(IndexError, "\xff0".encode, "ascii", "test.posreturn")
00792 
00793         handler.pos = 0
00794 
00795         class D(dict):
00796             def __getitem__(self, key):
00797                 raise ValueError
00798         for err in ("strict", "replace", "xmlcharrefreplace", "backslashreplace", "test.posreturn"):
00799             self.assertRaises(UnicodeError, codecs.charmap_encode, "\xff", err, {0xff: None})
00800             self.assertRaises(ValueError, codecs.charmap_encode, "\xff", err, D())
00801             self.assertRaises(TypeError, codecs.charmap_encode, "\xff", err, {0xff: 300})

Here is the call graph for this function:

Definition at line 288 of file test_codeccallbacks.py.

00288 
00289     def test_longstrings(self):
00290         # test long strings to check for memory overflow problems
00291         errors = [ "strict", "ignore", "replace", "xmlcharrefreplace",
00292                    "backslashreplace"]
00293         # register the handlers under different names,
00294         # to prevent the codec from recognizing the name
00295         for err in errors:
00296             codecs.register_error("test." + err, codecs.lookup_error(err))
00297         l = 1000
00298         errors += [ "test." + err for err in errors ]
00299         for uni in [ s*l for s in ("x", "\u3042", "a\xe4") ]:
00300             for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15",
00301                         "utf-8", "utf-7", "utf-16", "utf-32"):
00302                 for err in errors:
00303                     try:
00304                         uni.encode(enc, err)
00305                     except UnicodeError:
00306                         pass

Definition at line 632 of file test_codeccallbacks.py.

00632 
00633     def test_lookup(self):
00634         self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict"))
00635         self.assertEqual(codecs.ignore_errors, codecs.lookup_error("ignore"))
00636         self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict"))
00637         self.assertEqual(
00638             codecs.xmlcharrefreplace_errors,
00639             codecs.lookup_error("xmlcharrefreplace")
00640         )
00641         self.assertEqual(
00642             codecs.backslashreplace_errors,
00643             codecs.lookup_error("backslashreplace")
00644         )

Here is the call graph for this function:

Definition at line 827 of file test_codeccallbacks.py.

00827 
00828     def test_mutatingdecodehandler(self):
00829         baddata = [
00830             ("ascii", b"\xff"),
00831             ("utf-7", b"++"),
00832             ("utf-8",  b"\xff"),
00833             ("utf-16", b"\xff"),
00834             ("utf-32", b"\xff"),
00835             ("unicode-escape", b"\\u123g"),
00836             ("raw-unicode-escape", b"\\u123g"),
00837             ("unicode-internal", b"\xff"),
00838         ]
00839 
00840         def replacing(exc):
00841             if isinstance(exc, UnicodeDecodeError):
00842                 exc.object = 42
00843                 return ("\u4242", 0)
00844             else:
00845                 raise TypeError("don't know how to handle %r" % exc)
00846         codecs.register_error("test.replacing", replacing)
00847         for (encoding, data) in baddata:
00848             self.assertRaises(TypeError, data.decode, encoding, "test.replacing")
00849 
00850         def mutating(exc):
00851             if isinstance(exc, UnicodeDecodeError):
00852                 exc.object[:] = b""
00853                 return ("\u4242", 0)
00854             else:
00855                 raise TypeError("don't know how to handle %r" % exc)
00856         codecs.register_error("test.mutating", mutating)
00857         # If the decoder doesn't pick up the modified input the following
00858         # will lead to an endless loop
00859         for (encoding, data) in baddata:
00860             self.assertRaises(TypeError, data.decode, encoding, "test.replacing")

Here is the call graph for this function:

Definition at line 802 of file test_codeccallbacks.py.

00802 
00803     def test_translatehelper(self):
00804         # enhance coverage of:
00805         # Objects/unicodeobject.c::unicode_encode_call_errorhandler()
00806         # and callers
00807         # (Unfortunately the errors argument is not directly accessible
00808         # from Python, so we can't test that much)
00809         class D(dict):
00810             def __getitem__(self, key):
00811                 raise ValueError
00812         #self.assertRaises(ValueError, "\xff".translate, D())
00813         self.assertRaises(TypeError, "\xff".translate, {0xff: sys.maxunicode+1})
00814         self.assertRaises(TypeError, "\xff".translate, {0xff: ()})

Here is the call graph for this function:

Definition at line 645 of file test_codeccallbacks.py.

00645 
00646     def test_unencodablereplacement(self):
00647         def unencrepl(exc):
00648             if isinstance(exc, UnicodeEncodeError):
00649                 return ("\u4242", exc.end)
00650             else:
00651                 raise TypeError("don't know how to handle %r" % exc)
00652         codecs.register_error("test.unencreplhandler", unencrepl)
00653         for enc in ("ascii", "iso-8859-1", "iso-8859-15"):
00654             self.assertRaises(
00655                 UnicodeEncodeError,
00656                 "\u4242".encode,
00657                 enc,
00658                 "test.unencreplhandler"
00659             )

Here is the call graph for this function:

Definition at line 365 of file test_codeccallbacks.py.

00365 
00366     def test_unicodedecodeerror(self):
00367         self.check_exceptionobjectargs(
00368             UnicodeDecodeError,
00369             ["ascii", bytearray(b"g\xfcrk"), 1, 2, "ouch"],
00370             "'ascii' codec can't decode byte 0xfc in position 1: ouch"
00371         )
00372         self.check_exceptionobjectargs(
00373             UnicodeDecodeError,
00374             ["ascii", bytearray(b"g\xfcrk"), 1, 3, "ouch"],
00375             "'ascii' codec can't decode bytes in position 1-2: ouch"
00376         )

Here is the call graph for this function:

Definition at line 332 of file test_codeccallbacks.py.

00332 
00333     def test_unicodeencodeerror(self):
00334         self.check_exceptionobjectargs(
00335             UnicodeEncodeError,
00336             ["ascii", "g\xfcrk", 1, 2, "ouch"],
00337             "'ascii' codec can't encode character '\\xfc' in position 1: ouch"
00338         )
00339         self.check_exceptionobjectargs(
00340             UnicodeEncodeError,
00341             ["ascii", "g\xfcrk", 1, 4, "ouch"],
00342             "'ascii' codec can't encode characters in position 1-3: ouch"
00343         )
00344         self.check_exceptionobjectargs(
00345             UnicodeEncodeError,
00346             ["ascii", "\xfcx", 0, 1, "ouch"],
00347             "'ascii' codec can't encode character '\\xfc' in position 0: ouch"
00348         )
00349         self.check_exceptionobjectargs(
00350             UnicodeEncodeError,
00351             ["ascii", "\u0100x", 0, 1, "ouch"],
00352             "'ascii' codec can't encode character '\\u0100' in position 0: ouch"
00353         )
00354         self.check_exceptionobjectargs(
00355             UnicodeEncodeError,
00356             ["ascii", "\uffffx", 0, 1, "ouch"],
00357             "'ascii' codec can't encode character '\\uffff' in position 0: ouch"
00358         )
00359         if sys.maxunicode > 0xffff:
00360             self.check_exceptionobjectargs(
00361                 UnicodeEncodeError,
00362                 ["ascii", "\U00010000x", 0, 1, "ouch"],
00363                 "'ascii' codec can't encode character '\\U00010000' in position 0: ouch"
00364             )

Here is the call graph for this function:

Definition at line 377 of file test_codeccallbacks.py.

00377 
00378     def test_unicodetranslateerror(self):
00379         self.check_exceptionobjectargs(
00380             UnicodeTranslateError,
00381             ["g\xfcrk", 1, 2, "ouch"],
00382             "can't translate character '\\xfc' in position 1: ouch"
00383         )
00384         self.check_exceptionobjectargs(
00385             UnicodeTranslateError,
00386             ["g\u0100rk", 1, 2, "ouch"],
00387             "can't translate character '\\u0100' in position 1: ouch"
00388         )
00389         self.check_exceptionobjectargs(
00390             UnicodeTranslateError,
00391             ["g\uffffrk", 1, 2, "ouch"],
00392             "can't translate character '\\uffff' in position 1: ouch"
00393         )
00394         if sys.maxunicode > 0xffff:
00395             self.check_exceptionobjectargs(
00396                 UnicodeTranslateError,
00397                 ["g\U00010000rk", 1, 2, "ouch"],
00398                 "can't translate character '\\U00010000' in position 1: ouch"
00399             )
00400         self.check_exceptionobjectargs(
00401             UnicodeTranslateError,
00402             ["g\xfcrk", 1, 3, "ouch"],
00403             "can't translate characters in position 1-2: ouch"
00404         )

Here is the call graph for this function:

Definition at line 105 of file test_codeccallbacks.py.

00105 
00106     def test_uninamereplace(self):
00107         # We're using the names from the unicode database this time,
00108         # and we're doing "syntax highlighting" here, i.e. we include
00109         # the replaced text in ANSI escape sequences. For this it is
00110         # useful that the error handler is not called for every single
00111         # unencodable character, but for a complete sequence of
00112         # unencodable characters, otherwise we would output many
00113         # unnecessary escape sequences.
00114 
00115         def uninamereplace(exc):
00116             if not isinstance(exc, UnicodeEncodeError):
00117                 raise TypeError("don't know how to handle %r" % exc)
00118             l = []
00119             for c in exc.object[exc.start:exc.end]:
00120                 l.append(unicodedata.name(c, "0x%x" % ord(c)))
00121             return ("\033[1m%s\033[0m" % ", ".join(l), exc.end)
00122 
00123         codecs.register_error(
00124             "test.uninamereplace", uninamereplace)
00125 
00126         sin = "\xac\u1234\u20ac\u8000"
00127         sout = b"\033[1mNOT SIGN, ETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m"
00128         self.assertEqual(sin.encode("ascii", "test.uninamereplace"), sout)
00129 
00130         sout = b"\xac\033[1mETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m"
00131         self.assertEqual(sin.encode("latin-1", "test.uninamereplace"), sout)
00132 
00133         sout = b"\xac\033[1mETHIOPIC SYLLABLE SEE\033[0m\xa4\033[1mCJK UNIFIED IDEOGRAPH-8000\033[0m"
00134         self.assertEqual(sin.encode("iso-8859-15", "test.uninamereplace"), sout)

Here is the call graph for this function:

Definition at line 672 of file test_codeccallbacks.py.

00672 
00673     def test_unknownhandler(self):
00674         # enhance coverage of:
00675         # Modules/_codecsmodule.c::lookup_error()
00676         self.assertRaises(LookupError, codecs.lookup_error, "test.unknown")

Here is the call graph for this function:

Definition at line 79 of file test_codeccallbacks.py.

00079 
00080     def test_xmlcharnamereplace(self):
00081         # This time use a named character entity for unencodable
00082         # characters, if one is available.
00083 
00084         def xmlcharnamereplace(exc):
00085             if not isinstance(exc, UnicodeEncodeError):
00086                 raise TypeError("don't know how to handle %r" % exc)
00087             l = []
00088             for c in exc.object[exc.start:exc.end]:
00089                 try:
00090                     l.append("&%s;" % html.entities.codepoint2name[ord(c)])
00091                 except KeyError:
00092                     l.append("&#%d;" % ord(c))
00093             return ("".join(l), exc.end)
00094 
00095         codecs.register_error(
00096             "test.xmlcharnamereplace", xmlcharnamereplace)
00097 
00098         sin = "\xab\u211c\xbb = \u2329\u1234\u20ac\u232a"
00099         sout = b"&laquo;&real;&raquo; = &lang;&#4660;&euro;&rang;"
00100         self.assertEqual(sin.encode("ascii", "test.xmlcharnamereplace"), sout)
00101         sout = b"\xab&real;\xbb = &lang;&#4660;&euro;&rang;"
00102         self.assertEqual(sin.encode("latin-1", "test.xmlcharnamereplace"), sout)
00103         sout = b"\xab&real;\xbb = &lang;&#4660;\xa4&rang;"
00104         self.assertEqual(sin.encode("iso-8859-15", "test.xmlcharnamereplace"), sout)

Here is the call graph for this function:

Definition at line 65 of file test_codeccallbacks.py.

00065 
00066     def test_xmlcharrefreplace(self):
00067         # replace unencodable characters which numeric character entities.
00068         # For ascii, latin-1 and charmaps this is completely implemented
00069         # in C and should be reasonably fast.
00070         s = "\u30b9\u30d1\u30e2 \xe4nd eggs"
00071         self.assertEqual(
00072             s.encode("ascii", "xmlcharrefreplace"),
00073             b"&#12473;&#12497;&#12514; &#228;nd eggs"
00074         )
00075         self.assertEqual(
00076             s.encode("latin-1", "xmlcharrefreplace"),
00077             b"&#12473;&#12497;&#12514; \xe4nd eggs"
00078         )

Here is the call graph for this function:

Definition at line 677 of file test_codeccallbacks.py.

00677 
00678     def test_xmlcharrefvalues(self):
00679         # enhance coverage of:
00680         # Python/codecs.c::PyCodec_XMLCharRefReplaceErrors()
00681         # and inline implementations
00682         v = (1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000)
00683         if sys.maxunicode>=100000:
00684             v += (100000, 500000, 1000000)
00685         s = "".join([chr(x) for x in v])
00686         codecs.register_error("test.xmlcharrefreplace", codecs.xmlcharrefreplace_errors)
00687         for enc in ("ascii", "iso-8859-15"):
00688             for err in ("xmlcharrefreplace", "test.xmlcharrefreplace"):
00689                 s.encode(enc, err)

Here is the call graph for this function:


The documentation for this class was generated from the following file: