Back to index

python3.2  3.2.2
Public Member Functions
test.test_tokenize.TestDetectEncoding Class Reference
Inheritance diagram for test.test_tokenize.TestDetectEncoding:
Inheritance graph
[legend]
Collaboration diagram for test.test_tokenize.TestDetectEncoding:
Collaboration graph
[legend]

List of all members.

Public Member Functions

def get_readline
def test_no_bom_no_encoding_cookie
def test_bom_no_cookie
def test_cookie_first_line_no_bom
def test_matched_bom_and_cookie_first_line
def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror
def test_cookie_second_line_no_bom
def test_matched_bom_and_cookie_second_line
def test_mismatched_bom_and_cookie_second_line_raises_syntaxerror
def test_latin1_normalization
def test_utf8_normalization
def test_short_files
def test_open

Detailed Description

Definition at line 716 of file test_tokenize.py.


Member Function Documentation

Definition at line 718 of file test_tokenize.py.

00718 
00719     def get_readline(self, lines):
00720         index = 0
00721         def readline():
00722             nonlocal index
00723             if index == len(lines):
00724                 raise StopIteration
00725             line = lines[index]
00726             index += 1
00727             return line
00728         return readline

Definition at line 739 of file test_tokenize.py.

00739 
00740     def test_bom_no_cookie(self):
00741         lines = (
00742             b'\xef\xbb\xbf# something\n',
00743             b'print(something)\n',
00744             b'do_something(else)\n'
00745         )
00746         encoding, consumed_lines = detect_encoding(self.get_readline(lines))
00747         self.assertEqual(encoding, 'utf-8-sig')
00748         self.assertEqual(consumed_lines,
00749                          [b'# something\n', b'print(something)\n'])

Here is the call graph for this function:

Definition at line 750 of file test_tokenize.py.

00750 
00751     def test_cookie_first_line_no_bom(self):
00752         lines = (
00753             b'# -*- coding: latin-1 -*-\n',
00754             b'print(something)\n',
00755             b'do_something(else)\n'
00756         )
00757         encoding, consumed_lines = detect_encoding(self.get_readline(lines))
00758         self.assertEqual(encoding, 'iso-8859-1')
00759         self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n'])

Here is the call graph for this function:

Definition at line 779 of file test_tokenize.py.

00779 
00780     def test_cookie_second_line_no_bom(self):
00781         lines = (
00782             b'#! something\n',
00783             b'# vim: set fileencoding=ascii :\n',
00784             b'print(something)\n',
00785             b'do_something(else)\n'
00786         )
00787         encoding, consumed_lines = detect_encoding(self.get_readline(lines))
00788         self.assertEqual(encoding, 'ascii')
00789         expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n']
00790         self.assertEqual(consumed_lines, expected)

Here is the call graph for this function:

Definition at line 813 of file test_tokenize.py.

00813 
00814     def test_latin1_normalization(self):
00815         # See get_normal_name() in tokenizer.c.
00816         encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix",
00817                      "iso-8859-1-unix", "iso-latin-1-mac")
00818         for encoding in encodings:
00819             for rep in ("-", "_"):
00820                 enc = encoding.replace("-", rep)
00821                 lines = (b"#!/usr/bin/python\n",
00822                          b"# coding: " + enc.encode("ascii") + b"\n",
00823                          b"print(things)\n",
00824                          b"do_something += 4\n")
00825                 rl = self.get_readline(lines)
00826                 found, consumed_lines = detect_encoding(rl)
00827                 self.assertEqual(found, "iso-8859-1")

Here is the call graph for this function:

Definition at line 760 of file test_tokenize.py.

00760 
00761     def test_matched_bom_and_cookie_first_line(self):
00762         lines = (
00763             b'\xef\xbb\xbf# coding=utf-8\n',
00764             b'print(something)\n',
00765             b'do_something(else)\n'
00766         )
00767         encoding, consumed_lines = detect_encoding(self.get_readline(lines))
00768         self.assertEqual(encoding, 'utf-8-sig')
00769         self.assertEqual(consumed_lines, [b'# coding=utf-8\n'])

Here is the call graph for this function:

Definition at line 791 of file test_tokenize.py.

00791 
00792     def test_matched_bom_and_cookie_second_line(self):
00793         lines = (
00794             b'\xef\xbb\xbf#! something\n',
00795             b'f# coding=utf-8\n',
00796             b'print(something)\n',
00797             b'do_something(else)\n'
00798         )
00799         encoding, consumed_lines = detect_encoding(self.get_readline(lines))
00800         self.assertEqual(encoding, 'utf-8-sig')
00801         self.assertEqual(consumed_lines,
00802                          [b'#! something\n', b'f# coding=utf-8\n'])

Here is the call graph for this function:

Definition at line 770 of file test_tokenize.py.

00770 
00771     def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self):
00772         lines = (
00773             b'\xef\xbb\xbf# vim: set fileencoding=ascii :\n',
00774             b'print(something)\n',
00775             b'do_something(else)\n'
00776         )
00777         readline = self.get_readline(lines)
00778         self.assertRaises(SyntaxError, detect_encoding, readline)

Here is the call graph for this function:

Definition at line 803 of file test_tokenize.py.

00803 
00804     def test_mismatched_bom_and_cookie_second_line_raises_syntaxerror(self):
00805         lines = (
00806             b'\xef\xbb\xbf#! something\n',
00807             b'# vim: set fileencoding=ascii :\n',
00808             b'print(something)\n',
00809             b'do_something(else)\n'
00810         )
00811         readline = self.get_readline(lines)
00812         self.assertRaises(SyntaxError, detect_encoding, readline)

Here is the call graph for this function:

Definition at line 729 of file test_tokenize.py.

00729 
00730     def test_no_bom_no_encoding_cookie(self):
00731         lines = (
00732             b'# something\n',
00733             b'print(something)\n',
00734             b'do_something(else)\n'
00735         )
00736         encoding, consumed_lines = detect_encoding(self.get_readline(lines))
00737         self.assertEqual(encoding, 'utf-8')
00738         self.assertEqual(consumed_lines, list(lines[:2]))

Here is the call graph for this function:

Definition at line 864 of file test_tokenize.py.

00864 
00865     def test_open(self):
00866         filename = support.TESTFN + '.py'
00867         self.addCleanup(support.unlink, filename)
00868 
00869         # test coding cookie
00870         for encoding in ('iso-8859-15', 'utf-8'):
00871             with open(filename, 'w', encoding=encoding) as fp:
00872                 print("# coding: %s" % encoding, file=fp)
00873                 print("print('euro:\u20ac')", file=fp)
00874             with tokenize_open(filename) as fp:
00875                 self.assertEqual(fp.encoding, encoding)
00876                 self.assertEqual(fp.mode, 'r')
00877 
00878         # test BOM (no coding cookie)
00879         with open(filename, 'w', encoding='utf-8-sig') as fp:
00880             print("print('euro:\u20ac')", file=fp)
00881         with tokenize_open(filename) as fp:
00882             self.assertEqual(fp.encoding, 'utf-8-sig')
00883             self.assertEqual(fp.mode, 'r')

Here is the call graph for this function:

Definition at line 841 of file test_tokenize.py.

00841 
00842     def test_short_files(self):
00843         readline = self.get_readline((b'print(something)\n',))
00844         encoding, consumed_lines = detect_encoding(readline)
00845         self.assertEqual(encoding, 'utf-8')
00846         self.assertEqual(consumed_lines, [b'print(something)\n'])
00847 
00848         encoding, consumed_lines = detect_encoding(self.get_readline(()))
00849         self.assertEqual(encoding, 'utf-8')
00850         self.assertEqual(consumed_lines, [])
00851 
00852         readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',))
00853         encoding, consumed_lines = detect_encoding(readline)
00854         self.assertEqual(encoding, 'utf-8-sig')
00855         self.assertEqual(consumed_lines, [b'print(something)\n'])
00856 
00857         readline = self.get_readline((b'\xef\xbb\xbf',))
00858         encoding, consumed_lines = detect_encoding(readline)
00859         self.assertEqual(encoding, 'utf-8-sig')
00860         self.assertEqual(consumed_lines, [])
00861 
00862         readline = self.get_readline((b'# coding: bad\n',))
00863         self.assertRaises(SyntaxError, detect_encoding, readline)

Here is the call graph for this function:

Definition at line 828 of file test_tokenize.py.

00828 
00829     def test_utf8_normalization(self):
00830         # See get_normal_name() in tokenizer.c.
00831         encodings = ("utf-8", "utf-8-mac", "utf-8-unix")
00832         for encoding in encodings:
00833             for rep in ("-", "_"):
00834                 enc = encoding.replace("-", rep)
00835                 lines = (b"#!/usr/bin/python\n",
00836                          b"# coding: " + enc.encode("ascii") + b"\n",
00837                          b"1 + 3\n")
00838                 rl = self.get_readline(lines)
00839                 found, consumed_lines = detect_encoding(rl)
00840                 self.assertEqual(found, "utf-8")

Here is the call graph for this function:


The documentation for this class was generated from the following file: