Back to index

python3.2  3.2.2
Public Member Functions
test.test_htmlparser.HTMLParserTestCase Class Reference
Inheritance diagram for test.test_htmlparser.HTMLParserTestCase:
Inheritance graph
[legend]
Collaboration diagram for test.test_htmlparser.HTMLParserTestCase:
Collaboration graph
[legend]

List of all members.

Public Member Functions

def test_processing_instruction_only
def test_simple_html
def test_malformatted_charref
def test_unclosed_entityref
def test_doctype_decl
def test_bad_nesting
def test_bare_ampersands
def test_bare_pointy_brackets
def test_attr_syntax
def test_attr_values
def test_attr_nonascii
def test_attr_entity_replacement
def test_attr_funky_names
def test_illegal_declarations
def test_starttag_end_boundary
def test_buffer_artefacts
def test_starttag_junk_chars
def test_declaration_junk_chars
def test_startendtag
def test_get_starttag_text
def test_cdata_content
def test_entityrefs_in_attributes

Detailed Description

Definition at line 99 of file test_htmlparser.py.


Member Function Documentation

Definition at line 237 of file test_htmlparser.py.

00237 
00238     def test_attr_entity_replacement(self):
00239         self._run_check("""<a b='&amp;&gt;&lt;&quot;&apos;'>""", [
00240             ("starttag", "a", [("b", "&><\"'")]),
00241             ])

Here is the call graph for this function:

Definition at line 242 of file test_htmlparser.py.

00242 
00243     def test_attr_funky_names(self):
00244         self._run_check("""<a a.b='v' c:d=v e-f=v>""", [
00245             ("starttag", "a", [("a.b", "v"), ("c:d", "v"), ("e-f", "v")]),
00246             ])

Here is the call graph for this function:

Definition at line 220 of file test_htmlparser.py.

00220 
00221     def test_attr_nonascii(self):
00222         # see issue 7311
00223         self._run_check("<img src=/foo/bar.png alt=\u4e2d\u6587>", [
00224             ("starttag", "img", [("src", "/foo/bar.png"),
00225                                  ("alt", "\u4e2d\u6587")]),
00226             ])
00227         self._run_check("<a title='\u30c6\u30b9\u30c8' "
00228                         "href='\u30c6\u30b9\u30c8.html'>", [
00229             ("starttag", "a", [("title", "\u30c6\u30b9\u30c8"),
00230                                ("href", "\u30c6\u30b9\u30c8.html")]),
00231             ])
00232         self._run_check('<a title="\u30c6\u30b9\u30c8" '
00233                         'href="\u30c6\u30b9\u30c8.html">', [
00234             ("starttag", "a", [("title", "\u30c6\u30b9\u30c8"),
00235                                ("href", "\u30c6\u30b9\u30c8.html")]),
00236             ])

Here is the call graph for this function:

Definition at line 193 of file test_htmlparser.py.

00193 
00194     def test_attr_syntax(self):
00195         output = [
00196           ("starttag", "a", [("b", "v"), ("c", "v"), ("d", "v"), ("e", None)])
00197           ]
00198         self._run_check("""<a b='v' c="v" d=v e>""", output)
00199         self._run_check("""<a  b = 'v' c = "v" d = v e>""", output)
00200         self._run_check("""<a\nb\n=\n'v'\nc\n=\n"v"\nd\n=\nv\ne>""", output)
00201         self._run_check("""<a\tb\t=\t'v'\tc\t=\t"v"\td\t=\tv\te>""", output)

Here is the call graph for this function:

Definition at line 202 of file test_htmlparser.py.

00202 
00203     def test_attr_values(self):
00204         self._run_check("""<a b='xxx\n\txxx' c="yyy\t\nyyy" d='\txyz\n'>""",
00205                         [("starttag", "a", [("b", "xxx\n\txxx"),
00206                                             ("c", "yyy\t\nyyy"),
00207                                             ("d", "\txyz\n")])
00208                          ])
00209         self._run_check("""<a b='' c="">""", [
00210             ("starttag", "a", [("b", ""), ("c", "")]),
00211             ])
00212         # Regression test for SF patch #669683.
00213         self._run_check("<e a=rgb(1,2,3)>", [
00214             ("starttag", "e", [("a", "rgb(1,2,3)")]),
00215             ])
00216         # Regression test for SF bug #921657.
00217         self._run_check("<a href=mailto:xyz@example.com>", [
00218             ("starttag", "a", [("href", "mailto:xyz@example.com")]),
00219             ])

Here is the call graph for this function:

Definition at line 172 of file test_htmlparser.py.

00172 
00173     def test_bad_nesting(self):
00174         # Strangely, this *is* supposed to test that overlapping
00175         # elements are allowed.  HTMLParser is more geared toward
00176         # lexing the input that parsing the structure.
00177         self._run_check("<a><b></a></b>", [
00178             ("starttag", "a", []),
00179             ("starttag", "b", []),
00180             ("endtag", "a"),
00181             ("endtag", "b"),
00182             ])

Here is the call graph for this function:

Definition at line 183 of file test_htmlparser.py.

00183 
00184     def test_bare_ampersands(self):
00185         self._run_check("this text & contains & ampersands &", [
00186             ("data", "this text & contains & ampersands &"),
00187             ])

Here is the call graph for this function:

Definition at line 188 of file test_htmlparser.py.

00188 
00189     def test_bare_pointy_brackets(self):
00190         self._run_check("this < text > contains < bare>pointy< brackets", [
00191             ("data", "this < text > contains < bare>pointy< brackets"),
00192             ])

Here is the call graph for this function:

Definition at line 254 of file test_htmlparser.py.

00254 
00255     def test_buffer_artefacts(self):
00256         output = [("starttag", "a", [("b", "<")])]
00257         self._run_check(["<a b='<'>"], output)
00258         self._run_check(["<a ", "b='<'>"], output)
00259         self._run_check(["<a b", "='<'>"], output)
00260         self._run_check(["<a b=", "'<'>"], output)
00261         self._run_check(["<a b='<", "'>"], output)
00262         self._run_check(["<a b='<'", ">"], output)
00263 
00264         output = [("starttag", "a", [("b", ">")])]
00265         self._run_check(["<a b='>'>"], output)
00266         self._run_check(["<a ", "b='>'>"], output)
00267         self._run_check(["<a b", "='>'>"], output)
00268         self._run_check(["<a b=", "'>'>"], output)
00269         self._run_check(["<a b='>", "'>"], output)
00270         self._run_check(["<a b='>'", ">"], output)
00271 
00272         output = [("comment", "abc")]
00273         self._run_check(["", "<!--abc-->"], output)
00274         self._run_check(["<", "!--abc-->"], output)
00275         self._run_check(["<!", "--abc-->"], output)
00276         self._run_check(["<!-", "-abc-->"], output)
00277         self._run_check(["<!--", "abc-->"], output)
00278         self._run_check(["<!--a", "bc-->"], output)
00279         self._run_check(["<!--ab", "c-->"], output)
00280         self._run_check(["<!--abc", "-->"], output)
00281         self._run_check(["<!--abc-", "->"], output)
00282         self._run_check(["<!--abc--", ">"], output)
00283         self._run_check(["<!--abc-->", ""], output)

Here is the call graph for this function:

Definition at line 323 of file test_htmlparser.py.

00323 
00324     def test_cdata_content(self):
00325         s = """<script> <!-- not a comment --> &not-an-entity-ref; </script>"""
00326         self._run_check(s, [
00327             ("starttag", "script", []),
00328             ("data", " <!-- not a comment --> &not-an-entity-ref; "),
00329             ("endtag", "script"),
00330             ])
00331         s = """<script> <not a='start tag'> </script>"""
00332         self._run_check(s, [
00333             ("starttag", "script", []),
00334             ("data", " <not a='start tag'> "),
00335             ("endtag", "script"),
00336             ])

Here is the call graph for this function:

Definition at line 300 of file test_htmlparser.py.

00300 
00301     def test_declaration_junk_chars(self):
00302         self._parse_error("<!DOCTYPE foo $ >")

Here is the call graph for this function:

Definition at line 154 of file test_htmlparser.py.

00154 
00155     def test_doctype_decl(self):
00156         inside = """\
00157 DOCTYPE html [
00158   <!ELEMENT html - O EMPTY>
00159   <!ATTLIST html
00160       version CDATA #IMPLIED
00161       profile CDATA 'DublinCore'>
00162   <!NOTATION datatype SYSTEM 'http://xml.python.org/notations/python-module'>
00163   <!ENTITY myEntity 'internal parsed entity'>
00164   <!ENTITY anEntity SYSTEM 'http://xml.python.org/entities/something.xml'>
00165   <!ENTITY % paramEntity 'name|name|name'>
00166   %paramEntity;
00167   <!-- comment -->
00168 ]"""
00169         self._run_check("<!%s>" % inside, [
00170             ("decl", inside),
00171             ])

Here is the call graph for this function:

Definition at line 337 of file test_htmlparser.py.

00337 
00338     def test_entityrefs_in_attributes(self):
00339         self._run_check("<html foo='&euro;&amp;&#97;&#x61;&unsupported;'>", [
00340                 ("starttag", "html", [("foo", "\u20AC&aa&unsupported;")])
00341                 ])
00342 

Here is the call graph for this function:

Definition at line 317 of file test_htmlparser.py.

00317 
00318     def test_get_starttag_text(self):
00319         s = """<foo:bar   \n   one="1"\ttwo=2   >"""
00320         self._run_check_extra(s, [
00321             ("starttag", "foo:bar", [("one", "1"), ("two", "2")]),
00322             ("starttag_text", s)])

Here is the call graph for this function:

Definition at line 247 of file test_htmlparser.py.

00247 
00248     def test_illegal_declarations(self):
00249         self._parse_error('<!spacer type="block" height="25">')

Here is the call graph for this function:

Definition at line 141 of file test_htmlparser.py.

00141 
00142     def test_malformatted_charref(self):
00143         self._run_check("<p>&#bad;</p>", [
00144             ("starttag", "p", []),
00145             ("data", "&#bad;"),
00146             ("endtag", "p"),
00147         ])

Here is the call graph for this function:

Definition at line 101 of file test_htmlparser.py.

00101 
00102     def test_processing_instruction_only(self):
00103         self._run_check("<?processing instruction>", [
00104             ("pi", "processing instruction"),
00105             ])
00106         self._run_check("<?processing instruction ?>", [
00107             ("pi", "processing instruction ?"),
00108             ])

Here is the call graph for this function:

Definition at line 109 of file test_htmlparser.py.

00109 
00110     def test_simple_html(self):
00111         self._run_check("""
00112 <!DOCTYPE html PUBLIC 'foo'>
00113 <HTML>&entity;&#32;
00114 <!--comment1a
00115 -></foo><bar>&lt;<?pi?></foo<bar
00116 comment1b-->
00117 <Img sRc='Bar' isMAP>sample
00118 text
00119 &#x201C;
00120 <!--comment2a-- --comment2b--><!>
00121 </Html>
""", [

Here is the call graph for this function:

Definition at line 303 of file test_htmlparser.py.

00303 
00304     def test_startendtag(self):
00305         self._run_check("<p/>", [
00306             ("startendtag", "p", []),
00307             ])
00308         self._run_check("<p></p>", [
00309             ("starttag", "p", []),
00310             ("endtag", "p"),
00311             ])
00312         self._run_check("<p><img src='foo' /></p>", [
00313             ("starttag", "p", []),
00314             ("startendtag", "img", [("src", "foo")]),
00315             ("endtag", "p"),
00316             ])

Here is the call graph for this function:

Definition at line 250 of file test_htmlparser.py.

00250 
00251     def test_starttag_end_boundary(self):
00252         self._run_check("""<a b='<'>""", [("starttag", "a", [("b", "<")])])
00253         self._run_check("""<a b='>'>""", [("starttag", "a", [("b", ">")])])

Here is the call graph for this function:

Definition at line 284 of file test_htmlparser.py.

00284 
00285     def test_starttag_junk_chars(self):
00286         self._parse_error("</>")
00287         self._parse_error("</$>")
00288         self._parse_error("</")
00289         self._parse_error("</a")
00290         self._parse_error("<a<a>")
00291         self._parse_error("</a<a>")
00292         self._parse_error("<!")
00293         self._parse_error("<a $>")
00294         self._parse_error("<a")
00295         self._parse_error("<a foo='bar'")
00296         self._parse_error("<a foo='bar")
00297         self._parse_error("<a foo='>'")
00298         self._parse_error("<a foo='>")
00299         self._parse_error("<a foo=>")

Here is the call graph for this function:

Definition at line 148 of file test_htmlparser.py.

00148 
00149     def test_unclosed_entityref(self):
00150         self._run_check("&entityref foo", [
00151             ("entityref", "entityref"),
00152             ("data", " foo"),
00153             ])

Here is the call graph for this function:


The documentation for this class was generated from the following file: