Skip to content

Commit f7f9f56

Browse files
gh-135661: Fix CDATA section parsing in HTMLParser
"] ]>" and "]] >" no longer end the CDATA section.
1 parent 1c7efaf commit f7f9f56

File tree

3 files changed

+28
-22
lines changed

3 files changed

+28
-22
lines changed

Lib/html/parser.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,11 @@ def parse_html_declaration(self, i):
298298
# this case is actually already handled in goahead()
299299
return self.parse_comment(i)
300300
elif rawdata[i:i+9] == '<![CDATA[':
301-
return self.parse_marked_section(i)
301+
j = rawdata.find(']]>')
302+
if j < 0:
303+
return -1
304+
self.unknown_decl(rawdata[i+3: j])
305+
return j + 3
302306
elif rawdata[i:i+9].lower() == '<!doctype':
303307
# find the closing >
304308
gtpos = rawdata.find('>', i+9)

Lib/test/test_htmlparser.py

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -686,27 +686,27 @@ def test_broken_condcoms(self):
686686
]
687687
self._run_check(html, expected)
688688

689-
def test_cdata_declarations(self):
690-
# More tests should be added. See also "8.2.4.42. Markup
691-
# declaration open state", "8.2.4.69. CDATA section state",
692-
# and issue 32876
693-
html = ('<![CDATA[just some plain text]]>')
694-
expected = [('unknown decl', 'CDATA[just some plain text')]
695-
self._run_check(html, expected)
696-
697-
def test_cdata_declarations_multiline(self):
698-
html = ('<code><![CDATA['
699-
' if (a < b && a > b) {'
700-
' printf("[<marquee>How?</marquee>]");'
701-
' }'
702-
']]></code>')
703-
expected = [
704-
('starttag', 'code', []),
705-
('unknown decl',
706-
'CDATA[ if (a < b && a > b) { '
707-
'printf("[<marquee>How?</marquee>]"); }'),
708-
('endtag', 'code')
709-
]
689+
@support.subTests('content', [
690+
'just some plain text',
691+
'<!-- not a comment -->',
692+
'&not-an-entity-ref;',
693+
"<not a='start tag'>",
694+
'',
695+
'[[I have many brackets]]',
696+
'I have a > in the middle',
697+
'I have a ]] in the middle',
698+
'] ]>',
699+
']] >',
700+
('\n'
701+
' if (a < b && a > b) {\n'
702+
' printf("[<marquee>How?</marquee>]");\n'
703+
' }\n'),
704+
])
705+
def test_cdata_section(self, content):
706+
# See "13.2.5.42 Markup declaration open state",
707+
# "13.2.5.69 CDATA section state", and issue bpo-32876.
708+
html = f'<![CDATA[{content}]]>'
709+
expected = [('unknown decl', 'CDATA[' + content)]
710710
self._run_check(html, expected)
711711

712712
def test_convert_charrefs_dropped_text(self):
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix CDATA section parsing in :class:`html.parser.HTMLParser`: ``] ]>`` and
2+
``]] >`` no longer end the CDATA section.

0 commit comments

Comments
 (0)