Skip to content

Commit

Permalink
python#23144: merge with 3.4.
Browse files Browse the repository at this point in the history
  • Loading branch information
ezio-melotti committed Sep 6, 2015
2 parents 2ba3980 + 6f2bb98 commit 20a2c64
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 5 deletions.
10 changes: 9 additions & 1 deletion Lib/html/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,15 @@ def goahead(self, end):
if self.convert_charrefs and not self.cdata_elem:
j = rawdata.find('<', i)
if j < 0:
if not end:
# if we can't find the next <, either we are at the end
# or there's more text incoming. If the latter is True,
# we can't pass the text to handle_data in case we have
# a charref cut in half at end. Try to determine if
# this is the case before proceding by looking for an
# & near the end and see if it's followed by a space or ;.
amppos = rawdata.rfind('&', max(i, n-34))
if (amppos >= 0 and
not re.compile(r'[\s;]').search(rawdata, amppos)):
break # wait till we get all the text
j = n
else:
Expand Down
15 changes: 12 additions & 3 deletions Lib/test/test_htmlparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,6 @@ def handle_starttag(self, tag, attrs):

class EventCollectorCharrefs(EventCollector):

def get_events(self):
return self.events

def handle_charref(self, data):
self.fail('This should never be called with convert_charrefs=True')

Expand Down Expand Up @@ -633,6 +630,18 @@ def test_broken_condcoms(self):
]
self._run_check(html, expected)

def test_convert_charrefs_dropped_text(self):
# #23144: make sure that all the events are triggered when
# convert_charrefs is True, even if we don't call .close()
parser = EventCollector(convert_charrefs=True)
# before the fix, bar & baz was missing
parser.feed("foo <a>link</a> bar &amp; baz")
self.assertEqual(
parser.get_events(),
[('data', 'foo '), ('starttag', 'a', []), ('data', 'link'),
('endtag', 'a'), ('data', ' bar & baz')]
)


class AttributesTestCase(TestCaseBase):

Expand Down
6 changes: 5 additions & 1 deletion Misc/NEWS
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
+++++++++++
+++++++++++
Python News
+++++++++++

Expand Down Expand Up @@ -95,9 +95,13 @@ Core and Builtins
Library
-------

- Issue #23144: Make sure that HTMLParser.feed() returns all the data, even
when convert_charrefs is True.

- Issue #24635: Fixed a bug in typing.py where isinstance([], typing.Iterable)
would return True once, then False on subsequent calls.


- Issue #24989: Fixed buffer overread in BytesIO.readline() if a position is
set beyond size. Based on patch by John Leitch.

Expand Down

0 comments on commit 20a2c64

Please sign in to comment.