Skip to content

Commit

Permalink
TINY-724: fixed issues with malformed html and regexp backtracking
Browse files Browse the repository at this point in the history
  • Loading branch information
spocke committed Apr 26, 2017
1 parent 1478c10 commit 2ac2137
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 3 deletions.
6 changes: 3 additions & 3 deletions src/core/src/main/js/html/SaxParser.js
Original file line number Diff line number Diff line change
Expand Up @@ -246,8 +246,8 @@ define(
'(?:!\\[CDATA\\[([\\w\\W]*?)\\]\\]>)|' + // CDATA
'(?:!DOCTYPE([\\w\\W]*?)>)|' + // DOCTYPE
'(?:\\?([^\\s\\/<>]+) ?([\\w\\W]*?)[?/]>)|' + // PI
'(?:\\/([^>]+)>)|' + // End element
'(?:([A-Za-z0-9\\-_\\:\\.]+)((?:\\s+[^"\'>]+(?:(?:"[^"]*")|(?:\'[^\']*\')|[^>]*))*|\\/|\\s+)>)' + // Start element
'(?:\\/([A-Za-z][A-Za-z0-9\\-_\\:\\.]*)>)|' + // End element
'(?:([A-Za-z][A-Za-z0-9\\-_\\:\\.]*)((?:\\s+[^"\'>]+(?:(?:"[^"]*")|(?:\'[^\']*\')|[^>]*))*|\\/|\\s+)>)' + // Start element
')', 'g');

attrRegExp = /([\w:\-]+)(?:\s*=\s*(?:(?:\"((?:[^\"])*)\")|(?:\'((?:[^\'])*)\')|([^>\s]+)))?/g;
Expand All @@ -261,7 +261,7 @@ define(
fixSelfClosing = settings.fix_self_closing;
specialElements = schema.getSpecialElements();

while ((matches = tokenRegExp.exec(html))) {
while ((matches = tokenRegExp.exec(html + '>'))) { // Adds and extra '>' to keep regexps from doing catastrofic backtracking on malformed html
// Text
if (index < matches.index) {
self.text(decode(html.substr(index, matches.index - index)));
Expand Down
40 changes: 40 additions & 0 deletions src/core/src/test/js/browser/html/SaxParserTest.js
Original file line number Diff line number Diff line change
Expand Up @@ -789,6 +789,46 @@ asynctest(
);
});

suite.test('Parse elements with numbers', function () {
var counter, parser;

counter = createCounter(writer);
parser = new SaxParser(counter, schema);
writer.reset();
parser.parse('<a5>text</a5>');
LegacyUnit.equal(writer.getContent(), '<a5>text</a5>', 'Parse element with numbers.');
LegacyUnit.deepEqual(counter.counts, { start: 1, text: 1, end: 1 }, 'Parse element with numbers counts.');
});

suite.test('Parse malformed elements that start with numbers', function () {
var counter, parser;

counter = createCounter(writer);
parser = new SaxParser(counter, schema);
writer.reset();
parser.parse('a <2 b b b b b b b b b b b b b b b b b b b b b b');
LegacyUnit.equal(writer.getContent(), 'a &lt;2 b b b b b b b b b b b b b b b b b b b b b b');

counter = createCounter(writer);
parser = new SaxParser(counter, schema);
writer.reset();
parser.parse('a <2b>a</2b> b');
LegacyUnit.equal(writer.getContent(), 'a &lt;2b&gt;a&lt;/2b&gt; b');
});

suite.test('Parse malformed elements without an end', function () {
var counter, parser;

counter = createCounter(writer);
parser = new SaxParser(counter, schema);
writer.reset();
parser.parse('<b b b b b b b b b b b b b b b b b b b b b b b');
LegacyUnit.equal(
writer.getContent(),
'<b b="" b="" b="" b="" b="" b="" b="" b="" b="" b="" b="" b="" b="" b="" b="" b="" b="" b="" b="" b="" b="" b=""></b>'
);
});

Pipeline.async({}, suite.toSteps({}), function () {
success();
}, failure);
Expand Down

0 comments on commit 2ac2137

Please sign in to comment.