Skip to content

Commit

Permalink
Don't truncate in DOMLex when seeing closing div
Browse files Browse the repository at this point in the history
Signed-off-by: Edward Z. Yang <[email protected]>
  • Loading branch information
ezyang committed Aug 31, 2014
1 parent 80ebd43 commit 15d1a30
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 6 deletions.
4 changes: 4 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
. Internal change
==========================

4.7.0, unknown release date
- Don't truncate upon encountering </div> when using DOMLex. Thanks
Myrto Christina for finally convincing me to fix this.

4.6.0, released 2013-11-30
# Secure URI munge hashing algorithm has changed to hash_hmac("sha256", $url, $secret).
Please update any verification scripts you may have.
Expand Down
5 changes: 2 additions & 3 deletions library/HTMLPurifier/Lexer/DOMLex.php
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,7 @@ public function tokenizeHTML($html, $config, $context)
$tokens = array();
$this->tokenizeDOM(
$doc->getElementsByTagName('html')->item(0)-> // <html>
getElementsByTagName('body')->item(0)-> // <body>
getElementsByTagName('div')->item(0), // <div>
getElementsByTagName('body')->item(0), // <body>
$tokens
);
return $tokens;
Expand Down Expand Up @@ -272,7 +271,7 @@ protected function wrapHTML($html, $config, $context)
$ret .= '<html><head>';
$ret .= '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />';
// No protection if $html contains a stray </div>!
$ret .= '</head><body><div>' . $html . '</div></body></html>';
$ret .= '</head><body>' . $html . '</body></html>';
return $ret;
}
}
Expand Down
3 changes: 1 addition & 2 deletions library/HTMLPurifier/Lexer/PH5P.php
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ public function tokenizeHTML($html, $config, $context)
$tokens = array();
$this->tokenizeDOM(
$doc->getElementsByTagName('html')->item(0)-> // <html>
getElementsByTagName('body')->item(0)-> // <body>
getElementsByTagName('div')->item(0) // <div>
getElementsByTagName('body')->item(0) // <body>
,
$tokens
);
Expand Down
18 changes: 17 additions & 1 deletion tests/HTMLPurifier/LexerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,8 @@ public function test_tokenizeHTML_notWellFormed()
new HTMLPurifier_Token_End('poolasdf'),
new HTMLPurifier_Token_End('pooloka'),
),
'PH5P' => $alt,
// 20140831: Weird, but whatever...
'PH5P' => array(new HTMLPurifier_Token_Empty('asdf')),
)
);
}
Expand Down Expand Up @@ -800,6 +801,21 @@ public function test_tokenizeHTML_imgTag()
);
}

public function test_tokenizeHTML_prematureDivClose()
{
$this->assertTokenization(
'</div>dontdie',
array(
new HTMLPurifier_Token_End('div'),
new HTMLPurifier_Token_Text('dontdie')
),
array(
'DOMLex' => $alt = array(new HTMLPurifier_Token_Text('dontdie')),
'PH5P' => $alt
)
);
}


/*
Expand Down

0 comments on commit 15d1a30

Please sign in to comment.