Skip to content

Commit

Permalink
MDL-60337 htmlpurifier: non-ascii domain names
Browse files Browse the repository at this point in the history
  • Loading branch information
marinaglancy committed Oct 3, 2017
1 parent d2800e5 commit a3cc626
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 1 deletion.
2 changes: 1 addition & 1 deletion lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Host.php
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ public function validate($string, $config, $context)

// PHP 5.3 and later support this functionality natively
if (function_exists('idn_to_ascii')) {
$string = idn_to_ascii($string);
$string = idn_to_ascii($string, IDNA_NONTRANSITIONAL_TO_ASCII, INTL_IDNA_VARIANT_UTS46);

// If we have Net_IDNA2 support, we can support IRIs by
// punycoding them. (This is the most portable thing to do,
Expand Down
4 changes: 4 additions & 0 deletions lib/htmlpurifier/readme_moodle.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,7 @@ Description of HTML Purifier v4.9.3 library import into Moodle
HTMLPurifier.path.php
* add locallib.php with Moodle specific extensions to /lib/htmlpurifier/
* add this readme_moodle.txt to /lib/htmlpurifier/

Modifications:
* MDL-60337 use correct IDN variant for converting domain names to ascii
Check status of https://github.com/ezyang/htmlpurifier/pull/148
24 changes: 24 additions & 0 deletions lib/tests/htmlpurifier_test.php
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,30 @@ public function test_allowed_schemes() {
$this->assertSame('<a>link</a>', purify_html($text));
}

/**
* Test non-ascii domain names
*/
public function test_idn() {

// Example of domain that gives the same result in IDNA2003 and IDNA2008 .
$text = '<a href="http://правительство.рф">правительство.рф</a>';
$expected = '<a href="http://xn--80aealotwbjpid2k.xn--p1ai">правительство.рф</a>';
$this->assertSame($expected, purify_html($text));

// Examples of deviations from http://www.unicode.org/reports/tr46/#Table_Deviation_Characters .
$text = '<a href="http://teßt.de">teßt.de</a>';
$expected = '<a href="http://xn--tet-6ka.de">teßt.de</a>';
$this->assertSame($expected, purify_html($text));

$text = '<a href="http://βόλος.com">http://βόλος.com</a>';
$expected = '<a href="http://xn--nxasmm1c.com">http://βόλος.com</a>';
$this->assertSame($expected, purify_html($text));

$text = '<a href="http://نامه‌ای.com">http://نامه‌ای.com</a>';
$expected = '<a href="http://xn--mgba3gch31f060k.com">http://نامه‌ای.com</a>';
$this->assertSame($expected, purify_html($text));
}

/**
* Tests media tags.
*
Expand Down

0 comments on commit a3cc626

Please sign in to comment.