Skip to content

Commit

Permalink
Merge branch 'w51_MDL-36212_m25_entities' of git://github.com/skodak/…
Browse files Browse the repository at this point in the history
…moodle
  • Loading branch information
Sam Hemelryk committed Jan 7, 2013
2 parents 816635f + 7539894 commit 1bdc017
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 32 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ function &_getMatches($lang, $str) {
}

function _unhtmlentities($string) {
return textlib::entities_to_utf8($string); // Moodle hack
$string = preg_replace('~&#x([0-9a-f]+);~ei', 'chr(hexdec("\\1"))', $string);
$string = preg_replace('~&#([0-9]+);~e', 'chr(\\1)', $string);

Expand Down
4 changes: 1 addition & 3 deletions lib/editor/tinymce/plugins/spellchecker/readme_moodle.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,4 @@ List of changes:
* Modified config file to use moodle $CFG.
* Moved static files to /tinymce/ subfolder.
* MDL-25736 - French spellchecker fixes.

Commits:
https://github.com/moodle/custom-tinymce_spellchecker_php/commits/MOODLE_22_2.0.6b
* Fix htmlentities conversion in GoogleSpell.php
13 changes: 8 additions & 5 deletions lib/tests/textlib_test.php
Original file line number Diff line number Diff line change
Expand Up @@ -293,19 +293,22 @@ public function test_encode_mimeheader() {
* @return void
*/
public function test_entities_to_utf8() {
$str = "Žluťoučký koníček";
$this->assertSame(textlib::entities_to_utf8($str), "Žluťoučký koníček");
$str = "Žluťoučký koníček©"&<>§«";
$this->assertSame("Žluťoučký koníček©\"&<>§«", textlib::entities_to_utf8($str));
}

/**
* Tests the static utf8_to_entities method
* @return void
*/
public function test_utf8_to_entities() {
$str = "Žluťoučký koníček";
$this->assertSame(textlib::utf8_to_entities($str), "&#x17d;lu&#x165;ou&#x10d;k&#xfd; kon&#xed;&#x10d;ek");
$this->assertSame(textlib::utf8_to_entities($str, true), "&#381;lu&#357;ou&#269;k&#253; kon&#237;&#269;ek");
$str = "&#x17d;luťoučký kon&iacute;ček&copy;&quot;&amp;&lt;&gt;&sect;&laquo;";
$this->assertSame("&#x17d;lu&#x165;ou&#x10d;k&#xfd; kon&iacute;&#x10d;ek&copy;&quot;&amp;&lt;&gt;&sect;&laquo;", textlib::utf8_to_entities($str));
$this->assertSame("&#381;lu&#357;ou&#269;k&#253; kon&iacute;&#269;ek&copy;&quot;&amp;&lt;&gt;&sect;&laquo;", textlib::utf8_to_entities($str, true));

$str = "&#381;luťoučký kon&iacute;ček&copy;&quot;&amp;&lt;&gt;&sect;&laquo;";
$this->assertSame("&#x17d;lu&#x165;ou&#x10d;k&#xfd; kon&#xed;&#x10d;ek&#xa9;\"&<>&#xa7;&#xab;", textlib::utf8_to_entities($str, false, true));
$this->assertSame("&#381;lu&#357;ou&#269;k&#253; kon&#237;&#269;ek&#169;\"&<>&#167;&#171;", textlib::utf8_to_entities($str, true, true));
}

/**
Expand Down
75 changes: 53 additions & 22 deletions lib/textlib.class.php
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,34 @@ public static function encode_mimeheader($text, $charset='utf-8') {
return $encoded;
}

/**
* Returns HTML entity transliteration table.
* @return array with (html entity => utf-8) elements
*/
protected static function get_entities_table() {
static $trans_tbl = null;

// Generate/create $trans_tbl
if (!isset($trans_tbl)) {
if (version_compare(phpversion(), '5.3.4') < 0) {
$trans_tbl = array();
foreach (get_html_translation_table(HTML_ENTITIES) as $val=>$key) {
$trans_tbl[$key] = textlib::convert($val, 'ISO-8859-1', 'utf-8');
}

} else if (version_compare(phpversion(), '5.4.0') < 0) {
$trans_tbl = get_html_translation_table(HTML_ENTITIES, ENT_COMPAT, 'UTF-8');
$trans_tbl = array_flip($trans_tbl);

} else {
$trans_tbl = get_html_translation_table(HTML_ENTITIES, ENT_COMPAT | ENT_HTML401, 'UTF-8');
$trans_tbl = array_flip($trans_tbl);
}
}

return $trans_tbl;
}

/**
* Converts all the numeric entities &#nnnn; or &#xnnn; to UTF-8
* Original from laurynas dot butkus at gmail at:
Expand All @@ -450,28 +478,24 @@ public static function encode_mimeheader($text, $charset='utf-8') {
* @param string $str input string
* @param boolean $htmlent convert also html entities (defaults to true)
* @return string encoded UTF-8 string
*
* NOTE: we could have used typo3 entities_to_utf8() here
* but the direct alternative used runs 400% quicker
* and uses 0.5Mb less memory, so, let's use it
* (tested against 10^6 conversions)
*/
public static function entities_to_utf8($str, $htmlent=true) {
static $trans_tbl; // Going to use static transliteration table
static $callback1 = null ;
static $callback2 = null ;

if (!$callback1 or !$callback2) {
$callback1 = create_function('$matches', 'return textlib::code2utf8(hexdec($matches[1]));');
$callback2 = create_function('$matches', 'return textlib::code2utf8($matches[1]);');
}

// Replace numeric entities
$result = preg_replace('~&#x([0-9a-f]+);~ei', 'textlib::code2utf8(hexdec("\\1"))', $str);
$result = preg_replace('~&#([0-9]+);~e', 'textlib::code2utf8(\\1)', $result);
$result = (string)$str;
$result = preg_replace_callback('/&#x([0-9a-f]+);/i', $callback1, $result);
$result = preg_replace_callback('/&#([0-9]+);/', $callback2, $result);

// Replace literal entities (if desired)
if ($htmlent) {
// Generate/create $trans_tbl
if (!isset($trans_tbl)) {
$trans_tbl = array();
foreach (get_html_translation_table(HTML_ENTITIES) as $val=>$key) {
$trans_tbl[$key] = utf8_encode($val);
}
}
$trans_tbl = self::get_entities_table();
// It should be safe to search for ascii strings and replace them with utf-8 here.
$result = strtr($result, $trans_tbl);
}
// Return utf8-ised string
Expand All @@ -487,17 +511,24 @@ public static function entities_to_utf8($str, $htmlent=true) {
* @return string converted string
*/
public static function utf8_to_entities($str, $dec=false, $nonnum=false) {
// Avoid some notices from Typo3 code
$oldlevel = error_reporting(E_PARSE);
static $callback = null ;

if ($nonnum) {
$str = self::typo3()->entities_to_utf8((string)$str, true);
$str = self::entities_to_utf8($str, true);
}

// Avoid some notices from Typo3 code
$oldlevel = error_reporting(E_PARSE);
$result = self::typo3()->utf8_to_entities((string)$str);
error_reporting($oldlevel);

if ($dec) {
$result = preg_replace('/&#x([0-9a-f]+);/ie', "'&#'.hexdec('$1').';'", $result);
if (!$callback) {
$callback = create_function('$matches', 'return \'&#\'.(hexdec($matches[1])).\';\';');
}
$result = preg_replace_callback('/&#x([0-9a-f]+);/i', $callback, $result);
}
// Restore original debug level
error_reporting($oldlevel);

return $result;
}

Expand Down
4 changes: 2 additions & 2 deletions lib/weblib.php
Original file line number Diff line number Diff line change
Expand Up @@ -1384,7 +1384,7 @@ function format_text_email($text, $format) {
case FORMAT_WIKI:
// there should not be any of these any more!
$text = wikify_links($text);
return strtr(strip_tags($text), array_flip(get_html_translation_table(HTML_ENTITIES)));
return textlib::entities_to_utf8(strip_tags($text), true);
break;

case FORMAT_HTML:
Expand All @@ -1395,7 +1395,7 @@ function format_text_email($text, $format) {
case FORMAT_MARKDOWN:
default:
$text = wikify_links($text);
return strtr(strip_tags($text), array_flip(get_html_translation_table(HTML_ENTITIES)));
return textlib::entities_to_utf8(strip_tags($text), true);
break;
}
}
Expand Down

0 comments on commit 1bdc017

Please sign in to comment.