diff --git a/demo/index.php b/demo/index.php index d732d8d..6cf7489 100644 --- a/demo/index.php +++ b/demo/index.php @@ -40,9 +40,16 @@ function addDebugOutput($value, $key = 'general') $diff->setMatchThreshold($data['matchThreshold']); } $diff->setUseTableDiffing($useTableDiffing); - $diff->build(); + $diffOutput = $diff->build(); + $diffOutput = mb_convert_encoding($diffOutput, 'UTF-8'); - echo json_encode(array('diff' => $diff->getDifference(), 'debug' => $debugOutput)); + $jsonOutput = json_encode(array('diff' => $diffOutput, 'debug' => $debugOutput)); + + if (false === $jsonOutput) { + throw new \Exception('Failed to encode JSON: '.json_last_error_msg()); + } + + echo $jsonOutput; } else { header('Content-Type: text/html'); echo file_get_contents('demo.html'); diff --git a/lib/Caxy/HtmlDiff/AbstractDiff.php b/lib/Caxy/HtmlDiff/AbstractDiff.php index 9bccf2b..5e8b8f5 100644 --- a/lib/Caxy/HtmlDiff/AbstractDiff.php +++ b/lib/Caxy/HtmlDiff/AbstractDiff.php @@ -81,6 +81,8 @@ abstract class AbstractDiff */ public function __construct($oldText, $newText, $encoding = 'UTF-8', $specialCaseTags = null, $groupDiffs = null) { + mb_substitute_character(0x20); + if ($specialCaseTags === null) { $specialCaseTags = static::$defaultSpecialCaseTags; } diff --git a/lib/Caxy/HtmlDiff/Table/TableDiff.php b/lib/Caxy/HtmlDiff/Table/TableDiff.php index 149c664..eea975b 100644 --- a/lib/Caxy/HtmlDiff/Table/TableDiff.php +++ b/lib/Caxy/HtmlDiff/Table/TableDiff.php @@ -590,8 +590,25 @@ protected function diffCells($oldCell, $newCell, $usingExtraRow = false) protected function buildTableDoms() { - $this->oldTable = $this->parseTableStructure(mb_convert_encoding($this->oldText, 'HTML-ENTITIES', 'UTF-8')); - $this->newTable = $this->parseTableStructure(mb_convert_encoding($this->newText, 'HTML-ENTITIES', 'UTF-8')); + $this->oldTable = $this->parseTableStructure($this->oldText); + $this->newTable = $this->parseTableStructure($this->newText); + } + + /** + * @param string $text + * + * @return \DOMDocument + */ + protected function createDocumentWithHtml($text) + { + $dom = new \DOMDocument(); + $dom->loadHTML(mb_convert_encoding( + $this->purifier->purify(mb_convert_encoding($text, $this->encoding, mb_detect_encoding($text))), + 'HTML-ENTITIES', + $this->encoding + )); + + return $dom; } /** @@ -601,8 +618,7 @@ protected function buildTableDoms() */ protected function parseTableStructure($text) { - $dom = new \DOMDocument(); - $dom->loadHTML($text); + $dom = $this->createDocumentWithHtml($text); $tableNode = $dom->getElementsByTagName('table')->item(0); @@ -692,8 +708,7 @@ protected function setInnerHtml($node, $html) $html = ''; } - $doc = new \DOMDocument(); - $doc->loadHTML(mb_convert_encoding($this->purifier->purify($html), 'HTML-ENTITIES', 'UTF-8')); + $doc = $this->createDocumentWithHtml($html); $fragment = $node->ownerDocument->createDocumentFragment(); $root = $doc->getElementsByTagName('body')->item(0); foreach ($root->childNodes as $child) {