From 7cd8c939bb2f236729dd9fbbe0c6fc99354d9d1a Mon Sep 17 00:00:00 2001 From: Bob van de Vijver Date: Tue, 28 Jan 2020 16:19:14 +0100 Subject: [PATCH 1/2] Added option to disable HTML purifier --- README.md | 5 ++++- lib/Caxy/HtmlDiff/AbstractDiff.php | 10 +++++++++- lib/Caxy/HtmlDiff/HtmlDiffConfig.php | 17 +++++++++++++++++ 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index f2f96ab..eb2c4b8 100644 --- a/README.md +++ b/README.md @@ -127,6 +127,10 @@ $config // Pass an instance of \Doctrine\Common\Cache\Cache to cache the calculated diffs. ->setCacheProvider(null) + + // Disable the HTML purifier (only do this if you known what you're doing) + // This bundle heavily relies on the purified input from ezyang/htmlpurifier + ->setPurifierEnabled(true) // Set the cache directory that HTMLPurifier should use. ->setPurifierCacheLocation(null) @@ -192,7 +196,6 @@ php-htmldiff is available under [GNU General Public License, version 2][gnu]. Se * Maybe add abstraction layer for cache + adapter for doctrine cache * Make HTML Purifier an optional dependency - possibly use abstraction layer for purifiers so alternatives could be used (or none at all for performance) * Expose configuration for HTML Purifier (used in table diffing) - currently only cache dir is configurable through HtmlDiffConfig object -* Add option to enable using HTML Purifier to purify all input * Performance improvements (we have 1 benchmark test, we should probably get more) * Algorithm improvements - trimming alike text at start and ends, store nested diff results in memory to re-use (like we do w/ caching) * Benchmark using DOMDocument vs. alternatives vs. string parsing diff --git a/lib/Caxy/HtmlDiff/AbstractDiff.php b/lib/Caxy/HtmlDiff/AbstractDiff.php index 765da96..e392c34 100644 --- a/lib/Caxy/HtmlDiff/AbstractDiff.php +++ b/lib/Caxy/HtmlDiff/AbstractDiff.php @@ -66,7 +66,7 @@ abstract class AbstractDiff protected $diffCaches = array(); /** - * @var \HTMLPurifier + * @var \HTMLPurifier|null */ protected $purifier; @@ -154,6 +154,10 @@ public function initPurifier($defaultPurifierSerializerCache = null) */ protected function prepare() { + if (false === $this->config->isPurifierEnabled()) { + return; + } + $this->initPurifier($this->config->getPurifierCacheLocation()); $this->oldText = $this->purifyHtml($this->oldText); @@ -403,6 +407,10 @@ protected function getClosingTag($tag) */ protected function purifyHtml($html) { + if (null === $this->purifier) { + return $html; + } + return $this->purifier->purify($html); } diff --git a/lib/Caxy/HtmlDiff/HtmlDiffConfig.php b/lib/Caxy/HtmlDiff/HtmlDiffConfig.php index 2bfdcaa..34f7d23 100644 --- a/lib/Caxy/HtmlDiff/HtmlDiffConfig.php +++ b/lib/Caxy/HtmlDiff/HtmlDiffConfig.php @@ -80,6 +80,11 @@ class HtmlDiffConfig */ protected $cacheProvider; + /** + * @var bool + */ + protected $purifierEnabled = true; + /** * @var null|string */ @@ -468,6 +473,18 @@ public function getCacheProvider() return $this->cacheProvider; } + public function isPurifierEnabled(): bool + { + return $this->purifierEnabled; + } + + public function setPurifierEnabled(bool $purifierEnabled = true): self + { + $this->purifierEnabled = $purifierEnabled; + + return $this; + } + /** * @param null|string * From 2a84b0b1a3659a492c67c605478bb0dffc3fac9a Mon Sep 17 00:00:00 2001 From: Bob van de Vijver Date: Fri, 1 May 2020 08:50:04 +0200 Subject: [PATCH 2/2] Use correct purifier call in TableDiff class --- lib/Caxy/HtmlDiff/Table/TableDiff.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Caxy/HtmlDiff/Table/TableDiff.php b/lib/Caxy/HtmlDiff/Table/TableDiff.php index dda56c9..3f3e940 100644 --- a/lib/Caxy/HtmlDiff/Table/TableDiff.php +++ b/lib/Caxy/HtmlDiff/Table/TableDiff.php @@ -628,7 +628,7 @@ protected function createDocumentWithHtml($text) { $dom = new \DOMDocument(); $dom->loadHTML(mb_convert_encoding( - $this->purifier->purify(mb_convert_encoding($text, $this->config->getEncoding(), mb_detect_encoding($text))), + $this->purifyHtml(mb_convert_encoding($text, $this->config->getEncoding(), mb_detect_encoding($text))), 'HTML-ENTITIES', $this->config->getEncoding() ));