diff --git a/lib/htmlpurifier/HTMLPurifier.php b/lib/htmlpurifier/HTMLPurifier.php index e3fce9c2a3462..fff4678862167 100644 --- a/lib/htmlpurifier/HTMLPurifier.php +++ b/lib/htmlpurifier/HTMLPurifier.php @@ -19,7 +19,7 @@ */ /* - HTML Purifier 4.0.0 - Standards Compliant HTML Filtering + HTML Purifier 4.1.0 - Standards Compliant HTML Filtering Copyright (C) 2006-2008 Edward Z. Yang This library is free software; you can redistribute it and/or @@ -55,10 +55,10 @@ class HTMLPurifier { /** Version of HTML Purifier */ - public $version = '4.0.0'; + public $version = '4.1.0'; /** Constant with version of HTML Purifier */ - const VERSION = '4.0.0'; + const VERSION = '4.1.0'; /** Global configuration object */ public $config; diff --git a/lib/htmlpurifier/HTMLPurifier.safe-includes.php b/lib/htmlpurifier/HTMLPurifier.safe-includes.php index cf2c1d617a4d2..6402de045845f 100644 --- a/lib/htmlpurifier/HTMLPurifier.safe-includes.php +++ b/lib/htmlpurifier/HTMLPurifier.safe-includes.php @@ -170,6 +170,7 @@ require_once $__dir . '/HTMLPurifier/Injector/Linkify.php'; require_once $__dir . '/HTMLPurifier/Injector/PurifierLinkify.php'; require_once $__dir . '/HTMLPurifier/Injector/RemoveEmpty.php'; +require_once $__dir . '/HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php'; require_once $__dir . '/HTMLPurifier/Injector/SafeObject.php'; require_once $__dir . '/HTMLPurifier/Lexer/DOMLex.php'; require_once $__dir . '/HTMLPurifier/Lexer/DirectLex.php'; @@ -192,6 +193,7 @@ require_once $__dir . '/HTMLPurifier/URIFilter/HostBlacklist.php'; require_once $__dir . '/HTMLPurifier/URIFilter/MakeAbsolute.php'; require_once $__dir . '/HTMLPurifier/URIFilter/Munge.php'; +require_once $__dir . '/HTMLPurifier/URIScheme/data.php'; require_once $__dir . '/HTMLPurifier/URIScheme/ftp.php'; require_once $__dir . '/HTMLPurifier/URIScheme/http.php'; require_once $__dir . '/HTMLPurifier/URIScheme/https.php'; diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/URI.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/URI.php index 435d7930bb0a3..54b7d63f12b94 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/URI.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/URI.php @@ -47,7 +47,7 @@ public function validate($uri_string, $config, $context) { // URI at all $result = str_replace($keys, $values, $result); - return "url($result)"; + return "url('$result')"; } diff --git a/lib/htmlpurifier/HTMLPurifier/AttrTransform/ImgRequired.php b/lib/htmlpurifier/HTMLPurifier/AttrTransform/ImgRequired.php index a219479a029d5..7f0e4b7a59fbf 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrTransform/ImgRequired.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform/ImgRequired.php @@ -24,7 +24,8 @@ public function transform($attr, $config, $context) { if ($src) { $alt = $config->get('Attr.DefaultImageAlt'); if ($alt === null) { - $attr['alt'] = basename($attr['src']); + // truncate if the alt is too long + $attr['alt'] = substr(basename($attr['src']),0,40); } else { $attr['alt'] = $alt; } diff --git a/lib/htmlpurifier/HTMLPurifier/AttrTransform/SafeParam.php b/lib/htmlpurifier/HTMLPurifier/AttrTransform/SafeParam.php index 94e8052a9d09f..3f992ec31b23a 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrTransform/SafeParam.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform/SafeParam.php @@ -37,8 +37,14 @@ public function transform($attr, $config, $context) { $attr['value'] = 'window'; break; case 'movie': + case 'src': + $attr['name'] = "movie"; $attr['value'] = $this->uri->validate($attr['value'], $config, $context); break; + case 'flashvars': + // we're going to allow arbitrary inputs to the SWF, on + // the reasoning that it could only hack the SWF, not us. + break; // add other cases to support other param name/value pairs default: $attr['name'] = $attr['value'] = null; diff --git a/lib/htmlpurifier/HTMLPurifier/Config.php b/lib/htmlpurifier/HTMLPurifier/Config.php index a01706043adf8..203831f9f5d4c 100644 --- a/lib/htmlpurifier/HTMLPurifier/Config.php +++ b/lib/htmlpurifier/HTMLPurifier/Config.php @@ -20,7 +20,7 @@ class HTMLPurifier_Config /** * HTML Purifier's version */ - public $version = '4.0.0'; + public $version = '4.1.0'; /** * Bool indicator whether or not to automatically finalize diff --git a/lib/htmlpurifier/HTMLPurifier/ConfigSchema/schema.ser b/lib/htmlpurifier/HTMLPurifier/ConfigSchema/schema.ser index bbf12f9c3e739..22b8d54a59f17 100644 Binary files a/lib/htmlpurifier/HTMLPurifier/ConfigSchema/schema.ser and b/lib/htmlpurifier/HTMLPurifier/ConfigSchema/schema.ser differ diff --git a/lib/htmlpurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveSpansWithoutAttributes.txt b/lib/htmlpurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveSpansWithoutAttributes.txt new file mode 100755 index 0000000000000..dde990ab2603f --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveSpansWithoutAttributes.txt @@ -0,0 +1,11 @@ +AutoFormat.RemoveSpansWithoutAttributes +TYPE: bool +VERSION: 4.0.1 +DEFAULT: false +--DESCRIPTION-- +

+ This directive causes span tags without any attributes + to be removed. It will also remove spans that had all attributes + removed during processing. +

+--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/HTMLPurifier/ConfigSchema/schema/HTML.SafeEmbed.txt b/lib/htmlpurifier/HTMLPurifier/ConfigSchema/schema/HTML.SafeEmbed.txt index f635a68548bad..cdda09a4c5fbe 100644 --- a/lib/htmlpurifier/HTMLPurifier/ConfigSchema/schema/HTML.SafeEmbed.txt +++ b/lib/htmlpurifier/HTMLPurifier/ConfigSchema/schema/HTML.SafeEmbed.txt @@ -7,8 +7,7 @@ DEFAULT: false Whether or not to permit embed tags in documents, with a number of extra security features added to prevent script execution. This is similar to what websites like MySpace do to embed tags. Embed is a proprietary - element and will cause your website to stop validating. You probably want - to enable this with %HTML.SafeObject. - Highly experimental. -

+ element and will cause your website to stop validating; you should + see if you can use %Output.FlashCompat with %HTML.SafeObject instead + first.

--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/HTMLPurifier/ConfigSchema/schema/HTML.SafeObject.txt b/lib/htmlpurifier/HTMLPurifier/ConfigSchema/schema/HTML.SafeObject.txt index 32967b88fb7d3..ceb342e22b74a 100644 --- a/lib/htmlpurifier/HTMLPurifier/ConfigSchema/schema/HTML.SafeObject.txt +++ b/lib/htmlpurifier/HTMLPurifier/ConfigSchema/schema/HTML.SafeObject.txt @@ -6,9 +6,8 @@ DEFAULT: false

Whether or not to permit object tags in documents, with a number of extra security features added to prevent script execution. This is similar to - what websites like MySpace do to object tags. You may also want to - enable %HTML.SafeEmbed for maximum interoperability with Internet Explorer, - although embed tags will cause your website to stop validating. - Highly experimental. + what websites like MySpace do to object tags. You should also enable + %Output.FlashCompat in order to generate Internet Explorer + compatibility code for your object tags.

--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/HTMLPurifier/ConfigSchema/schema/Output.FlashCompat.txt b/lib/htmlpurifier/HTMLPurifier/ConfigSchema/schema/Output.FlashCompat.txt new file mode 100644 index 0000000000000..93398e8598009 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/ConfigSchema/schema/Output.FlashCompat.txt @@ -0,0 +1,11 @@ +Output.FlashCompat +TYPE: bool +VERSION: 4.1.0 +DEFAULT: false +--DESCRIPTION-- +

+ If true, HTML Purifier will generate Internet Explorer compatibility + code for all object code. This is highly recommended if you enable + %HTML.SafeObject. +

+--# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/HTMLPurifier/ConfigSchema/schema/URI.AllowedSchemes.txt b/lib/htmlpurifier/HTMLPurifier/ConfigSchema/schema/URI.AllowedSchemes.txt index 98fdfe922260a..ae3a913f2424c 100644 --- a/lib/htmlpurifier/HTMLPurifier/ConfigSchema/schema/URI.AllowedSchemes.txt +++ b/lib/htmlpurifier/HTMLPurifier/ConfigSchema/schema/URI.AllowedSchemes.txt @@ -12,4 +12,6 @@ array ( --DESCRIPTION-- Whitelist that defines the schemes that a URI is allowed to have. This prevents XSS attacks from using pseudo-schemes like javascript or mocha. +There is also support for the data URI scheme, but it is not +enabled by default. --# vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/HTMLPurifier/ElementDef.php b/lib/htmlpurifier/HTMLPurifier/ElementDef.php index aede2c3bb49b2..5498d9567040a 100644 --- a/lib/htmlpurifier/HTMLPurifier/ElementDef.php +++ b/lib/htmlpurifier/HTMLPurifier/ElementDef.php @@ -97,6 +97,13 @@ class HTMLPurifier_ElementDef */ public $autoclose = array(); + /** + * If a foreign element is found in this element, test if it is + * allowed by this sub-element; if it is, instead of closing the + * current element, place it inside this element. + */ + public $wrap; + /** * Whether or not this is a formatting element affected by the * "Active Formatting Elements" algorithm. diff --git a/lib/htmlpurifier/HTMLPurifier/Filter/YouTube.php b/lib/htmlpurifier/HTMLPurifier/Filter/YouTube.php index aca972f6c56fb..23df221eaa3cb 100644 --- a/lib/htmlpurifier/HTMLPurifier/Filter/YouTube.php +++ b/lib/htmlpurifier/HTMLPurifier/Filter/YouTube.php @@ -7,13 +7,13 @@ class HTMLPurifier_Filter_YouTube extends HTMLPurifier_Filter public function preFilter($html, $config, $context) { $pre_regex = '#]+>.+?'. - 'http://www.youtube.com/v/([A-Za-z0-9\-_]+).+?#s'; + 'http://www.youtube.com/((?:v|cp)/[A-Za-z0-9\-_=]+).+?#s'; $pre_replace = '\1'; return preg_replace($pre_regex, $pre_replace, $html); } public function postFilter($html, $config, $context) { - $post_regex = '#([A-Za-z0-9\-_]+)#'; + $post_regex = '#((?:v|cp)/[A-Za-z0-9\-_=]+)#'; return preg_replace_callback($post_regex, array($this, 'postFilterCallback'), $html); } @@ -24,10 +24,10 @@ protected function armorUrl($url) { protected function postFilterCallback($matches) { $url = $this->armorUrl($matches[1]); return ''. - ''. + 'data="http://www.youtube.com/'.$url.'">'. + ''. ''. diff --git a/lib/htmlpurifier/HTMLPurifier/Generator.php b/lib/htmlpurifier/HTMLPurifier/Generator.php index 24bd8a54eddaf..4a62417271efb 100644 --- a/lib/htmlpurifier/HTMLPurifier/Generator.php +++ b/lib/htmlpurifier/HTMLPurifier/Generator.php @@ -31,6 +31,17 @@ class HTMLPurifier_Generator */ private $_sortAttr; + /** + * Cache of %Output.FlashCompat + */ + private $_flashCompat; + + /** + * Stack for keeping track of object information when outputting IE + * compatibility code. + */ + private $_flashStack = array(); + /** * Configuration for the generator */ @@ -44,6 +55,7 @@ public function __construct($config, $context) { $this->config = $config; $this->_scriptFix = $config->get('Output.CommentScriptContents'); $this->_sortAttr = $config->get('Output.SortAttr'); + $this->_flashCompat = $config->get('Output.FlashCompat'); $this->_def = $config->getHTMLDefinition(); $this->_xhtml = $this->_def->doctype->xml; } @@ -104,12 +116,41 @@ public function generateFromToken($token) { } elseif ($token instanceof HTMLPurifier_Token_Start) { $attr = $this->generateAttributes($token->attr, $token->name); + if ($this->_flashCompat) { + if ($token->name == "object") { + $flash = new stdclass(); + $flash->attr = $token->attr; + $flash->param = array(); + $this->_flashStack[] = $flash; + } + } return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>'; } elseif ($token instanceof HTMLPurifier_Token_End) { - return 'name . '>'; + $_extra = ''; + if ($this->_flashCompat) { + if ($token->name == "object" && !empty($this->_flashStack)) { + $flash = array_pop($this->_flashStack); + $compat_token = new HTMLPurifier_Token_Empty("embed"); + foreach ($flash->attr as $name => $val) { + if ($name == "classid") continue; + if ($name == "type") continue; + if ($name == "data") $name = "src"; + $compat_token->attr[$name] = $val; + } + foreach ($flash->param as $name => $val) { + if ($name == "movie") $name = "src"; + $compat_token->attr[$name] = $val; + } + $_extra = ""; + } + } + return $_extra . 'name . '>'; } elseif ($token instanceof HTMLPurifier_Token_Empty) { + if ($this->_flashCompat && $token->name == "param" && !empty($this->_flashStack)) { + $this->_flashStack[count($this->_flashStack)-1]->param[$token->attr['name']] = $token->attr['value']; + } $attr = $this->generateAttributes($token->attr, $token->name); return '<' . $token->name . ($attr ? ' ' : '') . $attr . ( $this->_xhtml ? ' /': '' ) //
v.
diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/List.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/List.php index 1d15f27293df8..74d4522f4e24f 100644 --- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/List.php +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/List.php @@ -20,8 +20,10 @@ class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule public $content_sets = array('Flow' => 'List'); public function setup($config) { - $this->addElement('ol', 'List', 'Required: li', 'Common'); - $this->addElement('ul', 'List', 'Required: li', 'Common'); + $ol = $this->addElement('ol', 'List', 'Required: li', 'Common'); + $ol->wrap = "li"; + $ul = $this->addElement('ul', 'List', 'Required: li', 'Common'); + $ul->wrap = "li"; $this->addElement('dl', 'List', 'Required: dt | dd', 'Common'); $this->addElement('li', false, 'Flow', 'Common'); diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/SafeEmbed.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/SafeEmbed.php index 8fc03cb1c7eaa..ea256716bb1c1 100644 --- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/SafeEmbed.php +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/SafeEmbed.php @@ -20,6 +20,7 @@ public function setup($config) { 'height' => 'Pixels#' . $max, 'allowscriptaccess' => 'Enum#never', 'allownetworking' => 'Enum#internal', + 'flashvars' => 'Text', 'wmode' => 'Enum#window', 'name' => 'ID', ) diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/SafeObject.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/SafeObject.php index 33bac00cf24e2..64ab8c070318c 100644 --- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/SafeObject.php +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/SafeObject.php @@ -28,7 +28,10 @@ public function setup($config) { 'type' => 'Enum#application/x-shockwave-flash', 'width' => 'Pixels#' . $max, 'height' => 'Pixels#' . $max, - 'data' => 'URI#embedded' + 'data' => 'URI#embedded', + 'classid' => 'Enum#clsid:d27cdb6e-ae6d-11cf-96b8-444553540000', + 'codebase' => new HTMLPurifier_AttrDef_Enum(array( + 'http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=6,0,40,0')), ) ); $object->attr_transform_post[] = new HTMLPurifier_AttrTransform_SafeObject(); diff --git a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Tidy/Proprietary.php b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Tidy/Proprietary.php index 85fa90a942d89..14c15c4a062ea 100644 --- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Tidy/Proprietary.php +++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Tidy/Proprietary.php @@ -15,6 +15,7 @@ public function makeFixes() { $r['thead@background'] = new HTMLPurifier_AttrTransform_Background(); $r['tfoot@background'] = new HTMLPurifier_AttrTransform_Background(); $r['tbody@background'] = new HTMLPurifier_AttrTransform_Background(); + $r['table@height'] = new HTMLPurifier_AttrTransform_Length('height'); return $r; } diff --git a/lib/htmlpurifier/HTMLPurifier/Injector/AutoParagraph.php b/lib/htmlpurifier/HTMLPurifier/Injector/AutoParagraph.php index 8cc952549cc4c..afa7608924792 100644 --- a/lib/htmlpurifier/HTMLPurifier/Injector/AutoParagraph.php +++ b/lib/htmlpurifier/HTMLPurifier/Injector/AutoParagraph.php @@ -34,16 +34,21 @@ public function handleText(&$token) { // ---- // This is a degenerate case } else { - // State 1.2: PAR1 - // ---- + if (!$token->is_whitespace || $this->_isInline($current)) { + // State 1.2: PAR1 + // ---- - // State 1.3: PAR1\n\nPAR2 - // ------------ + // State 1.3: PAR1\n\nPAR2 + // ------------ - // State 1.4:
PAR1\n\nPAR2 (see State 2) - // ------------ - $token = array($this->_pStart()); - $this->_splitText($text, $token); + // State 1.4:
PAR1\n\nPAR2 (see State 2) + // ------------ + $token = array($this->_pStart()); + $this->_splitText($text, $token); + } else { + // State 1.5: \n
+ // -- + } } } else { // State 2:
PAR1... (similar to 1.4) diff --git a/lib/htmlpurifier/HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php b/lib/htmlpurifier/HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php new file mode 100644 index 0000000000000..b21313470e4e5 --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php @@ -0,0 +1,60 @@ +attrValidator = new HTMLPurifier_AttrValidator(); + $this->config = $config; + $this->context = $context; + return parent::prepare($config, $context); + } + + public function handleElement(&$token) { + if ($token->name !== 'span' || !$token instanceof HTMLPurifier_Token_Start) { + return; + } + + // We need to validate the attributes now since this doesn't normally + // happen until after MakeWellFormed. If all the attributes are removed + // the span needs to be removed too. + $this->attrValidator->validateToken($token, $this->config, $this->context); + $token->armor['ValidateAttributes'] = true; + + if (!empty($token->attr)) { + return; + } + + $nesting = 0; + $spanContentTokens = array(); + while ($this->forwardUntilEndToken($i, $current, $nesting)) {} + + if ($current instanceof HTMLPurifier_Token_End && $current->name === 'span') { + // Mark closing span tag for deletion + $current->markForDeletion = true; + // Delete open span tag + $token = false; + } + } + + public function handleEnd(&$token) { + if ($token->markForDeletion) { + $token = false; + } + } +} + +// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/HTMLPurifier/Injector/SafeObject.php b/lib/htmlpurifier/HTMLPurifier/Injector/SafeObject.php index 34158286858c6..9e178ce01aade 100644 --- a/lib/htmlpurifier/HTMLPurifier/Injector/SafeObject.php +++ b/lib/htmlpurifier/HTMLPurifier/Injector/SafeObject.php @@ -20,6 +20,8 @@ class HTMLPurifier_Injector_SafeObject extends HTMLPurifier_Injector protected $allowedParam = array( 'wmode' => true, 'movie' => true, + 'flashvars' => true, + 'src' => true, ); public function prepare($config, $context) { @@ -47,7 +49,8 @@ public function handleElement(&$token) { // We need this fix because YouTube doesn't supply a data // attribute, which we need if a type is specified. This is // *very* Flash specific. - if (!isset($this->objectStack[$i]->attr['data']) && $token->attr['name'] == 'movie') { + if (!isset($this->objectStack[$i]->attr['data']) && + ($token->attr['name'] == 'movie' || $token->attr['name'] == 'src')) { $this->objectStack[$i]->attr['data'] = $token->attr['value']; } // Check if the parameter is the correct value but has not diff --git a/lib/htmlpurifier/HTMLPurifier/Lexer/PEARSax3.php b/lib/htmlpurifier/HTMLPurifier/Lexer/PEARSax3.php index 57cffa82ab048..1d358c7b6bea2 100644 --- a/lib/htmlpurifier/HTMLPurifier/Lexer/PEARSax3.php +++ b/lib/htmlpurifier/HTMLPurifier/Lexer/PEARSax3.php @@ -26,13 +26,20 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer * Internal accumulator array for SAX parsers. */ protected $tokens = array(); + protected $last_token_was_empty; + + private $parent_handler; + private $stack = array(); public function tokenizeHTML($string, $config, $context) { $this->tokens = array(); + $this->last_token_was_empty = false; $string = $this->normalize($string, $config, $context); + $this->parent_handler = set_error_handler(array($this, 'muteStrictErrorHandler')); + $parser = new XML_HTMLSax3(); $parser->set_object($this); $parser->set_element_handler('openHandler','closeHandler'); @@ -44,6 +51,8 @@ public function tokenizeHTML($string, $config, $context) { $parser->parse($string); + restore_error_handler(); + return $this->tokens; } @@ -58,9 +67,11 @@ public function openHandler(&$parser, $name, $attrs, $closed) { } if ($closed) { $this->tokens[] = new HTMLPurifier_Token_Empty($name, $attrs); + $this->last_token_was_empty = true; } else { $this->tokens[] = new HTMLPurifier_Token_Start($name, $attrs); } + $this->stack[] = $name; return true; } @@ -71,10 +82,12 @@ public function closeHandler(&$parser, $name) { // HTMLSax3 seems to always send empty tags an extra close tag // check and ignore if you see it: // [TESTME] to make sure it doesn't overreach - if ($this->tokens[count($this->tokens)-1] instanceof HTMLPurifier_Token_Empty) { + if ($this->last_token_was_empty) { + $this->last_token_was_empty = false; return true; } $this->tokens[] = new HTMLPurifier_Token_End($name); + if (!empty($this->stack)) array_pop($this->stack); return true; } @@ -82,6 +95,7 @@ public function closeHandler(&$parser, $name) { * Data event handler, interface is defined by PEAR package. */ public function dataHandler(&$parser, $data) { + $this->last_token_was_empty = false; $this->tokens[] = new HTMLPurifier_Token_Text($data); return true; } @@ -91,7 +105,18 @@ public function dataHandler(&$parser, $data) { */ public function escapeHandler(&$parser, $data) { if (strpos($data, '--') === 0) { - $this->tokens[] = new HTMLPurifier_Token_Comment($data); + // remove trailing and leading double-dashes + $data = substr($data, 2); + if (strlen($data) >= 2 && substr($data, -2) == "--") { + $data = substr($data, 0, -2); + } + if (isset($this->stack[sizeof($this->stack) - 1]) && + $this->stack[sizeof($this->stack) - 1] == "style") { + $this->tokens[] = new HTMLPurifier_Token_Text($data); + } else { + $this->tokens[] = new HTMLPurifier_Token_Comment($data); + } + $this->last_token_was_empty = false; } // CDATA is handled elsewhere, but if it was handled here: //if (strpos($data, '[CDATA[') === 0) { @@ -101,6 +126,14 @@ public function escapeHandler(&$parser, $data) { return true; } + /** + * An error handler that mutes strict errors + */ + public function muteStrictErrorHandler($errno, $errstr, $errfile=null, $errline=null, $errcontext=null) { + if ($errno == E_STRICT) return; + return call_user_func($this->parent_handler, $errno, $errstr, $errfile, $errline, $errcontext); + } + } // vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/HTMLPurifier/Strategy/MakeWellFormed.php b/lib/htmlpurifier/HTMLPurifier/Strategy/MakeWellFormed.php index feb0c32b45709..316b2386ac8cd 100644 --- a/lib/htmlpurifier/HTMLPurifier/Strategy/MakeWellFormed.php +++ b/lib/htmlpurifier/HTMLPurifier/Strategy/MakeWellFormed.php @@ -83,6 +83,7 @@ public function execute($tokens, $config, $context) { $this->injectors[] = $injector; } foreach ($custom_injectors as $injector) { + if (!$injector) continue; if (is_string($injector)) { $injector = "HTMLPurifier_Injector_$injector"; $injector = new $injector; @@ -219,6 +220,19 @@ public function execute($tokens, $config, $context) { $autoclose = false; } + if ($autoclose && $definition->info[$token->name]->wrap) { + // check if this is actually a wrap (mmm wraps!) + $wrapname = $definition->info[$token->name]->wrap; + $wrapdef = $definition->info[$wrapname]; + $elements = $wrapdef->child->getAllowedElements($config); + if (isset($elements[$token->name])) { + $newtoken = new HTMLPurifier_Token_Start($wrapname); + $this->insertBefore($newtoken); + $reprocess = true; + continue; + } + } + $carryover = false; if ($autoclose && $definition->info[$parent->name]->formatting) { $carryover = true; diff --git a/lib/htmlpurifier/HTMLPurifier/URIScheme/data.php b/lib/htmlpurifier/HTMLPurifier/URIScheme/data.php new file mode 100644 index 0000000000000..b7f1989cbf7fc --- /dev/null +++ b/lib/htmlpurifier/HTMLPurifier/URIScheme/data.php @@ -0,0 +1,93 @@ + true, + 'image/gif' => true, + 'image/png' => true, + ); + + public function validate(&$uri, $config, $context) { + $result = explode(',', $uri->path, 2); + $is_base64 = false; + $charset = null; + $content_type = null; + if (count($result) == 2) { + list($metadata, $data) = $result; + // do some legwork on the metadata + $metas = explode(';', $metadata); + while(!empty($metas)) { + $cur = array_shift($metas); + if ($cur == 'base64') { + $is_base64 = true; + break; + } + if (substr($cur, 0, 8) == 'charset=') { + // doesn't match if there are arbitrary spaces, but + // whatever dude + if ($charset !== null) continue; // garbage + $charset = substr($cur, 8); // not used + } else { + if ($content_type !== null) continue; // garbage + $content_type = $cur; + } + } + } else { + $data = $result[0]; + } + if ($content_type !== null && empty($this->allowed_types[$content_type])) { + return false; + } + if ($charset !== null) { + // error; we don't allow plaintext stuff + $charset = null; + } + $data = rawurldecode($data); + if ($is_base64) { + $raw_data = base64_decode($data); + } else { + $raw_data = $data; + } + // XXX probably want to refactor this into a general mechanism + // for filtering arbitrary content types + $file = tempnam("/tmp", ""); + file_put_contents($file, $raw_data); + if (function_exists('exif_imagetype')) { + $image_code = exif_imagetype($file); + } elseif (function_exists('getimagesize')) { + set_error_handler(array($this, 'muteErrorHandler')); + $info = getimagesize($file); + restore_error_handler(); + if ($info == false) return false; + $image_code = $info[2]; + } else { + trigger_error("could not find exif_imagetype or getimagesize functions", E_USER_ERROR); + } + $real_content_type = image_type_to_mime_type($image_code); + if ($real_content_type != $content_type) { + // we're nice guys; if the content type is something else we + // support, change it over + if (empty($this->allowed_types[$real_content_type])) return false; + $content_type = $real_content_type; + } + // ok, it's kosher, rewrite what we need + $uri->userinfo = null; + $uri->host = null; + $uri->port = null; + $uri->fragment = null; + $uri->query = null; + $uri->path = "$content_type;base64," . base64_encode($raw_data); + return true; + } + + public function muteErrorHandler($errno, $errstr) {} + +} + diff --git a/lib/htmlpurifier/readme_moodle.txt b/lib/htmlpurifier/readme_moodle.txt index cdf31c4d5c0e1..1d5ab444988b5 100644 --- a/lib/htmlpurifier/readme_moodle.txt +++ b/lib/htmlpurifier/readme_moodle.txt @@ -1,4 +1,4 @@ -Description of HTML Purifier v4.0.0 library import into Moodle +Description of HTML Purifier v4.1.0 library import into Moodle Changes: * HMLTModule/Text.php - added , , and tags diff --git a/lib/thirdpartylibs.xml b/lib/thirdpartylibs.xml index 0cc749c176715..2c41c0086e8e9 100644 --- a/lib/thirdpartylibs.xml +++ b/lib/thirdpartylibs.xml @@ -60,7 +60,7 @@ htmlpurifier HTML Purifier LGPL - 4.0.0 + 4.1.0 2.1+