Skip to content

Commit

Permalink
MDL-22537 new html purifier
Browse files Browse the repository at this point in the history
  • Loading branch information
skodak committed May 21, 2010
1 parent d7be65a commit 7ff7f70
Show file tree
Hide file tree
Showing 27 changed files with 330 additions and 36 deletions.
6 changes: 3 additions & 3 deletions lib/htmlpurifier/HTMLPurifier.php
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
*/

/*
HTML Purifier 4.0.0 - Standards Compliant HTML Filtering
HTML Purifier 4.1.0 - Standards Compliant HTML Filtering
Copyright (C) 2006-2008 Edward Z. Yang
This library is free software; you can redistribute it and/or
Expand Down Expand Up @@ -55,10 +55,10 @@ class HTMLPurifier
{

/** Version of HTML Purifier */
public $version = '4.0.0';
public $version = '4.1.0';

/** Constant with version of HTML Purifier */
const VERSION = '4.0.0';
const VERSION = '4.1.0';

/** Global configuration object */
public $config;
Expand Down
2 changes: 2 additions & 0 deletions lib/htmlpurifier/HTMLPurifier.safe-includes.php
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@
require_once $__dir . '/HTMLPurifier/Injector/Linkify.php';
require_once $__dir . '/HTMLPurifier/Injector/PurifierLinkify.php';
require_once $__dir . '/HTMLPurifier/Injector/RemoveEmpty.php';
require_once $__dir . '/HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php';
require_once $__dir . '/HTMLPurifier/Injector/SafeObject.php';
require_once $__dir . '/HTMLPurifier/Lexer/DOMLex.php';
require_once $__dir . '/HTMLPurifier/Lexer/DirectLex.php';
Expand All @@ -192,6 +193,7 @@
require_once $__dir . '/HTMLPurifier/URIFilter/HostBlacklist.php';
require_once $__dir . '/HTMLPurifier/URIFilter/MakeAbsolute.php';
require_once $__dir . '/HTMLPurifier/URIFilter/Munge.php';
require_once $__dir . '/HTMLPurifier/URIScheme/data.php';
require_once $__dir . '/HTMLPurifier/URIScheme/ftp.php';
require_once $__dir . '/HTMLPurifier/URIScheme/http.php';
require_once $__dir . '/HTMLPurifier/URIScheme/https.php';
Expand Down
2 changes: 1 addition & 1 deletion lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/URI.php
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public function validate($uri_string, $config, $context) {
// URI at all
$result = str_replace($keys, $values, $result);

return "url($result)";
return "url('$result')";

}

Expand Down
3 changes: 2 additions & 1 deletion lib/htmlpurifier/HTMLPurifier/AttrTransform/ImgRequired.php
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ public function transform($attr, $config, $context) {
if ($src) {
$alt = $config->get('Attr.DefaultImageAlt');
if ($alt === null) {
$attr['alt'] = basename($attr['src']);
// truncate if the alt is too long
$attr['alt'] = substr(basename($attr['src']),0,40);
} else {
$attr['alt'] = $alt;
}
Expand Down
6 changes: 6 additions & 0 deletions lib/htmlpurifier/HTMLPurifier/AttrTransform/SafeParam.php
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,14 @@ public function transform($attr, $config, $context) {
$attr['value'] = 'window';
break;
case 'movie':
case 'src':
$attr['name'] = "movie";
$attr['value'] = $this->uri->validate($attr['value'], $config, $context);
break;
case 'flashvars':
// we're going to allow arbitrary inputs to the SWF, on
// the reasoning that it could only hack the SWF, not us.
break;
// add other cases to support other param name/value pairs
default:
$attr['name'] = $attr['value'] = null;
Expand Down
2 changes: 1 addition & 1 deletion lib/htmlpurifier/HTMLPurifier/Config.php
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class HTMLPurifier_Config
/**
* HTML Purifier's version
*/
public $version = '4.0.0';
public $version = '4.1.0';

/**
* Bool indicator whether or not to automatically finalize
Expand Down
Binary file modified lib/htmlpurifier/HTMLPurifier/ConfigSchema/schema.ser
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
AutoFormat.RemoveSpansWithoutAttributes
TYPE: bool
VERSION: 4.0.1
DEFAULT: false
--DESCRIPTION--
<p>
This directive causes <code>span</code> tags without any attributes
to be removed. It will also remove spans that had all attributes
removed during processing.
</p>
--# vim: et sw=4 sts=4
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@ DEFAULT: false
Whether or not to permit embed tags in documents, with a number of extra
security features added to prevent script execution. This is similar to
what websites like MySpace do to embed tags. Embed is a proprietary
element and will cause your website to stop validating. You probably want
to enable this with %HTML.SafeObject.
<strong>Highly experimental.</strong>
</p>
element and will cause your website to stop validating; you should
see if you can use %Output.FlashCompat with %HTML.SafeObject instead
first.</p>
--# vim: et sw=4 sts=4
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@ DEFAULT: false
<p>
Whether or not to permit object tags in documents, with a number of extra
security features added to prevent script execution. This is similar to
what websites like MySpace do to object tags. You may also want to
enable %HTML.SafeEmbed for maximum interoperability with Internet Explorer,
although embed tags will cause your website to stop validating.
<strong>Highly experimental.</strong>
what websites like MySpace do to object tags. You should also enable
%Output.FlashCompat in order to generate Internet Explorer
compatibility code for your object tags.
</p>
--# vim: et sw=4 sts=4
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Output.FlashCompat
TYPE: bool
VERSION: 4.1.0
DEFAULT: false
--DESCRIPTION--
<p>
If true, HTML Purifier will generate Internet Explorer compatibility
code for all object code. This is highly recommended if you enable
%HTML.SafeObject.
</p>
--# vim: et sw=4 sts=4
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,6 @@ array (
--DESCRIPTION--
Whitelist that defines the schemes that a URI is allowed to have. This
prevents XSS attacks from using pseudo-schemes like javascript or mocha.
There is also support for the <code>data</code> URI scheme, but it is not
enabled by default.
--# vim: et sw=4 sts=4
7 changes: 7 additions & 0 deletions lib/htmlpurifier/HTMLPurifier/ElementDef.php
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,13 @@ class HTMLPurifier_ElementDef
*/
public $autoclose = array();

/**
* If a foreign element is found in this element, test if it is
* allowed by this sub-element; if it is, instead of closing the
* current element, place it inside this element.
*/
public $wrap;

/**
* Whether or not this is a formatting element affected by the
* "Active Formatting Elements" algorithm.
Expand Down
10 changes: 5 additions & 5 deletions lib/htmlpurifier/HTMLPurifier/Filter/YouTube.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@ class HTMLPurifier_Filter_YouTube extends HTMLPurifier_Filter

public function preFilter($html, $config, $context) {
$pre_regex = '#<object[^>]+>.+?'.
'http://www.youtube.com/v/([A-Za-z0-9\-_]+).+?</object>#s';
'http://www.youtube.com/((?:v|cp)/[A-Za-z0-9\-_=]+).+?</object>#s';
$pre_replace = '<span class="youtube-embed">\1</span>';
return preg_replace($pre_regex, $pre_replace, $html);
}

public function postFilter($html, $config, $context) {
$post_regex = '#<span class="youtube-embed">([A-Za-z0-9\-_]+)</span>#';
$post_regex = '#<span class="youtube-embed">((?:v|cp)/[A-Za-z0-9\-_=]+)</span>#';
return preg_replace_callback($post_regex, array($this, 'postFilterCallback'), $html);
}

Expand All @@ -24,10 +24,10 @@ protected function armorUrl($url) {
protected function postFilterCallback($matches) {
$url = $this->armorUrl($matches[1]);
return '<object width="425" height="350" type="application/x-shockwave-flash" '.
'data="http://www.youtube.com/v/'.$url.'">'.
'<param name="movie" value="http://www.youtube.com/v/'.$url.'"></param>'.
'data="http://www.youtube.com/'.$url.'">'.
'<param name="movie" value="http://www.youtube.com/'.$url.'"></param>'.
'<!--[if IE]>'.
'<embed src="http://www.youtube.com/v/'.$url.'"'.
'<embed src="http://www.youtube.com/'.$url.'"'.
'type="application/x-shockwave-flash"'.
'wmode="transparent" width="425" height="350" />'.
'<![endif]-->'.
Expand Down
43 changes: 42 additions & 1 deletion lib/htmlpurifier/HTMLPurifier/Generator.php
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,17 @@ class HTMLPurifier_Generator
*/
private $_sortAttr;

/**
* Cache of %Output.FlashCompat
*/
private $_flashCompat;

/**
* Stack for keeping track of object information when outputting IE
* compatibility code.
*/
private $_flashStack = array();

/**
* Configuration for the generator
*/
Expand All @@ -44,6 +55,7 @@ public function __construct($config, $context) {
$this->config = $config;
$this->_scriptFix = $config->get('Output.CommentScriptContents');
$this->_sortAttr = $config->get('Output.SortAttr');
$this->_flashCompat = $config->get('Output.FlashCompat');
$this->_def = $config->getHTMLDefinition();
$this->_xhtml = $this->_def->doctype->xml;
}
Expand Down Expand Up @@ -104,12 +116,41 @@ public function generateFromToken($token) {

} elseif ($token instanceof HTMLPurifier_Token_Start) {
$attr = $this->generateAttributes($token->attr, $token->name);
if ($this->_flashCompat) {
if ($token->name == "object") {
$flash = new stdclass();
$flash->attr = $token->attr;
$flash->param = array();
$this->_flashStack[] = $flash;
}
}
return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';

} elseif ($token instanceof HTMLPurifier_Token_End) {
return '</' . $token->name . '>';
$_extra = '';
if ($this->_flashCompat) {
if ($token->name == "object" && !empty($this->_flashStack)) {
$flash = array_pop($this->_flashStack);
$compat_token = new HTMLPurifier_Token_Empty("embed");
foreach ($flash->attr as $name => $val) {
if ($name == "classid") continue;
if ($name == "type") continue;
if ($name == "data") $name = "src";
$compat_token->attr[$name] = $val;
}
foreach ($flash->param as $name => $val) {
if ($name == "movie") $name = "src";
$compat_token->attr[$name] = $val;
}
$_extra = "<!--[if IE]>".$this->generateFromToken($compat_token)."<![endif]-->";
}
}
return $_extra . '</' . $token->name . '>';

} elseif ($token instanceof HTMLPurifier_Token_Empty) {
if ($this->_flashCompat && $token->name == "param" && !empty($this->_flashStack)) {
$this->_flashStack[count($this->_flashStack)-1]->param[$token->attr['name']] = $token->attr['value'];
}
$attr = $this->generateAttributes($token->attr, $token->name);
return '<' . $token->name . ($attr ? ' ' : '') . $attr .
( $this->_xhtml ? ' /': '' ) // <br /> v. <br>
Expand Down
6 changes: 4 additions & 2 deletions lib/htmlpurifier/HTMLPurifier/HTMLModule/List.php
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@ class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule
public $content_sets = array('Flow' => 'List');

public function setup($config) {
$this->addElement('ol', 'List', 'Required: li', 'Common');
$this->addElement('ul', 'List', 'Required: li', 'Common');
$ol = $this->addElement('ol', 'List', 'Required: li', 'Common');
$ol->wrap = "li";
$ul = $this->addElement('ul', 'List', 'Required: li', 'Common');
$ul->wrap = "li";
$this->addElement('dl', 'List', 'Required: dt | dd', 'Common');

$this->addElement('li', false, 'Flow', 'Common');
Expand Down
1 change: 1 addition & 0 deletions lib/htmlpurifier/HTMLPurifier/HTMLModule/SafeEmbed.php
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ public function setup($config) {
'height' => 'Pixels#' . $max,
'allowscriptaccess' => 'Enum#never',
'allownetworking' => 'Enum#internal',
'flashvars' => 'Text',
'wmode' => 'Enum#window',
'name' => 'ID',
)
Expand Down
5 changes: 4 additions & 1 deletion lib/htmlpurifier/HTMLPurifier/HTMLModule/SafeObject.php
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,10 @@ public function setup($config) {
'type' => 'Enum#application/x-shockwave-flash',
'width' => 'Pixels#' . $max,
'height' => 'Pixels#' . $max,
'data' => 'URI#embedded'
'data' => 'URI#embedded',
'classid' => 'Enum#clsid:d27cdb6e-ae6d-11cf-96b8-444553540000',
'codebase' => new HTMLPurifier_AttrDef_Enum(array(
'http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=6,0,40,0')),
)
);
$object->attr_transform_post[] = new HTMLPurifier_AttrTransform_SafeObject();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ public function makeFixes() {
$r['thead@background'] = new HTMLPurifier_AttrTransform_Background();
$r['tfoot@background'] = new HTMLPurifier_AttrTransform_Background();
$r['tbody@background'] = new HTMLPurifier_AttrTransform_Background();
$r['table@height'] = new HTMLPurifier_AttrTransform_Length('height');
return $r;
}

Expand Down
21 changes: 13 additions & 8 deletions lib/htmlpurifier/HTMLPurifier/Injector/AutoParagraph.php
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,21 @@ public function handleText(&$token) {
// ----
// This is a degenerate case
} else {
// State 1.2: PAR1
// ----
if (!$token->is_whitespace || $this->_isInline($current)) {
// State 1.2: PAR1
// ----

// State 1.3: PAR1\n\nPAR2
// ------------
// State 1.3: PAR1\n\nPAR2
// ------------

// State 1.4: <div>PAR1\n\nPAR2 (see State 2)
// ------------
$token = array($this->_pStart());
$this->_splitText($text, $token);
// State 1.4: <div>PAR1\n\nPAR2 (see State 2)
// ------------
$token = array($this->_pStart());
$this->_splitText($text, $token);
} else {
// State 1.5: \n<hr />
// --
}
}
} else {
// State 2: <div>PAR1... (similar to 1.4)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
<?php

/**
* Injector that removes spans with no attributes
*/
class HTMLPurifier_Injector_RemoveSpansWithoutAttributes extends HTMLPurifier_Injector
{
public $name = 'RemoveSpansWithoutAttributes';
public $needed = array('span');

private $attrValidator;

/**
* Used by AttrValidator
*/
private $config;
private $context;

public function prepare($config, $context) {
$this->attrValidator = new HTMLPurifier_AttrValidator();
$this->config = $config;
$this->context = $context;
return parent::prepare($config, $context);
}

public function handleElement(&$token) {
if ($token->name !== 'span' || !$token instanceof HTMLPurifier_Token_Start) {
return;
}

// We need to validate the attributes now since this doesn't normally
// happen until after MakeWellFormed. If all the attributes are removed
// the span needs to be removed too.
$this->attrValidator->validateToken($token, $this->config, $this->context);
$token->armor['ValidateAttributes'] = true;

if (!empty($token->attr)) {
return;
}

$nesting = 0;
$spanContentTokens = array();
while ($this->forwardUntilEndToken($i, $current, $nesting)) {}

if ($current instanceof HTMLPurifier_Token_End && $current->name === 'span') {
// Mark closing span tag for deletion
$current->markForDeletion = true;
// Delete open span tag
$token = false;
}
}

public function handleEnd(&$token) {
if ($token->markForDeletion) {
$token = false;
}
}
}

// vim: et sw=4 sts=4
Loading

0 comments on commit 7ff7f70

Please sign in to comment.