Skip to content

Commit

Permalink
Markdown:
Browse files Browse the repository at this point in the history
* preserve code in <code> tags (optionally, defaults yes)
* use proper backreferences for simpler regex in codeblock preservation
* introduce `hash_block()` method
* rename `shortcode_restore()` to `do_restore()` since it restores anything `hash_block`'ed

Fixes Automattic#6349

Merges r93584-wpcom.
  • Loading branch information
mattwiebe authored and cathyjf committed Mar 13, 2014
1 parent ae9a764 commit 506d4bd
Showing 1 changed file with 46 additions and 13 deletions.
59 changes: 46 additions & 13 deletions _inc/lib/markdown/gfm.php
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ class WPCom_GHF_Markdown_Parser extends MarkdownExtra_Parser {
*/
public $preserve_latex = true;

/**
* Preserve single-line <code> blocks.
* @var boolean
*/
public $preserve_inline_code_blocks = true;

/**
* Strip paragraphs from the output. This is the right default for WordPress,
* which generally wants to create its own paragraphs with `wpautop`
Expand Down Expand Up @@ -79,6 +85,10 @@ public function transform( $text ) {
if ( $this->preserve_latex ) {
$text = $this->latex_preserve( $text );
}
// Preserve anything inside a single-line <code> element
if ( $this->preserve_inline_code_blocks ) {
$text = $this->single_line_code_preserve( $text );
}

// escape line-beginning # chars that do not have a space after them.
$text = preg_replace_callback( '|^#{1,6}( )?|um', array( $this, '_doEscapeForHashWithoutSpacing' ), $text );
Expand All @@ -89,8 +99,8 @@ public function transform( $text ) {
// put start-of-line # chars back in place
$text = preg_replace( "/^(<p>)?(&#35;|\\\\#)/um", "$1#", $text );

// Restore shortcodes/LaTeX
$text = $this->shortcode_restore( $text );
// Restore preserved things like shortcodes/LaTeX
$text = $this->do_restore( $text );

// Strip paras if set
if ( $this->strip_paras ) {
Expand All @@ -100,15 +110,31 @@ public function transform( $text ) {
return $text;
}

/**
* Prevents blocks like <code>__this__</code> from turning into <code><strong>this</strong></code>
* @param string $text Text that may need preserving
* @return string Text that was preserved if needed
*/
public function single_line_code_preserve( $text ) {
return preg_replace_callback( '|<code>(.+)</code>|', array( $this, 'do_single_line_code_preserve' ), $text );
}

/**
* Regex callback for inline code presevation
* @param array $matches Regex matches
* @return string Hashed content for later restoration
*/
public function do_single_line_code_preserve( $matches ) {
return '<code>' . $this->hash_block( $matches[1] ) . '</code>';
}

/**
* Preserve code block contents by HTML encoding them. Useful before getting to KSES stripping.
* @param string $text Markdown/HTML content
* @return string Markdown/HTML content with escaped code blocks
*/
public function codeblock_preserve( $text ) {
$text = preg_replace_callback( "/^(`{3})([^`\n]+)?\n([^`~]+)(`{3})/m", array( $this, 'do_codeblock_preserve' ), $text );
$text = preg_replace_callback( "/^(~{3})([^~\n]+)?\n([^~~]+)(~{3})/m", array( $this, 'do_codeblock_preserve' ), $text );
return $text;
return preg_replace_callback( "/^([`~]{3})([^`\n]+)?\n([^`~]+)(\\1)/m", array( $this, 'do_codeblock_preserve' ), $text );
}

/**
Expand All @@ -129,9 +155,7 @@ public function do_codeblock_preserve( $matches ) {
* @return string Markdown/HTML content
*/
public function codeblock_restore( $text ) {
$text = preg_replace_callback( "/^(`{3})([^`\n]+)?\n([^`~]+)(`{3})/m", array( $this, 'do_codeblock_restore' ), $text );
$text = preg_replace_callback( "/^(~{3})([^~\n]+)?\n([^~~]+)(~{3})/m", array( $this, 'do_codeblock_restore' ), $text );
return $text;
return preg_replace_callback( "/^([`~]{3})([^`\n]+)?\n([^`~]+)(\\1)/m", array( $this, 'do_codeblock_restore' ), $text );
}

/**
Expand Down Expand Up @@ -176,11 +200,11 @@ protected function shortcode_preserve( $text ) {
}

/**
* Restores any text preserved by $this->latex_preserve() or $this->shortcode_preserve()
* Restores any text preserved by $this->hash_block()
* @param string $text Text that may have hashed preservation placeholders
* @return string Text with hashed preseravtion placeholders replaced by original text
*/
protected function shortcode_restore( $text ) {
protected function do_restore( $text ) {
foreach( $this->preserve_text_hash as $hash => $value ) {
$placeholder = $this->hash_maker( $hash );
$text = str_replace( $placeholder, $value, $text );
Expand All @@ -196,8 +220,17 @@ protected function shortcode_restore( $text ) {
* @return string A placeholder that will later be replaced by the original text
*/
protected function _doRemoveText( $m ) {
$hash = md5( $m[0] );
$this->preserve_text_hash[ $hash ] = $m[0];
return $this->hash_block( $m[0] );
}

/**
* Call this to store a text block for later restoration.
* @param string $text Text to preserve for later
* @return string Placeholder that will be swapped out later for the original text
*/
protected function hash_block( $text ) {
$hash = md5( $text );
$this->preserve_text_hash[ $hash ] = $text;
$placeholder = $this->hash_maker( $hash );
return $placeholder;
}
Expand All @@ -208,7 +241,7 @@ protected function _doRemoveText( $m ) {
* @return string A placeholder hash
*/
protected function hash_maker( $hash ) {
return 'MARDOWN_HASH' . $hash . 'MARKDOWN_HASH';
return 'MARKDOWN_HASH' . $hash . 'MARKDOWN_HASH';
}

/**
Expand Down

0 comments on commit 506d4bd

Please sign in to comment.