Skip to content

Commit

Permalink
MDL-56012 lib: Update spout to version 2.6.0
Browse files Browse the repository at this point in the history
  • Loading branch information
abgreeve committed Oct 4, 2016
1 parent ddd8dc0 commit cd198ea
Show file tree
Hide file tree
Showing 43 changed files with 1,579 additions and 303 deletions.
4 changes: 4 additions & 0 deletions lib/spout/src/Spout/Common/Escaper/ODS.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

namespace Box\Spout\Common\Escaper;

use Box\Spout\Common\Singleton;

/**
* Class ODS
* Provides functions to escape and unescape data for ODS files
Expand All @@ -10,6 +12,8 @@
*/
class ODS implements EscaperInterface
{
use Singleton;

/**
* Escapes the given string to make it compatible with XLSX
*
Expand Down
51 changes: 43 additions & 8 deletions lib/spout/src/Spout/Common/Escaper/XLSX.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

namespace Box\Spout\Common\Escaper;

use Box\Spout\Common\Singleton;

/**
* Class XLSX
* Provides functions to escape and unescape data for XLSX files
Expand All @@ -10,15 +12,25 @@
*/
class XLSX implements EscaperInterface
{
/** @var string[] Control characters to be escaped */
use Singleton;

/** @var string Regex pattern to detect control characters that need to be escaped */
protected $escapableControlCharactersPattern;

/** @var string[] Map containing control characters to be escaped (key) and their escaped value (value) */
protected $controlCharactersEscapingMap;

/** @var string[] Map containing control characters to be escaped (value) and their escaped value (key) */
protected $controlCharactersEscapingReverseMap;

/**
*
* Initializes the singleton instance
*/
public function __construct()
protected function init()
{
$this->escapableControlCharactersPattern = $this->getEscapableControlCharactersPattern();
$this->controlCharactersEscapingMap = $this->getControlCharactersEscapingMap();
$this->controlCharactersEscapingReverseMap = array_flip($this->controlCharactersEscapingMap);
}

/**
Expand Down Expand Up @@ -49,6 +61,20 @@ public function unescape($string)
return $unescapedString;
}

/**
* @return string Regex pattern containing all escapable control characters
*/
protected function getEscapableControlCharactersPattern()
{
// control characters values are from 0 to 1F (hex values) in the ASCII table
// some characters should not be escaped though: "\t", "\r" and "\n".
return '[\x00-\x08' .
// skipping "\t" (0x9) and "\n" (0xA)
'\x0B-\x0C' .
// skipping "\r" (0xD)
'\x0E-\x1F]';
}

/**
* Builds the map containing control characters to be escaped
* mapped to their escaped values.
Expand All @@ -62,14 +88,14 @@ public function unescape($string)
protected function getControlCharactersEscapingMap()
{
$controlCharactersEscapingMap = [];
$whitelistedControlCharacters = ["\t", "\r", "\n"];

// control characters values are from 0 to 1F (hex values) in the ASCII table
for ($charValue = 0x0; $charValue <= 0x1F; $charValue++) {
if (!in_array(chr($charValue), $whitelistedControlCharacters)) {
for ($charValue = 0x00; $charValue <= 0x1F; $charValue++) {
$character = chr($charValue);
if (preg_match("/{$this->escapableControlCharactersPattern}/", $character)) {
$charHexValue = dechex($charValue);
$escapedChar = '_x' . sprintf('%04s' , strtoupper($charHexValue)) . '_';
$controlCharactersEscapingMap[$escapedChar] = chr($charValue);
$controlCharactersEscapingMap[$escapedChar] = $character;
}
}

Expand All @@ -92,7 +118,15 @@ protected function getControlCharactersEscapingMap()
protected function escapeControlCharacters($string)
{
$escapedString = $this->escapeEscapeCharacter($string);
return str_replace(array_values($this->controlCharactersEscapingMap), array_keys($this->controlCharactersEscapingMap), $escapedString);

// if no control characters
if (!preg_match("/{$this->escapableControlCharactersPattern}/", $escapedString)) {
return $escapedString;
}

return preg_replace_callback("/({$this->escapableControlCharactersPattern})/", function($matches) {
return $this->controlCharactersEscapingReverseMap[$matches[0]];
}, $escapedString);
}

/**
Expand Down Expand Up @@ -122,6 +156,7 @@ protected function escapeEscapeCharacter($string)
protected function unescapeControlCharacters($string)
{
$unescapedString = $string;

foreach ($this->controlCharactersEscapingMap as $escapedCharValue => $charValue) {
// only unescape characters that don't contain the escaped escape character for now
$unescapedString = preg_replace("/(?<!_x005F)($escapedCharValue)/", $charValue, $unescapedString);
Expand Down
2 changes: 1 addition & 1 deletion lib/spout/src/Spout/Common/Helper/EncodingHelper.php
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ public function getBytesOffsetToSkipBOM($filePointer, $encoding)
{
$byteOffsetToSkipBom = 0;

if ($this->hasBom($filePointer, $encoding)) {
if ($this->hasBOM($filePointer, $encoding)) {
$bomUsed = $this->supportedEncodingsWithBom[$encoding];

// we skip the N first bytes
Expand Down
13 changes: 13 additions & 0 deletions lib/spout/src/Spout/Common/Helper/GlobalFunctionsHelper.php
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,19 @@ public function header($string)
header($string);
}

/**
* Wrapper around global function ob_end_clean()
* @see ob_end_clean()
*
* @return void
*/
public function ob_end_clean()
{
if (ob_get_length() > 0) {
ob_end_clean();
}
}

/**
* Wrapper around global function iconv()
* @see iconv()
Expand Down
41 changes: 41 additions & 0 deletions lib/spout/src/Spout/Common/Singleton.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
<?php

namespace Box\Spout\Common;

/**
* Class Singleton
* Defines a class as a singleton.
*
* @package Box\Spout\Common
*/
trait Singleton
{
protected static $instance;

/**
* @return static
*/
final public static function getInstance()
{
return isset(static::$instance)
? static::$instance
: static::$instance = new static;
}

/**
* Singleton constructor.
*/
final private function __construct()
{
$this->init();
}

/**
* Initializes the singleton
* @return void
*/
protected function init() {}

final private function __wakeup() {}
final private function __clone() {}
}
17 changes: 16 additions & 1 deletion lib/spout/src/Spout/Reader/AbstractReader.php
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ abstract class AbstractReader implements ReaderInterface
/** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
protected $globalFunctionsHelper;

/** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
protected $shouldFormatDates = false;

/**
* Returns whether stream wrappers are supported
*
Expand Down Expand Up @@ -49,7 +52,7 @@ abstract public function getConcreteSheetIterator();
abstract protected function closeReader();

/**
* @param $globalFunctionsHelper
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
* @return AbstractReader
*/
public function setGlobalFunctionsHelper($globalFunctionsHelper)
Expand All @@ -58,6 +61,18 @@ public function setGlobalFunctionsHelper($globalFunctionsHelper)
return $this;
}

/**
* Sets whether date/time values should be returned as PHP objects or be formatted as strings.
*
* @param bool $shouldFormatDates
* @return AbstractReader
*/
public function setShouldFormatDates($shouldFormatDates)
{
$this->shouldFormatDates = $shouldFormatDates;
return $this;
}

/**
* Prepares the reader to read the given file. It also makes sure
* that the file exists and is readable.
Expand Down
3 changes: 2 additions & 1 deletion lib/spout/src/Spout/Reader/CSV/RowIterator.php
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ class RowIterator implements IteratorInterface
* @param string $fieldDelimiter Character that delimits fields
* @param string $fieldEnclosure Character that enclose fields
* @param string $encoding Encoding of the CSV file to be read
* @param string $endOfLineDelimiter End of line delimiter
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
*/
public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineDelimiter, $globalFunctionsHelper)
Expand Down Expand Up @@ -152,7 +153,7 @@ public function next()
*/
protected function getNextUTF8EncodedRow()
{
$encodedRowData = fgetcsv($this->filePointer, self::MAX_READ_BYTES_PER_LINE, $this->fieldDelimiter, $this->fieldEnclosure);
$encodedRowData = $this->globalFunctionsHelper->fgetcsv($this->filePointer, self::MAX_READ_BYTES_PER_LINE, $this->fieldDelimiter, $this->fieldEnclosure);
if (false === $encodedRowData) {
return false;
}
Expand Down
64 changes: 49 additions & 15 deletions lib/spout/src/Spout/Reader/ODS/Helper/CellValueFormatter.php
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ class CellValueFormatter
/** Definition of XML nodes names used to parse data */
const XML_NODE_P = 'p';
const XML_NODE_S = 'text:s';
const XML_NODE_A = 'text:a';
const XML_NODE_SPAN = 'text:span';

/** Definition of XML attribute used to parse data */
const XML_ATTRIBUTE_TYPE = 'office:value-type';
Expand All @@ -33,16 +35,21 @@ class CellValueFormatter
const XML_ATTRIBUTE_CURRENCY = 'office:currency';
const XML_ATTRIBUTE_C = 'text:c';

/** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
protected $shouldFormatDates;

/** @var \Box\Spout\Common\Escaper\ODS Used to unescape XML data */
protected $escaper;

/**
*
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
*/
public function __construct()
public function __construct($shouldFormatDates)
{
$this->shouldFormatDates = $shouldFormatDates;

/** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
$this->escaper = new \Box\Spout\Common\Escaper\ODS();
$this->escaper = \Box\Spout\Common\Escaper\ODS::getInstance();
}

/**
Expand Down Expand Up @@ -98,6 +105,8 @@ protected function formatStringCellValue($node)
$spaceAttribute = $childNode->getAttribute(self::XML_ATTRIBUTE_C);
$numSpaces = (!empty($spaceAttribute)) ? intval($spaceAttribute) : 1;
$currentPValue .= str_repeat(' ', $numSpaces);
} else if ($childNode->nodeName === self::XML_NODE_A || $childNode->nodeName === self::XML_NODE_SPAN) {
$currentPValue .= $childNode->nodeValue;
}
}

Expand All @@ -119,6 +128,7 @@ protected function formatFloatCellValue($node)
{
$nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_VALUE);
$nodeIntValue = intval($nodeValue);
// The "==" is intentionally not a "===" because only the value matters, not the type
$cellValue = ($nodeIntValue == $nodeValue) ? $nodeIntValue : floatval($nodeValue);
return $cellValue;
}
Expand All @@ -141,31 +151,55 @@ protected function formatBooleanCellValue($node)
* Returns the cell Date value from the given node.
*
* @param \DOMNode $node
* @return \DateTime|null The value associated with the cell or NULL if invalid date value
* @return \DateTime|string|null The value associated with the cell or NULL if invalid date value
*/
protected function formatDateCellValue($node)
{
try {
$nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_DATE_VALUE);
return new \DateTime($nodeValue);
} catch (\Exception $e) {
return null;
// The XML node looks like this:
// <table:table-cell calcext:value-type="date" office:date-value="2016-05-19T16:39:00" office:value-type="date">
// <text:p>05/19/16 04:39 PM</text:p>
// </table:table-cell>

if ($this->shouldFormatDates) {
// The date is already formatted in the "p" tag
$nodeWithValueAlreadyFormatted = $node->getElementsByTagName(self::XML_NODE_P)->item(0);
return $nodeWithValueAlreadyFormatted->nodeValue;
} else {
// otherwise, get it from the "date-value" attribute
try {
$nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_DATE_VALUE);
return new \DateTime($nodeValue);
} catch (\Exception $e) {
return null;
}
}
}

/**
* Returns the cell Time value from the given node.
*
* @param \DOMNode $node
* @return \DateInterval|null The value associated with the cell or NULL if invalid time value
* @return \DateInterval|string|null The value associated with the cell or NULL if invalid time value
*/
protected function formatTimeCellValue($node)
{
try {
$nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_TIME_VALUE);
return new \DateInterval($nodeValue);
} catch (\Exception $e) {
return null;
// The XML node looks like this:
// <table:table-cell calcext:value-type="time" office:time-value="PT13H24M00S" office:value-type="time">
// <text:p>01:24:00 PM</text:p>
// </table:table-cell>

if ($this->shouldFormatDates) {
// The date is already formatted in the "p" tag
$nodeWithValueAlreadyFormatted = $node->getElementsByTagName(self::XML_NODE_P)->item(0);
return $nodeWithValueAlreadyFormatted->nodeValue;
} else {
// otherwise, get it from the "time-value" attribute
try {
$nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_TIME_VALUE);
return new \DateInterval($nodeValue);
} catch (\Exception $e) {
return null;
}
}
}

Expand Down
2 changes: 1 addition & 1 deletion lib/spout/src/Spout/Reader/ODS/Reader.php
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ protected function openReader($filePath)
$this->zip = new \ZipArchive();

if ($this->zip->open($filePath) === true) {
$this->sheetIterator = new SheetIterator($filePath);
$this->sheetIterator = new SheetIterator($filePath, $this->shouldFormatDates);
} else {
throw new IOException("Could not open $filePath for reading.");
}
Expand Down
7 changes: 4 additions & 3 deletions lib/spout/src/Spout/Reader/ODS/RowIterator.php
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,12 @@ class RowIterator implements IteratorInterface

/**
* @param XMLReader $xmlReader XML Reader, positioned on the "<table:table>" element
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
*/
public function __construct($xmlReader)
public function __construct($xmlReader, $shouldFormatDates)
{
$this->xmlReader = $xmlReader;
$this->cellValueFormatter = new CellValueFormatter();
$this->cellValueFormatter = new CellValueFormatter($shouldFormatDates);
}

/**
Expand Down Expand Up @@ -186,7 +187,7 @@ protected function getCellValue($node)
/**
* empty() replacement that honours 0 as a valid value
*
* @param $value The cell value
* @param string|int|float|bool|\DateTime|\DateInterval|null $value The cell value
* @return bool
*/
protected function isEmptyCellValue($value)
Expand Down
Loading

0 comments on commit cd198ea

Please sign in to comment.