Skip to content

Commit

Permalink
MDL-52954 core: Change from pandoc to unoconv - it gives better results
Browse files Browse the repository at this point in the history
Most importantly it retains formatting better, and supports different charsets far better than pandoc.
  • Loading branch information
Damyon Wiese committed Mar 30, 2016
1 parent 128d873 commit 1356d85
Show file tree
Hide file tree
Showing 10 changed files with 75 additions and 49 deletions.
2 changes: 1 addition & 1 deletion admin/settings/server.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
$temp->add(new admin_setting_configexecutable('aspellpath', new lang_string('aspellpath', 'admin'), new lang_string('edhelpaspellpath'), ''));
$temp->add(new admin_setting_configexecutable('pathtodot', new lang_string('pathtodot', 'admin'), new lang_string('pathtodot_help', 'admin'), ''));
$temp->add(new admin_setting_configexecutable('pathtogs', new lang_string('pathtogs', 'admin'), new lang_string('pathtogs_help', 'admin'), '/usr/bin/gs'));
$temp->add(new admin_setting_configexecutable('pathtopandoc', new lang_string('pathtopandoc', 'admin'), new lang_string('pathtopandoc_help', 'admin'), '/usr/bin/pandoc'));
$temp->add(new admin_setting_configexecutable('pathtounoconv', new lang_string('pathtounoconv', 'admin'), new lang_string('pathtounoconv_help', 'admin'), '/usr/bin/unoconv'));
$ADMIN->add('server', $temp);


Expand Down
10 changes: 5 additions & 5 deletions config-dist.php
Original file line number Diff line number Diff line change
Expand Up @@ -838,11 +838,11 @@
// (Development->Profiling) built into Moodle.
// $CFG->pathtodot = '';
//
// Path to pandoc.
// Probably something like /usr/bin/pandoc. Used to convert between document formats.
// It is recommended to install the latest stable release of pandoc.
// Download packages for all platforms are available from http://pandoc.org/
// $CFG->pathtopandoc = '';
// Path to unoconv.
// Probably something like /usr/bin/unoconv. Used as a fallback to convert between document formats.
// Unoconv is used convert between file formats supported by LibreOffice.
// Use a recent version of unoconv ( >= 0.7 ), older versions have trouble running from a webserver.
// $CFG->pathtounoconv = '';

//=========================================================================
// ALL DONE! To continue installation, visit your main page with a browser
Expand Down
4 changes: 2 additions & 2 deletions lang/en/admin.php
Original file line number Diff line number Diff line change
Expand Up @@ -779,8 +779,6 @@
$string['passwordresettime'] = 'Maximum time to validate password reset request';
$string['passwordreuselimit'] = 'Password rotation limit';
$string['passwordreuselimit_desc'] = 'Number of times a user must change their password before they are allowed to reuse a password. Hashes of previously used passwords are stored in local database table. This feature might not be compatible with some external authentication plugins.';
$string['pathtopandoc'] = 'Path to pandoc document converter';
$string['pathtopandoc_help'] = 'Path to pandoc document converter. This is an executable that is capable of converting between document formats. This is optional, but if specified, Moodle will be able to perform automated conversion of documents from a wide range of file formats. This is used by the Assignment module "Annotate PDF" feature.';
$string['pathtodot'] = 'Path to dot';
$string['pathtodot_help'] = 'Path to dot. Probably something like /usr/bin/dot. To be able to generate graphics from DOT files, you must have installed the dot executable and point to it here. Note that, for now, this only used by the profiling features (Development->Profiling) built into Moodle.';
$string['pathtodu'] = 'Path to du';
Expand All @@ -792,6 +790,8 @@
$string['pathtopsql'] = 'Path to psql';
$string['pathtopsqldesc'] = 'This is only necessary to enter if you have more than one psql on your system (for example if you have more than one version of postgresql installed)';
$string['pathtopsqlinvalid'] = 'Invalid path to psql - either wrong path or not executable';
$string['pathtounoconv'] = 'Path to unoconv document converter';
$string['pathtounoconv_help'] = 'Path to unoconv document converter. This is an executable that is capable of converting between document formats supported by LibreOffice. This is optional, but if specified, Moodle will use it to automatically convert between document formats. This is used to support a wider range of input files for the assignment annotate PDF feature.';
$string['pcreunicodewarning'] = 'It is strongly recommended to use PCRE PHP extension that is compatible with Unicode characters.';
$string['perfdebug'] = 'Performance info';
$string['performance'] = 'Performance';
Expand Down
2 changes: 1 addition & 1 deletion lib/behat/lib.php
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ function behat_clean_init_config() {
'umaskpermissions', 'dbtype', 'dblibrary', 'dbhost', 'dbname', 'dbuser', 'dbpass', 'prefix',
'dboptions', 'proxyhost', 'proxyport', 'proxytype', 'proxyuser', 'proxypassword',
'proxybypass', 'theme', 'pathtogs', 'pathtodu', 'aspellpath', 'pathtodot', 'skiplangupgrade',
'altcacheconfigpath', 'pathtopandoc'
'altcacheconfigpath', 'pathtounoconv'
));

// Add extra allowed settings.
Expand Down
52 changes: 27 additions & 25 deletions lib/filestorage/file_storage.php
Original file line number Diff line number Diff line change
Expand Up @@ -187,22 +187,30 @@ public function get_converted_document(stored_file $file, $format) {
* @param string $format The desired format - e.g. 'pdf'. Formats are specified by file extension.
* @return bool - True if the format is supported for input.
*/
protected function is_input_format_supported_by_pandoc($format) {
$sanitized = trim(strtolower($format));
return in_array($sanitized, array('md', 'html', 'tex', 'docx', 'odt', 'epub', 'png', 'jpg', 'gif'));
}
protected function is_format_supported_by_unoconv($format) {
global $CFG;

if (!isset($this->unoconvformats)) {
// Ask unoconv for it's list of supported document formats.
$cmd = escapeshellcmd(trim($CFG->pathtounoconv)) . ' --show';
$pipes = array();
$pipesspec = array(2 => array('pipe', 'w'));
$proc = proc_open($cmd, $pipesspec, $pipes);
$programoutput = stream_get_contents($pipes[2]);
fclose($pipes[2]);
proc_close($proc);
$matches = array();
preg_match_all('/\[\.(.*)\]/', $programoutput, $matches);

$this->unoconvformats = $matches[1];
$this->unoconvformats = array_unique($this->unoconvformats);
}

/**
* Verify the format is supported.
*
* @param string $format The desired format - e.g. 'pdf'. Formats are specified by file extension.
* @return bool - True if the format is supported for output.
*/
protected function is_output_format_supported_by_pandoc($format) {
$sanitized = trim(strtolower($format));
return in_array($sanitized, array('md', 'pdf', 'html', 'tex', 'docx', 'odt', 'odf', 'epub'));
return in_array($sanitized, $this->unoconvformats);
}


/**
* Perform a file format conversion on the specified document.
*
Expand All @@ -213,17 +221,17 @@ protected function is_output_format_supported_by_pandoc($format) {
protected function create_converted_document(stored_file $file, $format) {
global $CFG;

if (empty($CFG->pathtopandoc) || !is_executable(trim($CFG->pathtopandoc))) {
if (empty($CFG->pathtounoconv) || !is_executable(trim($CFG->pathtounoconv))) {
// No conversions are possible, sorry.
return false;
}

$fileextension = strtolower(pathinfo($file->get_filename(), PATHINFO_EXTENSION));
if (!self::is_input_format_supported_by_pandoc($fileextension)) {
if (!self::is_format_supported_by_unoconv($fileextension)) {
return false;
}

if (!self::is_output_format_supported_by_pandoc($format)) {
if (!self::is_format_supported_by_unoconv($format)) {
return false;
}

Expand All @@ -236,21 +244,14 @@ protected function create_converted_document(stored_file $file, $format) {
$filename = $tmp . '/' . $localfilename;
$file->copy_content_to($filename);

if (in_array($fileextension, array('gif', 'jpg', 'png'))) {
// We wrap images in a tiny html file - pandoc will generate documents from them.
$htmlwrapperfile = $tmp . '/wrapper.html';

file_put_contents($htmlwrapperfile, "<html><body><img src=\"$localfilename\"></body></html>");

$filename = $htmlwrapperfile;
}

$newtmpfile = pathinfo($filename, PATHINFO_FILENAME) . '.' . $format;

// Safety.
$newtmpfile = $tmp . '/' . clean_param($newtmpfile, PARAM_FILE);

$cmd = escapeshellcmd(trim($CFG->pathtopandoc)) . ' ' .
$cmd = escapeshellcmd(trim($CFG->pathtounoconv)) . ' ' .
escapeshellarg('-f') . ' ' .
escapeshellarg($format) . ' ' .
escapeshellarg('-o') . ' ' .
escapeshellarg($newtmpfile) . ' ' .
escapeshellarg($filename);
Expand All @@ -259,6 +260,7 @@ protected function create_converted_document(stored_file $file, $format) {
$output = null;
$currentdir = getcwd();
chdir($tmp);
$result = exec('env 1>&2', $output);
$result = exec($cmd, $output);
chdir($currentdir);
if (!file_exists($newtmpfile)) {
Expand Down
2 changes: 1 addition & 1 deletion lib/phpunit/bootstrap.php
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@
'dbtype', 'dblibrary', 'dbhost', 'dbname', 'dbuser', 'dbpass', 'prefix', 'dboptions',
'proxyhost', 'proxyport', 'proxytype', 'proxyuser', 'proxypassword', 'proxybypass', // keep proxy settings from config.php
'altcacheconfigpath', 'pathtogs', 'pathtodu', 'aspellpath', 'pathtodot',
'pathtopandoc'
'pathtounoconv'
);
$productioncfg = (array)$CFG;
$CFG = new stdClass();
Expand Down
File renamed without changes.
File renamed without changes.
22 changes: 13 additions & 9 deletions lib/tests/pandoc_test.php → lib/tests/unoconv_test.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// along with Moodle. If not, see <http://www.gnu.org/licenses/>.

/**
* Test pandoc functionality.
* Test unoconv functionality.
*
* @package core
* @category phpunit
Expand All @@ -27,14 +27,14 @@


/**
* A set of tests for some of the pandoc functionality within Moodle.
* A set of tests for some of the unoconv functionality within Moodle.
*
* @package core
* @category phpunit
* @copyright 2016 Damyon Wiese
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
*/
class core_pandoc_testcase extends advanced_testcase {
class core_unoconv_testcase extends advanced_testcase {

private $testfile1 = null;
private $testfile2 = null;
Expand All @@ -51,7 +51,7 @@ public function setUp() {
'filepath' => '/',
'filename' => 'test.html'
);
$teststring = file_get_contents($this->fixturepath . DIRECTORY_SEPARATOR . 'pandoc-source.html');
$teststring = file_get_contents($this->fixturepath . DIRECTORY_SEPARATOR . 'unoconv-source.html');
$this->testfile1 = $fs->create_file_from_string($filerecord, $teststring);

$filerecord = array(
Expand All @@ -62,7 +62,7 @@ public function setUp() {
'filepath' => '/',
'filename' => 'test.docx'
);
$teststring = file_get_contents($this->fixturepath . DIRECTORY_SEPARATOR . 'pandoc-source.docx');
$teststring = file_get_contents($this->fixturepath . DIRECTORY_SEPARATOR . 'unoconv-source.docx');
$this->testfile2 = $fs->create_file_from_string($filerecord, $teststring);

$this->resetAfterTest();
Expand All @@ -71,33 +71,37 @@ public function setUp() {
public function test_generate_pdf() {
global $CFG;

if (empty($CFG->pathtopandoc) || !is_executable(trim($CFG->pathtopandoc))) {
if (empty($CFG->pathtounoconv) || !is_executable(trim($CFG->pathtounoconv))) {
// No conversions are possible, sorry.
return $this->markTestSkipped();
}
$fs = get_file_storage();

$result = $fs->get_converted_document($this->testfile1, 'pdf');
$this->assertNotFalse($result);
$this->assertSame($result->get_mimetype(), 'application/pdf');
$this->assertGreaterThan(0, $result->get_filesize());
$result = $fs->get_converted_document($this->testfile2, 'pdf');
$this->assertNotFalse($result);
$this->assertSame($result->get_mimetype(), 'application/pdf');
$this->assertGreaterThan(0, $result->get_filesize());
}

public function test_generate_markdown() {
global $CFG;

if (empty($CFG->pathtopandoc) || !is_executable(trim($CFG->pathtopandoc))) {
if (empty($CFG->pathtounoconv) || !is_executable(trim($CFG->pathtounoconv))) {
// No conversions are possible, sorry.
return $this->markTestSkipped();
}
$fs = get_file_storage();

$result = $fs->get_converted_document($this->testfile1, 'md');
$result = $fs->get_converted_document($this->testfile1, 'txt');
$this->assertNotFalse($result);
$this->assertSame($result->get_mimetype(), 'text/plain');
$this->assertGreaterThan(0, $result->get_filesize());
$result = $fs->get_converted_document($this->testfile2, 'md');
$result = $fs->get_converted_document($this->testfile2, 'txt');
$this->assertNotFalse($result);
$this->assertSame($result->get_mimetype(), 'text/plain');
$this->assertGreaterThan(0, $result->get_filesize());
}
Expand Down
30 changes: 25 additions & 5 deletions mod/assign/feedback/editpdf/classes/document_services.php
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,30 @@ class document_services {
/** Filename for combined pdf */
const COMBINED_PDF_FILENAME = 'combined.pdf';

/** Base64 encoded blank pdf. This is the most reliable/fastest way to generate a blank pdf. */
const BLANK_PDF_BASE64 = <<<EOD
JVBERi0xLjQKJcOkw7zDtsOfCjIgMCBvYmoKPDwvTGVuZ3RoIDMgMCBSL0ZpbHRlci9GbGF0ZURl
Y29kZT4+CnN0cmVhbQp4nDPQM1Qo5ypUMFAwALJMLU31jBQsTAz1LBSKUrnCtRTyuAIVAIcdB3IK
ZW5kc3RyZWFtCmVuZG9iagoKMyAwIG9iago0MgplbmRvYmoKCjUgMCBvYmoKPDwKPj4KZW5kb2Jq
Cgo2IDAgb2JqCjw8L0ZvbnQgNSAwIFIKL1Byb2NTZXRbL1BERi9UZXh0XQo+PgplbmRvYmoKCjEg
MCBvYmoKPDwvVHlwZS9QYWdlL1BhcmVudCA0IDAgUi9SZXNvdXJjZXMgNiAwIFIvTWVkaWFCb3hb
MCAwIDU5NSA4NDJdL0dyb3VwPDwvUy9UcmFuc3BhcmVuY3kvQ1MvRGV2aWNlUkdCL0kgdHJ1ZT4+
L0NvbnRlbnRzIDIgMCBSPj4KZW5kb2JqCgo0IDAgb2JqCjw8L1R5cGUvUGFnZXMKL1Jlc291cmNl
cyA2IDAgUgovTWVkaWFCb3hbIDAgMCA1OTUgODQyIF0KL0tpZHNbIDEgMCBSIF0KL0NvdW50IDE+
PgplbmRvYmoKCjcgMCBvYmoKPDwvVHlwZS9DYXRhbG9nL1BhZ2VzIDQgMCBSCi9PcGVuQWN0aW9u
WzEgMCBSIC9YWVogbnVsbCBudWxsIDBdCi9MYW5nKGVuLUFVKQo+PgplbmRvYmoKCjggMCBvYmoK
PDwvQ3JlYXRvcjxGRUZGMDA1NzAwNzIwMDY5MDA3NDAwNjUwMDcyPgovUHJvZHVjZXI8RkVGRjAw
NEMwMDY5MDA2MjAwNzIwMDY1MDA0RjAwNjYwMDY2MDA2OTAwNjMwMDY1MDAyMDAwMzQwMDJFMDAz
ND4KL0NyZWF0aW9uRGF0ZShEOjIwMTYwMjI2MTMyMzE0KzA4JzAwJyk+PgplbmRvYmoKCnhyZWYK
MCA5CjAwMDAwMDAwMDAgNjU1MzUgZiAKMDAwMDAwMDIyNiAwMDAwMCBuIAowMDAwMDAwMDE5IDAw
MDAwIG4gCjAwMDAwMDAxMzIgMDAwMDAgbiAKMDAwMDAwMDM2OCAwMDAwMCBuIAowMDAwMDAwMTUx
IDAwMDAwIG4gCjAwMDAwMDAxNzMgMDAwMDAgbiAKMDAwMDAwMDQ2NiAwMDAwMCBuIAowMDAwMDAw
NTYyIDAwMDAwIG4gCnRyYWlsZXIKPDwvU2l6ZSA5L1Jvb3QgNyAwIFIKL0luZm8gOCAwIFIKL0lE
IFsgPEJDN0REQUQwRDQyOTQ1OTQ2OUU4NzJCMjI1ODUyNkU4Pgo8QkM3RERBRDBENDI5NDU5NDY5
RTg3MkIyMjU4NTI2RTg+IF0KL0RvY0NoZWNrc3VtIC9BNTYwMEZCMDAzRURCRTg0MTNBNTk3RTZF
MURDQzJBRgo+PgpzdGFydHhyZWYKNzM2CiUlRU9GCg==
EOD;

/**
* This function will take an int or an assignment instance and
* return an assignment instance. It is just for convenience.
Expand Down Expand Up @@ -302,11 +326,7 @@ public static function generate_combined_pdf_for_attempt($assignment, $userid, $
}

if (!$files) {
// This was a blank pdf.
$blankpdf = new pdf();
$content = $blankpdf->Output(self::COMBINED_PDF_FILENAME, 'S');
$file = $fs->create_file_from_string($record, $content);
$blankpdf->Close(); // No real need to close this pdf, because it has been outputted, but for clarity.
$file = $fs->create_file_from_string($record, base64_decode(self::BLANK_PDF_BASE64));
} else {
// This was a combined pdf.
$file = $fs->create_file_from_pathname($record, $tmpfile);
Expand Down

0 comments on commit 1356d85

Please sign in to comment.