Skip to content

Commit

Permalink
MDL-31928: Fixing bugs in repository_url
Browse files Browse the repository at this point in the history
- if the same image occurs several times on the page list it only once
- resolve image path correctly if it has a query string
- show images included in CSS
- non-JS file picker ignores thumbnail width and height attributes (this is a temp fix until renderers are implemented)
marinaglancy committed Apr 23, 2012
1 parent ead4f18 commit 8685679
Showing 3 changed files with 168 additions and 47 deletions.
20 changes: 18 additions & 2 deletions repository/filepicker.php
Original file line number Diff line number Diff line change
@@ -139,7 +139,15 @@
echo '<table>';
foreach ($search_result['list'] as $item) {
echo '<tr>';
echo '<td><img src="'.$item['thumbnail'].'" />';
echo '<td>';
$style = '';
if (isset($item['thumbnail_height'])) {
$style .= 'max-height:'.$item['thumbnail_height'].'px;';
}
if (isset($item['thumbnail_width'])) {
$style .= 'max-width:'.$item['thumbnail_width'].'px;';
}
echo html_writer::empty_tag('img', array('src' => $item['thumbnail'], 'style' => $style));
echo '</td><td>';
if (!empty($item['url'])) {
echo html_writer::link($item['url'], $item['title'], array('target'=>'_blank'));
@@ -227,7 +235,15 @@
echo '<table>';
foreach ($list['list'] as $item) {
echo '<tr>';
echo '<td><img src="'.$item['thumbnail'].'" />';
echo '<td>';
$style = '';
if (isset($item['thumbnail_height'])) {
$style .= 'max-height:'.$item['thumbnail_height'].'px;';
}
if (isset($item['thumbnail_width'])) {
$style .= 'max-width:'.$item['thumbnail_width'].'px;';
}
echo html_writer::empty_tag('img', array('src' => $item['thumbnail'], 'style' => $style));
echo '</td><td>';
if (!empty($item['url'])) {
echo html_writer::link($item['url'], $item['title'], array('target'=>'_blank'));
141 changes: 99 additions & 42 deletions repository/url/lib.php
Original file line number Diff line number Diff line change
@@ -30,6 +30,7 @@
require_once(dirname(__FILE__).'/locallib.php');

class repository_url extends repository {
var $processedfiles = array();

/**
* @param int $repositoryid
@@ -42,16 +43,6 @@ public function __construct($repositoryid, $context = SYSCONTEXTID, $options = a
$this->file_url = optional_param('file', '', PARAM_RAW);
}

public function get_file($url, $file = '') {
global $CFG;
//$CFG->repository_no_delete = true;
$path = $this->prepare_file($file);
$fp = fopen($path, 'w');
$c = new curl;
$c->download(array(array('url'=>$url, 'file'=>$fp)));
return array('path'=>$path, 'url'=>$url);
}

public function check_login() {
if (!empty($this->file_url)) {
return true;
@@ -75,6 +66,7 @@ public function print_login() {

$ret['login'] = array($url);
$ret['login_btn_label'] = get_string('download', 'repository_url');
$ret['allowcaching'] = true; // indicates that login form can be cached in filepicker.js
return $ret;
} else {
echo <<<EOD
@@ -97,48 +89,113 @@ public function print_login() {
public function get_listing($path='', $page='') {
global $CFG, $OUTPUT;
$ret = array();
$ret['list'] = array();
$ret['nosearch'] = true;
$ret['norefresh'] = true;
$ret['nologin'] = true;

$this->parse_file(null, $this->file_url, $ret, true);
return $ret;
}

/**
* Parses one file (either html or css)
*
* @param string $baseurl (optional) URL of the file where link to this file was found
* @param string $relativeurl relative or absolute link to the file
* @param array $list
* @param bool $mainfile true only for main HTML false and false for all embedded/linked files
*/
protected function parse_file($baseurl, $relativeurl, &$list, $mainfile = false) {
if (preg_match('/([\'"])(.*)\1/', $relativeurl, $matches)) {
$relativeurl = $matches[2];
}
if (empty($baseurl)) {
$url = $relativeurl;
} else {
$url = htmlspecialchars_decode(url_to_absolute($baseurl, $relativeurl));
}
if (in_array($url, $this->processedfiles)) {
// avoid endless recursion
return;
}
$this->processedfiles[] = $url;
$curl = new curl;
$msg = $curl->head($this->file_url);
$msg = $curl->head($url);
$info = $curl->get_info();
if ($info['http_code'] != 200) {
$ret['e'] = $msg;
if ($mainfile) {
$list['error'] = $msg;
}
} else {
$ret['list'] = array();
$ret['nosearch'] = true;
$ret['nologin'] = true;
$filename = $this->guess_filename($info['url'], $info['content_type']);
if (strstr($info['content_type'], 'text/html') || empty($info['content_type'])) {
// analysis this web page, general file list
$ret['list'] = array();
$content = $curl->get($info['url']);
$this->analyse_page($info['url'], $content, $ret);
} else {
$csstoanalyze = '';
if ($mainfile && (strstr($info['content_type'], 'text/html') || empty($info['content_type']))) {
// parse as html
$htmlcontent = $curl->get($info['url']);
$ddoc = new DOMDocument();
@$ddoc->loadHTML($htmlcontent);
// extract <img>
$tags = $ddoc->getElementsByTagName('img');
foreach ($tags as $tag) {
$url = $tag->getAttribute('src');
$this->add_image_to_list($info['url'], $url, $list);
}
// analyse embedded css (<style>)
$tags = $ddoc->getElementsByTagName('style');
foreach ($tags as $tag) {
if ($tag->getAttribute('type') == 'text/css') {
$csstoanalyze .= $tag->textContent."\n";
}
}
// analyse links to css (<link type='text/css' href='...'>)
$tags = $ddoc->getElementsByTagName('link');
foreach ($tags as $tag) {
if ($tag->getAttribute('type') == 'text/css' && strlen($tag->getAttribute('href'))) {
$this->parse_file($info['url'], $tag->getAttribute('href'), $list);
}
}
} else if (strstr($info['content_type'], 'css')) {
// parse as css
$csscontent = $curl->get($info['url']);
$csstoanalyze .= $csscontent."\n";
} else if (strstr($info['content_type'], 'image/')) {
// download this file
$ret['list'][] = array(
'title'=>$filename,
'source'=>$this->file_url,
'thumbnail' => $OUTPUT->pix_url(file_extension_icon($filename, 32))->out(false)
);
$this->add_image_to_list($info['url'], $info['url'], $list);
}

// parse all found css styles
if (strlen($csstoanalyze)) {
$urls = extract_css_urls($csstoanalyze);
if (!empty($urls['property'])) {
foreach ($urls['property'] as $url) {
$this->add_image_to_list($info['url'], $url, $list);
}
}
if (!empty($urls['import'])) {
foreach ($urls['import'] as $cssurl) {
$this->parse_file($info['url'], $cssurl, $list);
}
}
}
}
return $ret;
}
public function analyse_page($baseurl, $content, &$list) {
global $CFG, $OUTPUT;
$urls = extract_html_urls($content);
$images = $urls['img']['src'];
$pattern = '#img(.+)src="?\'?([[:alnum:]:?=&@/._+-]+)"?\'?#i';
if (!empty($images)) {
foreach($images as $url) {
$list['list'][] = array(
'title'=>$this->guess_filename($url, ''),
'source'=>url_to_absolute($baseurl, $url),
'thumbnail'=>url_to_absolute($baseurl, $url),
'thumbnail_height'=>84,
'thumbnail_width'=>84
);
protected function add_image_to_list($baseurl, $url, &$list) {
if (empty($list['list'])) {
$list['list'] = array();
}
$src = url_to_absolute($baseurl, htmlspecialchars_decode($url));
foreach ($list['list'] as $image) {
if ($image['source'] == $src) {
return;
}
}
$list['list'][] = array(
'title'=>$this->guess_filename($url, ''),
'source'=>$src,
'thumbnail'=>$src,
'thumbnail_height'=>84,
'thumbnail_width'=>84
);
}
public function guess_filename($url, $type) {
$pattern = '#\/([\w_\?\-.]+)$#';
54 changes: 51 additions & 3 deletions repository/url/locallib.php
Original file line number Diff line number Diff line change
@@ -79,6 +79,9 @@ function url_to_absolute( $baseUrl, $relativeUrl )
if ( $b === FALSE || empty( $b['scheme'] ) || empty( $b['host'] ) )
return FALSE;
$r['scheme'] = $b['scheme'];
if (empty($b['path'])) {
$b['path'] = '';
}

// If relative URL has an authority, clean path and return.
if ( isset( $r['host'] ) )
@@ -248,11 +251,11 @@ function url_remove_dot_segments( $path )
* the associative array of URL parts, or FALSE if the URL is
* too malformed to recognize any parts.
*/
function split_url( $url, $decode=TRUE )
function split_url( $url, $decode=FALSE)
{
// Character sets from RFC3986.
$xunressub = 'a-zA-Z\d\-._~\!$&\'()*+,;=';
$xpchar = $xunressub . ':@%';
$xpchar = $xunressub . ':@% ';

// Scheme from RFC3986.
$xscheme = '([a-zA-Z][a-zA-Z\d+-.]*)';
@@ -382,7 +385,7 @@ function split_url( $url, $decode=TRUE )
* empty string is returned if the $parts array does not contain
* any of the needed values.
*/
function join_url( $parts, $encode=TRUE )
function join_url( $parts, $encode=FALSE)
{
if ( $encode )
{
@@ -432,6 +435,51 @@ function join_url( $parts, $encode=TRUE )
$url .= '#' . $parts['fragment'];
return $url;
}

/**
* This function encodes URL to form a URL which is properly
* percent encoded to replace disallowed characters.
*
* RFC3986 specifies the allowed characters in the URL as well as
* reserved characters in the URL. This function replaces all the
* disallowed characters in the URL with their repective percent
* encodings. Already encoded characters are not encoded again,
* such as '%20' is not encoded to '%2520'.
*
* Parameters:
* url the url to encode.
*
* Return values:
* Returns the encoded URL string.
*/
function encode_url($url) {
$reserved = array(
":" => '!%3A!ui',
"/" => '!%2F!ui',
"?" => '!%3F!ui',
"#" => '!%23!ui',
"[" => '!%5B!ui',
"]" => '!%5D!ui',
"@" => '!%40!ui',
"!" => '!%21!ui',
"$" => '!%24!ui',
"&" => '!%26!ui',
"'" => '!%27!ui',
"(" => '!%28!ui',
")" => '!%29!ui',
"*" => '!%2A!ui',
"+" => '!%2B!ui',
"," => '!%2C!ui',
";" => '!%3B!ui',
"=" => '!%3D!ui',
"%" => '!%25!ui',
);

$url = rawurlencode($url);
$url = preg_replace(array_values($reserved), array_keys($reserved), $url);
return $url;
}

/**
* Extract URLs from a web page.
*

0 comments on commit 8685679

Please sign in to comment.