forked from moodle/moodle
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathxmlize.php
245 lines (235 loc) · 8.85 KB
/
xmlize.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
<?php
// This file is part of Moodle - http://moodle.org/
//
// Moodle is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Moodle is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
/**
* Code for parsing xml files.
*
* Handles functionality for:
*
* Import of xml files in questionbank and course import.
* Can handle xml files larger than 10MB through chunking the input file.
* Replaces the original xmlize by Hans Anderson, {@link http://www.hansanderson.com/contact/}
* with equal interface.
*
* @package core
* @subpackage lib
* @copyright Kilian Singer
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
*/
/**
* Exception thrown when there is an error parsing an XML file.
*
* @copyright 2010 The Open University
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
*/
defined('MOODLE_INTERNAL') || die();
class xml_format_exception extends moodle_exception {
/** @var string */
public $errorstring;
/** @var int */
public $line;
/** @var char */
public $char;
/**
* Constructor function
*
* @param string $errorstring Errorstring
* @param int $line Linenumber
* @param char $char Errorcharacter
* @param string $link Link
*/
public function __construct($errorstring, $line, $char, $link = '') {
$this->errorstring = $errorstring;
$this->line = $line;
$this->char = $char;
$a = new stdClass();
$a->errorstring = $errorstring;
$a->errorline = $line;
$a->errorchar = $char;
parent::__construct('errorparsingxml', 'error', $link, $a);
}
}
/**
* Class for parsing xml files.
*
* Handles functionality for:
*
* Import of xml files in questionbank and course import.
* Can handle xml files larger than 10MB through chunking the input file.
* Uses a similar interface to the original version xmlize() by Hans Anderson.
*
* @package core
* @subpackage lib
* @copyright Kilian Singer
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
*/
class core_xml_parser {
/** @var array resulting $xml tree */
private $xml;
/** @var array stores references to constructed $xml tree */
private $current;
/** @var int tores the level in the XML tree */
private $level;
/**
* Is called when tags are opened.
*
* Note: Used by xml element handler as callback.
*
* @author Kilian Singer
* @param resource $parser The XML parser resource.
* @param string $name The XML source to parse.
* @param array $attrs Stores attributes of XML tag.
*/
private function startelement($parser, $name, $attrs) {
$current = &$this->current;
$level = &$this->level;
if (!empty($name)) {
if ($level == 0) {
$current[$level][$name] = array();
$current[$level][$name]["@"] = $attrs; // Attribute.
$current[$level][$name]["#"] = array(); // Other tags.
$current[$level + 1] = & $current[$level][$name]["#"];
$level++;
} else {
if (empty($current[$level][$name])) {
$current[$level][$name] = array();
}
$siz = count($current[$level][$name]);
if (!empty($attrs)) {
$current[$level][$name][$siz]["@"] = $attrs; // Attribute.
}
$current[$level][$name][$siz]["#"] = array(); // Other tags.
$current[$level + 1] = & $current[$level][$name][$siz]["#"];
$level++;
}
}
}
/**
* Is called when tags are closed.
*
* Note: Used by xml element handler as callback.
*
* @author Kilian Singer
* @param resource $parser The XML parser resource.
* @param string $name The XML source to parse.
*/
private function endelement($parser, $name) {
$current = &$this->current;
$level = &$this->level;
if (!empty($name)) {
if (empty($current[$level])) {
$current[$level] = '';
} else if (array_key_exists(0, $current[$level])) {
if (count($current[$level]) == 1) {
$current[$level] = $current[$level][0]; // We remove array index if we only have a single entry.
}
}
$level--;
}
}
/**
* Is called for text between the start and the end of tags.
*
* Note: Used by xml element handler as callback.
*
* @author Kilian Singer
* @param resource $parser The XML parser resource.
* @param string $data The XML source to parse.
*/
private function characterdata($parser, $data) {
$current = &$this->current;
$level = &$this->level;
if (($data == "0") || (!empty($data) && trim($data) != "")) {
$siz = count($current[$level]);
if ($siz == 0) {
$current[$level][0] = $data;
} else {
$key = max(array_keys($current[$level]));
if (is_int($key)) {
end($current[$level]);
if (is_int(key($current[$level]))) { // If last index is nummeric we have CDATA and concat.
$current[$level][$key] = $current[$level][$key] . $data;
} else {
$current[$level][$key + 1] = $data; // Otherwise we make a new key.
}
} else {
$current[$level][0] = $data;
}
}
}
}
/**
* Parses XML string.
*
* Note: Interface is kept equal to previous version.
*
* @author Kilian Singer
* @param string $data the XML source to parse.
* @param int $whitespace If set to 1 allows the parser to skip "space" characters in xml document. Default is 1
* @param string $encoding Specify an OUTPUT encoding. If not specified, it defaults to UTF-8.
* @param bool $reporterrors if set to true, then a {@link xml_format_exception}
* exception will be thrown if the XML is not well-formed. Otherwise errors are ignored.
* @return array representation of the parsed XML.
*/
public function parse($data, $whitespace = 1, $encoding = 'UTF-8', $reporterrors = false) {
$data = trim($data);
$this->xml = array();
$this->current = array();
$this->level = 0;
$this->current[0] = & $this->xml;
$parser = xml_parser_create($encoding);
xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0);
xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, $whitespace);
xml_set_element_handler($parser, [$this, "startelement"], [$this, "endelement"]);
xml_set_character_data_handler($parser, [$this, "characterdata"]);
// Start parsing an xml document.
for ($i = 0; $i < strlen($data); $i += 4096) {
if (!xml_parse($parser, substr($data, $i, 4096))) {
break;
}
}
if ($reporterrors) {
$errorcode = xml_get_error_code($parser);
if ($errorcode) {
$exception = new xml_format_exception(xml_error_string($errorcode),
xml_get_current_line_number($parser),
xml_get_current_column_number($parser));
xml_parser_free($parser);
throw $exception;
}
}
xml_parser_free($parser); // Deletes the parser.
if (empty($this->xml)) { // XML file is invalid or empty, return false.
return false;
}
return $this->xml;
}
}
/**
* XML parsing function calles into class.
*
* Note: Used by xml element handler as callback.
*
* @param string $data the XML source to parse.
* @param int $whitespace If set to 1 allows the parser to skip "space" characters in xml document. Default is 1
* @param string $encoding Specify an OUTPUT encoding. If not specified, it defaults to UTF-8.
* @param bool $reporterrors if set to true, then a {@link xml_format_exception}
* exception will be thrown if the XML is not well-formed. Otherwise errors are ignored.
* @return array representation of the parsed XML.
*/
function xmlize($data, $whitespace = 1, $encoding = 'UTF-8', $reporterrors = false) {
$hxml = new core_xml_parser();
return $hxml->parse($data, $whitespace, $encoding, $reporterrors);
}