Skip to content

Commit

Permalink
Fix XML output plugin generates invalid XML andresriancho#15246
Browse files Browse the repository at this point in the history
  • Loading branch information
andresriancho committed Jun 27, 2017
1 parent 2a23626 commit a3e3a9b
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 9 deletions.
35 changes: 27 additions & 8 deletions w3af/plugins/output/xml_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
"""
import os
import re
import sys
import time
import base64
import xml.dom.minidom
Expand All @@ -46,11 +47,26 @@

TIME_FORMAT = '%a %b %d %H:%M:%S %Y'

# https://stackoverflow.com/questions/8733233/filtering-out-certain-bytes-in-python
INVALID_XML = re.compile(u'[^\u0020-\uD7FF\u0009\u000A\u000D\uE000-\uFFFD\u10000-\u10FFFF]+')
# https://stackoverflow.com/questions/1707890/fast-way-to-filter-illegal-xml-unicode-chars-in-python
_illegal_unichrs = [(0x00, 0x08), (0x0B, 0x0C), (0x0E, 0x1F),
(0x7F, 0x84), (0x86, 0x9F),
(0xFDD0, 0xFDDF), (0xFFFE, 0xFFFF)]

if sys.maxunicode >= 0x10000: # not narrow build
_illegal_unichrs.extend([(0x1FFFE, 0x1FFFF), (0x2FFFE, 0x2FFFF),
(0x3FFFE, 0x3FFFF), (0x4FFFE, 0x4FFFF),
(0x5FFFE, 0x5FFFF), (0x6FFFE, 0x6FFFF),
(0x7FFFE, 0x7FFFF), (0x8FFFE, 0x8FFFF),
(0x9FFFE, 0x9FFFF), (0xAFFFE, 0xAFFFF),
(0xBFFFE, 0xBFFFF), (0xCFFFE, 0xCFFFF),
(0xDFFFE, 0xDFFFF), (0xEFFFE, 0xEFFFF),
(0xFFFFE, 0xFFFFF), (0x10FFFE, 0x10FFFF)])

def xml_str(s):
_illegal_ranges = ['%s-%s' % (unichr(low), unichr(high)) for (low, high) in _illegal_unichrs]
INVALID_XML = re.compile(u'[%s]' % u''.join(_illegal_ranges))


def xml_str(s, replace_invalid=True):
"""
Avoid encoding errors while generating objects' utf8 byte-string
representations.
Expand All @@ -62,7 +78,10 @@ def xml_str(s):
:return: A string ready to be sent to the XML file
"""
encoded_str = smart_str(s, encoding='utf8', errors='xmlcharrefreplace')
encoded_str = INVALID_XML.sub('?', encoded_str)

if replace_invalid:
encoded_str = INVALID_XML.sub('?', encoded_str)

return encoded_str


Expand Down Expand Up @@ -399,11 +418,11 @@ def report_http_action(self, parent_node, action):
def handle_headers(self, parent_node, action):
if isinstance(action, HTTPRequest):
headers = action.get_headers()
body = xml_str(action.get_data() or '')
body = action.get_data() or ''
status = xml_str(action.get_request_line())
else:
headers = action.headers
body = xml_str(action.body or '')
body = action.body or ''
status = xml_str(action.get_status_line())

# Put out the status as an element
Expand Down Expand Up @@ -438,7 +457,7 @@ def handle_body(self, parent_node, headers, body):
if INVALID_XML.search(body) or ']]>' in body:
# irrespective of the mimetype; if the NULL char is present; then
# base64.encode it
encoded = base64.encodestring(body)
encoded = base64.encodestring(xml_str(body, replace_invalid=False))
action_body_content = self._xml.createTextNode(encoded)
action_body_node.setAttribute('content-encoding', 'base64')

Expand Down Expand Up @@ -467,7 +486,7 @@ def handle_body(self, parent_node, headers, body):
else:
# either known (image, audio, video) or unknown binary format
# Write it as base64encoded text
encoded = base64.encodestring(body)
encoded = base64.encodestring(xml_str(body, replace_invalid=False))
action_body_content = self._xml.createTextNode(encoded)
action_body_node.setAttribute('content-encoding', 'base64')

Expand Down
Binary file added w3af/plugins/tests/output/data/nsepa32.rpm
Binary file not shown.
65 changes: 64 additions & 1 deletion w3af/plugins/tests/output/test_xml_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import os
import StringIO
import unittest
import xml.etree.ElementTree as ElementTree

from lxml import etree
from nose.plugins.attrib import attr
Expand All @@ -36,7 +37,7 @@
from w3af.core.data.options.opt_factory import opt_factory
from w3af.core.data.options.option_types import OUTPUT_FILE

from w3af.plugins.tests.helper import PluginTest, PluginConfig
from w3af.plugins.tests.helper import PluginTest, PluginConfig, MockResponse
from w3af.plugins.output.xml_file import xml_file, xml_str, INVALID_XML


Expand Down Expand Up @@ -214,6 +215,9 @@ def validate_xml(content, schema_content):


class TestXMLStr(unittest.TestCase):
TEST_FILE = os.path.join(ROOT_PATH, 'plugins', 'tests', 'output',
'data', 'nsepa32.rpm')

def test_simple_xml_str(self):
self.assertEquals('a', xml_str('a'))

Expand All @@ -231,3 +235,62 @@ def test_re_match_false_1(self):

def test_re_match_false_2(self):
self.assertIsNone(INVALID_XML.search('ab\n'))

def test_re_match_match_ffff(self):
self.assertIsNotNone(INVALID_XML.search(u'ab\uffffdef'))

def test_binary(self):
contents = file(self.TEST_FILE).read()
match_object = INVALID_XML.search(contents)
self.assertIsNotNone(match_object)


class TestXMLOutputBinary(PluginTest):

target_url = 'http://rpm-path-binary/'

TEST_FILE = os.path.join(ROOT_PATH, 'plugins', 'tests', 'output',
'data', 'nsepa32.rpm')

MOCK_RESPONSES = [
MockResponse(url='http://rpm-path-binary/',
body=file(TEST_FILE).read(),
content_type='text/plain',
method='GET', status=200),
]

FILENAME = 'output-unittest.xml'

_run_configs = {
'cfg': {
'target': target_url,
'plugins': {
'grep': (PluginConfig('path_disclosure'),),
'output': (
PluginConfig(
'xml_file',
('output_file', FILENAME, PluginConfig.STR)),
)
},
}
}

def test_binary_handling_in_xml(self):
cfg = self._run_configs['cfg']
self._scan(cfg['target'], cfg['plugins'])

try:
tree = ElementTree.parse(self.FILENAME)
tree.getroot()
except Exception, e:
self.assertTrue(False, 'Generated invalid XML: "%s"' % e)

def tearDown(self):
super(TestXMLOutputBinary, self).tearDown()
try:
os.remove(self.FILENAME)
except:
pass
finally:
self.kb.cleanup()

0 comments on commit a3e3a9b

Please sign in to comment.