Skip to content

Commit

Permalink
Bug 1751796 - XML parsererror eats two first letters. r=bholley
Browse files Browse the repository at this point in the history
We were calling XML_GetCurrentColumnNumber after ParseBuffer caused Expat
to consume some data. XML_GetCurrentColumnNumber uses the buffer that was
last passed to Expat. Before Expat was put in an RLBox sandbox the caller
of ParseBuffer would keep the data in the scanner string until after the
call to XML_GetCurrentColumnNumber. Now that we copy the data into the
RLBox sandbox the data is freed when the TransferBuffer in ParseBuffer
goes out of scope, so in the caller of ParseBuffer the call to
XML_GetCurrentColumnNumber would cause us to read freed memory inside the
sandbox. Moving the call to XML_GetCurrentColumnNumber to inside
ParseBuffer, when TransferBuffer is still in scope, solves the issue.

Differential Revision: https://phabricator.services.mozilla.com/D141795
  • Loading branch information
petervanderbeken committed Apr 5, 2022
1 parent d89fe4f commit 4641d9b
Show file tree
Hide file tree
Showing 7 changed files with 182 additions and 95 deletions.
120 changes: 65 additions & 55 deletions parser/htmlparser/nsExpatDriver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1138,16 +1138,17 @@ nsresult nsExpatDriver::HandleError() {
void nsExpatDriver::ChunkAndParseBuffer(const char16_t* aBuffer,
uint32_t aLength, bool aIsFinal,
uint32_t* aPassedToExpat,
uint32_t* aConsumed) {
uint32_t* aConsumed,
XML_Size* aLastLineLength) {
*aConsumed = 0;
*aLastLineLength = 0;

uint32_t remainder = aLength;
while (remainder > sMaxChunkLength) {
uint32_t consumed = 0;
ParseBuffer(aBuffer, sMaxChunkLength, /* aIsFinal = */ false, &consumed);
ParseChunk(aBuffer, sMaxChunkLength, ChunkOrBufferIsFinal::None, aConsumed,
aLastLineLength);
aBuffer += sMaxChunkLength;
remainder -= sMaxChunkLength;
*aConsumed += consumed;
if (NS_FAILED(mInternalState)) {
// Stop parsing if there's an error (including if we're blocked or
// interrupted).
Expand All @@ -1156,16 +1157,20 @@ void nsExpatDriver::ChunkAndParseBuffer(const char16_t* aBuffer,
}
}

uint32_t consumed = 0;
ParseBuffer(aBuffer, remainder, aIsFinal, &consumed);
*aConsumed += consumed;
ParseChunk(aBuffer, remainder,
aIsFinal ? ChunkOrBufferIsFinal::FinalChunkAndBuffer
: ChunkOrBufferIsFinal::FinalChunk,
aConsumed, aLastLineLength);
*aPassedToExpat = aLength;
}

void nsExpatDriver::ParseBuffer(const char16_t* aBuffer, uint32_t aLength,
bool aIsFinal, uint32_t* aConsumed) {
void nsExpatDriver::ParseChunk(const char16_t* aBuffer, uint32_t aLength,
ChunkOrBufferIsFinal aIsFinal,
uint32_t* aConsumed, XML_Size* aLastLineLength) {
NS_ASSERTION((aBuffer && aLength != 0) || (!aBuffer && aLength == 0), "?");
NS_ASSERTION(mInternalState != NS_OK || aIsFinal || aBuffer,
NS_ASSERTION(mInternalState != NS_OK ||
(aIsFinal == ChunkOrBufferIsFinal::FinalChunkAndBuffer) ||
aBuffer,
"Useless call, we won't call Expat");
MOZ_ASSERT(!BlockedOrInterrupted() || !aBuffer,
"Non-null buffer when resuming");
Expand All @@ -1180,47 +1185,55 @@ void nsExpatDriver::ParseBuffer(const char16_t* aBuffer, uint32_t aLength,
int32_t parserBytesBefore = RLBOX_EXPAT_SAFE_MCALL(
XML_GetCurrentByteIndex, parserBytesBefore_verifier);

if (mInternalState == NS_OK || BlockedOrInterrupted()) {
XML_Status status;
bool inParser = mInParser; // Save in-parser status
mInParser = true;
if (BlockedOrInterrupted()) {
mInternalState = NS_OK; // Resume in case we're blocked.
status = RLBOX_EXPAT_SAFE_MCALL(MOZ_XML_ResumeParser, status_verifier);
} else {
auto buffer = TransferBuffer<char16_t>(Sandbox(), aBuffer, aLength);
MOZ_RELEASE_ASSERT(!aBuffer || !!*buffer,
"Chunking should avoid OOM in ParseBuffer");

status = RLBOX_EXPAT_SAFE_MCALL(
MOZ_XML_Parse, status_verifier,
rlbox::sandbox_reinterpret_cast<const char*>(*buffer),
aLength * sizeof(char16_t), aIsFinal);
}
mInParser = inParser; // Restore in-parser status

auto parserBytesConsumed_verifier = [&](auto parserBytesConsumed) {
MOZ_RELEASE_ASSERT(parserBytesConsumed >= 0, "Unexpected value");
MOZ_RELEASE_ASSERT(parserBytesConsumed >= parserBytesBefore,
"How'd this happen?");
MOZ_RELEASE_ASSERT(parserBytesConsumed % sizeof(char16_t) == 0,
"Consumed part of a char16_t?");
return parserBytesConsumed;
};
int32_t parserBytesConsumed = RLBOX_EXPAT_SAFE_MCALL(
XML_GetCurrentByteIndex, parserBytesConsumed_verifier);

// Consumed something.
*aConsumed = (parserBytesConsumed - parserBytesBefore) / sizeof(char16_t);

NS_ASSERTION(status != XML_STATUS_SUSPENDED || BlockedOrInterrupted(),
"Inconsistent expat suspension state.");

if (status == XML_STATUS_ERROR) {
mInternalState = NS_ERROR_HTMLPARSER_STOPPARSING;
}
if (mInternalState != NS_OK && !BlockedOrInterrupted()) {
return;
}

XML_Status status;
bool inParser = mInParser; // Save in-parser status
mInParser = true;
Maybe<TransferBuffer<char16_t>> buffer;
if (BlockedOrInterrupted()) {
mInternalState = NS_OK; // Resume in case we're blocked.
status = RLBOX_EXPAT_SAFE_MCALL(MOZ_XML_ResumeParser, status_verifier);
} else {
*aConsumed = 0;
buffer.emplace(Sandbox(), aBuffer, aLength);
MOZ_RELEASE_ASSERT(!aBuffer || !!*buffer.ref(),
"Chunking should avoid OOM in ParseBuffer");

status = RLBOX_EXPAT_SAFE_MCALL(
MOZ_XML_Parse, status_verifier,
rlbox::sandbox_reinterpret_cast<const char*>(*buffer.ref()),
aLength * sizeof(char16_t),
aIsFinal == ChunkOrBufferIsFinal::FinalChunkAndBuffer);
}
mInParser = inParser; // Restore in-parser status

auto parserBytesConsumed_verifier = [&](auto parserBytesConsumed) {
MOZ_RELEASE_ASSERT(parserBytesConsumed >= 0, "Unexpected value");
MOZ_RELEASE_ASSERT(parserBytesConsumed >= parserBytesBefore,
"How'd this happen?");
MOZ_RELEASE_ASSERT(parserBytesConsumed % sizeof(char16_t) == 0,
"Consumed part of a char16_t?");
return parserBytesConsumed;
};
int32_t parserBytesConsumed = RLBOX_EXPAT_SAFE_MCALL(
XML_GetCurrentByteIndex, parserBytesConsumed_verifier);

// Consumed something.
*aConsumed += (parserBytesConsumed - parserBytesBefore) / sizeof(char16_t);

NS_ASSERTION(status != XML_STATUS_SUSPENDED || BlockedOrInterrupted(),
"Inconsistent expat suspension state.");

if (status == XML_STATUS_ERROR) {
mInternalState = NS_ERROR_HTMLPARSER_STOPPARSING;
}

if (*aConsumed > 0 &&
(aIsFinal != ChunkOrBufferIsFinal::None || NS_FAILED(mInternalState))) {
*aLastLineLength = RLBOX_EXPAT_SAFE_MCALL(MOZ_XML_GetCurrentColumnNumber,
safe_unverified<XML_Size>);
}
}

Expand Down Expand Up @@ -1291,8 +1304,9 @@ nsresult nsExpatDriver::ResumeParse(nsScanner& aScanner, bool aIsFinalChunk) {

uint32_t passedToExpat;
uint32_t consumed;
XML_Size lastLineLength;
ChunkAndParseBuffer(buffer, length, noMoreBuffers, &passedToExpat,
&consumed);
&consumed, &lastLineLength);
MOZ_ASSERT_IF(passedToExpat != length, NS_FAILED(mInternalState));
MOZ_ASSERT(consumed <= passedToExpat + mExpatBuffered);
if (consumed > 0) {
Expand All @@ -1303,10 +1317,6 @@ nsresult nsExpatDriver::ResumeParse(nsScanner& aScanner, bool aIsFinalChunk) {
// was consumed in case we run into an error (to show the line in which
// the error occurred).

// The length of the last line that Expat has parsed.
XML_Size lastLineLength = RLBOX_EXPAT_SAFE_MCALL(
XML_GetCurrentColumnNumber, safe_unverified<XML_Size>);

if (lastLineLength <= consumed) {
// The length of the last line was less than what expat consumed, so
// there was at least one line break in the consumed data. Store the
Expand Down
36 changes: 30 additions & 6 deletions parser/htmlparser/nsExpatDriver.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,12 @@ class nsExpatDriver : public nsIDTD {
nsIInputStream** aStream,
nsIURI** aAbsURI);

enum class ChunkOrBufferIsFinal {
None,
FinalChunk,
FinalChunkAndBuffer,
};

/**
* Pass a buffer to Expat. If Expat is blocked aBuffer should be null and
* aLength should be 0. The result of the call will be stored in
Expand All @@ -91,23 +97,41 @@ class nsExpatDriver : public nsIDTD {
* @param aLength the length of the buffer to pass to Expat (in number of
* char16_t's). Must be 0 if aBuffer is null and > 0 if
* aBuffer is not null.
* @param aIsFinal whether there will definitely not be any more new buffers
* passed in to ParseBuffer
* @param aIsFinal whether this is the last chunk in a row passed to
* ParseChunk, and if so whether it's the last chunk and
* buffer passed to ParseChunk (meaning there will be no more
* calls to ParseChunk for the document being parsed).
* @param aConsumed [out] the number of PRUnichars that Expat consumed. This
* doesn't include the PRUnichars that Expat stored in
* its buffer but didn't parse yet.
* @param aLastLineLength [out] the length of the last line that Expat has
* consumed. This will only be computed if
* aIsFinal is not None or mInternalState is set
* to a failure.
*/
void ParseBuffer(const char16_t* aBuffer, uint32_t aLength, bool aIsFinal,
uint32_t* aConsumed);

void ParseChunk(const char16_t* aBuffer, uint32_t aLength,
ChunkOrBufferIsFinal aIsFinal, uint32_t* aConsumed,
XML_Size* aLastLineLength);
/**
* Wrapper for ParseBuffer. If the buffer is too large to be copied into the
* sandbox all at once, splits it into chunks and invokes ParseBuffer in a
* loop.
*
* @param aBuffer the buffer to pass to Expat. May be null.
* @param aLength the length of the buffer to pass to Expat (in number of
* char16_t's). Must be 0 if aBuffer is null and > 0 if
* aBuffer is not null.
* @param aIsFinal whether there will definitely not be any more new buffers
* passed in to ParseBuffer
* @param aConsumed [out] the number of PRUnichars that Expat consumed. This
* doesn't include the PRUnichars that Expat stored in
* its buffer but didn't parse yet.
* @param aLastLineLength [out] the length of the last line that Expat has
* consumed.
*/
void ChunkAndParseBuffer(const char16_t* aBuffer, uint32_t aLength,
bool aIsFinal, uint32_t* aPassedToExpat,
uint32_t* aConsumed);
uint32_t* aConsumed, XML_Size* aLastLineLength);

nsresult HandleError();

Expand Down
1 change: 1 addition & 0 deletions parser/htmlparser/tests/mochitest/file_xml_parse_error.js
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
parent.ok(true, "Loaded script.");
3 changes: 3 additions & 0 deletions parser/htmlparser/tests/mochitest/file_xml_parse_error.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<?xml version="1.0" standalone="yes" ?>
<html xmlns="http://www.w3.org/1999/xhtml">
<p> <script src="file_xml_parse_error.js" /> Not a <<b>well-formed</b> xml string</p></html>
6 changes: 4 additions & 2 deletions parser/htmlparser/tests/mochitest/mochitest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,6 @@ skip-if = (xorigin && debug)
support-files =
file_defer_bug1104732.js
file_async_bug1104732.sjs
[test_bug1752498.html]
skip-if = true # Bug 1751796
[test_compatmode.html]
[test_html5_tree_construction.html]
[test_html5_tree_construction_part2.html]
Expand All @@ -158,3 +156,7 @@ skip-if = true # Bug 1751796
[test_bug1646140-1.html]
[test_bug1646140-2.html]
skip-if = headless # Bug 1685088
[test_xml_parse_error.html]
support-files =
file_xml_parse_error.js
file_xml_parse_error.xml
32 changes: 0 additions & 32 deletions parser/htmlparser/tests/mochitest/test_bug1752498.html

This file was deleted.

79 changes: 79 additions & 0 deletions parser/htmlparser/tests/mochitest/test_xml_parse_error.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
<!DOCTYPE HTML>
<html>
<head>
<meta charset="utf-8">
<title><!-- TODO: insert title here --></title>
<script src="/tests/SimpleTest/SimpleTest.js"></script>
<link rel="stylesheet" href="/tests/SimpleTest/test.css"/>
<script>
function getExpectedError(string, filename=location.href) {
let lines = string.split(/\r\n|\r|\n/);
let line, column;
let errorLine;
for (line = 0; line < lines.length; ++line) {
errorLine = lines[line];
// The error starts at the opening '<' of '<b>'.
column = errorLine.search("<<b>") + 1;
if (column > 0) {
// Line and column are 1-based.
line += 1;
column += 1;
break;
}
}

let expectedError = `XML Parsing Error: not well-formed
Location: ${filename}
Line Number ${line}, Column ${column}:${errorLine}
${"^".padStart(column, "-")}`;
return expectedError;
}

function getParseError(string) {
let error = new window.DOMParser()
.parseFromString(string, "text/xml")
.getElementsByTagName("parsererror")[0].textContent;
return [error, getExpectedError(string)];
}

SimpleTest.waitForExplicitFinish();

function runTest() {
let [error, expectedError] = getParseError("<p>Not a <<b>well-formed</b> xml string</p>");
is(error, expectedError, "Check that parsererror contains the right data.");

[error, expectedError] = getParseError("<p>Not \na <<b>well-formed</b> xml string</p>");
is(error, expectedError, "Check that parsererror contains the right data.");

[error, expectedError] = getParseError("<p>Not \na <<b>well-formed</b> xml string</p>");
is(error, expectedError, "Check that parsererror contains the right data.");

[error, expectedError] = getParseError("<p>Not a <<b>well-fo\nrmed</b> xml string</p>");
is(error, expectedError, "Check that parsererror contains the right data.");

[error, expectedError] = getParseError(`<p>Not ${' '.repeat(512)} a <<b>well-formed</b> xml string</p>`);
is(error, expectedError, "Check that parsererror contains the right data.");

[error, expectedError] = getParseError(`<p>${' '.repeat(505)}<br>${' '.repeat(512)}<<b>Not a well-formed</b> xml string</p>`);
is(error, expectedError, "Check that parsererror contains the right data.");

[error, expectedError] = getParseError(`<p>${' '.repeat(2048)}<br>${' '.repeat(512)}<<b>Not a well-formed</b> xml string</p>`);
is(error, expectedError, "Check that parsererror contains the right data.");

fetch("file_xml_parse_error.xml").then(response => response.text()).then(string => {
let doc = document.getElementById("frame").contentDocument;
error = doc.documentElement.textContent;
expectedError = getExpectedError(string, doc.location);
is(error, expectedError, "Check that parsererror contains the right data.");

SimpleTest.finish();
});
}
</script>
</head>
<body onload="runTest()">
<p id="display"><iframe id="frame" src="file_xml_parse_error.xml"></iframe></p>
<div id="content" style="display: none"></div>
<pre id="test"></pre>
</body>
</html>

0 comments on commit 4641d9b

Please sign in to comment.