Skip to content

Commit

Permalink
Bug 1749935 - Always create scanner upfront. r=hsivonen
Browse files Browse the repository at this point in the history
  • Loading branch information
petervanderbeken committed Feb 14, 2022
1 parent c68cf34 commit 15b9997
Show file tree
Hide file tree
Showing 7 changed files with 59 additions and 93 deletions.
28 changes: 20 additions & 8 deletions parser/htmlparser/CParserContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,29 @@
#include "nsMimeTypes.h"
#include "nsHTMLTokenizer.h"

CParserContext::CParserContext(nsScanner* aScanner, eParserCommands aCommand,
eAutoDetectResult aStatus, bool aCopyUnused)
: mScanner(mozilla::WrapUnique(aScanner)),
CParserContext::CParserContext(nsIURI* aURI, eParserCommands aCommand)
: mScanner(aURI),
mDTDMode(eDTDMode_autodetect),
mDocType(static_cast<eParserDocType>(0)),
mDocType(eUnknown),
mStreamListenerState(eNone),
mContextType(eCTNone),
mAutoDetectStatus(aStatus),
mContextType(eCTURL),
mParserCommand(aCommand),
mMultipart(true),
mCopyUnused(aCopyUnused) {
mCopyUnused(false) {
MOZ_COUNT_CTOR(CParserContext);
}

CParserContext::CParserContext(const nsAString& aBuffer,
eParserCommands aCommand, bool aLastBuffer)
: mScanner(aBuffer, !aLastBuffer),
mMimeType("application/xml"_ns),
mDTDMode(eDTDMode_full_standards),
mDocType(eXML),
mStreamListenerState(aLastBuffer ? eOnStop : eOnDataAvail),
mContextType(eCTString),
mParserCommand(aCommand),
mMultipart(!aLastBuffer),
mCopyUnused(aLastBuffer) {
MOZ_COUNT_CTOR(CParserContext);
}

Expand All @@ -34,7 +46,7 @@ CParserContext::~CParserContext() {
void CParserContext::SetMimeType(const nsACString& aMimeType) {
mMimeType.Assign(aMimeType);

mDocType = ePlainText;
mDocType = eUnknown;

if (mMimeType.EqualsLiteral(TEXT_HTML))
mDocType = eHTML_Strict;
Expand Down
13 changes: 6 additions & 7 deletions parser/htmlparser/CParserContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,11 @@ class nsITokenizer;

class CParserContext {
public:
enum eContextType { eCTNone, eCTURL, eCTString, eCTStream };
enum eContextType { eCTURL, eCTString };

explicit CParserContext(nsScanner* aScanner,
eParserCommands aCommand = eViewNormal,
eAutoDetectResult aStatus = eUnknownDetect,
bool aCopyUnused = false);
CParserContext(nsIURI* aURI, eParserCommands aCommand);
CParserContext(const nsAString& aBuffer, eParserCommands aCommand,
bool aLastBuffer);

~CParserContext();

Expand All @@ -46,15 +45,15 @@ class CParserContext {
mRequest; // provided by necko to differnciate different input streams
// why is mRequest strongly referenced? see bug 102376.
nsCOMPtr<nsITokenizer> mTokenizer;
mozilla::UniquePtr<nsScanner> mScanner;
nsScanner mScanner;

nsCString mMimeType;
nsDTDMode mDTDMode;

eParserDocType mDocType;
eStreamState mStreamListenerState;
eContextType mContextType;
eAutoDetectResult mAutoDetectStatus;
eAutoDetectResult mAutoDetectStatus = eUnknownDetect;
eParserCommands mParserCommand;

bool mMultipart;
Expand Down
2 changes: 1 addition & 1 deletion parser/htmlparser/nsExpatDriver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1576,7 +1576,7 @@ nsExpatDriver::WillBuildModel(const CParserContext& aParserContext,
XML_PARAM_ENTITY_PARSING_ALWAYS);
#endif

auto baseURI = GetExpatBaseURI(aParserContext.mScanner->GetURI());
auto baseURI = GetExpatBaseURI(aParserContext.mScanner.GetURI());
auto uri =
TransferBuffer<XML_Char>(Sandbox(), &baseURI[0], ArrayLength(baseURI));
RLBOX_EXPAT_MCALL(MOZ_XML_SetBase, *uri);
Expand Down
2 changes: 1 addition & 1 deletion parser/htmlparser/nsIParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class Encoding;

enum eParserCommands { eViewNormal, eViewSource, eViewFragment, eViewErrors };

enum eParserDocType { ePlainText = 0, eXML, eHTML_Quirks, eHTML_Strict };
enum eParserDocType { eUnknown = 0, eXML, eHTML_Quirks, eHTML_Strict };

enum eStreamState { eNone, eOnStart, eOnDataAvail, eOnStop };

Expand Down
86 changes: 24 additions & 62 deletions parser/htmlparser/nsParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,8 +234,8 @@ void nsParser::SetDocumentCharset(NotNull<const Encoding*> aCharset,
bool aForceAutoDetection) {
mCharset = aCharset;
mCharsetSource = aCharsetSource;
if (mParserContext && mParserContext->mScanner) {
mParserContext->mScanner->SetDocumentCharset(aCharset, aCharsetSource);
if (mParserContext) {
mParserContext->mScanner.SetDocumentCharset(aCharset, aCharsetSource);
}
}

Expand Down Expand Up @@ -547,10 +547,7 @@ nsParser::Parse(nsIURI* aURL) {

MOZ_ASSERT(!mParserContext, "We expect mParserContext to be null.");

nsScanner* theScanner = new nsScanner(aURL, false);
mParserContext = MakeUnique<CParserContext>(theScanner, mCommand);
mParserContext->mMultipart = true;
mParserContext->mContextType = CParserContext::eCTURL;
mParserContext = MakeUnique<CParserContext>(aURL, mCommand);

return NS_OK;
}
Expand All @@ -561,8 +558,6 @@ nsParser::Parse(nsIURI* aURL) {
* @param aSourceBuffer contains a string-full of real content
*/
nsresult nsParser::Parse(const nsAString& aSourceBuffer, bool aLastCall) {
nsresult result = NS_OK;

if (mInternalState == NS_ERROR_OUT_OF_MEMORY) {
// Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED
// to avoid introducing unintentional changes to behavior.
Expand All @@ -571,7 +566,7 @@ nsresult nsParser::Parse(const nsAString& aSourceBuffer, bool aLastCall) {

// Don't bother if we're never going to parse this.
if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {
return result;
return NS_OK;
}

if (!aLastCall && aSourceBuffer.IsEmpty()) {
Expand All @@ -580,61 +575,28 @@ nsresult nsParser::Parse(const nsAString& aSourceBuffer, bool aLastCall) {
// some data is actually passed in.
// But if this is the last call, make sure to finish up
// stuff correctly.
return result;
return NS_OK;
}

// Maintain a reference to ourselves so we don't go away
// till we're completely done.
nsCOMPtr<nsIParser> kungFuDeathGrip(this);

if (aLastCall || !aSourceBuffer.IsEmpty() || !mUnusedInput.IsEmpty()) {
if (!mParserContext) {
// Only make a new context if we don't have one.
nsScanner* theScanner = new nsScanner(mUnusedInput);
NS_ENSURE_TRUE(theScanner, NS_ERROR_OUT_OF_MEMORY);

mParserContext = MakeUnique<CParserContext>(theScanner, mCommand,
eUnknownDetect, aLastCall);

mParserContext->mMultipart = !aLastCall; // By default

// Start fix bug 40143
if (mParserContext->mMultipart) {
mParserContext->mStreamListenerState = eOnDataAvail;
if (mParserContext->mScanner) {
mParserContext->mScanner->SetIncremental(true);
}
} else {
mParserContext->mStreamListenerState = eOnStop;
if (mParserContext->mScanner) {
mParserContext->mScanner->SetIncremental(false);
}
}
// end fix for 40143

mParserContext->mContextType = CParserContext::eCTString;
mParserContext->SetMimeType("application/xml"_ns);
mParserContext->mDTDMode = eDTDMode_full_standards;

mUnusedInput.Truncate();

mParserContext->mScanner->Append(aSourceBuffer);
// Do not interrupt document.write() - bug 95487
result = ResumeParse(false, false, false);
} else {
mParserContext->mScanner->Append(aSourceBuffer);
// Set stream listener state to eOnStop, on the final context - Fix
// 68160, to guarantee DidBuildModel() call - Fix 36148
if (aLastCall) {
mParserContext->mStreamListenerState = eOnStop;
mParserContext->mScanner->SetIncremental(false);
}
if (!mParserContext) {
// Only make a new context if we don't have one.
mParserContext =
MakeUnique<CParserContext>(mUnusedInput, mCommand, aLastCall);

ResumeParse(false, false, false);
}
mUnusedInput.Truncate();
} else if (aLastCall) {
// Set stream listener state to eOnStop, on the final context - Fix
// 68160, to guarantee DidBuildModel() call - Fix 36148
mParserContext->mStreamListenerState = eOnStop;
mParserContext->mScanner.SetIncremental(false);
}

return result;
mParserContext->mScanner.Append(aSourceBuffer);
return ResumeParse(false, false, false);
}

NS_IMETHODIMP
Expand Down Expand Up @@ -765,12 +727,12 @@ nsresult nsParser::ResumeParse(bool allowIteration, bool aIsFinalChunk,
bool theIterationIsOk = true;

while (result == NS_OK && theIterationIsOk) {
if (!mUnusedInput.IsEmpty() && mParserContext->mScanner) {
if (!mUnusedInput.IsEmpty()) {
// -- Ref: Bug# 22485 --
// Insert the unused input into the source buffer
// as if it was read from the input stream.
// Adding UngetReadable() per vidur!!
mParserContext->mScanner->UngetReadable(mUnusedInput);
mParserContext->mScanner.UngetReadable(mUnusedInput);
mUnusedInput.Truncate(0);
}

Expand Down Expand Up @@ -1089,7 +1051,7 @@ nsresult nsParser::OnDataAvailable(nsIRequest* request,
ParserWriteStruct pws;
pws.mNeedCharsetCheck = true;
pws.mParser = this;
pws.mScanner = mParserContext->mScanner.get();
pws.mScanner = &mParserContext->mScanner;
pws.mRequest = request;

rv = pIStream->ReadSegments(ParserWriteFunc, &pws, aLength, &totalRead);
Expand Down Expand Up @@ -1129,7 +1091,7 @@ nsresult nsParser::OnStopRequest(nsIRequest* request, nsresult status) {

if (mParserContext->mRequest == request) {
mParserContext->mStreamListenerState = eOnStop;
mParserContext->mScanner->SetIncremental(false);
mParserContext->mScanner.SetIncremental(false);
}

mStreamStatus = status;
Expand Down Expand Up @@ -1193,10 +1155,10 @@ nsresult nsParser::Tokenize(bool aIsFinalChunk) {

WillTokenize(aIsFinalChunk);
while (NS_SUCCEEDED(result)) {
mParserContext->mScanner->Mark();
result = theTokenizer->ConsumeToken(*mParserContext->mScanner);
mParserContext->mScanner.Mark();
result = theTokenizer->ConsumeToken(mParserContext->mScanner);
if (NS_FAILED(result)) {
mParserContext->mScanner->RewindToMark();
mParserContext->mScanner.RewindToMark();
if (NS_ERROR_HTMLPARSER_EOF == result) {
break;
}
Expand Down
14 changes: 3 additions & 11 deletions parser/htmlparser/nsScanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ nsReadEndCondition::nsReadEndCondition(const char16_t* aTerminateChars)
* @param aMode represents the parser mode (nav, other)
* @return
*/
nsScanner::nsScanner(const nsAString& anHTMLString) {
nsScanner::nsScanner(const nsAString& anHTMLString, bool aIncremental)
: mIncremental(aIncremental) {
MOZ_COUNT_CTOR(nsScanner);

mSlidingBuffer = nullptr;
Expand All @@ -59,20 +60,15 @@ nsScanner::nsScanner(const nsAString& anHTMLString) {
mEndPosition = mCurrentPosition;
}
mMarkPosition = mCurrentPosition;
mIncremental = false;
mUnicodeDecoder = nullptr;
mCharsetSource = kCharsetUninitialized;
}

/**
* Use this constructor if you want i/o to be based on strings
* the scanner receives. If you pass a null filename, you
* can still provide data to the scanner via append.
*/
nsScanner::nsScanner(nsIURI* aURI, bool aCreateStream) : mURI(aURI) {
nsScanner::nsScanner(nsIURI* aURI) : mURI(aURI), mIncremental(true) {
MOZ_COUNT_CTOR(nsScanner);
NS_ASSERTION(!aCreateStream, "This is always true.");

mSlidingBuffer = nullptr;

// XXX This is a big hack. We need to initialize the iterators to something.
Expand All @@ -84,10 +80,6 @@ nsScanner::nsScanner(nsIURI* aURI, bool aCreateStream) : mURI(aURI) {
mMarkPosition = mCurrentPosition;
mEndPosition = mCurrentPosition;

mIncremental = true;

mUnicodeDecoder = nullptr;
mCharsetSource = kCharsetUninitialized;
// XML defaults to UTF-8 and about:blank is UTF-8, too.
SetDocumentCharset(UTF_8_ENCODING, kCharsetFromDocTypeDefault);
}
Expand Down
7 changes: 4 additions & 3 deletions parser/htmlparser/nsScanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#ifndef SCANNER
#define SCANNER

#include "nsCharsetSource.h"
#include "nsCOMPtr.h"
#include "nsString.h"
#include "nsIParser.h"
Expand Down Expand Up @@ -44,13 +45,13 @@ class nsScanner final {
/**
* Use this constructor for the XML fragment parsing case
*/
explicit nsScanner(const nsAString& anHTMLString);
nsScanner(const nsAString& anHTMLString, bool aIncremental);

/**
* Use this constructor if you want i/o to be based on
* a file (therefore a stream) or just data you provide via Append().
*/
nsScanner(nsIURI* aURI, bool aCreateStream);
explicit nsScanner(nsIURI* aURI);

~nsScanner();

Expand Down Expand Up @@ -176,7 +177,7 @@ class nsScanner final {
nsScannerIterator mEndPosition; // The current end of the scanner buffer
nsCOMPtr<nsIURI> mURI;
bool mIncremental;
int32_t mCharsetSource;
int32_t mCharsetSource = kCharsetUninitialized;
nsCString mCharset;
mozilla::UniquePtr<mozilla::Decoder> mUnicodeDecoder;

Expand Down

0 comments on commit 15b9997

Please sign in to comment.