Skip to content

Commit

Permalink
Revert "Refactor and cleanup Assembly Parsing / Lexing"
Browse files Browse the repository at this point in the history
Reverting for unexpected crashes on various platforms.

This reverts commit r272953.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@272957 91177308-0d34-0410-b5e6-96231b3b80d8
niravhdave committed Jun 16, 2016
1 parent f3108f6 commit afacc94
Showing 9 changed files with 135 additions and 165 deletions.
11 changes: 5 additions & 6 deletions include/llvm/MC/MCParser/AsmLexer.h
Original file line number Diff line number Diff line change
@@ -29,8 +29,7 @@ class AsmLexer : public MCAsmLexer {

const char *CurPtr;
StringRef CurBuf;
bool IsAtStartOfLine;
bool IsAtStartOfStatement;
bool isAtStartOfLine;

void operator=(const AsmLexer&) = delete;
AsmLexer(const AsmLexer&) = delete;
@@ -46,15 +45,17 @@ class AsmLexer : public MCAsmLexer {
void setBuffer(StringRef Buf, const char *ptr = nullptr);

StringRef LexUntilEndOfStatement() override;
StringRef LexUntilEndOfLine();

size_t peekTokens(MutableArrayRef<AsmToken> Buf,
bool ShouldSkipSpace = true) override;

bool isAtStartOfComment(const char *Ptr);
bool isAtStatementSeparator(const char *Ptr);

const MCAsmInfo &getMAI() const { return MAI; }

private:
bool isAtStartOfComment(const char *Ptr);
bool isAtStatementSeparator(const char *Ptr);
int getNextChar();
AsmToken ReturnError(const char *Loc, const std::string &Msg);

@@ -66,8 +67,6 @@ class AsmLexer : public MCAsmLexer {
AsmToken LexQuote();
AsmToken LexFloatLiteral();
AsmToken LexHexFloatLiteral(bool NoIntDigits);

StringRef LexUntilEndOfLine();
};

} // end namespace llvm
8 changes: 2 additions & 6 deletions include/llvm/MC/MCParser/MCAsmLexer.h
Original file line number Diff line number Diff line change
@@ -39,15 +39,12 @@ class AsmToken {
// Real values.
Real,

// Comments
Comment,
HashDirective,
// No-value.
EndOfStatement,
Colon,
Space,
Plus, Minus, Tilde,
Slash, // '/'
Slash, // '/'
BackSlash, // '\'
LParen, RParen, LBrac, RBrac, LCurly, RCurly,
Star, Dot, Comma, Dollar, Equal, EqualEqual,
@@ -156,9 +153,8 @@ class MCAsmLexer {
const AsmToken &Lex() {
assert(!CurTok.empty());
CurTok.erase(CurTok.begin());
// Always place in front as LexToken may generate multiple tokens via UnLex.
if (CurTok.empty())
CurTok.insert(CurTok.begin(), LexToken());
CurTok.emplace_back(LexToken());
return CurTok.front();
}

137 changes: 60 additions & 77 deletions lib/MC/MCParser/AsmLexer.cpp
Original file line number Diff line number Diff line change
@@ -23,8 +23,7 @@ using namespace llvm;

AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) {
CurPtr = nullptr;
IsAtStartOfLine = true;
IsAtStartOfStatement = true;
isAtStartOfLine = true;
AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@");
}

@@ -51,9 +50,20 @@ AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
}

int AsmLexer::getNextChar() {
if (CurPtr == CurBuf.end())
char CurChar = *CurPtr++;
switch (CurChar) {
default:
return (unsigned char)CurChar;
case 0:
// A nul character in the stream is either the end of the current buffer or
// a random nul in the file. Disambiguate that here.
if (CurPtr - 1 != CurBuf.end())
return 0; // Just whitespace.

// Otherwise, return end of file.
--CurPtr; // Another call to lex will return EOF again.
return EOF;
return (unsigned char)*CurPtr++;
}
}

/// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)?
@@ -159,52 +169,43 @@ AsmToken AsmLexer::LexIdentifier() {
AsmToken AsmLexer::LexSlash() {
switch (*CurPtr) {
case '*':
IsAtStartOfStatement = false;
break; // C style comment.
case '/':
++CurPtr;
return LexLineComment();
default:
IsAtStartOfStatement = false;
return AsmToken(AsmToken::Slash, StringRef(TokStart, 1));
return AsmToken(AsmToken::Slash, StringRef(CurPtr - 1, 1));
}

// C Style comment.
++CurPtr; // skip the star.
while (CurPtr != CurBuf.end()) {
switch (*CurPtr++) {
while (1) {
int CurChar = getNextChar();
switch (CurChar) {
case EOF:
return ReturnError(TokStart, "unterminated comment");
case '*':
// End of the comment?
if (*CurPtr != '/')
break;
if (CurPtr[0] != '/') break;

++CurPtr; // End the */.
return AsmToken(AsmToken::Comment,
StringRef(TokStart, CurPtr - TokStart));
return LexToken();
}
}
return ReturnError(TokStart, "unterminated comment");
}

/// LexLineComment: Comment: #[^\n]*
/// : //[^\n]*
AsmToken AsmLexer::LexLineComment() {
// Mark This as an end of statement with a body of the
// comment. While it would be nicer to leave this two tokens,
// backwards compatability with TargetParsers makes keeping this in this form
// better.
// FIXME: This is broken if we happen to a comment at the end of a file, which
// was .included, and which doesn't end with a newline.
int CurChar = getNextChar();
while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF)
CurChar = getNextChar();

IsAtStartOfLine = true;
// Whis is a whole line comment. leave newline
if (IsAtStartOfStatement)
return AsmToken(AsmToken::EndOfStatement,
StringRef(TokStart, CurPtr - TokStart));
IsAtStartOfStatement = true;

return AsmToken(AsmToken::EndOfStatement,
StringRef(TokStart, CurPtr - 1 - TokStart));
if (CurChar == EOF)
return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 0));
}

static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
@@ -422,7 +423,8 @@ StringRef AsmLexer::LexUntilEndOfStatement() {

while (!isAtStartOfComment(CurPtr) && // Start of line comment.
!isAtStatementSeparator(CurPtr) && // End of statement marker.
*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
*CurPtr != '\n' && *CurPtr != '\r' &&
(*CurPtr != 0 || CurPtr != CurBuf.end())) {
++CurPtr;
}
return StringRef(TokStart, CurPtr-TokStart);
@@ -431,7 +433,8 @@ StringRef AsmLexer::LexUntilEndOfStatement() {
StringRef AsmLexer::LexUntilEndOfLine() {
TokStart = CurPtr;

while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
while (*CurPtr != '\n' && *CurPtr != '\r' &&
(*CurPtr != 0 || CurPtr != CurBuf.end())) {
++CurPtr;
}
return StringRef(TokStart, CurPtr-TokStart);
@@ -441,8 +444,7 @@ size_t AsmLexer::peekTokens(MutableArrayRef<AsmToken> Buf,
bool ShouldSkipSpace) {
const char *SavedTokStart = TokStart;
const char *SavedCurPtr = CurPtr;
bool SavedAtStartOfLine = IsAtStartOfLine;
bool SavedAtStartOfStatement = IsAtStartOfStatement;
bool SavedAtStartOfLine = isAtStartOfLine;
bool SavedSkipSpace = SkipSpace;

std::string SavedErr = getErr();
@@ -463,8 +465,7 @@ size_t AsmLexer::peekTokens(MutableArrayRef<AsmToken> Buf,
SetError(SavedErrLoc, SavedErr);

SkipSpace = SavedSkipSpace;
IsAtStartOfLine = SavedAtStartOfLine;
IsAtStartOfStatement = SavedAtStartOfStatement;
isAtStartOfLine = SavedAtStartOfLine;
CurPtr = SavedCurPtr;
TokStart = SavedTokStart;

@@ -494,45 +495,29 @@ AsmToken AsmLexer::LexToken() {
// This always consumes at least one character.
int CurChar = getNextChar();

if (CurChar == '#' && IsAtStartOfStatement) {
// If this starts with a '#', this may be a cpp
// hash directive and otherwise a line comment.
AsmToken TokenBuf[2];
MutableArrayRef<AsmToken> Buf(TokenBuf, 2);
size_t num = peekTokens(Buf, true);
// There cannot be a space preceeding this
if (IsAtStartOfLine && num == 2 && TokenBuf[0].is(AsmToken::Integer) &&
TokenBuf[1].is(AsmToken::String)) {
CurPtr = TokStart; // reset curPtr;
StringRef s = LexUntilEndOfLine();
UnLex(TokenBuf[1]);
UnLex(TokenBuf[0]);
return AsmToken(AsmToken::HashDirective, s);
}
if (isAtStartOfComment(TokStart)) {
// If this comment starts with a '#', then return the Hash token and let
// the assembler parser see if it can be parsed as a cpp line filename
// comment. We do this only if we are at the start of a line.
if (CurChar == '#' && isAtStartOfLine)
return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
isAtStartOfLine = true;
return LexLineComment();
}

if (isAtStartOfComment(TokStart))
return LexLineComment();

if (isAtStatementSeparator(TokStart)) {
CurPtr += strlen(MAI.getSeparatorString()) - 1;
IsAtStartOfLine = true;
IsAtStartOfStatement = true;
return AsmToken(AsmToken::EndOfStatement,
StringRef(TokStart, strlen(MAI.getSeparatorString())));
}

// If we're missing a newline at EOF, make sure we still get an
// EndOfStatement token before the Eof token.
if (CurChar == EOF && !IsAtStartOfStatement) {
IsAtStartOfLine = true;
IsAtStartOfStatement = true;
if (CurChar == EOF && !isAtStartOfLine) {
isAtStartOfLine = true;
return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
}
IsAtStartOfLine = false;
bool OldIsAtStartOfStatement = IsAtStartOfStatement;
IsAtStartOfStatement = false;

isAtStartOfLine = false;
switch (CurChar) {
default:
// Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
@@ -541,24 +526,24 @@ AsmToken AsmLexer::LexToken() {

// Unknown character, emit an error.
return ReturnError(TokStart, "invalid character in input");
case EOF:
IsAtStartOfLine = true;
IsAtStartOfStatement = true;
return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
case 0:
case ' ':
case '\t':
IsAtStartOfStatement = OldIsAtStartOfStatement;
while (*CurPtr == ' ' || *CurPtr == '\t')
CurPtr++;
if (SkipSpace)
return LexToken(); // Ignore whitespace.
else
return AsmToken(AsmToken::Space, StringRef(TokStart, CurPtr - TokStart));
case '\n':
if (SkipSpace) {
// Ignore whitespace.
return LexToken();
} else {
int len = 1;
while (*CurPtr==' ' || *CurPtr=='\t') {
CurPtr++;
len++;
}
return AsmToken(AsmToken::Space, StringRef(TokStart, len));
}
case '\n': // FALL THROUGH.
case '\r':
IsAtStartOfLine = true;
IsAtStartOfStatement = true;
isAtStartOfLine = true;
return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
@@ -601,9 +586,7 @@ AsmToken AsmLexer::LexToken() {
}
return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1));
case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1));
case '/':
IsAtStartOfStatement = OldIsAtStartOfStatement;
return LexSlash();
case '/': return LexSlash();
case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
case '\'': return LexSingleQuote();
case '"': return LexQuote();
Loading

0 comments on commit afacc94

Please sign in to comment.