Skip to content

Commit

Permalink
Lexer: fix a bug where restoring lexer state could produce duplicate
Browse files Browse the repository at this point in the history
code_complete tokens


Swift SVN r7636
  • Loading branch information
gribozavr committed Aug 27, 2013
1 parent 8842d75 commit 8a8b979
Show file tree
Hide file tree
Showing 2 changed files with 158 additions and 1 deletion.
10 changes: 9 additions & 1 deletion lib/Parse/Lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,15 @@ Lexer::State Lexer::getStateForBeginningOfTokenLoc(SourceLoc Loc) const {
// correctly lex the token if it is at the beginning of the line.
while (Ptr >= BufferStart + 1) {
char C = Ptr[-1];
if (C == ' ' || C == '\t' || C == 0) {
if (C == ' ' || C == '\t') {
Ptr--;
continue;
}
if (C == 0) {
// A NUL character can be either whitespace we diagnose or a code
// completion token.
if (Ptr - 1 == CodeCompletionPtr)
break;
Ptr--;
continue;
}
Expand Down
149 changes: 149 additions & 0 deletions unittests/Parse/LexerTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,3 +108,152 @@ TEST_F(LexerTest, CharacterLiterals) {
}
}

TEST_F(LexerTest, RestoreBasic) {
const char *Source = "aaa \t\0 bbb ccc";

MemoryBuffer *Buf = MemoryBuffer::getMemBuffer(StringRef(Source, 14));
SourceManager SourceMgr;
unsigned BufferID = SourceMgr->AddNewSourceBuffer(Buf, llvm::SMLoc());

Lexer L(SourceMgr, BufferID, /*Diags=*/nullptr, /*InSILMode=*/false);

Token Tok;

L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("aaa", Tok.getText());
ASSERT_TRUE(Tok.isAtStartOfLine());

L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("bbb", Tok.getText());
ASSERT_FALSE(Tok.isAtStartOfLine());

Lexer::State S = L.getStateForBeginningOfToken(Tok);

L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("ccc", Tok.getText());
ASSERT_FALSE(Tok.isAtStartOfLine());

L.lex(Tok);
ASSERT_EQ(tok::eof, Tok.getKind());

L.restoreState(S);

L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("bbb", Tok.getText());
ASSERT_FALSE(Tok.isAtStartOfLine());

L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("ccc", Tok.getText());
ASSERT_FALSE(Tok.isAtStartOfLine());

L.lex(Tok);
ASSERT_EQ(tok::eof, Tok.getKind());
}

TEST_F(LexerTest, RestoreNewlineFlag) {
const char *Source = "aaa \n \0\tbbb \nccc";

MemoryBuffer *Buf = MemoryBuffer::getMemBuffer(StringRef(Source, 16));
SourceManager SourceMgr;
unsigned BufferID = SourceMgr->AddNewSourceBuffer(Buf, llvm::SMLoc());

Lexer L(SourceMgr, BufferID, /*Diags=*/nullptr, /*InSILMode=*/false);

Token Tok;

L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("aaa", Tok.getText());
ASSERT_TRUE(Tok.isAtStartOfLine());

L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("bbb", Tok.getText());
ASSERT_TRUE(Tok.isAtStartOfLine());

Lexer::State S = L.getStateForBeginningOfToken(Tok);

L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("ccc", Tok.getText());
ASSERT_TRUE(Tok.isAtStartOfLine());

L.lex(Tok);
ASSERT_EQ(tok::eof, Tok.getKind());

L.restoreState(S);

L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("bbb", Tok.getText());
ASSERT_TRUE(Tok.isAtStartOfLine());

L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("ccc", Tok.getText());
ASSERT_TRUE(Tok.isAtStartOfLine());

L.lex(Tok);
ASSERT_EQ(tok::eof, Tok.getKind());
}

TEST_F(LexerTest, RestoreStopAtCodeCompletion) {
const char *Source = "aaa \n \0\tbbb \nccc";

MemoryBuffer *Buf = MemoryBuffer::getMemBuffer(StringRef(Source, 16));
SourceManager SourceMgr;
unsigned BufferID = SourceMgr->AddNewSourceBuffer(Buf, llvm::SMLoc());
SourceMgr.setCodeCompletionPoint(BufferID, 6);

Lexer L(SourceMgr, BufferID, /*Diags=*/nullptr, /*InSILMode=*/false);

Token Tok;

L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("aaa", Tok.getText());
ASSERT_TRUE(Tok.isAtStartOfLine());

L.lex(Tok);
ASSERT_EQ(tok::code_complete, Tok.getKind());
ASSERT_TRUE(Tok.isAtStartOfLine());

L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("bbb", Tok.getText());
ASSERT_FALSE(Tok.isAtStartOfLine());

Lexer::State S = L.getStateForBeginningOfToken(Tok);

L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("ccc", Tok.getText());
ASSERT_TRUE(Tok.isAtStartOfLine());

L.lex(Tok);
ASSERT_EQ(tok::eof, Tok.getKind());

L.restoreState(S);

// Ensure that we don't get tok::code_complete here. We saved the lexer
// position after it, so we should not be getting it.

L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("bbb", Tok.getText());
ASSERT_FALSE(Tok.isAtStartOfLine());

L.lex(Tok);
ASSERT_EQ(tok::identifier, Tok.getKind());
ASSERT_EQ("ccc", Tok.getText());
ASSERT_TRUE(Tok.isAtStartOfLine());

L.lex(Tok);
ASSERT_EQ(tok::eof, Tok.getKind());
}

0 comments on commit 8a8b979

Please sign in to comment.