Skip to content

Commit

Permalink
[Parse] An implementation for SE-0182
Browse files Browse the repository at this point in the history
  • Loading branch information
johnno1962 committed Jul 21, 2017
1 parent 3f308b7 commit c0fcc1a
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 2 deletions.
42 changes: 40 additions & 2 deletions lib/Parse/Lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1189,6 +1189,27 @@ unsigned Lexer::lexUnicodeEscape(const char *&CurPtr, Lexer *Diags) {
return CharValue;
}

/// maybeConsumeNewlineEscape - Check for valid elided newline escape and
/// move pointer passed in to the character after the end of the line.
static bool maybeConsumeNewlineEscape(const char *&CurPtr, ssize_t Offset) {
const char *TmpPtr = CurPtr + Offset;
while (true) {
switch (*TmpPtr++) {
case ' ': case '\t':
continue;
case '\r':
if (*TmpPtr == '\n')
TmpPtr++;
LLVM_FALLTHROUGH;
case '\n':
CurPtr = TmpPtr;
return true;
case 0:
default:
return false;
}
}
}

/// lexCharacter - Read a character and return its UTF32 code. If this is the
/// end of enclosing string/character sequence (i.e. the character is equal to
Expand Down Expand Up @@ -1254,6 +1275,10 @@ unsigned Lexer::lexCharacter(const char *&CurPtr, char StopQuote,
unsigned CharValue = 0;
// Escape processing. We already ate the "\".
switch (*CurPtr) {
case ' ': case '\t': case '\n': case '\r':
if (MultilineString && maybeConsumeNewlineEscape(CurPtr, 0))
return '\n';
LLVM_FALLTHROUGH;
default: // Invalid escape.
if (EmitDiagnostics)
diagnose(CurPtr, diag::lex_invalid_escape);
Expand Down Expand Up @@ -1380,7 +1405,11 @@ static const char *skipToEndOfInterpolatedExpression(const char *CurPtr,
// Entering a recursive interpolated expression
OpenDelimiters.push_back('(');
continue;
case '\n': case '\r': case 0:
case '\n': case '\r':
if (AllowNewline.back())
continue;
LLVM_FALLTHROUGH;
case 0:
// Don't jump over newline/EOF due to preceding backslash!
return CurPtr-1;
default:
Expand Down Expand Up @@ -1883,12 +1912,14 @@ StringRef Lexer::getEncodedStringSegment(StringRef Bytes,
// we know that there is a terminating " character. Use BytesPtr to avoid a
// range check subscripting on the StringRef.
const char *BytesPtr = Bytes.begin();
bool IsEscapedNewline = false;
while (BytesPtr < Bytes.end()) {
char CurChar = *BytesPtr++;

// Multiline string line ending normalization and indent stripping.
if (CurChar == '\r' || CurChar == '\n') {
bool stripNewline = IsFirstSegment && BytesPtr - 1 == Bytes.begin();
bool stripNewline = IsEscapedNewline ||
(IsFirstSegment && BytesPtr - 1 == Bytes.begin());
if (CurChar == '\r' && *BytesPtr == '\n')
BytesPtr++;
if (*BytesPtr != '\r' && *BytesPtr != '\n')
Expand All @@ -1897,6 +1928,7 @@ StringRef Lexer::getEncodedStringSegment(StringRef Bytes,
stripNewline = true;
if (!stripNewline)
TempString.push_back('\n');
IsEscapedNewline = false;
continue;
}

Expand All @@ -1921,6 +1953,12 @@ StringRef Lexer::getEncodedStringSegment(StringRef Bytes,
case '\'': TempString.push_back('\''); continue;
case '\\': TempString.push_back('\\'); continue;

case ' ': case '\t': case '\n': case '\r':
if (maybeConsumeNewlineEscape(BytesPtr, -1)) {
IsEscapedNewline = true;
BytesPtr--;
}
continue;

// String interpolation.
case '(':
Expand Down
6 changes: 6 additions & 0 deletions test/Parse/multiline_errors.swift
Original file line number Diff line number Diff line change
Expand Up @@ -120,3 +120,9 @@ _ = "hello\(
""")!"
// expected-error@-4 {{unterminated string literal}}
// expected-error@-2 {{unterminated string literal}}

_ = """
line one \ non-whitepace
line two
"""
// expected-error@-3 {{invalid escape sequence in literal}}
45 changes: 45 additions & 0 deletions test/Parse/multiline_string.swift
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,51 @@ _ = """
"""
// CHECK: "Twelve\nNu"

_ = """
newline \
elided
"""
// CHECK: "newline elided"

// contains trailing whitepsace
_ = """
trailing \
\("""
substring1 \
\("""
substring2 \
substring3
""")\
""") \
whitepsace
"""
// CHECK: "trailing "
// CHECK: "substring1 "
// CHECK: "substring2 substring3"
// CHECK: " whitepsace"

// contains trailing whitepsace
_ = """
foo\

bar
"""
// CHECK: "foo\nbar"

// contains trailing whitepsace
_ = """
foo\

bar
"""
// CHECK: "foo\nbar"

_ = """
foo \
bar
"""
// CHECK: "foo bar"

_ = """
ABC
Expand Down

0 comments on commit c0fcc1a

Please sign in to comment.