Skip to content

Commit

Permalink
Fixes tests that was broken with enhancements made to the lexer wit…
Browse files Browse the repository at this point in the history
…h commit graphql-go#137

- Increased coverage for `lexer` package to 100%
- Added more tests to cover parsing unicode strings (graphql-go#135) as well.
- Fixed invalid test for `lexer` properly escaping slashes for TokenString type
  • Loading branch information
sogko committed Jun 12, 2016
1 parent 065ab6b commit db630ca
Show file tree
Hide file tree
Showing 5 changed files with 312 additions and 79 deletions.
1 change: 0 additions & 1 deletion gqlerrors/syntax.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package gqlerrors
import (
"fmt"
"regexp"

"strings"

"github.com/graphql-go/graphql/language/ast"
Expand Down
63 changes: 37 additions & 26 deletions language/lexer/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,6 @@ type Token struct {
Value string
}

func (t *Token) String() string {
return fmt.Sprintf("%s", tokenDescription[t.Kind])
}

type Lexer func(resetPosition int) (Token, error)

func Lex(s *source.Source) Lexer {
Expand All @@ -106,24 +102,28 @@ func Lex(s *source.Source) Lexer {

// Reads an alphanumeric + underscore name from the source.
// [_A-Za-z][_0-9A-Za-z]*
func readName(source *source.Source, position int) Token {
// position: Points to the byte position in the byte array
// runePosition: Points to the rune position in the byte array
func readName(source *source.Source, position, runePosition int) Token {
body := source.Body
bodyLength := len(body)
end := position + 1
endByte := position + 1
endRune := runePosition + 1
for {
code, n := runeAt(body, end)
if (end != bodyLength) &&
code, _ := runeAt(body, endByte)
if (endByte != bodyLength) &&
(code == '_' || // _
code >= '0' && code <= '9' || // 0-9
code >= 'A' && code <= 'Z' || // A-Z
code >= 'a' && code <= 'z') { // a-z
end += n
endByte++
endRune++
continue
} else {
break
}
}
return makeToken(TokenKind[NAME], position, end, string(body[position:end]))
return makeToken(TokenKind[NAME], runePosition, endRune, string(body[position:endByte]))
}

// Reads a number token from the source file, either a float
Expand Down Expand Up @@ -212,6 +212,7 @@ func readDigits(s *source.Source, start int, firstCode rune, codeLength int) (in
func readString(s *source.Source, start int) (Token, error) {
body := s.Body
position := start + 1
runePosition := start + 1
chunkStart := position
var code rune
var n int
Expand All @@ -226,9 +227,10 @@ func readString(s *source.Source, start int) (Token, error) {

// SourceCharacter
if code < 0x0020 && code != 0x0009 {
return Token{}, gqlerrors.NewSyntaxError(s, position, fmt.Sprintf(`Invalid character within String: %v.`, printCharCode(code)))
return Token{}, gqlerrors.NewSyntaxError(s, runePosition, fmt.Sprintf(`Invalid character within String: %v.`, printCharCode(code)))
}
position += n
runePosition++
if code == '\\' { // \
valueBuffer.Write(body[chunkStart : position-1])
code, n = runeAt(body, position)
Expand Down Expand Up @@ -260,9 +262,9 @@ func readString(s *source.Source, start int) (Token, error) {
case 'u':
// Check if there are at least 4 bytes available
if len(body) <= position+4 {
return Token{}, gqlerrors.NewSyntaxError(s, position,
return Token{}, gqlerrors.NewSyntaxError(s, runePosition,
fmt.Sprintf("Invalid character escape sequence: "+
"\\u%v", body[position+1:]))
"\\u%v", string(body[position+1:])))
}
charCode := uniCharCode(
rune(body[position+1]),
Expand All @@ -271,18 +273,20 @@ func readString(s *source.Source, start int) (Token, error) {
rune(body[position+4]),
)
if charCode < 0 {
return Token{}, gqlerrors.NewSyntaxError(s, position,
return Token{}, gqlerrors.NewSyntaxError(s, runePosition,
fmt.Sprintf("Invalid character escape sequence: "+
"\\u%v", body[position+1:position+5]))
"\\u%v", string(body[position+1:position+5])))
}
valueBuffer.WriteRune(charCode)
position += 4
runePosition += 4
break
default:
return Token{}, gqlerrors.NewSyntaxError(s, position,
return Token{}, gqlerrors.NewSyntaxError(s, runePosition,
fmt.Sprintf(`Invalid character escape sequence: \\%c.`, code))
}
position += n
runePosition++
chunkStart = position
}
continue
Expand All @@ -291,7 +295,7 @@ func readString(s *source.Source, start int) (Token, error) {
}
}
if code != '"' { // quote (")
return Token{}, gqlerrors.NewSyntaxError(s, position, "Unterminated string.")
return Token{}, gqlerrors.NewSyntaxError(s, runePosition, "Unterminated string.")
}
stringContent := body[chunkStart:position]
valueBuffer.Write(stringContent)
Expand Down Expand Up @@ -346,15 +350,15 @@ func printCharCode(code rune) string {
func readToken(s *source.Source, fromPosition int) (Token, error) {
body := s.Body
bodyLength := len(body)
position := positionAfterWhitespace(body, fromPosition)
position, runePosition := positionAfterWhitespace(body, fromPosition)
if position >= bodyLength {
return makeToken(TokenKind[EOF], position, position, ""), nil
}
code, codeLength := runeAt(body, position)

// SourceCharacter
if code < 0x0020 && code != 0x0009 && code != 0x000A && code != 0x000D {
return Token{}, gqlerrors.NewSyntaxError(s, position, fmt.Sprintf(`Invalid character %v`, printCharCode(code)))
return Token{}, gqlerrors.NewSyntaxError(s, runePosition, fmt.Sprintf(`Invalid character %v`, printCharCode(code)))
}

switch code {
Expand Down Expand Up @@ -405,12 +409,12 @@ func readToken(s *source.Source, fromPosition int) (Token, error) {
// A-Z
case 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z':
return readName(s, position), nil
return readName(s, position, runePosition), nil
// _
// a-z
case '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z':
return readName(s, position), nil
return readName(s, position, runePosition), nil
// -
// 0-9
case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
Expand All @@ -428,12 +432,14 @@ func readToken(s *source.Source, fromPosition int) (Token, error) {
return token, nil
}
description := fmt.Sprintf("Unexpected character %v.", printCharCode(code))
return Token{}, gqlerrors.NewSyntaxError(s, position, description)
return Token{}, gqlerrors.NewSyntaxError(s, runePosition, description)
}

// Gets the rune from the byte array at given byte position and it's width in bytes
func runeAt(body []byte, position int) (code rune, charWidth int) {
if len(body) <= position {
return 0, utf8.RuneError
// <EOF>
return -1, utf8.RuneError
}

c := body[position]
Expand All @@ -448,9 +454,11 @@ func runeAt(body []byte, position int) (code rune, charWidth int) {
// Reads from body starting at startPosition until it finds a non-whitespace
// or commented character, then returns the position of that character for lexing.
// lexing.
func positionAfterWhitespace(body []byte, startPosition int) int {
// Returns both byte positions and rune position
func positionAfterWhitespace(body []byte, startPosition int) (position int, runePosition int) {
bodyLength := len(body)
position := startPosition
position = startPosition
runePosition = startPosition
for {
if position < bodyLength {
code, n := runeAt(body, position)
Expand All @@ -466,15 +474,18 @@ func positionAfterWhitespace(body []byte, startPosition int) int {
// Comma
code == 0x002C {
position += n
runePosition++
} else if code == 35 { // #
position += n
runePosition++
for {
code, n := runeAt(body, position)
if position < bodyLength &&
code != 0 &&
// SourceCharacter but not LineTerminator
(code > 0x001F || code == 0x0009) && code != 0x000A && code != 0x000D {
position += n
runePosition++
continue
} else {
break
Expand All @@ -488,7 +499,7 @@ func positionAfterWhitespace(body []byte, startPosition int) int {
break
}
}
return position
return position, runePosition
}

func GetTokenDesc(token Token) string {
Expand Down
Loading

0 comments on commit db630ca

Please sign in to comment.