Skip to content

Commit

Permalink
parser: handle scan identifier meets \0 (pingcap#1994)
Browse files Browse the repository at this point in the history
this is a vague corner case
it should be illegal according to mysql document
but mysql implementation seems accept it
so we follow implementation
trim \0 in conn packet, report error when parser meet \0
  • Loading branch information
tiancaiamao authored and shenli committed Nov 12, 2016
1 parent ab8795d commit 2543f4b
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 1 deletion.
11 changes: 10 additions & 1 deletion parser/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ func (s *Scanner) Errorf(format string, a ...interface{}) {

// Lex returns a token and store the token value in v.
// Scanner satisfies yyLexer interface.
// 0 and invalid are special token id this function would return:
// return 0 tells parser that scanner meets EOF,
// return invalid tells parser that scanner meets illegal character.
func (s *Scanner) Lex(v *yySymType) int {
tok, pos, lit := s.scan()
v.offset = pos.Offset
Expand Down Expand Up @@ -129,6 +132,11 @@ func (s *Scanner) scan() (tok int, pos Pos, lit string) {
ch0 = s.skipWhitespace()
}
pos = s.r.pos()
if s.r.eof() {
// when scanner meets EOF, the returned token should be 0,
// because 0 is a special token id to remind the parser that stream is end.
return 0, pos, ""
}

if ch0 != unicode.ReplacementChar && isIdentExtend(ch0) {
return scanIdentifier(s)
Expand Down Expand Up @@ -516,7 +524,8 @@ func (r *reader) peek() rune {
v, w := rune(r.s[r.p.Offset]), 1
switch {
case v == 0:
return unicode.ReplacementChar
r.w = w
return v // illegal UTF-8 encoding
case v >= 0x80:
v, w = utf8.DecodeRuneInString(r.s[r.p.Offset:])
if v == utf8.RuneError && w == 1 {
Expand Down
5 changes: 5 additions & 0 deletions parser/misc.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,11 @@ func initTokenFunc(str string, fn func(s *Scanner) (int, Pos, string)) {
}

func init() {
// invalid is a special token defined in parser.y, when parser meet
// this token, it will throw an error.
// set root trie node's token to invalid, so when input match nothing
// in the trie, invalid will be the default return token.
ruleTable.token = invalid
initTokenByte('*', int('*'))
initTokenByte('/', int('/'))
initTokenByte('+', int('+'))
Expand Down
1 change: 1 addition & 0 deletions parser/parser.y
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ import (
%token <ident>
/*yy:token "%c" */ identifier "identifier"
/*yy:token "\"%c\"" */ stringLit "string literal"
invalid "a special token never used by parser, used by lexer to indicate error"

with "WITH"

Expand Down
1 change: 1 addition & 0 deletions parser/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,7 @@ func (s *testParserSuite) TestIdentifier(c *C) {
{"create database `123`", true},
{"create table `123` (123a1 int)", true},
{"create table 123 (123a1 int)", false},
{fmt.Sprintf("select * from t%cble", 0), false},
}
s.RunTest(c, table)
}
Expand Down
5 changes: 5 additions & 0 deletions parser/scanner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
package parser

import (
"fmt"

. "github.com/pingcap/check"
"github.com/pingcap/tidb/util/testleak"
)
Expand Down Expand Up @@ -97,6 +99,8 @@ func (s *testLexerSuite) TestLiteral(c *C) {
{"0x3c26", hexLit},
{"x'13181C76734725455A'", hexLit},
{"0b01", bitLit},
{fmt.Sprintf("%c", 0), invalid},
{fmt.Sprintf("t1%c", 0), identifier},
}
runTest(c, table)
}
Expand Down Expand Up @@ -181,6 +185,7 @@ func (s *testLexerSuite) TestIdentifier(c *C) {
{`哈哈`, "哈哈"},
{"`numeric`", "numeric"},
{"\r\n \r \n \tthere\t \n", "there"},
{fmt.Sprintf("t1%cxxx", 0), "t1"},
// `5number`,
}
l := &Scanner{}
Expand Down
7 changes: 7 additions & 0 deletions server/conn.go
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,13 @@ func (cc *clientConn) dispatch(data []byte) error {
case mysql.ComQuit:
return io.EOF
case mysql.ComQuery: // Most frequently used command.
// For issue 1989
// Input payload may end with byte '\0', we didn't find related mysql document about it, but mysql
// implementation accept that case. So trim the last '\0' here as if the payload an EOF string.
// See http://dev.mysql.com/doc/internals/en/com-query.html
if data[len(data)-1] == 0 {
data = data[:len(data)-1]
}
return cc.handleQuery(hack.String(data))
case mysql.ComPing:
return cc.writeOK()
Expand Down

0 comments on commit 2543f4b

Please sign in to comment.