From 2543f4b17c71d7e444128ae6301a45fa143ecff4 Mon Sep 17 00:00:00 2001 From: tiancaiamao Date: Sat, 12 Nov 2016 09:30:12 +0800 Subject: [PATCH] parser: handle scan identifier meets \0 (#1994) this is a vague corner case it should be illegal according to mysql document but mysql implementation seems accept it so we follow implementation trim \0 in conn packet, report error when parser meet \0 --- parser/lexer.go | 11 ++++++++++- parser/misc.go | 5 +++++ parser/parser.y | 1 + parser/parser_test.go | 1 + parser/scanner_test.go | 5 +++++ server/conn.go | 7 +++++++ 6 files changed, 29 insertions(+), 1 deletion(-) diff --git a/parser/lexer.go b/parser/lexer.go index 385886e8c4ca1..f28ba925f3577 100644 --- a/parser/lexer.go +++ b/parser/lexer.go @@ -78,6 +78,9 @@ func (s *Scanner) Errorf(format string, a ...interface{}) { // Lex returns a token and store the token value in v. // Scanner satisfies yyLexer interface. +// 0 and invalid are special token id this function would return: +// return 0 tells parser that scanner meets EOF, +// return invalid tells parser that scanner meets illegal character. func (s *Scanner) Lex(v *yySymType) int { tok, pos, lit := s.scan() v.offset = pos.Offset @@ -129,6 +132,11 @@ func (s *Scanner) scan() (tok int, pos Pos, lit string) { ch0 = s.skipWhitespace() } pos = s.r.pos() + if s.r.eof() { + // when scanner meets EOF, the returned token should be 0, + // because 0 is a special token id to remind the parser that stream is end. + return 0, pos, "" + } if ch0 != unicode.ReplacementChar && isIdentExtend(ch0) { return scanIdentifier(s) @@ -516,7 +524,8 @@ func (r *reader) peek() rune { v, w := rune(r.s[r.p.Offset]), 1 switch { case v == 0: - return unicode.ReplacementChar + r.w = w + return v // illegal UTF-8 encoding case v >= 0x80: v, w = utf8.DecodeRuneInString(r.s[r.p.Offset:]) if v == utf8.RuneError && w == 1 { diff --git a/parser/misc.go b/parser/misc.go index 74f5fb54b14d3..529a084cc0432 100644 --- a/parser/misc.go +++ b/parser/misc.go @@ -83,6 +83,11 @@ func initTokenFunc(str string, fn func(s *Scanner) (int, Pos, string)) { } func init() { + // invalid is a special token defined in parser.y, when parser meet + // this token, it will throw an error. + // set root trie node's token to invalid, so when input match nothing + // in the trie, invalid will be the default return token. + ruleTable.token = invalid initTokenByte('*', int('*')) initTokenByte('/', int('/')) initTokenByte('+', int('+')) diff --git a/parser/parser.y b/parser/parser.y index 4e8ee6f2a1cbb..c174547a4c5f4 100644 --- a/parser/parser.y +++ b/parser/parser.y @@ -47,6 +47,7 @@ import ( %token /*yy:token "%c" */ identifier "identifier" /*yy:token "\"%c\"" */ stringLit "string literal" + invalid "a special token never used by parser, used by lexer to indicate error" with "WITH" diff --git a/parser/parser_test.go b/parser/parser_test.go index b4b250eee1859..64cbc4e706542 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -726,6 +726,7 @@ func (s *testParserSuite) TestIdentifier(c *C) { {"create database `123`", true}, {"create table `123` (123a1 int)", true}, {"create table 123 (123a1 int)", false}, + {fmt.Sprintf("select * from t%cble", 0), false}, } s.RunTest(c, table) } diff --git a/parser/scanner_test.go b/parser/scanner_test.go index c4a7870d7bf9b..a2dd5679df958 100644 --- a/parser/scanner_test.go +++ b/parser/scanner_test.go @@ -14,6 +14,8 @@ package parser import ( + "fmt" + . "github.com/pingcap/check" "github.com/pingcap/tidb/util/testleak" ) @@ -97,6 +99,8 @@ func (s *testLexerSuite) TestLiteral(c *C) { {"0x3c26", hexLit}, {"x'13181C76734725455A'", hexLit}, {"0b01", bitLit}, + {fmt.Sprintf("%c", 0), invalid}, + {fmt.Sprintf("t1%c", 0), identifier}, } runTest(c, table) } @@ -181,6 +185,7 @@ func (s *testLexerSuite) TestIdentifier(c *C) { {`哈哈`, "哈哈"}, {"`numeric`", "numeric"}, {"\r\n \r \n \tthere\t \n", "there"}, + {fmt.Sprintf("t1%cxxx", 0), "t1"}, // `5number`, } l := &Scanner{} diff --git a/server/conn.go b/server/conn.go index d8ac20747ee24..3d1a72f6e8153 100644 --- a/server/conn.go +++ b/server/conn.go @@ -369,6 +369,13 @@ func (cc *clientConn) dispatch(data []byte) error { case mysql.ComQuit: return io.EOF case mysql.ComQuery: // Most frequently used command. + // For issue 1989 + // Input payload may end with byte '\0', we didn't find related mysql document about it, but mysql + // implementation accept that case. So trim the last '\0' here as if the payload an EOF string. + // See http://dev.mysql.com/doc/internals/en/com-query.html + if data[len(data)-1] == 0 { + data = data[:len(data)-1] + } return cc.handleQuery(hack.String(data)) case mysql.ComPing: return cc.writeOK()