Skip to content

Commit

Permalink
Support unicode characters in parseWords
Browse files Browse the repository at this point in the history
Signed-off-by: Jonathan Stoppani <[email protected]>
  • Loading branch information
GaretJax committed Jun 8, 2016
1 parent a4422e6 commit 6284f04
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 9 deletions.
19 changes: 10 additions & 9 deletions builder/dockerfile/parser/line_parsers.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"fmt"
"strings"
"unicode"
"unicode/utf8"
)

var (
Expand Down Expand Up @@ -58,10 +59,11 @@ func parseWords(rest string) []string {
quote := '\000'
blankOK := false
var ch rune
var chWidth int

for pos := 0; pos <= len(rest); pos++ {
for pos := 0; pos <= len(rest); pos += chWidth {
if pos != len(rest) {
ch = rune(rest[pos])
ch, chWidth = utf8.DecodeRuneInString(rest[pos:])
}

if phase == inSpaces { // Looking for start of word
Expand Down Expand Up @@ -95,15 +97,15 @@ func parseWords(rest string) []string {
phase = inQuote
}
if ch == tokenEscape {
if pos+1 == len(rest) {
if pos+chWidth == len(rest) {
continue // just skip an escape token at end of line
}
// If we're not quoted and we see an escape token, then always just
// add the escape token plus the char to the word, even if the char
// is a quote.
word += string(ch)
pos++
ch = rune(rest[pos])
pos += chWidth
ch, chWidth = utf8.DecodeRuneInString(rest[pos:])
}
word += string(ch)
continue
Expand All @@ -114,14 +116,13 @@ func parseWords(rest string) []string {
}
// The escape token is special except for ' quotes - can't escape anything for '
if ch == tokenEscape && quote != '\'' {
if pos+1 == len(rest) {
if pos+chWidth == len(rest) {
phase = inWord
continue // just skip the escape token at end
}
pos++
nextCh := rune(rest[pos])
pos += chWidth
word += string(ch)
ch = nextCh
ch, chWidth = utf8.DecodeRuneInString(rest[pos:])
}
word += string(ch)
}
Expand Down
12 changes: 12 additions & 0 deletions builder/dockerfile/parser/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@ func TestParseWords(t *testing.T) {
"input": {"foo bar"},
"expect": {"foo", "bar"},
},
{
"input": {"foo\\ bar"},
"expect": {"foo\\ bar"},
},
{
"input": {"foo=bar"},
"expect": {"foo=bar"},
Expand All @@ -104,6 +108,14 @@ func TestParseWords(t *testing.T) {
"input": {`foo bar "abc xyz"`},
"expect": {"foo", "bar", `"abc xyz"`},
},
{
"input": {"àöû"},
"expect": {"àöû"},
},
{
"input": {`föo bàr "âbc xÿz"`},
"expect": {"föo", "bàr", `"âbc xÿz"`},
},
}

for _, test := range tests {
Expand Down

0 comments on commit 6284f04

Please sign in to comment.