Skip to content

Commit

Permalink
Fix corrupt JSON format with non-UT8 binary
Browse files Browse the repository at this point in the history
The JSON output format has been changed to support valid utf8 and
non-utf8 binary data. Valid utf8 has a '+' prefix and store the
data as a standard JSON string. Binary has a '$' prefix and store
the data in base64.

fixes #1
  • Loading branch information
tidwall committed Jan 31, 2020
1 parent b6fafbe commit 9c44081
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 13 deletions.
37 changes: 24 additions & 13 deletions wal.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package wal

import (
"bufio"
"encoding/base64"
"encoding/binary"
"encoding/json"
"errors"
Expand All @@ -12,6 +13,8 @@ import (
"path/filepath"
"strconv"
"strings"
"unicode/utf8"
"unsafe"
)

var (
Expand Down Expand Up @@ -343,22 +346,20 @@ func appendJSONEntry(dst []byte, index uint64, data []byte) []byte {
dst = append(dst, `{"index":"`...)
dst = strconv.AppendUint(dst, index, 10)
dst = append(dst, `","data":`...)
dst = appendJSONString(dst, data)
dst = appendJSONData(dst, data)
dst = append(dst, '}', '\n')
return dst
}

func appendJSONString(dst []byte, s []byte) []byte {
for i := 0; i < len(s); i++ {
if s[i] < ' ' || s[i] == '\\' || s[i] == '"' || s[i] > 126 {
d, _ := json.Marshal(string(s))
return append(dst, d...)
}
func appendJSONData(dst []byte, s []byte) []byte {
if utf8.Valid(s) {
b, _ := json.Marshal(*(*string)(unsafe.Pointer(&s)))
dst = append(dst, '"', '+')
return append(dst, b[1:]...)
}
dst = append(dst, '"')
dst = append(dst, s...)
dst = append(dst, '"')
return dst
dst = append(dst, '"', '$')
dst = append(dst, base64.URLEncoding.EncodeToString(s)...)
return append(dst, '"')
}

func appendBinaryEntry(dst []byte, index uint64, data []byte) []byte {
Expand Down Expand Up @@ -615,11 +616,21 @@ func readEntry(rd *bufio.Reader, frmt LogFormat, discardData bool) (
return 0, nil, ErrCorrupt
}
s, ok := m["data"]
if !ok {
if !ok || len(s) == 0 {
return 0, nil, ErrCorrupt
}
if !discardData {
data = []byte(s)
switch s[0] {
case '$':
data, err = base64.URLEncoding.DecodeString(s[1:])
if err != nil {
return 0, nil, ErrCorrupt
}
case '+':
data = []byte(s[1:])
default:
return 0, nil, ErrCorrupt
}
}
return index, data, nil
}
Expand Down
25 changes: 25 additions & 0 deletions wal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -613,3 +613,28 @@ func makeOpts(segSize int, dur Durability, lf LogFormat) *Options {
opts.LogFormat = lf
return &opts
}

// https://github.com/tidwall/wal/issues/1
func TestIssue1(t *testing.T) {
in := []byte{0, 0, 0, 0, 0, 0, 0, 1, 37, 108, 131, 178, 151, 17, 77, 32,
27, 48, 23, 159, 63, 14, 240, 202, 206, 151, 131, 98, 45, 165, 151, 67,
38, 180, 54, 23, 138, 238, 246, 16, 0, 0, 0, 0}
opts := *DefaultOptions
opts.LogFormat = JSON
os.RemoveAll("testlog")
l, err := Open("testlog", &opts)
if err != nil {
t.Fatal(err)
}
defer l.Close()
if err := l.Write(1, in); err != nil {
t.Fatal(err)
}
out, err := l.Read(1)
if err != nil {
t.Fatal(err)
}
if string(in) != string(out) {
t.Fatal("data mismatch")
}
}

0 comments on commit 9c44081

Please sign in to comment.