Skip to content

Commit 2948823

Browse files
skiporakshayjshah
authored andcommittedMar 14, 2017
Add ByteString API (uber-go#366)
This is a breaking change that adds bytestring APIs (to log UTF-8 encoded bytes) to ObjectEncoder and ArrayEncoder. To actually save allocations along this path, I also benchmarked adding a []byte to the field union; this reduces the allocation count for bytestrings, but increases the cost of adding fields by ~50%. Fixes uber-go#324.
1 parent c8c0a55 commit 2948823

11 files changed

+179
-56
lines changed
 

‎array.go

+15
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,12 @@ func Bools(key string, bs []bool) zapcore.Field {
4343
return Array(key, bools(bs))
4444
}
4545

46+
// ByteStrings constructs a field that carries a slice of []byte, each of which
47+
// must be UTF-8 encoded text.
48+
func ByteStrings(key string, bss [][]byte) zapcore.Field {
49+
return Array(key, byteStringsArray(bss))
50+
}
51+
4652
// Complex128s constructs a field that carries a slice of complex numbers.
4753
func Complex128s(key string, nums []complex128) zapcore.Field {
4854
return Array(key, complex128s(nums))
@@ -147,6 +153,15 @@ func (bs bools) MarshalLogArray(arr zapcore.ArrayEncoder) error {
147153
return nil
148154
}
149155

156+
type byteStringsArray [][]byte
157+
158+
func (bss byteStringsArray) MarshalLogArray(arr zapcore.ArrayEncoder) error {
159+
for i := range bss {
160+
arr.AppendByteString(bss[i])
161+
}
162+
return nil
163+
}
164+
150165
type complex128s []complex128
151166

152167
func (nums complex128s) MarshalLogArray(arr zapcore.ArrayEncoder) error {

‎array_test.go

+2
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ func TestArrayWrappers(t *testing.T) {
5959
expected []interface{}
6060
}{
6161
{"empty bools", Bools("", []bool{}), []interface{}(nil)},
62+
{"empty byte strings", ByteStrings("", [][]byte{}), []interface{}(nil)},
6263
{"empty complex128s", Complex128s("", []complex128{}), []interface{}(nil)},
6364
{"empty complex64s", Complex64s("", []complex64{}), []interface{}(nil)},
6465
{"empty durations", Durations("", []time.Duration{}), []interface{}(nil)},
@@ -79,6 +80,7 @@ func TestArrayWrappers(t *testing.T) {
7980
{"empty uintptrs", Uintptrs("", []uintptr{}), []interface{}(nil)},
8081
{"empty errors", Errors("", []error{}), []interface{}(nil)},
8182
{"bools", Bools("", []bool{true, false}), []interface{}{true, false}},
83+
{"byte strings", ByteStrings("", [][]byte{{1, 2}, {3, 4}}), []interface{}{[]byte{1, 2}, []byte{3, 4}}},
8284
{"complex128s", Complex128s("", []complex128{1 + 2i, 3 + 4i}), []interface{}{1 + 2i, 3 + 4i}},
8385
{"complex64s", Complex64s("", []complex64{1 + 2i, 3 + 4i}), []interface{}{complex64(1 + 2i), complex64(3 + 4i)}},
8486
{"durations", Durations("", []time.Duration{1, 2}), []interface{}{time.Nanosecond, 2 * time.Nanosecond}},

‎field.go

+9-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ func Skip() zapcore.Field {
3636
// Binary constructs a field that carries an opaque binary blob.
3737
//
3838
// Binary data is serialized in an encoding-appropriate format. For example,
39-
// zap's JSON encoder base64-encodes binary blobs.
39+
// zap's JSON encoder base64-encodes binary blobs. To log UTF-8 encoded text,
40+
// use ByteString.
4041
func Binary(key string, val []byte) zapcore.Field {
4142
return zapcore.Field{Key: key, Type: zapcore.BinaryType, Interface: val}
4243
}
@@ -50,6 +51,13 @@ func Bool(key string, val bool) zapcore.Field {
5051
return zapcore.Field{Key: key, Type: zapcore.BoolType, Integer: ival}
5152
}
5253

54+
// ByteString constructs a field that carries UTF-8 encoded text as a []byte.
55+
// To log opaque binary blobs (which aren't necessarily valid UTF-8), use
56+
// Binary.
57+
func ByteString(key string, val []byte) zapcore.Field {
58+
return zapcore.Field{Key: key, Type: zapcore.ByteStringType, Interface: val}
59+
}
60+
5361
// Complex128 constructs a field that carries a complex number. Unlike most
5462
// numeric fields, this costs an allocation (to convert the complex128 to
5563
// interface{}).

‎field_test.go

+1
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ func TestFieldConstructors(t *testing.T) {
7474
{"Binary", zapcore.Field{Key: "k", Type: zapcore.BinaryType, Interface: []byte("ab12")}, Binary("k", []byte("ab12"))},
7575
{"Bool", zapcore.Field{Key: "k", Type: zapcore.BoolType, Integer: 1}, Bool("k", true)},
7676
{"Bool", zapcore.Field{Key: "k", Type: zapcore.BoolType, Integer: 1}, Bool("k", true)},
77+
{"ByteString", zapcore.Field{Key: "k", Type: zapcore.ByteStringType, Interface: []byte("ab12")}, ByteString("k", []byte("ab12"))},
7778
{"Complex128", zapcore.Field{Key: "k", Type: zapcore.Complex128Type, Interface: 1 + 2i}, Complex128("k", 1+2i)},
7879
{"Complex64", zapcore.Field{Key: "k", Type: zapcore.Complex64Type, Interface: complex64(1 + 2i)}, Complex64("k", 1+2i)},
7980
{"Duration", zapcore.Field{Key: "k", Type: zapcore.DurationType, Integer: 1}, Duration("k", 1)},

‎logger_bench_test.go

+7
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,13 @@ func BenchmarkBoolField(b *testing.B) {
7575
})
7676
}
7777

78+
func BenchmarkByteStringField(b *testing.B) {
79+
val := []byte("bar")
80+
withBenchedLogger(b, func(log *Logger) {
81+
log.Info("ByteString.", ByteString("foo", val))
82+
})
83+
}
84+
7885
func BenchmarkFloat64Field(b *testing.B) {
7986
withBenchedLogger(b, func(log *Logger) {
8087
log.Info("Floating point.", Float64("foo", 3.14))

‎zapcore/encoder.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,8 @@ type ObjectEncoder interface {
220220
AddObject(key string, marshaler ObjectMarshaler) error
221221

222222
// Built-in types.
223-
AddBinary(key string, value []byte)
223+
AddBinary(key string, value []byte) // for arbitrary bytes
224+
AddByteString(key string, value []byte) // for UTF-8 encoded bytes
224225
AddBool(key string, value bool)
225226
AddComplex128(key string, value complex128)
226227
AddComplex64(key string, value complex64)
@@ -277,6 +278,7 @@ type ArrayEncoder interface {
277278
type PrimitiveArrayEncoder interface {
278279
// Built-in types.
279280
AppendBool(bool)
281+
AppendByteString([]byte) // for UTF-8 encoded bytes
280282
AppendComplex128(complex128)
281283
AppendComplex64(complex64)
282284
AppendFloat64(float64)

‎zapcore/field.go

+4
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ const (
4141
BinaryType
4242
// BoolType indicates that the field carries a bool.
4343
BoolType
44+
// ByteStringType indicates that the field carries UTF-8 encoded bytes.
45+
ByteStringType
4446
// Complex128Type indicates that the field carries a complex128.
4547
Complex128Type
4648
// Complex64Type indicates that the field carries a complex128.
@@ -112,6 +114,8 @@ func (f Field) AddTo(enc ObjectEncoder) {
112114
enc.AddBinary(f.Key, f.Interface.([]byte))
113115
case BoolType:
114116
enc.AddBool(f.Key, f.Integer == 1)
117+
case ByteStringType:
118+
enc.AddByteString(f.Key, f.Interface.([]byte))
115119
case Complex128Type:
116120
enc.AddComplex128(f.Key, f.Interface.(complex128))
117121
case Complex64Type:

‎zapcore/field_test.go

+1
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ func TestFields(t *testing.T) {
107107
{t: ObjectMarshalerType, iface: users(2), want: map[string]interface{}{"users": 2}},
108108
{t: BinaryType, iface: []byte("foo"), want: []byte("foo")},
109109
{t: BoolType, i: 0, want: false},
110+
{t: ByteStringType, iface: []byte("foo"), want: []byte("foo")},
110111
{t: Complex128Type, iface: 1 + 2i, want: 1 + 2i},
111112
{t: Complex64Type, iface: complex64(1 + 2i), want: complex64(1 + 2i)},
112113
{t: DurationType, i: 1000, want: time.Microsecond},

‎zapcore/json_encoder.go

+71-27
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,11 @@ func (enc *jsonEncoder) AddBinary(key string, val []byte) {
9494
enc.AddString(key, base64.StdEncoding.EncodeToString(val))
9595
}
9696

97+
func (enc *jsonEncoder) AddByteString(key string, val []byte) {
98+
enc.addKey(key)
99+
enc.AppendByteString(val)
100+
}
101+
97102
func (enc *jsonEncoder) AddBool(key string, val bool) {
98103
enc.addKey(key)
99104
enc.AppendBool(val)
@@ -171,6 +176,13 @@ func (enc *jsonEncoder) AppendBool(val bool) {
171176
enc.buf.AppendBool(val)
172177
}
173178

179+
func (enc *jsonEncoder) AppendByteString(val []byte) {
180+
enc.addElementSeparator()
181+
enc.buf.AppendByte('"')
182+
enc.safeAddByteString(val)
183+
enc.buf.AppendByte('"')
184+
}
185+
174186
func (enc *jsonEncoder) AppendComplex128(val complex128) {
175187
enc.addElementSeparator()
176188
// Cast to a platform-independent, fixed-size type.
@@ -379,40 +391,72 @@ func (enc *jsonEncoder) appendFloat(val float64, bitSize int) {
379391
// user from browser vulnerabilities or JSONP-related problems.
380392
func (enc *jsonEncoder) safeAddString(s string) {
381393
for i := 0; i < len(s); {
382-
if b := s[i]; b < utf8.RuneSelf {
394+
if enc.tryAddRuneSelf(s[i]) {
383395
i++
384-
if 0x20 <= b && b != '\\' && b != '"' {
385-
enc.buf.AppendByte(b)
386-
continue
387-
}
388-
switch b {
389-
case '\\', '"':
390-
enc.buf.AppendByte('\\')
391-
enc.buf.AppendByte(b)
392-
case '\n':
393-
enc.buf.AppendByte('\\')
394-
enc.buf.AppendByte('n')
395-
case '\r':
396-
enc.buf.AppendByte('\\')
397-
enc.buf.AppendByte('r')
398-
case '\t':
399-
enc.buf.AppendByte('\\')
400-
enc.buf.AppendByte('t')
401-
default:
402-
// Encode bytes < 0x20, except for the escape sequences above.
403-
enc.buf.AppendString(`\u00`)
404-
enc.buf.AppendByte(_hex[b>>4])
405-
enc.buf.AppendByte(_hex[b&0xF])
406-
}
407396
continue
408397
}
409-
c, size := utf8.DecodeRuneInString(s[i:])
410-
if c == utf8.RuneError && size == 1 {
411-
enc.buf.AppendString(`\ufffd`)
398+
r, size := utf8.DecodeRuneInString(s[i:])
399+
if enc.tryAddRuneError(r, size) {
412400
i++
413401
continue
414402
}
415403
enc.buf.AppendString(s[i : i+size])
416404
i += size
417405
}
418406
}
407+
408+
// safeAddByteString is no-alloc equivalent of safeAddString(string(s)) for s []byte.
409+
func (enc *jsonEncoder) safeAddByteString(s []byte) {
410+
for i := 0; i < len(s); {
411+
if enc.tryAddRuneSelf(s[i]) {
412+
i++
413+
continue
414+
}
415+
r, size := utf8.DecodeRune(s[i:])
416+
if enc.tryAddRuneError(r, size) {
417+
i++
418+
continue
419+
}
420+
enc.buf.Write(s[i : i+size])
421+
i += size
422+
}
423+
}
424+
425+
// tryAddRuneSelf appends b if it is valid UTF-8 character represented in a single byte.
426+
func (enc *jsonEncoder) tryAddRuneSelf(b byte) bool {
427+
if b >= utf8.RuneSelf {
428+
return false
429+
}
430+
if 0x20 <= b && b != '\\' && b != '"' {
431+
enc.buf.AppendByte(b)
432+
return true
433+
}
434+
switch b {
435+
case '\\', '"':
436+
enc.buf.AppendByte('\\')
437+
enc.buf.AppendByte(b)
438+
case '\n':
439+
enc.buf.AppendByte('\\')
440+
enc.buf.AppendByte('n')
441+
case '\r':
442+
enc.buf.AppendByte('\\')
443+
enc.buf.AppendByte('r')
444+
case '\t':
445+
enc.buf.AppendByte('\\')
446+
enc.buf.AppendByte('t')
447+
default:
448+
// Encode bytes < 0x20, except for the escape sequences above.
449+
enc.buf.AppendString(`\u00`)
450+
enc.buf.AppendByte(_hex[b>>4])
451+
enc.buf.AppendByte(_hex[b&0xF])
452+
}
453+
return true
454+
}
455+
456+
func (enc *jsonEncoder) tryAddRuneError(r rune, size int) bool {
457+
if r == utf8.RuneError && size == 1 {
458+
enc.buf.AppendString(`\ufffd`)
459+
return true
460+
}
461+
return false
462+
}

‎zapcore/json_encoder_impl_test.go

+62-27
Original file line numberDiff line numberDiff line change
@@ -83,11 +83,22 @@ func TestJSONEscaping(t *testing.T) {
8383
"\xed\xa0\x80": `\ufffd\ufffd\ufffd`,
8484
"foo\xed\xa0\x80": `foo\ufffd\ufffd\ufffd`,
8585
}
86-
for input, output := range cases {
87-
enc.truncate()
88-
enc.safeAddString(input)
89-
assertJSON(t, output, enc)
90-
}
86+
87+
t.Run("String", func(t *testing.T) {
88+
for input, output := range cases {
89+
enc.truncate()
90+
enc.safeAddString(input)
91+
assertJSON(t, output, enc)
92+
}
93+
})
94+
95+
t.Run("ByteString", func(t *testing.T) {
96+
for input, output := range cases {
97+
enc.truncate()
98+
enc.safeAddByteString([]byte(input))
99+
assertJSON(t, output, enc)
100+
}
101+
})
91102
}
92103

93104
func TestJSONEncoderObjectFields(t *testing.T) {
@@ -100,6 +111,10 @@ func TestJSONEncoderObjectFields(t *testing.T) {
100111
{"bool", `"k\\":true`, func(e Encoder) { e.AddBool(`k\`, true) }}, // test key escaping once
101112
{"bool", `"k":true`, func(e Encoder) { e.AddBool("k", true) }},
102113
{"bool", `"k":false`, func(e Encoder) { e.AddBool("k", false) }},
114+
{"byteString", `"k":"v\\"`, func(e Encoder) { e.AddByteString(`k`, []byte(`v\`)) }},
115+
{"byteString", `"k":"v"`, func(e Encoder) { e.AddByteString("k", []byte("v")) }},
116+
{"byteString", `"k":""`, func(e Encoder) { e.AddByteString("k", []byte{}) }},
117+
{"byteString", `"k":""`, func(e Encoder) { e.AddByteString("k", nil) }},
103118
{"complex128", `"k":"1+2i"`, func(e Encoder) { e.AddComplex128("k", 1+2i) }},
104119
{"complex64", `"k":"1+2i"`, func(e Encoder) { e.AddComplex64("k", 1+2i) }},
105120
{"duration", `"k":0.000000001`, func(e Encoder) { e.AddDuration("k", 1) }},
@@ -219,6 +234,8 @@ func TestJSONEncoderArrays(t *testing.T) {
219234
f func(ArrayEncoder)
220235
}{
221236
{"bool", `[true,true]`, func(e ArrayEncoder) { e.AppendBool(true) }},
237+
{"byteString", `["k","k"]`, func(e ArrayEncoder) { e.AppendByteString([]byte("k")) }},
238+
{"byteString", `["k\\","k\\"]`, func(e ArrayEncoder) { e.AppendByteString([]byte(`k\`)) }},
222239
{"complex128", `["1+2i","1+2i"]`, func(e ArrayEncoder) { e.AppendComplex128(1 + 2i) }},
223240
{"complex64", `["1+2i","1+2i"]`, func(e ArrayEncoder) { e.AppendComplex64(1 + 2i) }},
224241
{"durations", `[0.000000002,0.000000002]`, func(e ArrayEncoder) { e.AppendDuration(2) }},
@@ -385,22 +402,24 @@ func (nj noJSON) MarshalJSON() ([]byte, error) {
385402
return nil, errors.New("no")
386403
}
387404

388-
func zapEncodeString(s string) []byte {
389-
enc := &jsonEncoder{buf: bufferpool.Get()}
390-
// Escape and quote a string using our encoder.
391-
var ret []byte
392-
enc.safeAddString(s)
393-
ret = make([]byte, 0, enc.buf.Len()+2)
394-
ret = append(ret, '"')
395-
ret = append(ret, enc.buf.Bytes()...)
396-
ret = append(ret, '"')
397-
return ret
405+
func zapEncode(encode func(*jsonEncoder, string)) func(s string) []byte {
406+
return func(s string) []byte {
407+
enc := &jsonEncoder{buf: bufferpool.Get()}
408+
// Escape and quote a string using our encoder.
409+
var ret []byte
410+
encode(enc, s)
411+
ret = make([]byte, 0, enc.buf.Len()+2)
412+
ret = append(ret, '"')
413+
ret = append(ret, enc.buf.Bytes()...)
414+
ret = append(ret, '"')
415+
return ret
416+
}
398417
}
399418

400-
func roundTripsCorrectly(original string) bool {
419+
func roundTripsCorrectly(encode func(string) []byte, original string) bool {
401420
// Encode using our encoder, decode using the standard library, and assert
402421
// that we haven't lost any information.
403-
encoded := zapEncodeString(original)
422+
encoded := encode(original)
404423

405424
var decoded string
406425
err := json.Unmarshal(encoded, &decoded)
@@ -410,6 +429,18 @@ func roundTripsCorrectly(original string) bool {
410429
return original == decoded
411430
}
412431

432+
func roundTripsCorrectlyString(original string) bool {
433+
return roundTripsCorrectly(zapEncode((*jsonEncoder).safeAddString), original)
434+
}
435+
436+
func roundTripsCorrectlyByteString(original string) bool {
437+
return roundTripsCorrectly(
438+
zapEncode(func(enc *jsonEncoder, s string) {
439+
enc.safeAddByteString([]byte(s))
440+
}),
441+
original)
442+
}
443+
413444
type ASCII string
414445

415446
func (s ASCII) Generate(r *rand.Rand, size int) reflect.Value {
@@ -421,20 +452,24 @@ func (s ASCII) Generate(r *rand.Rand, size int) reflect.Value {
421452
return reflect.ValueOf(a)
422453
}
423454

424-
func asciiRoundTripsCorrectly(s ASCII) bool {
425-
return roundTripsCorrectly(string(s))
455+
func asciiRoundTripsCorrectlyString(s ASCII) bool {
456+
return roundTripsCorrectlyString(string(s))
457+
}
458+
459+
func asciiRoundTripsCorrectlyByteString(s ASCII) bool {
460+
return roundTripsCorrectlyByteString(string(s))
426461
}
427462

428463
func TestJSONQuick(t *testing.T) {
429-
// Test the full range of UTF-8 strings.
430-
err := quick.Check(roundTripsCorrectly, &quick.Config{MaxCountScale: 100.0})
431-
if err != nil {
432-
t.Error(err.Error())
464+
check := func(f interface{}) {
465+
err := quick.Check(f, &quick.Config{MaxCountScale: 100.0})
466+
assert.NoError(t, err)
433467
}
468+
// Test the full range of UTF-8 strings.
469+
check(roundTripsCorrectlyString)
470+
check(roundTripsCorrectlyByteString)
434471

435472
// Focus on ASCII strings.
436-
err = quick.Check(asciiRoundTripsCorrectly, &quick.Config{MaxCountScale: 100.0})
437-
if err != nil {
438-
t.Error(err.Error())
439-
}
473+
check(asciiRoundTripsCorrectlyString)
474+
check(asciiRoundTripsCorrectlyByteString)
440475
}

‎zapcore/memory_encoder.go

+4
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,9 @@ func (m *MapObjectEncoder) AddObject(k string, v ObjectMarshaler) error {
5959
// AddBinary implements ObjectEncoder.
6060
func (m *MapObjectEncoder) AddBinary(k string, v []byte) { m.cur[k] = v }
6161

62+
// AddByteString implements ObjectEncoder.
63+
func (m *MapObjectEncoder) AddByteString(k string, v []byte) { m.cur[k] = v }
64+
6265
// AddBool implements ObjectEncoder.
6366
func (m *MapObjectEncoder) AddBool(k string, v bool) { m.cur[k] = v }
6467

@@ -155,6 +158,7 @@ func (s *sliceArrayEncoder) AppendReflected(v interface{}) error {
155158
}
156159

157160
func (s *sliceArrayEncoder) AppendBool(v bool) { s.elems = append(s.elems, v) }
161+
func (s *sliceArrayEncoder) AppendByteString(v []byte) { s.elems = append(s.elems, v) }
158162
func (s *sliceArrayEncoder) AppendComplex128(v complex128) { s.elems = append(s.elems, v) }
159163
func (s *sliceArrayEncoder) AppendComplex64(v complex64) { s.elems = append(s.elems, v) }
160164
func (s *sliceArrayEncoder) AppendDuration(v time.Duration) { s.elems = append(s.elems, v) }

0 commit comments

Comments
 (0)
Please sign in to comment.