Skip to content

Commit

Permalink
json, executor: fix json hash value conflict (pingcap#38095)
Browse files Browse the repository at this point in the history
  • Loading branch information
YangKeao authored Sep 22, 2022
1 parent cb414e7 commit d47840f
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 1 deletion.
18 changes: 17 additions & 1 deletion types/json_binary.go
Original file line number Diff line number Diff line change
Expand Up @@ -527,35 +527,51 @@ func (bj *BinaryJSON) UnmarshalJSON(data []byte) error {

// HashValue converts certain JSON values for aggregate comparisons.
// For example int64(3) == float64(3.0)
// Other than the numeric condition, this function has to construct a bidirectional map between hash value
// and the original representation
func (bj BinaryJSON) HashValue(buf []byte) []byte {
switch bj.TypeCode {
case JSONTypeCodeInt64:
// Convert to a FLOAT if no precision is lost.
// In the future, it will be better to convert to a DECIMAL value instead
// See: https://github.com/pingcap/tidb/issues/9988
if bj.GetInt64() == int64(float64(bj.GetInt64())) {
buf = append(buf, JSONTypeCodeFloat64)
buf = appendBinaryFloat64(buf, float64(bj.GetInt64()))
} else {
buf = append(buf, bj.TypeCode)
buf = append(buf, bj.Value...)
}
case JSONTypeCodeUint64:
if bj.GetUint64() == uint64(float64(bj.GetUint64())) {
buf = append(buf, JSONTypeCodeFloat64)
buf = appendBinaryFloat64(buf, float64(bj.GetUint64()))
} else {
buf = append(buf, bj.TypeCode)
buf = append(buf, bj.Value...)
}
case JSONTypeCodeArray:
// this hash value is bidirectional, because you can get the element one-by-one
// and you know the end of it, as the elemCount is also appended here
buf = append(buf, bj.TypeCode)
elemCount := int(jsonEndian.Uint32(bj.Value))
buf = append(buf, bj.Value[0:dataSizeOff]...)
for i := 0; i < elemCount; i++ {
buf = bj.arrayGetElem(i).HashValue(buf)
}
case JSONTypeCodeObject:
// this hash value is bidirectional, because you can get the key using the json
// string format, and get the value accordingly.
buf = append(buf, bj.TypeCode)
elemCount := int(jsonEndian.Uint32(bj.Value))
buf = append(buf, bj.Value[0:dataSizeOff]...)
for i := 0; i < elemCount; i++ {
buf = append(buf, bj.objectGetKey(i)...)
keyJSON := CreateBinaryJSON(string(bj.objectGetKey(i)))
buf = append(buf, keyJSON.Value...)
buf = bj.objectGetVal(i).HashValue(buf)
}
default:
buf = append(buf, bj.TypeCode)
buf = append(buf, bj.Value...)
}
return buf
Expand Down
22 changes: 22 additions & 0 deletions types/json_binary_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -688,3 +688,25 @@ func TestBinaryJSONOpaque(t *testing.T) {
require.Equal(t, string(buf), test.expectedOutput)
}
}

func TestHashValue(t *testing.T) {
// The following values should have different hash value
jsons := []BinaryJSON{
CreateBinaryJSON([]interface{}{}),
CreateBinaryJSON([]interface{}{CreateBinaryJSON([]interface{}{})}),
CreateBinaryJSON([]interface{}{CreateBinaryJSON([]interface{}{CreateBinaryJSON([]interface{}{})})}),
CreateBinaryJSON(map[string]interface{}{}),
CreateBinaryJSON([]interface{}{CreateBinaryJSON(false)}),
CreateBinaryJSON([]interface{}{CreateBinaryJSON(true)}),
CreateBinaryJSON([]interface{}{CreateBinaryJSON(nil)}),
}

// TODO: use a better way to count distinct json value
counter := make(map[string]struct{})
for _, j := range jsons {
hashKey := j.HashValue([]byte{})
counter[string(hashKey)] = struct{}{}
}

require.Equal(t, len(jsons), len(counter))
}

0 comments on commit d47840f

Please sign in to comment.