Skip to content

Commit

Permalink
update reverse function (matrixorigin#2933)
Browse files Browse the repository at this point in the history
* update reverse

* fix multiple reverse

* fix same input in plan1

Co-authored-by: Jin Hai <[email protected]>
  • Loading branch information
daviszhen and JinHai-CN authored Jun 13, 2022
1 parent 976d125 commit aa6a7b3
Show file tree
Hide file tree
Showing 4 changed files with 185 additions and 10 deletions.
33 changes: 29 additions & 4 deletions pkg/sql/plan2/function/builtin/unary/reverse.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,31 +15,52 @@
package unary

import (
"errors"
"github.com/matrixorigin/matrixone/pkg/container/nulls"
"github.com/matrixorigin/matrixone/pkg/container/types"
"github.com/matrixorigin/matrixone/pkg/container/vector"
"github.com/matrixorigin/matrixone/pkg/vectorize/reverse"
"github.com/matrixorigin/matrixone/pkg/vm/process"
)

var (
errorReverseStringFailed = errors.New("errors happened in reversing string")
)

func Reverse(vectors []*vector.Vector, proc *process.Process) (*vector.Vector, error) {
if len(vectors) == 0 || proc == nil {
return nil, errorParameterIsInvalid
}
if vectors[0] == nil {
return nil, errorParameterIsInvalid
}
inputVector := vectors[0]
resultType := types.Type{Oid: types.T_varchar, Size: 24}
if inputVector.IsScalar() {
if inputVector.ConstVectorIsNull() {
return proc.AllocScalarNullVector(resultType), nil
}
inputValues := inputVector.Col.(*types.Bytes)
inputValues, ok := inputVector.Col.(*types.Bytes)
if !ok {
return nil, errorParameterIsNotString
}
resultVector := vector.NewConst(resultType)
resultValues := &types.Bytes{
Data: make([]byte, len(inputValues.Data)),
Offsets: make([]uint32, len(inputValues.Offsets)),
Lengths: make([]uint32, len(inputValues.Lengths)),
}
vector.SetCol(resultVector, reverse.ReverseChar(inputValues, resultValues))
res := reverse.ReverseChar(inputValues, resultValues)
if res == nil {
return nil, errorReverseStringFailed
}
vector.SetCol(resultVector, res)
return resultVector, nil
} else {
inputValues := inputVector.Col.(*types.Bytes)
inputValues, ok := inputVector.Col.(*types.Bytes)
if !ok {
return nil, errorParameterIsNotString
}
resultVector, err := proc.AllocVector(resultType, int64(len(inputValues.Data)))
if err != nil {
return nil, err
Expand All @@ -50,7 +71,11 @@ func Reverse(vectors []*vector.Vector, proc *process.Process) (*vector.Vector, e
Lengths: make([]uint32, len(inputValues.Lengths)),
}
nulls.Set(resultVector.Nsp, inputVector.Nsp)
vector.SetCol(resultVector, reverse.ReverseChar(inputValues, resultValues))
res := reverse.ReverseChar(inputValues, resultValues)
if res == nil {
return nil, errorReverseStringFailed
}
vector.SetCol(resultVector, res)
return resultVector, nil
}
}
92 changes: 92 additions & 0 deletions pkg/sql/plan2/function/builtin/unary/reverse_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
package unary

import (
"github.com/matrixorigin/matrixone/pkg/container/vector"
"github.com/matrixorigin/matrixone/pkg/sql/testutil"
"github.com/smartystreets/goconvey/convey"
"testing"
)

func TestReverse(t *testing.T) {
convey.Convey("right", t, func() {
inputStrs := []string{
"abc",
"abcd",
"hello",
"アイウエオ",
"あいうえお",
"龔龖龗龞龡",
"你好",
"再 见",
"bcd",
"def",
"xyz",
"1a1",
"2012",
"@($)@($#)_@(#",
"2023-04-24",
"10:03:23.021412",
"sdfad ",
}
wantStrs := []string{
"cba",
"dcba",
"olleh",
"オエウイア",
"おえういあ",
"龡龞龗龖龔",
"好你",
"见 再",
"dcb",
"fed",
"zyx",
"1a1",
"2102",
"#(@_)#$(@)$(@",
"42-40-3202",
"214120.32:30:01",
" dafds",
}
ivec := testutil.MakeVarcharVector(inputStrs, nil)
wantVec := testutil.MakeVarcharVector(wantStrs, nil)
proc := testutil.NewProc()
get, err := Reverse([]*vector.Vector{ivec}, proc)
convey.So(err, convey.ShouldBeNil)
ret := testutil.CompareVectors(wantVec, get)
convey.So(ret, convey.ShouldBeTrue)
})

convey.Convey("null", t, func() {
ivec := testutil.MakeScalarNull(10)
wantvec := testutil.MakeScalarNull(10)
proc := testutil.NewProc()
ovec, err := Reverse([]*vector.Vector{ivec}, proc)
convey.So(err, convey.ShouldBeNil)
ret := testutil.CompareVectors(wantvec, ovec)
convey.So(ret, convey.ShouldBeTrue)

})
convey.Convey("tinyint", t, func() {
ivec := testutil.MakeInt8Vector([]int8{
1, 71, 1, 1}, nil)
proc := testutil.NewProc()
_, err := Reverse([]*vector.Vector{ivec}, proc)
convey.So(err, convey.ShouldNotBeNil)

ivec2 := testutil.MakeScalarInt8(1, 10)
_, err = Reverse([]*vector.Vector{ivec2}, proc)
convey.So(err, convey.ShouldNotBeNil)
})

convey.Convey("nil", t, func() {
proc := testutil.NewProc()
_, err := Reverse([]*vector.Vector{}, proc)
convey.So(err, convey.ShouldNotBeNil)

_, err = Reverse([]*vector.Vector{}, nil)
convey.So(err, convey.ShouldNotBeNil)

_, err = Reverse([]*vector.Vector{nil}, proc)
convey.So(err, convey.ShouldNotBeNil)
})
}
49 changes: 43 additions & 6 deletions pkg/vectorize/reverse/reverse.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ package reverse

import (
"github.com/matrixorigin/matrixone/pkg/container/types"
"unicode/utf8"
)

var (
Expand All @@ -30,18 +31,54 @@ func init() {
func reverse(xs *types.Bytes, rs *types.Bytes) *types.Bytes {
var retCursor uint32

//in plan1, sometime, xs and rs are same...
isSame := xs == rs
var tmp []byte
if isSame {
maxSpaceLen := uint32(0)
for _, length := range xs.Lengths {
if length > maxSpaceLen {
maxSpaceLen = length
}
}
tmp = make([]byte, maxSpaceLen)
}

for idx, offset := range xs.Offsets {
cursor := offset
curLen := xs.Lengths[idx]

// handle with unicode
unicodes := []rune(string(xs.Data[cursor : cursor+curLen]))
for i, j := 0, len(unicodes)-1; i < j; i, j = i+1, j-1 {
unicodes[i], unicodes[j] = unicodes[j], unicodes[i]
}
if curLen != 0 {
//reverse
input := xs.Data[cursor : cursor+curLen]
var output []byte
target := curLen
if isSame {
output = tmp
target = uint32(len(tmp))
} else {
output = rs.Data[cursor : cursor+curLen]
target = curLen
}
source := 0
for source < len(input) {
r, readed := utf8.DecodeRune(input[source:])
if r == utf8.RuneError {
return nil
}

for i, b := range []byte(string(unicodes)) {
rs.Data[retCursor+uint32(i)] = b
p := target - uint32(readed)
w := utf8.EncodeRune(output[p:], r)
if w == utf8.RuneError {
return nil
}
source += readed
target = p
}
if isSame {
copy(rs.Data[cursor:cursor+curLen], tmp[target:])
}
}

retCursor += curLen
Expand Down
21 changes: 21 additions & 0 deletions pkg/vectorize/reverse/reverse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,27 @@ func TestReverse(t *testing.T) {
Offsets: []uint32{0},
},
},
{
name: "three strings",
args: &types.Bytes{
Data: []byte("Hello 世界"),
Lengths: []uint32{
uint32(len("Hello")),
uint32(len(" ")),
uint32(len("世界")),
},
Offsets: []uint32{0, 5, 6},
},
want: &types.Bytes{
Data: []byte("olleH 界世"),
Lengths: []uint32{
uint32(len("olleH")),
uint32(len(" ")),
uint32(len("界世")),
},
Offsets: []uint32{0, 5, 6},
},
},
}

for _, c := range cases {
Expand Down

0 comments on commit aa6a7b3

Please sign in to comment.