Skip to content

Commit

Permalink
Refactoring rtrim (matrixorigin#2922)
Browse files Browse the repository at this point in the history
  • Loading branch information
daviszhen authored Jun 11, 2022
1 parent b54893b commit 8fbf5ac
Show file tree
Hide file tree
Showing 6 changed files with 310 additions and 19 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ require (
github.com/BurntSushi/toml v1.0.0
github.com/FastFilter/xorfilter v0.1.1
github.com/RoaringBitmap/roaring v0.9.4
github.com/aws/aws-sdk-go-v2 v1.16.5
github.com/aws/aws-sdk-go-v2/credentials v1.12.5
github.com/aws/aws-sdk-go-v2/service/s3 v1.26.11
github.com/aws/smithy-go v1.11.3
Expand Down Expand Up @@ -38,7 +39,6 @@ require (
)

require (
github.com/aws/aws-sdk-go-v2 v1.16.5 // indirect
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.2 // indirect
github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.12 // indirect
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.6 // indirect
Expand Down
16 changes: 14 additions & 2 deletions pkg/sql/plan2/function/builtin/unary/rtrim.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,23 @@ import (
)

func Rtrim(vectors []*vector.Vector, proc *process.Process) (*vector.Vector, error) {
if len(vectors) == 0 || proc == nil {
return nil, errorParameterIsInvalid
}
if vectors[0] == nil {
return nil, errorParameterIsInvalid
}
inputVector := vectors[0]
resultType := types.Type{Oid: types.T_varchar, Size: 24}

if inputVector.IsScalar() {
if inputVector.ConstVectorIsNull() {
return proc.AllocScalarNullVector(resultType), nil
}
inputValues := inputVector.Col.(*types.Bytes)
inputValues, ok := inputVector.Col.(*types.Bytes)
if !ok {
return nil, errorParameterIsNotString
}
// totalCount - spaceCount is the total bytes need for the ltrim-ed string
spaceCount := rtrim.CountSpacesFromRight(inputValues)
totalCount := int32(len(inputValues.Data))
Expand All @@ -43,7 +52,10 @@ func Rtrim(vectors []*vector.Vector, proc *process.Process) (*vector.Vector, err
vector.SetCol(resultVector, rtrim.RtrimChar(inputValues, resultValues))
return resultVector, nil
} else {
inputValues := inputVector.Col.(*types.Bytes)
inputValues, ok := inputVector.Col.(*types.Bytes)
if !ok {
return nil, errorParameterIsNotString
}
// totalCount - spaceCount is the total bytes need for the ltrim-ed string
spaceCount := rtrim.CountSpacesFromRight(inputValues)
totalCount := int32(len(inputValues.Data))
Expand Down
272 changes: 272 additions & 0 deletions pkg/sql/plan2/function/builtin/unary/rtrim_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,272 @@
package unary

import (
"github.com/matrixorigin/matrixone/pkg/container/vector"
"github.com/matrixorigin/matrixone/pkg/sql/testutil"
"github.com/smartystreets/goconvey/convey"
"testing"
)

func TestRtrim(t *testing.T) {
convey.Convey("right cases", t, func() {
type kase struct {
s string
want string
}

kases := []kase{
{
"barbar ",
"barbar",
},
{
"MySQL",
"MySQL",
},
{
"a",
"a",
},
{
" 20.06 ",
" 20.06",
},
{
" right ",
" right",
},
{
"你好 ",
"你好",
},
{
"2017-06-15 ",
"2017-06-15",
},
{
"2017-06-15 ",
"2017-06-15",
},
}

var inStrs []string
var outStrs []string
for _, k := range kases {
inStrs = append(inStrs, k.s)
outStrs = append(outStrs, k.want)
}

extraInStrs := []string{
"アイウエオ",
"アイウエオ ",
"アイウエオ ",
"アイウエオ ",
"アイウエオ ",
"アイウエオ  ",
"アイウエオ   ",
"あいうえお",
"あいうえお ",
"あいうえお ",
"あいうえお ",
"あいうえお ",
"あいうえお  ",
"あいうえお   ",
"龔龖龗龞龡",
"龔龖龗龞龡 ",
"龔龖龗龞龡 ",
"龔龖龗龞龡 ",
"龔龖龗龞龡 ",
"龔龖龗龞龡  ",
"龔龖龗龞龡   ",
"アイウエオ",
"アイウエオ ",
"アイウエオ ",
"アイウエオ ",
"アイウエオ ",
"アイウエオ  ",
"アイウエオ   ",
"あいうえお",
"あいうえお ",
"あいうえお ",
"あいうえお ",
"あいうえお ",
"あいうえお  ",
"あいうえお   ",
"龔龖龗龞龡",
"龔龖龗龞龡 ",
"龔龖龗龞龡 ",
"龔龖龗龞龡 ",
"龔龖龗龞龡 ",
"龔龖龗龞龡  ",
"龔龖龗龞龡   ",
"アイウエオ",
"アイウエオ ",
"アイウエオ ",
"アイウエオ ",
"アイウエオ ",
"アイウエオ  ",
"アイウエオ   ",
"あいうえお",
"あいうえお ",
"あいうえお ",
"あいうえお ",
"あいうえお ",
"あいうえお  ",
"あいうえお   ",
"龔龖龗龞龡",
"龔龖龗龞龡 ",
"龔龖龗龞龡 ",
"龔龖龗龞龡 ",
"龔龖龗龞龡 ",
"龔龖龗龞龡  ",
"龔龖龗龞龡   ",
"2017-06-15 ",
"2019-06-25 ",
" 2019-06-25 ",
" 2019-06-25 ",
" 2012-10-12 ",
" 2004-04-24. ",
" 2008-12-04. ",
" 2012-03-23. ",
" 2013-04-30 ",
" 1994-10-04 ",
" 2018-06-04 ",
" 2012-10-12 ",
"1241241^&@%#^*^!@#&*(!& ",
" 123 ",
}
extraOutStrs := []string{
"アイウエオ",
"アイウエオ",
"アイウエオ",
"アイウエオ",
"アイウエオ",
"アイウエオ",
"アイウエオ",
"あいうえお",
"あいうえお",
"あいうえお",
"あいうえお",
"あいうえお",
"あいうえお",
"あいうえお",
"龔龖龗龞龡",
"龔龖龗龞龡",
"龔龖龗龞龡",
"龔龖龗龞龡",
"龔龖龗龞龡",
"龔龖龗龞龡",
"龔龖龗龞龡",
"アイウエオ",
"アイウエオ",
"アイウエオ",
"アイウエオ",
"アイウエオ",
"アイウエオ",
"アイウエオ",
"あいうえお",
"あいうえお",
"あいうえお",
"あいうえお",
"あいうえお",
"あいうえお",
"あいうえお",
"龔龖龗龞龡",
"龔龖龗龞龡",
"龔龖龗龞龡",
"龔龖龗龞龡",
"龔龖龗龞龡",
"龔龖龗龞龡",
"龔龖龗龞龡",
"アイウエオ",
"アイウエオ",
"アイウエオ",
"アイウエオ",
"アイウエオ",
"アイウエオ",
"アイウエオ",
"あいうえお",
"あいうえお",
"あいうえお",
"あいうえお",
"あいうえお",
"あいうえお",
"あいうえお",
"龔龖龗龞龡",
"龔龖龗龞龡",
"龔龖龗龞龡",
"龔龖龗龞龡",
"龔龖龗龞龡",
"龔龖龗龞龡",
"龔龖龗龞龡",
"2017-06-15",
"2019-06-25",
" 2019-06-25",
" 2019-06-25",
" 2012-10-12",
" 2004-04-24.",
" 2008-12-04.",
" 2012-03-23.",
" 2013-04-30",
" 1994-10-04",
" 2018-06-04",
" 2012-10-12",
"1241241^&@%#^*^!@#&*(!&",
" 123",
}

inStrs = append(inStrs, extraInStrs...)
outStrs = append(outStrs, extraOutStrs...)

ivec := testutil.MakeVarcharVector(inStrs, nil)
wantVec := testutil.MakeVarcharVector(outStrs, nil)
proc := testutil.NewProc()
retVec, err := Rtrim([]*vector.Vector{ivec}, proc)
convey.So(err, convey.ShouldBeNil)
ret := testutil.CompareVectors(wantVec, retVec)
convey.So(ret, convey.ShouldBeTrue)
})
convey.Convey("null", t, func() {
ivec := testutil.MakeScalarNull(10)
wantvec := testutil.MakeScalarNull(10)
proc := testutil.NewProc()
ovec, err := Rtrim([]*vector.Vector{ivec}, proc)
convey.So(err, convey.ShouldBeNil)
ret := testutil.CompareVectors(wantvec, ovec)
convey.So(ret, convey.ShouldBeTrue)

})
convey.Convey("tinyint", t, func() {
ivec := testutil.MakeInt8Vector([]int8{
1, 71, 1, 1}, nil)
proc := testutil.NewProc()
_, err := Rtrim([]*vector.Vector{ivec}, proc)
convey.So(err, convey.ShouldNotBeNil)

ivec2 := testutil.MakeScalarInt8(1, 10)
_, err = Rtrim([]*vector.Vector{ivec2}, proc)
convey.So(err, convey.ShouldNotBeNil)
})

convey.Convey("nil", t, func() {
proc := testutil.NewProc()
_, err := Rtrim([]*vector.Vector{}, proc)
convey.So(err, convey.ShouldNotBeNil)

_, err = Rtrim([]*vector.Vector{}, nil)
convey.So(err, convey.ShouldNotBeNil)

_, err = Rtrim([]*vector.Vector{nil}, proc)
convey.So(err, convey.ShouldNotBeNil)
})

convey.Convey("scalar", t, func() {
ivec := testutil.MakeScalarVarchar("abc ", 5)
wantvec := testutil.MakeScalarVarchar("abc", 5)
proc := testutil.NewProc()
ovec, err := Rtrim([]*vector.Vector{ivec}, proc)
convey.So(err, convey.ShouldBeNil)
ret := testutil.CompareVectors(wantvec, ovec)
convey.So(ret, convey.ShouldBeTrue)
})
}
2 changes: 1 addition & 1 deletion pkg/sql/testutil/testutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ func makeStringVector(values []string, nsp []uint64, typ types.Type) *vector.Vec
}

func NewProc() *process.Process {
return process.New(mheap.New(guest.New(1<<10, host.New(1<<10))))
return process.New(mheap.New(guest.New(1<<20, host.New(1<<20))))
}

func MakeScalarNull(length int) *vector.Vector {
Expand Down
28 changes: 13 additions & 15 deletions pkg/vectorize/rtrim/rtrim.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,13 @@ func CountSpacesFromRight(xs *types.Bytes) int32 {
}

cursor := offset + xs.Lengths[i] - 1
for ; cursor >= offset && xs.Data[cursor] == ' '; cursor-- {
//cursor >= offset >=0
for ; cursor > offset && xs.Data[cursor] == ' '; cursor-- {
spaceCount++
}

if cursor == 0 {
// cursor is uint32, it will be 2 ** 32 -1 after cursor--
break
}
if cursor == offset && xs.Data[cursor] == ' ' {
spaceCount++
}
}

Expand All @@ -64,22 +64,20 @@ func rtrim(xs *types.Bytes, rs *types.Bytes) *types.Bytes {
}

cursor := offset + xs.Lengths[i] - 1
//cursor >= offset >=0
// ignore the tailing spaces
for ; cursor >= offset && xs.Data[cursor] == ' '; cursor-- {
if cursor == 0 {
break
}

for ; cursor > offset && xs.Data[cursor] == ' '; cursor-- {
continue
}

// copy the non-space characters
length := cursor - offset + 1
if resultCursor+length > uint32(len(rs.Data)) {
copy(rs.Data[resultCursor:], xs.Data[offset:offset+length])
} else {
length := uint32(0)
//cursor == offset :all spaces
if cursor > offset {
// copy the non-space characters
length = cursor - offset + 1
copy(rs.Data[resultCursor:resultCursor+length], xs.Data[offset:offset+length])
}

rs.Lengths[i] = length
rs.Offsets[i] = resultCursor
resultCursor += length
Expand Down
Loading

0 comments on commit 8fbf5ac

Please sign in to comment.