Skip to content

Commit

Permalink
refactoring type
Browse files Browse the repository at this point in the history
  • Loading branch information
xitongsys committed Nov 30, 2020
1 parent d2ea22c commit a5e3176
Show file tree
Hide file tree
Showing 11 changed files with 141 additions and 479 deletions.
157 changes: 90 additions & 67 deletions common/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ type Tag struct {
KeyType string
ValueType string

BaseType string
KeyBaseType string
ValueBaseType string
ConvertedType string
KeyConvertedType string
ValueConvertedType string

Length int32
KeyLength int32
Expand All @@ -36,6 +36,10 @@ type Tag struct {
KeyPrecision int32
ValuePrecision int32

IsAdjustedToUTC bool
KeyIsAdjustedToUTC bool
ValueIsAdjustedToUTC bool

FieldID int32
KeyFieldID int32
ValueFieldID int32
Expand All @@ -48,17 +52,13 @@ type Tag struct {
KeyOmitStats bool
ValueOmitStats bool

IsAdjustedToUTC bool
KeyIsAdjustedToUTC bool
ValueIsAdjustedToUTC bool

RepetitionType parquet.FieldRepetitionType
KeyRepetitionType parquet.FieldRepetitionType
ValueRepetitionType parquet.FieldRepetitionType

LogicalTypeFields map[string]string
KeyLogicalTypeFields map[string]string
ValueLogicalTypeFields map[string]string
LogicalTypeFields map[string]string
KeyLogicalTypeFields map[string]string
ValueLogicalTypeFields map[string]string
}

func NewTag() *Tag {
Expand Down Expand Up @@ -109,12 +109,12 @@ func StringToTag(tag string) *Tag {
mp.KeyType = val
case "valuetype":
mp.ValueType = val
case "basetype":
mp.BaseType = val
case "keybasetype":
mp.KeyBaseType = val
case "valuebasetype":
mp.ValueBaseType = val
case "convertedtype":
mp.ConvertedType = val
case "keyconvertedtype":
mp.KeyConvertedType = val
case "valueconvertedtype":
mp.ValueConvertedType = val
case "length":
mp.Length = valInt32()
case "keylength":
Expand All @@ -139,6 +139,12 @@ func StringToTag(tag string) *Tag {
mp.KeyFieldID = valInt32()
case "valuefieldid":
mp.ValueFieldID = valInt32()
case "isadjustedtoutc":
mp.IsAdjustedToUTC = valBoolean()
case "keyisadjustedtoutc":
mp.KeyIsAdjustedToUTC = valBoolean()
case "valueisadjustedtoutc":
mp.ValueIsAdjustedToUTC = valBoolean()
case "name":
if mp.InName == "" {
mp.InName = StringToVariableName(val)
Expand All @@ -152,12 +158,6 @@ func StringToTag(tag string) *Tag {
mp.KeyOmitStats = valBoolean()
case "valueomitstats":
mp.ValueOmitStats = valBoolean()
case "isadjustedtoutc":
mp.IsAdjustedToUTC = valBoolean()
case "keyisadjustedtoutc":
mp.KeyIsAdjustedToUTC = valBoolean()
case "valueisadjustedtoutc":
mp.ValueIsAdjustedToUTC = valBoolean()
case "repetitiontype":
switch strings.ToLower(val) {
case "repeated":
Expand Down Expand Up @@ -267,30 +267,15 @@ func NewSchemaElementFromTagMap(info *Tag) *parquet.SchemaElement {
schema.RepetitionType = &info.RepetitionType
schema.NumChildren = nil

typeName := info.Type
if t, err := parquet.TypeFromString(typeName); err == nil {
if t, err := parquet.TypeFromString(info.Type); err == nil {
schema.Type = &t

} else {
ct, _ := parquet.ConvertedTypeFromString(typeName)
panic(err)
}

if ct, err := parquet.ConvertedTypeFromString(info.ConvertedType); err == nil {
schema.ConvertedType = &ct
if typeName == "INT_8" || typeName == "INT_16" || typeName == "INT_32" ||
typeName == "UINT_8" || typeName == "UINT_16" || typeName == "UINT_32" ||
typeName == "DATE" || typeName == "TIME_MILLIS" {
schema.Type = parquet.TypePtr(parquet.Type_INT32)
} else if typeName == "INT_64" || typeName == "UINT_64" ||
typeName == "TIME_MICROS" || typeName == "TIMESTAMP_MICROS" || typeName == "TIMESTAMP_MILLIS" {
schema.Type = parquet.TypePtr(parquet.Type_INT64)
} else if typeName == "UTF8" || typeName == "JSON" || typeName == "BSON" {
schema.Type = parquet.TypePtr(parquet.Type_BYTE_ARRAY)
} else if typeName == "INTERVAL" {
schema.Type = parquet.TypePtr(parquet.Type_FIXED_LEN_BYTE_ARRAY)
var ln int32 = 12
schema.TypeLength = &ln
} else if typeName == "DECIMAL" {
t, _ = parquet.TypeFromString(info.BaseType)
schema.Type = &t
}
}

var logicalType *parquet.LogicalType
Expand Down Expand Up @@ -320,7 +305,7 @@ func NewLogicalTypeFromFieldsMap(mp map[string]string) *parquet.LogicalType {
logicalType.LIST = parquet.NewListType()
case "ENUM":
logicalType.ENUM = parquet.NewEnumType()

case "DECIMAL":
logicalType.DECIMAL = parquet.NewDecimalType()
logicalType.DECIMAL.Precision = Str2Int32(mp["logicaltype.precision"])
Expand Down Expand Up @@ -385,7 +370,6 @@ func NewLogicalTypeFromFieldsMap(mp map[string]string) *parquet.LogicalType {
}
}


func NewLogicalTypeFromConvertedType(schemaElement *parquet.SchemaElement, info *Tag) *parquet.LogicalType {
_, ct := schemaElement.Type, schemaElement.ConvertedType
if ct == nil {
Expand Down Expand Up @@ -497,7 +481,8 @@ func GetKeyTagMap(src *Tag) *Tag {
res.InName = "Key"
res.ExName = "key"
res.Type = src.KeyType
res.BaseType = src.KeyBaseType
res.ConvertedType = src.KeyConvertedType
res.IsAdjustedToUTC = src.KeyIsAdjustedToUTC
res.Length = src.KeyLength
res.Scale = src.KeyScale
res.Precision = src.KeyPrecision
Expand All @@ -514,7 +499,8 @@ func GetValueTagMap(src *Tag) *Tag {
res.InName = "Value"
res.ExName = "value"
res.Type = src.ValueType
res.BaseType = src.ValueBaseType
res.ConvertedType = src.ValueConvertedType
res.IsAdjustedToUTC = src.ValueIsAdjustedToUTC
res.Length = src.ValueLength
res.Scale = src.ValueScale
res.Precision = src.ValuePrecision
Expand Down Expand Up @@ -622,8 +608,8 @@ func CmpIntBinary(as string, bs string, order string, signed bool) bool {
return false
}

func FindFuncTable(pT *parquet.Type, cT *parquet.ConvertedType) FuncTable {
if cT == nil {
func FindFuncTable(pT *parquet.Type, cT *parquet.ConvertedType, logT *parquet.LogicalType) FuncTable {
if cT == nil && logT == nil {
if *pT == parquet.Type_BOOLEAN {
return boolFuncTable{}
} else if *pT == parquet.Type_INT32 {
Expand All @@ -643,29 +629,66 @@ func FindFuncTable(pT *parquet.Type, cT *parquet.ConvertedType) FuncTable {
}
}

if *cT == parquet.ConvertedType_UTF8 || *cT == parquet.ConvertedType_BSON || *cT == parquet.ConvertedType_JSON {
return stringFuncTable{}
} else if *cT == parquet.ConvertedType_INT_8 || *cT == parquet.ConvertedType_INT_16 || *cT == parquet.ConvertedType_INT_32 ||
*cT == parquet.ConvertedType_DATE || *cT == parquet.ConvertedType_TIME_MILLIS {
return int32FuncTable{}
} else if *cT == parquet.ConvertedType_UINT_8 || *cT == parquet.ConvertedType_UINT_16 || *cT == parquet.ConvertedType_UINT_32 {
return uint32FuncTable{}
} else if *cT == parquet.ConvertedType_INT_64 || *cT == parquet.ConvertedType_TIME_MICROS ||
*cT == parquet.ConvertedType_TIMESTAMP_MILLIS || *cT == parquet.ConvertedType_TIMESTAMP_MICROS {
return int64FuncTable{}
} else if *cT == parquet.ConvertedType_UINT_64 {
return uint64FuncTable{}
} else if *cT == parquet.ConvertedType_INTERVAL {
return intervalFuncTable{}
} else if *cT == parquet.ConvertedType_DECIMAL {
if *pT == parquet.Type_BYTE_ARRAY || *pT == parquet.Type_FIXED_LEN_BYTE_ARRAY {
return decimalStringFuncTable{}
} else if *pT == parquet.Type_INT32 {
if cT != nil {
if *cT == parquet.ConvertedType_UTF8 || *cT == parquet.ConvertedType_BSON || *cT == parquet.ConvertedType_JSON {
return stringFuncTable{}
} else if *cT == parquet.ConvertedType_INT_8 || *cT == parquet.ConvertedType_INT_16 || *cT == parquet.ConvertedType_INT_32 ||
*cT == parquet.ConvertedType_DATE || *cT == parquet.ConvertedType_TIME_MILLIS {
return int32FuncTable{}
} else if *pT == parquet.Type_INT64 {
} else if *cT == parquet.ConvertedType_UINT_8 || *cT == parquet.ConvertedType_UINT_16 || *cT == parquet.ConvertedType_UINT_32 {
return uint32FuncTable{}
} else if *cT == parquet.ConvertedType_INT_64 || *cT == parquet.ConvertedType_TIME_MICROS ||
*cT == parquet.ConvertedType_TIMESTAMP_MILLIS || *cT == parquet.ConvertedType_TIMESTAMP_MICROS {
return int64FuncTable{}
} else if *cT == parquet.ConvertedType_UINT_64 {
return uint64FuncTable{}
} else if *cT == parquet.ConvertedType_INTERVAL {
return intervalFuncTable{}
} else if *cT == parquet.ConvertedType_DECIMAL {
if *pT == parquet.Type_BYTE_ARRAY || *pT == parquet.Type_FIXED_LEN_BYTE_ARRAY {
return decimalStringFuncTable{}
} else if *pT == parquet.Type_INT32 {
return int32FuncTable{}
} else if *pT == parquet.Type_INT64 {
return int64FuncTable{}
}
}
}

if logT != nil {
if logT.TIME != nil {
return FindFuncTable(pT, nil, nil)

} else if logT.DATE != nil {
return int32FuncTable{}

} else if logT.INTEGER != nil {
if logT.INTEGER.IsSigned {
return FindFuncTable(pT, nil, nil)

} else {
if *pT == parquet.Type_INT32 {
return uint32FuncTable{}

} else if *pT == parquet.Type_INT64 {
return uint64FuncTable{}
}
}

} else if logT.DECIMAL != nil {
if *pT == parquet.Type_BYTE_ARRAY || *pT == parquet.Type_FIXED_LEN_BYTE_ARRAY {
return decimalStringFuncTable{}
} else if *pT == parquet.Type_INT32 {
return int32FuncTable{}
} else if *pT == parquet.Type_INT64 {
return int64FuncTable{}
}

} else if logT.BSON != nil || logT.JSON != nil || logT.STRING != nil || logT.UUID != nil {
return stringFuncTable{}
}
}

panic("No known func table in FindFuncTable")
}

Expand Down
66 changes: 33 additions & 33 deletions example/type.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,36 +19,36 @@ type TypeList struct {
ByteArray string `parquet:"name=bytearray, type=BYTE_ARRAY"`
FixedLenByteArray string `parquet:"name=FixedLenByteArray, type=FIXED_LEN_BYTE_ARRAY, length=10"`

Utf8 string `parquet:"name=utf8, type=UTF8, encoding=PLAIN_DICTIONARY"`
Int_8 int8 `parquet:"name=int_8, type=INT_8"`
Int_16 int16 `parquet:"name=int_16, type=INT_16"`
Int_32 int32 `parquet:"name=int_32, type=INT_32"`
Int_64 int64 `parquet:"name=int_64, type=INT_64"`
Uint_8 uint8 `parquet:"name=uint_8, type=UINT_8"`
Uint_16 uint16 `parquet:"name=uint_16, type=UINT_16"`
Uint_32 uint32 `parquet:"name=uint_32, type=UINT_32"`
Uint_64 uint64 `parquet:"name=uint_64, type=UINT_64"`
Date int32 `parquet:"name=date, type=DATE"`
Date2 int32 `parquet:"name=date2, type=DATE, logicaltype=DATE"`
TimeMillis int32 `parquet:"name=timemillis, type=TIME_MILLIS"`
TimeMillis2 int32 `parquet:"name=timemillis2, type=TIME_MILLIS, logicaltype=TIME, logicaltype.isadjustedtoutc=true, logicaltype.unit=MILLIS"`
TimeMicros int64 `parquet:"name=timemicros, type=TIME_MICROS"`
TimeMicros2 int64 `parquet:"name=timemicros2, type=TIME_MICROS, logicaltype=TIME, logicaltype.isadjustedtoutc=false, logicaltype.unit=MICROS"`
TimestampMillis int64 `parquet:"name=timestampmillis, type=TIMESTAMP_MILLIS"`
TimestampMillis2 int32 `parquet:"name=timestampmillis2, type=TIMESTAMP_MILLIS, logicaltype=TIMESTAMP, logicaltype.isadjustedtoutc=true, logicaltype.unit=MILLIS"`
TimestampMicros int64 `parquet:"name=timestampmicros, type=TIMESTAMP_MICROS"`
TimestampMicros2 int64 `parquet:"name=timestampmicros2, type=TIMESTAMP_MICROS, logicaltype=TIMESTAMP, logicaltype.isadjustedtoutc=false, logicaltype.unit=MICROS"`
Interval string `parquet:"name=interval, type=INTERVAL"`
Utf8 string `parquet:"name=utf8, type=BYTE_ARRAY, convertedtype=UTF8, encoding=PLAIN_DICTIONARY"`
Int_8 int32 `parquet:"name=int_8, type=INT32, convertedtype=INT32, convertedtype=INT_8"`
Int_16 int32 `parquet:"name=int_16, type=INT32, convertedtype=INT_16"`
Int_32 int32 `parquet:"name=int_32, type=INT32, convertedtype=INT_32"`
Int_64 int64 `parquet:"name=int_64, type=INT64, convertedtype=INT_64"`
Uint_8 int32 `parquet:"name=uint_8, type=INT32, convertedtype=UINT_8"`
Uint_16 int32 `parquet:"name=uint_16, type=INT32, convertedtype=UINT_16"`
Uint_32 int32 `parquet:"name=uint_32, type=INT32, convertedtype=UINT_32"`
Uint_64 int64 `parquet:"name=uint_64, type=INT64, convertedtype=UINT_64"`
Date int32 `parquet:"name=date, type=INT32, convertedtype=DATE"`
Date2 int32 `parquet:"name=date2, type=INT32, convertedtype=DATE, logicaltype=DATE"`
TimeMillis int32 `parquet:"name=timemillis, type=INT32, convertedtype=TIME_MILLIS"`
TimeMillis2 int32 `parquet:"name=timemillis2, type=INT32, convertedtype=TIME_MILLIS, logicaltype=TIME, logicaltype.isadjustedtoutc=true, logicaltype.unit=MILLIS"`
TimeMicros int64 `parquet:"name=timemicros, type=INT64, convertedtype=TIME_MICROS"`
TimeMicros2 int64 `parquet:"name=timemicros2, type=INT64, convertedtype=TIME_MICROS, logicaltype=TIME, logicaltype.isadjustedtoutc=false, logicaltype.unit=MICROS"`
TimestampMillis int64 `parquet:"name=timestampmillis, type=INT64, convertedtype=TIMESTAMP_MILLIS"`
TimestampMillis2 int32 `parquet:"name=timestampmillis2, type=INT32, convertedtype=TIMESTAMP_MILLIS, logicaltype=TIMESTAMP, logicaltype.isadjustedtoutc=true, logicaltype.unit=MILLIS"`
TimestampMicros int64 `parquet:"name=timestampmicros, type=INT64, convertedtype=TIMESTAMP_MICROS"`
TimestampMicros2 int64 `parquet:"name=timestampmicros2, type=INT64, convertedtype=TIMESTAMP_MICROS, logicaltype=TIMESTAMP, logicaltype.isadjustedtoutc=false, logicaltype.unit=MICROS"`
Interval string `parquet:"name=interval, type=BYTE_ARRAY, convertedtype=INTERVAL"`

Decimal1 int32 `parquet:"name=decimal1, type=DECIMAL, scale=2, precision=9, basetype=INT32"`
Decimal2 int64 `parquet:"name=decimal2, type=DECIMAL, scale=2, precision=18, basetype=INT64"`
Decimal3 string `parquet:"name=decimal3, type=DECIMAL, scale=2, precision=10, basetype=FIXED_LEN_BYTE_ARRAY, length=12"`
Decimal4 string `parquet:"name=decimal4, type=DECIMAL, scale=2, precision=20, basetype=BYTE_ARRAY"`
Decimal1 int32 `parquet:"name=decimal1, type=INT32, convertedtype=DECIMAL, scale=2, precision=9"`
Decimal2 int64 `parquet:"name=decimal2, type=INT64, convertedtype=DECIMAL, scale=2, precision=18"`
Decimal3 string `parquet:"name=decimal3, type=FIXED_LENGTH_BYTE_ARRAY, convertedtype=DECIMAL, scale=2, precision=10, length=12"`
Decimal4 string `parquet:"name=decimal4, type=BYTE_ARRAY, convertedtype=DECIMAL, scale=2, precision=20"`

Decimal5 int32 `parquet:"name=decimalt, type=DECIMAL, basetype=INT32, logicaltype=DECIMAL, logicaltype.precision=10, logicaltype.scale=2"`
Decimal5 int32 `parquet:"name=decimal5, type=INT32, convertedtype=DECIMAL, logicaltype=DECIMAL, logicaltype.precision=10, logicaltype.scale=2"`

Map map[string]int32 `parquet:"name=map, type=MAP, keytype=UTF8, valuetype=INT32"`
List []string `parquet:"name=list, type=LIST, valuetype=UTF8"`
Map map[string]int32 `parquet:"name=map, type=MAP, convertedtype=MAP, keytype=UTF8, valuetype=INT32"`
List []string `parquet:"name=list, type=MAP, convertedtype=LIST, valuetype=UTF8"`
Repeated []int32 `parquet:"name=repeated, type=INT32, repetitiontype=REPEATED"`
}

Expand Down Expand Up @@ -78,14 +78,14 @@ func main() {
FixedLenByteArray: "HelloWorld",

Utf8: "utf8",
Int_8: int8(i),
Int_16: int16(i),
Int_8: int32(i),
Int_16: int32(i),
Int_32: int32(i),
Int_64: int64(i),
Uint_8: uint8(i),
Uint_16: uint16(i),
Uint_32: uint32(i),
Uint_64: uint64(i),
Uint_8: int32(i),
Uint_16: int32(i),
Uint_32: int32(i),
Uint_64: int64(i),
Date: int32(i),
TimeMillis: int32(i),
TimeMicros: int64(i),
Expand Down
8 changes: 4 additions & 4 deletions layout/chunk.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ func PagesToChunk(pages []*Page) *Chunk {

var maxVal interface{} = pages[0].MaxVal
var minVal interface{} = pages[0].MinVal
pT, cT, omitStats := pages[0].Schema.Type, pages[0].Schema.ConvertedType, pages[0].Info.OmitStats
funcTable := common.FindFuncTable(pT, cT)
pT, cT, logT, omitStats := pages[0].Schema.Type, pages[0].Schema.ConvertedType, pages[0].Schema.LogicalType, pages[0].Info.OmitStats
funcTable := common.FindFuncTable(pT, cT, logT)

for i := 0; i < ln; i++ {
if pages[i].Header.DataPageHeader != nil {
Expand Down Expand Up @@ -85,8 +85,8 @@ func PagesToDictChunk(pages []*Page) *Chunk {

var maxVal interface{} = pages[1].MaxVal
var minVal interface{} = pages[1].MinVal
pT, cT, omitStats := pages[1].Schema.Type, pages[1].Schema.ConvertedType, pages[0].Info.OmitStats
funcTable := common.FindFuncTable(pT, cT)
pT, cT, logT, omitStats := pages[1].Schema.Type, pages[1].Schema.ConvertedType, pages[1].Schema.LogicalType, pages[0].Info.OmitStats
funcTable := common.FindFuncTable(pT, cT, logT)

for i := 0; i < len(pages); i++ {
if pages[i].Header.DataPageHeader != nil {
Expand Down
4 changes: 2 additions & 2 deletions layout/dictpage.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ func TableToDictDataPages(dictRec *DictRecType, table *Table, pageSize int32, bi
res := make([]*Page, 0)
i := 0

pT, cT, omitStats := table.Schema.Type, table.Schema.ConvertedType, table.Info.OmitStats
pT, cT, logT, omitStats := table.Schema.Type, table.Schema.ConvertedType, table.Schema.LogicalType, table.Info.OmitStats

for i < totalLn {
j := i
Expand All @@ -88,7 +88,7 @@ func TableToDictDataPages(dictRec *DictRecType, table *Table, pageSize int32, bi
var minVal interface{} = table.Values[i]
values := make([]int32, 0)

funcTable := common.FindFuncTable(pT, cT)
funcTable := common.FindFuncTable(pT, cT, logT)

for j < totalLn && size < pageSize {
if table.DefinitionLevels[j] == table.MaxDefinitionLevel {
Expand Down
Loading

0 comments on commit a5e3176

Please sign in to comment.