Skip to content

Commit

Permalink
ARROW-8028: [Go] Allow duplicate field names in schemas and nested types
Browse files Browse the repository at this point in the history
Closes apache#6580 from sbinet/issue-8028

Authored-by: Sebastien Binet <[email protected]>
Signed-off-by: Sebastien Binet <[email protected]>
  • Loading branch information
sbinet committed Mar 13, 2020
1 parent f67b210 commit 777d9fc
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 37 deletions.
37 changes: 15 additions & 22 deletions go/arrow/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,18 +108,17 @@ func (md Metadata) clone() Metadata {
// a record batch.
type Schema struct {
fields []Field
index map[string]int
index map[string][]int
meta Metadata
}

// NewSchema returns a new Schema value from the slice of fields and metadata.
//
// NewSchema panics if there are duplicated fields.
// NewSchema panics if there is a field with an invalid DataType.
func NewSchema(fields []Field, metadata *Metadata) *Schema {
sc := &Schema{
fields: make([]Field, 0, len(fields)),
index: make(map[string]int, len(fields)),
index: make(map[string][]int, len(fields)),
}
if metadata != nil {
sc.meta = metadata.clone()
Expand All @@ -129,10 +128,7 @@ func NewSchema(fields []Field, metadata *Metadata) *Schema {
panic("arrow: field with nil DataType")
}
sc.fields = append(sc.fields, field)
if _, dup := sc.index[field.Name]; dup {
panic(fmt.Errorf("arrow: duplicate field with name %q", field.Name))
}
sc.index[field.Name] = i
sc.index[field.Name] = append(sc.index[field.Name], i)
}
return sc
}
Expand All @@ -141,28 +137,25 @@ func (sc *Schema) Metadata() Metadata { return sc.meta }
func (sc *Schema) Fields() []Field { return sc.fields }
func (sc *Schema) Field(i int) Field { return sc.fields[i] }

func (sc *Schema) FieldByName(n string) (Field, bool) {
i, ok := sc.index[n]
func (sc *Schema) FieldsByName(n string) ([]Field, bool) {
indices, ok := sc.index[n]
if !ok {
return Field{}, ok
return nil, ok
}
return sc.fields[i], ok
}

// FieldIndex returns the index of the named field or -1.
func (sc *Schema) FieldIndex(n string) int {
i, ok := sc.index[n]
if !ok {
return -1
fields := make([]Field, 0, len(indices))
for _, v := range indices {
fields = append(fields, sc.fields[v])
}
return i
return fields, ok
}

func (sc *Schema) HasField(n string) bool {
return sc.FieldIndex(n) >= 0
// FieldIndices returns the indices of the named field or nil.
func (sc *Schema) FieldIndices(n string) []int {
return sc.index[n]
}

func (sc *Schema) HasMetadata() bool { return len(sc.meta.keys) > 0 }
func (sc *Schema) HasField(n string) bool { return len(sc.FieldIndices(n)) > 0 }
func (sc *Schema) HasMetadata() bool { return len(sc.meta.keys) > 0 }

// Equal returns whether two schema are equal.
// Equal does not compare the metadata.
Expand Down
41 changes: 26 additions & 15 deletions go/arrow/schema_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,10 +155,11 @@ func TestSchema(t *testing.T) {
{
fields: []Field{
{Name: "f1", Type: PrimitiveTypes.Int32},
{Name: "f1", Type: PrimitiveTypes.Int64},
{Name: "f2", Type: PrimitiveTypes.Int64},
{Name: "dup", Type: PrimitiveTypes.Int32}, // duplicate
{Name: "dup", Type: PrimitiveTypes.Int64}, // duplicate
},
md: nil,
err: fmt.Errorf(`arrow: duplicate field with name "f1"`),
md: nil,
},
} {
t.Run("", func(t *testing.T) {
Expand Down Expand Up @@ -204,31 +205,41 @@ func TestSchema(t *testing.T) {
}

for _, tc := range []struct {
name string
ok bool
field Field
i int
name string
ok bool
fields []Field
i []int
}{
{"f1", true, tc.fields[0], 0},
{"f2", true, tc.fields[1], 1},
{"N/A", false, Field{}, -1},
{"f1", true, []Field{tc.fields[0]}, []int{0}},
{"f2", true, []Field{tc.fields[1]}, []int{1}},
{"N/A", false, nil, nil},
} {
t.Run(tc.name, func(t *testing.T) {
got, ok := s.FieldByName(tc.name)
got, ok := s.FieldsByName(tc.name)
if ok != tc.ok {
t.Fatalf("invalid field %q: got=%v, want=%v", tc.name, ok, tc.ok)
}
if i := s.FieldIndex(tc.name); i != tc.i {
t.Fatalf("invalid FieldIndex(%s): got=%v, want=%v", tc.name, i, tc.i)
if i := s.FieldIndices(tc.name); !reflect.DeepEqual(i, tc.i) {
t.Fatalf("invalid FieldIndices(%s): got=%v, want=%v\nfields: %v", tc.name, i, tc.i, s.fields)
}
if ok := s.HasField(tc.name); ok != tc.ok {
t.Fatalf("invalid HasField(%s): got=%v, want=%v", tc.name, ok, tc.ok)
}
if !got.Equal(tc.field) {
t.Fatalf("invalid field: got=%#v, want=%#v", got, tc.field)
for i, field := range got {
if !field.Equal(tc.fields[i]) {
t.Fatalf("invalid field[%d]: got=%#v, want=%#v", i, field, tc.fields[i])
}
}
})
}

if s.HasField("dup") {
got := s.FieldIndices("dup")
want := []int{2, 3}
if !reflect.DeepEqual(got, want) {
t.Fatalf("invalid duplicate fields: got=%v, want=%v", got, want)
}
}
})
}
}
Expand Down

0 comments on commit 777d9fc

Please sign in to comment.