forked from apache/arrow
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ARROW-3951: [Go] implement a CSV writer
@sbinet Author: Anson Qian <[email protected]> Closes apache#3755 from anson627/arrow-3951 and squashes the following commits: df1735a <Anson Qian> Fix reader test 9bc8dc0 <Anson Qian> Fix unit test 6e63617 <Anson Qian> Fix typo 7624a97 <Anson Qian> Add example and bump up test coverage f460e19 <Anson Qian> Add newline at end of file 947235c <Anson Qian> Consoliate option for reader and writer 2a57a67 <Anson Qian> Add memory size check e00638e <Anson Qian> Address code reviews 92cbcea <Anson Qian> ARROW-3951 implement a CSV writer
- Loading branch information
Showing
5 changed files
with
448 additions
and
96 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
// Licensed to the Apache Software Foundation (ASF) under one | ||
// or more contributor license agreements. See the NOTICE file | ||
// distributed with this work for additional information | ||
// regarding copyright ownership. The ASF licenses this file | ||
// to you under the Apache License, Version 2.0 (the | ||
// "License"); you may not use this file except in compliance | ||
// with the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
// Package csv reads CSV files and presents the extracted data as records, also | ||
// writes data as record into CSV files | ||
package csv | ||
|
||
import ( | ||
"errors" | ||
"fmt" | ||
|
||
"github.com/apache/arrow/go/arrow" | ||
"github.com/apache/arrow/go/arrow/memory" | ||
) | ||
|
||
var ( | ||
ErrMismatchFields = errors.New("arrow/csv: number of records mismatch") | ||
) | ||
|
||
// Option configures a CSV reader/writer. | ||
type Option func(config) | ||
type config interface{} | ||
|
||
// WithComma specifies the fields separation character used while parsing CSV files. | ||
func WithComma(c rune) Option { | ||
return func(cfg config) { | ||
switch cfg := cfg.(type) { | ||
case *Reader: | ||
cfg.r.Comma = c | ||
case *Writer: | ||
cfg.w.Comma = c | ||
default: | ||
panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg)) | ||
} | ||
} | ||
} | ||
|
||
// WithComment specifies the comment character used while parsing CSV files. | ||
func WithComment(c rune) Option { | ||
return func(cfg config) { | ||
switch cfg := cfg.(type) { | ||
case *Reader: | ||
cfg.r.Comment = c | ||
default: | ||
panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg)) | ||
} | ||
} | ||
} | ||
|
||
// WithAllocator specifies the Arrow memory allocator used while building records. | ||
func WithAllocator(mem memory.Allocator) Option { | ||
return func(cfg config) { | ||
switch cfg := cfg.(type) { | ||
case *Reader: | ||
cfg.mem = mem | ||
default: | ||
panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg)) | ||
} | ||
} | ||
} | ||
|
||
// WithChunk specifies the chunk size used while parsing CSV files. | ||
// | ||
// If n is zero or 1, no chunking will take place and the reader will create | ||
// one record per row. | ||
// If n is greater than 1, chunks of n rows will be read. | ||
// If n is negative, the reader will load the whole CSV file into memory and | ||
// create one big record with all the rows. | ||
func WithChunk(n int) Option { | ||
return func(cfg config) { | ||
switch cfg := cfg.(type) { | ||
case *Reader: | ||
cfg.chunk = n | ||
default: | ||
panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg)) | ||
} | ||
} | ||
} | ||
|
||
// WithCRLF specifies the line terminator used while writing CSV files. | ||
// If useCRLF is true, \r\n is used as the line terminator, otherwise \n is used. | ||
// The default value is false. | ||
func WithCRLF(useCRLF bool) Option { | ||
return func(cfg config) { | ||
switch cfg := cfg.(type) { | ||
case *Writer: | ||
cfg.w.UseCRLF = useCRLF | ||
default: | ||
panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg)) | ||
} | ||
} | ||
} | ||
|
||
func validate(schema *arrow.Schema) { | ||
for i, f := range schema.Fields() { | ||
switch ft := f.Type.(type) { | ||
case *arrow.BooleanType: | ||
case *arrow.Int8Type, *arrow.Int16Type, *arrow.Int32Type, *arrow.Int64Type: | ||
case *arrow.Uint8Type, *arrow.Uint16Type, *arrow.Uint32Type, *arrow.Uint64Type: | ||
case *arrow.Float32Type, *arrow.Float64Type: | ||
case *arrow.StringType: | ||
default: | ||
panic(fmt.Errorf("arrow/csv: field %d (%s) has invalid data type %T", i, f.Name, ft)) | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.