Skip to content

Commit

Permalink
ARROW-3838: [Rust] CSV Writer
Browse files Browse the repository at this point in the history
This supersedes apache#3111.

Supports:
* writing using the `csv` crate
* customising delimiter, and whether or not to write headers

Limitations:
* writes to `std::fs::File`, I struggled to make it write to any `std::io::Write` interface. @paddyhoran any ideas as you did it for `Reader`?
* values are converted first to `String` before being written. It could be more performant if we could convert directly to byte slice as `csv` crate supports that. I also struggled with this

Potential Further Work:
* writing temporal arrays (after apache#3726 [ARROW-4386])

Author: Neville Dipale <[email protected]>

Closes apache#3790 from nevi-me/ARROW-3838 and squashes the following commits:

7839949 <Neville Dipale> try fix tmp file issue
d60d0ce <Neville Dipale> change writer tests to write to target folder
74db488 <Neville Dipale> cargo fmt
1693c9b <Neville Dipale> ARROW-3838:  CSV Writer
  • Loading branch information
nevi-me authored and andygrove committed Mar 4, 2019
1 parent 72f5774 commit 1099305
Show file tree
Hide file tree
Showing 6 changed files with 471 additions and 0 deletions.
4 changes: 4 additions & 0 deletions rust/arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,7 @@ harness = false
[[bench]]
name = "comparison_kernels"
harness = false

[[bench]]
name = "csv_writer"
harness = false
74 changes: 74 additions & 0 deletions rust/arrow/benches/csv_writer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

extern crate arrow;
extern crate criterion;

use criterion::*;

use arrow::array::*;
use arrow::csv;
use arrow::datatypes::*;
use arrow::record_batch::RecordBatch;
use std::fs::File;
use std::sync::Arc;

fn record_batches_to_csv() {
let schema = Schema::new(vec![
Field::new("c1", DataType::Utf8, false),
Field::new("c2", DataType::Float64, true),
Field::new("c3", DataType::UInt32, false),
Field::new("c3", DataType::Boolean, true),
]);

let c1 = BinaryArray::from(vec![
"Lorem ipsum dolor sit amet",
"consectetur adipiscing elit",
"sed do eiusmod tempor",
]);
let c2 = PrimitiveArray::<Float64Type>::from(vec![
Some(123.564532),
None,
Some(-556132.25),
]);
let c3 = PrimitiveArray::<UInt32Type>::from(vec![3, 2, 1]);
let c4 = PrimitiveArray::<BooleanType>::from(vec![Some(true), Some(false), None]);

let b = RecordBatch::new(
Arc::new(schema),
vec![Arc::new(c1), Arc::new(c2), Arc::new(c3), Arc::new(c4)],
);
let file = File::create("target/bench_write_csv.csv").unwrap();
let writer = csv::Writer::new(file);
criterion::black_box(
writer
.write(vec![&b, &b, &b, &b, &b, &b, &b, &b, &b, &b, &b])
.unwrap(),
);
}

fn criterion_benchmark(c: &mut Criterion) {
c.bench(
"record_batches_to_csv",
Benchmark::new("record_batches_to_csv", move |b| {
b.iter(|| record_batches_to_csv())
}),
);
}

criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);
3 changes: 3 additions & 0 deletions rust/arrow/src/csv/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
//! Transfer data between the Arrow memory format and CSV (comma-separated values).
pub mod reader;
pub mod writer;

pub use self::reader::Reader;
pub use self::reader::ReaderBuilder;
pub use self::writer::Writer;
pub use self::writer::WriterBuilder;
Loading

0 comments on commit 1099305

Please sign in to comment.