From b1600c1d1d549bb6db24340275aa54b59ee6ef0b Mon Sep 17 00:00:00 2001 From: Neville Dipale Date: Mon, 9 Dec 2019 19:10:32 +0200 Subject: [PATCH] ARROW-5366: [Rust] Duration and Interval Arrays This implements duration and interval arrays, and adds them to the IPC file reader Closes #5989 from nevi-me/ARROW-5366-2 and squashes the following commits: 7f945a7c8 ARROW-5633: Duration and Interval Arrays Authored-by: Neville Dipale Signed-off-by: Neville Dipale --- rust/arrow/src/array/array.rs | 105 ++++++++++++++++++++ rust/arrow/src/array/builder.rs | 18 ++++ rust/arrow/src/array/mod.rs | 19 +++- rust/arrow/src/compute/kernels/cast.rs | 1 + rust/arrow/src/compute/kernels/take.rs | 67 ++++++++++++- rust/arrow/src/datatypes.rs | 126 ++++++++++++++++++++++-- rust/arrow/src/ipc/convert.rs | 57 ++++++++++- rust/arrow/src/ipc/file/reader.rs | 23 ++++- rust/arrow/src/util/integration_util.rs | 42 +++++++- 9 files changed, 443 insertions(+), 15 deletions(-) diff --git a/rust/arrow/src/array/array.rs b/rust/arrow/src/array/array.rs index 548e4c0137c22..fdf30c9547256 100644 --- a/rust/arrow/src/array/array.rs +++ b/rust/arrow/src/array/array.rs @@ -105,6 +105,7 @@ pub fn make_array(data: ArrayDataRef) -> ArrayRef { DataType::UInt16 => Arc::new(UInt16Array::from(data)) as ArrayRef, DataType::UInt32 => Arc::new(UInt32Array::from(data)) as ArrayRef, DataType::UInt64 => Arc::new(UInt64Array::from(data)) as ArrayRef, + DataType::Float16 => panic!("Float16 datatype not supported"), DataType::Float32 => Arc::new(Float32Array::from(data)) as ArrayRef, DataType::Float64 => Arc::new(Float64Array::from(data)) as ArrayRef, DataType::Date32(DateUnit::Day) => Arc::new(Date32Array::from(data)) as ArrayRef, @@ -135,6 +136,24 @@ pub fn make_array(data: ArrayDataRef) -> ArrayRef { DataType::Timestamp(TimeUnit::Nanosecond, _) => { Arc::new(TimestampNanosecondArray::from(data)) as ArrayRef } + DataType::Interval(IntervalUnit::YearMonth) => { + Arc::new(IntervalYearMonthArray::from(data)) as ArrayRef + } + DataType::Interval(IntervalUnit::DayTime) => { + Arc::new(IntervalDayTimeArray::from(data)) as ArrayRef + } + DataType::Duration(TimeUnit::Second) => { + Arc::new(DurationSecondArray::from(data)) as ArrayRef + } + DataType::Duration(TimeUnit::Millisecond) => { + Arc::new(DurationMillisecondArray::from(data)) as ArrayRef + } + DataType::Duration(TimeUnit::Microsecond) => { + Arc::new(DurationMicrosecondArray::from(data)) as ArrayRef + } + DataType::Duration(TimeUnit::Nanosecond) => { + Arc::new(DurationNanosecondArray::from(data)) as ArrayRef + } DataType::Binary => Arc::new(BinaryArray::from(data)) as ArrayRef, DataType::FixedSizeBinary(_) => { Arc::new(FixedSizeBinaryArray::from(data)) as ArrayRef @@ -600,6 +619,36 @@ def_numeric_from_vec!( i64, DataType::Time64(TimeUnit::Nanosecond) ); +def_numeric_from_vec!( + IntervalYearMonthType, + i32, + DataType::Interval(IntervalUnit::YearMonth) +); +def_numeric_from_vec!( + IntervalDayTimeType, + i64, + DataType::Interval(IntervalUnit::DayTime) +); +def_numeric_from_vec!( + DurationSecondType, + i64, + DataType::Duration(TimeUnit::Second) +); +def_numeric_from_vec!( + DurationMillisecondType, + i64, + DataType::Duration(TimeUnit::Millisecond) +); +def_numeric_from_vec!( + DurationMicrosecondType, + i64, + DataType::Duration(TimeUnit::Microsecond) +); +def_numeric_from_vec!( + DurationNanosecondType, + i64, + DataType::Duration(TimeUnit::Nanosecond) +); impl PrimitiveArray { /// Construct a timestamp array from a vec of i64 values and an optional timezone @@ -1717,6 +1766,62 @@ mod tests { } } + #[test] + fn test_interval_array_from_vec() { + // intervals are currently not treated specially, but are Int32 and Int64 arrays + let arr = IntervalYearMonthArray::from(vec![Some(1), None, Some(-5)]); + assert_eq!(3, arr.len()); + assert_eq!(0, arr.offset()); + assert_eq!(1, arr.null_count()); + assert_eq!(1, arr.value(0)); + assert!(arr.is_null(1)); + assert_eq!(-5, arr.value(2)); + + // a day_time interval contains days and milliseconds, but we do not yet have accessors for the values + let arr = IntervalDayTimeArray::from(vec![Some(1), None, Some(-5)]); + assert_eq!(3, arr.len()); + assert_eq!(0, arr.offset()); + assert_eq!(1, arr.null_count()); + assert_eq!(1, arr.value(0)); + assert!(arr.is_null(1)); + assert_eq!(-5, arr.value(2)); + } + + #[test] + fn test_duration_array_from_vec() { + let arr = DurationSecondArray::from(vec![Some(1), None, Some(-5)]); + assert_eq!(3, arr.len()); + assert_eq!(0, arr.offset()); + assert_eq!(1, arr.null_count()); + assert_eq!(1, arr.value(0)); + assert!(arr.is_null(1)); + assert_eq!(-5, arr.value(2)); + + let arr = DurationMillisecondArray::from(vec![Some(1), None, Some(-5)]); + assert_eq!(3, arr.len()); + assert_eq!(0, arr.offset()); + assert_eq!(1, arr.null_count()); + assert_eq!(1, arr.value(0)); + assert!(arr.is_null(1)); + assert_eq!(-5, arr.value(2)); + + let arr = DurationMicrosecondArray::from(vec![Some(1), None, Some(-5)]); + assert_eq!(3, arr.len()); + assert_eq!(0, arr.offset()); + assert_eq!(1, arr.null_count()); + assert_eq!(1, arr.value(0)); + assert!(arr.is_null(1)); + assert_eq!(-5, arr.value(2)); + + let arr = DurationNanosecondArray::from(vec![Some(1), None, Some(-5)]); + assert_eq!(3, arr.len()); + assert_eq!(0, arr.offset()); + assert_eq!(1, arr.null_count()); + assert_eq!(1, arr.value(0)); + assert!(arr.is_null(1)); + assert_eq!(-5, arr.value(2)); + } + #[test] fn test_primitive_array_slice() { let arr = Int32Array::from(vec![ diff --git a/rust/arrow/src/array/builder.rs b/rust/arrow/src/array/builder.rs index 61ed7254bd6e6..868ab44cabd3c 100644 --- a/rust/arrow/src/array/builder.rs +++ b/rust/arrow/src/array/builder.rs @@ -889,6 +889,24 @@ impl StructBuilder { DataType::Timestamp(TimeUnit::Nanosecond, _) => { Box::new(TimestampNanosecondBuilder::new(capacity)) } + DataType::Interval(IntervalUnit::YearMonth) => { + Box::new(IntervalYearMonthBuilder::new(capacity)) + } + DataType::Interval(IntervalUnit::DayTime) => { + Box::new(IntervalDayTimeBuilder::new(capacity)) + } + DataType::Duration(TimeUnit::Second) => { + Box::new(DurationSecondBuilder::new(capacity)) + } + DataType::Duration(TimeUnit::Millisecond) => { + Box::new(DurationMillisecondBuilder::new(capacity)) + } + DataType::Duration(TimeUnit::Microsecond) => { + Box::new(DurationMicrosecondBuilder::new(capacity)) + } + DataType::Duration(TimeUnit::Nanosecond) => { + Box::new(DurationNanosecondBuilder::new(capacity)) + } DataType::Struct(fields) => { let schema = Schema::new(fields.clone()); Box::new(Self::from_schema(schema, capacity)) diff --git a/rust/arrow/src/array/mod.rs b/rust/arrow/src/array/mod.rs index 31718274d8cc2..b6386773c5e64 100644 --- a/rust/arrow/src/array/mod.rs +++ b/rust/arrow/src/array/mod.rs @@ -101,7 +101,12 @@ pub type Time32SecondArray = PrimitiveArray; pub type Time32MillisecondArray = PrimitiveArray; pub type Time64MicrosecondArray = PrimitiveArray; pub type Time64NanosecondArray = PrimitiveArray; -// TODO add interval +pub type IntervalYearMonthArray = PrimitiveArray; +pub type IntervalDayTimeArray = PrimitiveArray; +pub type DurationSecondArray = PrimitiveArray; +pub type DurationMillisecondArray = PrimitiveArray; +pub type DurationMicrosecondArray = PrimitiveArray; +pub type DurationNanosecondArray = PrimitiveArray; pub use self::array::ListArrayOps; pub use self::array::PrimitiveArrayOps; @@ -133,6 +138,12 @@ pub type Time32SecondBufferBuilder = BufferBuilder; pub type Time32MillisecondBufferBuilder = BufferBuilder; pub type Time64MicrosecondBufferBuilder = BufferBuilder; pub type Time64NanosecondBufferBuilder = BufferBuilder; +pub type IntervalYearMonthBufferBuilder = BufferBuilder; +pub type IntervalDayTimeBufferBuilder = BufferBuilder; +pub type DurationSecondBufferBuilder = BufferBuilder; +pub type DurationMillisecondBufferBuilder = BufferBuilder; +pub type DurationMicrosecondBufferBuilder = BufferBuilder; +pub type DurationNanosecondBufferBuilder = BufferBuilder; pub use self::builder::ArrayBuilder; pub use self::builder::BinaryBuilder; @@ -165,6 +176,12 @@ pub type Time32SecondBuilder = PrimitiveBuilder; pub type Time32MillisecondBuilder = PrimitiveBuilder; pub type Time64MicrosecondBuilder = PrimitiveBuilder; pub type Time64NanosecondBuilder = PrimitiveBuilder; +pub type IntervalYearMonthBuilder = PrimitiveBuilder; +pub type IntervalDayTimeBuilder = PrimitiveBuilder; +pub type DurationSecondBuilder = PrimitiveBuilder; +pub type DurationMillisecondBuilder = PrimitiveBuilder; +pub type DurationMicrosecondBuilder = PrimitiveBuilder; +pub type DurationNanosecondBuilder = PrimitiveBuilder; // --------------------- Array Equality --------------------- diff --git a/rust/arrow/src/compute/kernels/cast.rs b/rust/arrow/src/compute/kernels/cast.rs index 2ceefe7c1f45e..2a00be59cd642 100644 --- a/rust/arrow/src/compute/kernels/cast.rs +++ b/rust/arrow/src/compute/kernels/cast.rs @@ -61,6 +61,7 @@ use crate::error::{ArrowError, Result}; /// * To or from `StructArray` /// * List to primitive /// * Utf8 to boolean +/// * Interval and duration pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result { use DataType::*; let from_type = array.data_type(); diff --git a/rust/arrow/src/compute/kernels/take.rs b/rust/arrow/src/compute/kernels/take.rs index f56f6408de60d..cb294dd121094 100644 --- a/rust/arrow/src/compute/kernels/take.rs +++ b/rust/arrow/src/compute/kernels/take.rs @@ -88,6 +88,24 @@ pub fn take( DataType::Timestamp(Nanosecond, _) => { take_primitive::(values, indices) } + DataType::Interval(IntervalUnit::YearMonth) => { + take_primitive::(values, indices) + } + DataType::Interval(IntervalUnit::DayTime) => { + take_primitive::(values, indices) + } + DataType::Duration(TimeUnit::Second) => { + take_primitive::(values, indices) + } + DataType::Duration(TimeUnit::Millisecond) => { + take_primitive::(values, indices) + } + DataType::Duration(TimeUnit::Microsecond) => { + take_primitive::(values, indices) + } + DataType::Duration(TimeUnit::Nanosecond) => { + take_primitive::(values, indices) + } DataType::Utf8 => take_string(values, indices), DataType::List(_) => take_list(values, indices), DataType::Struct(fields) => { @@ -291,6 +309,54 @@ mod tests { vec![Some(-15), None, None, Some(-15), Some(2)], ); + // interval_year_month + test_take_primitive_arrays::( + vec![Some(0), None, Some(2), Some(-15), None], + &index, + None, + vec![Some(-15), None, None, Some(-15), Some(2)], + ); + + // interval_day_time + test_take_primitive_arrays::( + vec![Some(0), None, Some(2), Some(-15), None], + &index, + None, + vec![Some(-15), None, None, Some(-15), Some(2)], + ); + + // duration_second + test_take_primitive_arrays::( + vec![Some(0), None, Some(2), Some(-15), None], + &index, + None, + vec![Some(-15), None, None, Some(-15), Some(2)], + ); + + // duration_millisecond + test_take_primitive_arrays::( + vec![Some(0), None, Some(2), Some(-15), None], + &index, + None, + vec![Some(-15), None, None, Some(-15), Some(2)], + ); + + // duration_microsecond + test_take_primitive_arrays::( + vec![Some(0), None, Some(2), Some(-15), None], + &index, + None, + vec![Some(-15), None, None, Some(-15), Some(2)], + ); + + // duration_nanosecond + test_take_primitive_arrays::( + vec![Some(0), None, Some(2), Some(-15), None], + &index, + None, + vec![Some(-15), None, None, Some(-15), Some(2)], + ); + // float32 test_take_primitive_arrays::( vec![Some(0.0), None, Some(2.21), Some(-3.1), None], @@ -308,7 +374,6 @@ mod tests { ); // boolean - // float32 test_take_primitive_arrays::( vec![Some(false), None, Some(true), Some(false), None], &index, diff --git a/rust/arrow/src/datatypes.rs b/rust/arrow/src/datatypes.rs index 4e00a62fa3dea..639973581a14a 100644 --- a/rust/arrow/src/datatypes.rs +++ b/rust/arrow/src/datatypes.rs @@ -70,6 +70,7 @@ pub enum DataType { Date64(DateUnit), Time32(TimeUnit), Time64(TimeUnit), + Duration(TimeUnit), Interval(IntervalUnit), Binary, FixedSizeBinary(i32), @@ -298,10 +299,10 @@ make_type!( ); make_type!( IntervalYearMonthType, - i64, + i32, DataType::Interval(IntervalUnit::YearMonth), - 64, - 0i64 + 32, + 0i32 ); make_type!( IntervalDayTimeType, @@ -310,6 +311,34 @@ make_type!( 64, 0i64 ); +make_type!( + DurationSecondType, + i64, + DataType::Duration(TimeUnit::Second), + 64, + 0i64 +); +make_type!( + DurationMillisecondType, + i64, + DataType::Duration(TimeUnit::Millisecond), + 64, + 0i64 +); +make_type!( + DurationMicrosecondType, + i64, + DataType::Duration(TimeUnit::Microsecond), + 64, + 0i64 +); +make_type!( + DurationNanosecondType, + i64, + DataType::Duration(TimeUnit::Nanosecond), + 64, + 0i64 +); /// A subtype of primitive type that represents numeric values. /// @@ -497,8 +526,12 @@ make_numeric_type!(Time32SecondType, i32, i32x16, m32x16); make_numeric_type!(Time32MillisecondType, i32, i32x16, m32x16); make_numeric_type!(Time64MicrosecondType, i64, i64x8, m64x8); make_numeric_type!(Time64NanosecondType, i64, i64x8, m64x8); -make_numeric_type!(IntervalYearMonthType, i64, i64x8, m64x8); +make_numeric_type!(IntervalYearMonthType, i32, i32x16, m32x16); make_numeric_type!(IntervalDayTimeType, i64, i64x8, m64x8); +make_numeric_type!(DurationSecondType, i64, i64x8, m64x8); +make_numeric_type!(DurationMillisecondType, i64, i64x8, m64x8); +make_numeric_type!(DurationMicrosecondType, i64, i64x8, m64x8); +make_numeric_type!(DurationNanosecondType, i64, i64x8, m64x8); /// A subtype of primitive type that represents temporal values. pub trait ArrowTemporalType: ArrowPrimitiveType {} @@ -513,8 +546,8 @@ impl ArrowTemporalType for Time32SecondType {} impl ArrowTemporalType for Time32MillisecondType {} impl ArrowTemporalType for Time64MicrosecondType {} impl ArrowTemporalType for Time64NanosecondType {} -impl ArrowTemporalType for IntervalYearMonthType {} -impl ArrowTemporalType for IntervalDayTimeType {} +// impl ArrowTemporalType for IntervalYearMonthType {} +// impl ArrowTemporalType for IntervalDayTimeType {} /// A timestamp type allows us to create array builders that take a timestamp pub trait ArrowTimestampType: ArrowTemporalType { @@ -634,6 +667,21 @@ impl DataType { )), } } + Some(s) if s == "duration" => match map.get("unit") { + Some(p) if p == "SECOND" => Ok(DataType::Duration(TimeUnit::Second)), + Some(p) if p == "MILLISECOND" => { + Ok(DataType::Duration(TimeUnit::Millisecond)) + } + Some(p) if p == "MICROSECOND" => { + Ok(DataType::Duration(TimeUnit::Microsecond)) + } + Some(p) if p == "NANOSECOND" => { + Ok(DataType::Duration(TimeUnit::Nanosecond)) + } + _ => Err(ArrowError::ParseError( + "time unit missing or invalid".to_string(), + )), + }, Some(s) if s == "interval" => match map.get("unit") { Some(p) if p == "DAY_TIME" => { Ok(DataType::Interval(IntervalUnit::DayTime)) @@ -778,6 +826,12 @@ impl DataType { IntervalUnit::YearMonth => "YEAR_MONTH", IntervalUnit::DayTime => "DAY_TIME", }}), + DataType::Duration(unit) => json!({"name": "duration", "unit": match unit { + TimeUnit::Second => "SECOND", + TimeUnit::Millisecond => "MILLISECOND", + TimeUnit::Microsecond => "MICROSECOND", + TimeUnit::Nanosecond => "NANOSECOND", + }}), } } } @@ -1348,6 +1402,12 @@ mod tests { ]), false, ), + Field::new("c25", DataType::Interval(IntervalUnit::YearMonth), true), + Field::new("c26", DataType::Interval(IntervalUnit::DayTime), true), + Field::new("c27", DataType::Duration(TimeUnit::Second), false), + Field::new("c28", DataType::Duration(TimeUnit::Millisecond), false), + Field::new("c29", DataType::Duration(TimeUnit::Microsecond), false), + Field::new("c30", DataType::Duration(TimeUnit::Nanosecond), false), ], metadata, ); @@ -1629,6 +1689,60 @@ mod tests { "children": [] } ] + }, + { + "name": "c25", + "nullable": true, + "type": { + "name": "interval", + "unit": "YEAR_MONTH" + }, + "children": [] + }, + { + "name": "c26", + "nullable": true, + "type": { + "name": "interval", + "unit": "DAY_TIME" + }, + "children": [] + }, + { + "name": "c27", + "nullable": false, + "type": { + "name": "duration", + "unit": "SECOND" + }, + "children": [] + }, + { + "name": "c28", + "nullable": false, + "type": { + "name": "duration", + "unit": "MILLISECOND" + }, + "children": [] + }, + { + "name": "c29", + "nullable": false, + "type": { + "name": "duration", + "unit": "MICROSECOND" + }, + "children": [] + }, + { + "name": "c30", + "nullable": false, + "type": { + "name": "duration", + "unit": "NANOSECOND" + }, + "children": [] } ], "metadata" : { diff --git a/rust/arrow/src/ipc/convert.rs b/rust/arrow/src/ipc/convert.rs index 2e2dda9453ad9..e2416cec1d836 100644 --- a/rust/arrow/src/ipc/convert.rs +++ b/rust/arrow/src/ipc/convert.rs @@ -17,7 +17,7 @@ //! Utilities for converting between IPC types and native Arrow types -use crate::datatypes::{DataType, DateUnit, Field, Schema, TimeUnit}; +use crate::datatypes::{DataType, DateUnit, Field, IntervalUnit, Schema, TimeUnit}; use crate::ipc; use flatbuffers::{ @@ -182,6 +182,24 @@ fn get_data_type(field: ipc::Field) -> DataType { } } } + ipc::Type::Interval => { + let interval = field.type_as_interval().unwrap(); + match interval.unit() { + ipc::IntervalUnit::YEAR_MONTH => { + DataType::Interval(IntervalUnit::YearMonth) + } + ipc::IntervalUnit::DAY_TIME => DataType::Interval(IntervalUnit::DayTime), + } + } + ipc::Type::Duration => { + let duration = field.type_as_duration().unwrap(); + match duration.unit() { + ipc::TimeUnit::SECOND => DataType::Duration(TimeUnit::Second), + ipc::TimeUnit::MILLISECOND => DataType::Duration(TimeUnit::Millisecond), + ipc::TimeUnit::MICROSECOND => DataType::Duration(TimeUnit::Microsecond), + ipc::TimeUnit::NANOSECOND => DataType::Duration(TimeUnit::Nanosecond), + } + } ipc::Type::List => { let children = field.children().unwrap(); if children.len() != 1 { @@ -213,7 +231,6 @@ fn get_data_type(field: ipc::Field) -> DataType { DataType::Struct(fields) } - // TODO add interval support t @ _ => unimplemented!("Type {:?} not supported", t), } } @@ -272,6 +289,11 @@ fn get_fb_field_type<'a: 'b, 'b>( None, ) } + Binary => ( + ipc::Type::Binary, + ipc::BinaryBuilder::new(&mut fbb).finish().as_union_value(), + None, + ), Utf8 => ( ipc::Type::Utf8, ipc::Utf8Builder::new(&mut fbb).finish().as_union_value(), @@ -329,6 +351,26 @@ fn get_fb_field_type<'a: 'b, 'b>( None, ) } + Interval(unit) => { + let mut builder = ipc::IntervalBuilder::new(&mut fbb); + let interval_unit = match unit { + IntervalUnit::YearMonth => ipc::IntervalUnit::YEAR_MONTH, + IntervalUnit::DayTime => ipc::IntervalUnit::DAY_TIME, + }; + builder.add_unit(interval_unit); + (ipc::Type::Interval, builder.finish().as_union_value(), None) + } + Duration(unit) => { + let mut builder = ipc::DurationBuilder::new(&mut fbb); + let time_unit = match unit { + TimeUnit::Second => ipc::TimeUnit::SECOND, + TimeUnit::Millisecond => ipc::TimeUnit::MILLISECOND, + TimeUnit::Microsecond => ipc::TimeUnit::MICROSECOND, + TimeUnit::Nanosecond => ipc::TimeUnit::NANOSECOND, + }; + builder.add_unit(time_unit); + (ipc::Type::Duration, builder.finish().as_union_value(), None) + } List(ref list_type) => { let inner_types = get_fb_field_type(list_type, &mut fbb); let child = ipc::Field::create( @@ -434,7 +476,18 @@ mod tests { DataType::Timestamp(TimeUnit::Nanosecond, None), true, ), + Field::new( + "interval[ym]", + DataType::Interval(IntervalUnit::YearMonth), + true, + ), + Field::new( + "interval[dt]", + DataType::Interval(IntervalUnit::DayTime), + true, + ), Field::new("utf8", DataType::Utf8, false), + Field::new("binary", DataType::Binary, false), Field::new("list[u8]", DataType::List(Box::new(DataType::UInt8)), true), Field::new( "list[struct]", diff --git a/rust/arrow/src/ipc/file/reader.rs b/rust/arrow/src/ipc/file/reader.rs index 3f29ec77e9706..e1929649903d6 100644 --- a/rust/arrow/src/ipc/file/reader.rs +++ b/rust/arrow/src/ipc/file/reader.rs @@ -23,7 +23,7 @@ use std::sync::Arc; use crate::array::*; use crate::buffer::Buffer; use crate::compute::cast; -use crate::datatypes::{DataType, Schema, SchemaRef}; +use crate::datatypes::{DataType, IntervalUnit, Schema, SchemaRef}; use crate::error::{ArrowError, Result}; use crate::ipc; use crate::record_batch::{RecordBatch, RecordBatchReader}; @@ -215,7 +215,15 @@ fn create_primitive_array( } builder.build() } - Int8 | Int16 | Int32 | UInt8 | UInt16 | UInt32 | Time32(_) | Date32(_) => { + Int8 + | Int16 + | Int32 + | UInt8 + | UInt16 + | UInt32 + | Time32(_) + | Date32(_) + | Interval(IntervalUnit::YearMonth) => { if buffers[1].len() / 8 == length { // interpret as a signed i64, and cast appropriately let mut builder = ArrayData::builder(DataType::Int64) @@ -271,7 +279,15 @@ fn create_primitive_array( builder.build() } } - Boolean | Int64 | UInt64 | Float64 | Time64(_) | Timestamp(_, _) | Date64(_) => { + Boolean + | Int64 + | UInt64 + | Float64 + | Time64(_) + | Timestamp(_, _) + | Date64(_) + | Duration(_) + | Interval(IntervalUnit::DayTime) => { let mut builder = ArrayData::builder(data_type.clone()) .len(length) .buffers(buffers[1..].to_vec()) @@ -530,6 +546,7 @@ mod tests { let testdata = env::var("ARROW_TEST_DATA").expect("ARROW_TEST_DATA not defined"); // the test is repetitive, thus we can read all supported files at once let paths = vec![ + "generated_interval", "generated_datetime", "generated_nested", "generated_primitive_no_batches", diff --git a/rust/arrow/src/util/integration_util.rs b/rust/arrow/src/util/integration_util.rs index ab89e60683cca..ad232d0e0eac7 100644 --- a/rust/arrow/src/util/integration_util.rs +++ b/rust/arrow/src/util/integration_util.rs @@ -20,7 +20,7 @@ //! These utilities define structs that read the integration JSON format for integration testing purposes. use serde_derive::Deserialize; -use serde_json::Value; +use serde_json::{Number as VNumber, Value}; use crate::array::*; use crate::datatypes::*; @@ -135,10 +135,48 @@ impl ArrowJsonBatch { DataType::Int64 | DataType::Date64(_) | DataType::Time64(_) - | DataType::Timestamp(_, _) => { + | DataType::Timestamp(_, _) + | DataType::Duration(_) => { let arr = Int64Array::from(arr.data()); arr.equals_json(&json_array.iter().collect::>()[..]) } + DataType::Interval(IntervalUnit::YearMonth) => { + let arr = IntervalYearMonthArray::from(arr.data()); + arr.equals_json(&json_array.iter().collect::>()[..]) + } + DataType::Interval(IntervalUnit::DayTime) => { + let arr = IntervalDayTimeArray::from(arr.data()); + let x = json_array + .iter() + .map(|v| { + match v { + Value::Null => Value::Null, + Value::Object(v) => { + // interval has days and milliseconds + let days: i32 = + v.get("days").unwrap().as_i64().unwrap() + as i32; + let milliseconds: i32 = v + .get("milliseconds") + .unwrap() + .as_i64() + .unwrap() + as i32; + let value: i64 = unsafe { + std::mem::transmute::<[i32; 2], i64>([ + days, + milliseconds, + ]) + }; + Value::Number(VNumber::from(value)) + } + // return null if Value is not an object + _ => Value::Null, + } + }) + .collect::>(); + arr.equals_json(&x.iter().collect::>()[..]) + } DataType::UInt8 => { let arr = arr.as_any().downcast_ref::().unwrap(); arr.equals_json(&json_array.iter().collect::>()[..])