Skip to content

Commit

Permalink
ARROW-5366: [Rust] Duration and Interval Arrays
Browse files Browse the repository at this point in the history
This implements duration and interval arrays, and adds them to the IPC file reader

Closes apache#5989 from nevi-me/ARROW-5366-2 and squashes the following commits:

7f945a7 <Neville Dipale> ARROW-5633:  Duration and Interval Arrays

Authored-by: Neville Dipale <[email protected]>
Signed-off-by: Neville Dipale <[email protected]>
  • Loading branch information
nevi-me committed Dec 9, 2019
1 parent 7126fdb commit b1600c1
Show file tree
Hide file tree
Showing 9 changed files with 443 additions and 15 deletions.
105 changes: 105 additions & 0 deletions rust/arrow/src/array/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ pub fn make_array(data: ArrayDataRef) -> ArrayRef {
DataType::UInt16 => Arc::new(UInt16Array::from(data)) as ArrayRef,
DataType::UInt32 => Arc::new(UInt32Array::from(data)) as ArrayRef,
DataType::UInt64 => Arc::new(UInt64Array::from(data)) as ArrayRef,
DataType::Float16 => panic!("Float16 datatype not supported"),
DataType::Float32 => Arc::new(Float32Array::from(data)) as ArrayRef,
DataType::Float64 => Arc::new(Float64Array::from(data)) as ArrayRef,
DataType::Date32(DateUnit::Day) => Arc::new(Date32Array::from(data)) as ArrayRef,
Expand Down Expand Up @@ -135,6 +136,24 @@ pub fn make_array(data: ArrayDataRef) -> ArrayRef {
DataType::Timestamp(TimeUnit::Nanosecond, _) => {
Arc::new(TimestampNanosecondArray::from(data)) as ArrayRef
}
DataType::Interval(IntervalUnit::YearMonth) => {
Arc::new(IntervalYearMonthArray::from(data)) as ArrayRef
}
DataType::Interval(IntervalUnit::DayTime) => {
Arc::new(IntervalDayTimeArray::from(data)) as ArrayRef
}
DataType::Duration(TimeUnit::Second) => {
Arc::new(DurationSecondArray::from(data)) as ArrayRef
}
DataType::Duration(TimeUnit::Millisecond) => {
Arc::new(DurationMillisecondArray::from(data)) as ArrayRef
}
DataType::Duration(TimeUnit::Microsecond) => {
Arc::new(DurationMicrosecondArray::from(data)) as ArrayRef
}
DataType::Duration(TimeUnit::Nanosecond) => {
Arc::new(DurationNanosecondArray::from(data)) as ArrayRef
}
DataType::Binary => Arc::new(BinaryArray::from(data)) as ArrayRef,
DataType::FixedSizeBinary(_) => {
Arc::new(FixedSizeBinaryArray::from(data)) as ArrayRef
Expand Down Expand Up @@ -600,6 +619,36 @@ def_numeric_from_vec!(
i64,
DataType::Time64(TimeUnit::Nanosecond)
);
def_numeric_from_vec!(
IntervalYearMonthType,
i32,
DataType::Interval(IntervalUnit::YearMonth)
);
def_numeric_from_vec!(
IntervalDayTimeType,
i64,
DataType::Interval(IntervalUnit::DayTime)
);
def_numeric_from_vec!(
DurationSecondType,
i64,
DataType::Duration(TimeUnit::Second)
);
def_numeric_from_vec!(
DurationMillisecondType,
i64,
DataType::Duration(TimeUnit::Millisecond)
);
def_numeric_from_vec!(
DurationMicrosecondType,
i64,
DataType::Duration(TimeUnit::Microsecond)
);
def_numeric_from_vec!(
DurationNanosecondType,
i64,
DataType::Duration(TimeUnit::Nanosecond)
);

impl<T: ArrowTimestampType> PrimitiveArray<T> {
/// Construct a timestamp array from a vec of i64 values and an optional timezone
Expand Down Expand Up @@ -1717,6 +1766,62 @@ mod tests {
}
}

#[test]
fn test_interval_array_from_vec() {
// intervals are currently not treated specially, but are Int32 and Int64 arrays
let arr = IntervalYearMonthArray::from(vec![Some(1), None, Some(-5)]);
assert_eq!(3, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(1, arr.null_count());
assert_eq!(1, arr.value(0));
assert!(arr.is_null(1));
assert_eq!(-5, arr.value(2));

// a day_time interval contains days and milliseconds, but we do not yet have accessors for the values
let arr = IntervalDayTimeArray::from(vec![Some(1), None, Some(-5)]);
assert_eq!(3, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(1, arr.null_count());
assert_eq!(1, arr.value(0));
assert!(arr.is_null(1));
assert_eq!(-5, arr.value(2));
}

#[test]
fn test_duration_array_from_vec() {
let arr = DurationSecondArray::from(vec![Some(1), None, Some(-5)]);
assert_eq!(3, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(1, arr.null_count());
assert_eq!(1, arr.value(0));
assert!(arr.is_null(1));
assert_eq!(-5, arr.value(2));

let arr = DurationMillisecondArray::from(vec![Some(1), None, Some(-5)]);
assert_eq!(3, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(1, arr.null_count());
assert_eq!(1, arr.value(0));
assert!(arr.is_null(1));
assert_eq!(-5, arr.value(2));

let arr = DurationMicrosecondArray::from(vec![Some(1), None, Some(-5)]);
assert_eq!(3, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(1, arr.null_count());
assert_eq!(1, arr.value(0));
assert!(arr.is_null(1));
assert_eq!(-5, arr.value(2));

let arr = DurationNanosecondArray::from(vec![Some(1), None, Some(-5)]);
assert_eq!(3, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(1, arr.null_count());
assert_eq!(1, arr.value(0));
assert!(arr.is_null(1));
assert_eq!(-5, arr.value(2));
}

#[test]
fn test_primitive_array_slice() {
let arr = Int32Array::from(vec![
Expand Down
18 changes: 18 additions & 0 deletions rust/arrow/src/array/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -889,6 +889,24 @@ impl StructBuilder {
DataType::Timestamp(TimeUnit::Nanosecond, _) => {
Box::new(TimestampNanosecondBuilder::new(capacity))
}
DataType::Interval(IntervalUnit::YearMonth) => {
Box::new(IntervalYearMonthBuilder::new(capacity))
}
DataType::Interval(IntervalUnit::DayTime) => {
Box::new(IntervalDayTimeBuilder::new(capacity))
}
DataType::Duration(TimeUnit::Second) => {
Box::new(DurationSecondBuilder::new(capacity))
}
DataType::Duration(TimeUnit::Millisecond) => {
Box::new(DurationMillisecondBuilder::new(capacity))
}
DataType::Duration(TimeUnit::Microsecond) => {
Box::new(DurationMicrosecondBuilder::new(capacity))
}
DataType::Duration(TimeUnit::Nanosecond) => {
Box::new(DurationNanosecondBuilder::new(capacity))
}
DataType::Struct(fields) => {
let schema = Schema::new(fields.clone());
Box::new(Self::from_schema(schema, capacity))
Expand Down
19 changes: 18 additions & 1 deletion rust/arrow/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,12 @@ pub type Time32SecondArray = PrimitiveArray<Time32SecondType>;
pub type Time32MillisecondArray = PrimitiveArray<Time32MillisecondType>;
pub type Time64MicrosecondArray = PrimitiveArray<Time64MicrosecondType>;
pub type Time64NanosecondArray = PrimitiveArray<Time64NanosecondType>;
// TODO add interval
pub type IntervalYearMonthArray = PrimitiveArray<IntervalYearMonthType>;
pub type IntervalDayTimeArray = PrimitiveArray<IntervalDayTimeType>;
pub type DurationSecondArray = PrimitiveArray<DurationSecondType>;
pub type DurationMillisecondArray = PrimitiveArray<DurationMillisecondType>;
pub type DurationMicrosecondArray = PrimitiveArray<DurationMicrosecondType>;
pub type DurationNanosecondArray = PrimitiveArray<DurationNanosecondType>;

pub use self::array::ListArrayOps;
pub use self::array::PrimitiveArrayOps;
Expand Down Expand Up @@ -133,6 +138,12 @@ pub type Time32SecondBufferBuilder = BufferBuilder<Time32SecondType>;
pub type Time32MillisecondBufferBuilder = BufferBuilder<Time32MillisecondType>;
pub type Time64MicrosecondBufferBuilder = BufferBuilder<Time64MicrosecondType>;
pub type Time64NanosecondBufferBuilder = BufferBuilder<Time64NanosecondType>;
pub type IntervalYearMonthBufferBuilder = BufferBuilder<IntervalYearMonthType>;
pub type IntervalDayTimeBufferBuilder = BufferBuilder<IntervalDayTimeType>;
pub type DurationSecondBufferBuilder = BufferBuilder<DurationSecondType>;
pub type DurationMillisecondBufferBuilder = BufferBuilder<DurationMillisecondType>;
pub type DurationMicrosecondBufferBuilder = BufferBuilder<DurationMicrosecondType>;
pub type DurationNanosecondBufferBuilder = BufferBuilder<DurationNanosecondType>;

pub use self::builder::ArrayBuilder;
pub use self::builder::BinaryBuilder;
Expand Down Expand Up @@ -165,6 +176,12 @@ pub type Time32SecondBuilder = PrimitiveBuilder<Time32SecondType>;
pub type Time32MillisecondBuilder = PrimitiveBuilder<Time32MillisecondType>;
pub type Time64MicrosecondBuilder = PrimitiveBuilder<Time64MicrosecondType>;
pub type Time64NanosecondBuilder = PrimitiveBuilder<Time64NanosecondType>;
pub type IntervalYearMonthBuilder = PrimitiveBuilder<IntervalYearMonthType>;
pub type IntervalDayTimeBuilder = PrimitiveBuilder<IntervalDayTimeType>;
pub type DurationSecondBuilder = PrimitiveBuilder<DurationSecondType>;
pub type DurationMillisecondBuilder = PrimitiveBuilder<DurationMillisecondType>;
pub type DurationMicrosecondBuilder = PrimitiveBuilder<DurationMicrosecondType>;
pub type DurationNanosecondBuilder = PrimitiveBuilder<DurationNanosecondType>;

// --------------------- Array Equality ---------------------

Expand Down
1 change: 1 addition & 0 deletions rust/arrow/src/compute/kernels/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ use crate::error::{ArrowError, Result};
/// * To or from `StructArray`
/// * List to primitive
/// * Utf8 to boolean
/// * Interval and duration
pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result<ArrayRef> {
use DataType::*;
let from_type = array.data_type();
Expand Down
67 changes: 66 additions & 1 deletion rust/arrow/src/compute/kernels/take.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,24 @@ pub fn take(
DataType::Timestamp(Nanosecond, _) => {
take_primitive::<TimestampNanosecondType>(values, indices)
}
DataType::Interval(IntervalUnit::YearMonth) => {
take_primitive::<IntervalYearMonthType>(values, indices)
}
DataType::Interval(IntervalUnit::DayTime) => {
take_primitive::<IntervalDayTimeType>(values, indices)
}
DataType::Duration(TimeUnit::Second) => {
take_primitive::<DurationSecondType>(values, indices)
}
DataType::Duration(TimeUnit::Millisecond) => {
take_primitive::<DurationMillisecondType>(values, indices)
}
DataType::Duration(TimeUnit::Microsecond) => {
take_primitive::<DurationMicrosecondType>(values, indices)
}
DataType::Duration(TimeUnit::Nanosecond) => {
take_primitive::<DurationNanosecondType>(values, indices)
}
DataType::Utf8 => take_string(values, indices),
DataType::List(_) => take_list(values, indices),
DataType::Struct(fields) => {
Expand Down Expand Up @@ -291,6 +309,54 @@ mod tests {
vec![Some(-15), None, None, Some(-15), Some(2)],
);

// interval_year_month
test_take_primitive_arrays::<IntervalYearMonthType>(
vec![Some(0), None, Some(2), Some(-15), None],
&index,
None,
vec![Some(-15), None, None, Some(-15), Some(2)],
);

// interval_day_time
test_take_primitive_arrays::<IntervalDayTimeType>(
vec![Some(0), None, Some(2), Some(-15), None],
&index,
None,
vec![Some(-15), None, None, Some(-15), Some(2)],
);

// duration_second
test_take_primitive_arrays::<DurationSecondType>(
vec![Some(0), None, Some(2), Some(-15), None],
&index,
None,
vec![Some(-15), None, None, Some(-15), Some(2)],
);

// duration_millisecond
test_take_primitive_arrays::<DurationMillisecondType>(
vec![Some(0), None, Some(2), Some(-15), None],
&index,
None,
vec![Some(-15), None, None, Some(-15), Some(2)],
);

// duration_microsecond
test_take_primitive_arrays::<DurationMicrosecondType>(
vec![Some(0), None, Some(2), Some(-15), None],
&index,
None,
vec![Some(-15), None, None, Some(-15), Some(2)],
);

// duration_nanosecond
test_take_primitive_arrays::<DurationNanosecondType>(
vec![Some(0), None, Some(2), Some(-15), None],
&index,
None,
vec![Some(-15), None, None, Some(-15), Some(2)],
);

// float32
test_take_primitive_arrays::<Float32Type>(
vec![Some(0.0), None, Some(2.21), Some(-3.1), None],
Expand All @@ -308,7 +374,6 @@ mod tests {
);

// boolean
// float32
test_take_primitive_arrays::<BooleanType>(
vec![Some(false), None, Some(true), Some(false), None],
&index,
Expand Down
Loading

0 comments on commit b1600c1

Please sign in to comment.