Skip to content

Commit

Permalink
ARROW-8360: [C++][Gandiva] Fixes date32 support for date/time functions
Browse files Browse the repository at this point in the history
Gandiva date/time functions like extractYear only work with
millisecond, passing date32 to these functions will get wrong
results.

This patch adds a new function castDATE_date32 to convert date32
to date64. date/time functions should do a castDATE_date32 first,
e.g.: extractYear(castDATE(date32_days)).

Signed-off-by: Yuan Zhou <[email protected]>

Closes apache#6861 from zhouyuan/wip_gandiva_func_date32 and squashes the following commits:

a3019a3 <Yuan Zhou>  Fixes date32 support for date/time functions

Authored-by: Yuan Zhou <[email protected]>
Signed-off-by: Praveen <[email protected]>
  • Loading branch information
zhouyuan authored and kszucs committed Apr 20, 2020
1 parent 8b5b8e0 commit cc2b8cc
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 13 deletions.
1 change: 1 addition & 0 deletions cpp/src/gandiva/function_registry_arithmetic.cc
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ std::vector<NativeFunction> GetArithmeticFunctionRegistry() {

UNARY_SAFE_NULL_IF_NULL(castDATE, {}, int64, date64),
UNARY_SAFE_NULL_IF_NULL(castDATE, {}, int32, date32),
UNARY_SAFE_NULL_IF_NULL(castDATE, {}, date32, date64),

// add/sub/multiply/divide/mod
BINARY_SYMMETRIC_FN(add, {}), BINARY_SYMMETRIC_FN(subtract, {}),
Expand Down
5 changes: 4 additions & 1 deletion cpp/src/gandiva/precompiled/time.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ extern "C" {

// Expand inner macro for all date types.
#define DATE_TYPES(INNER) \
INNER(date32) \
INNER(date64) \
INNER(timestamp)

Expand Down Expand Up @@ -454,9 +453,13 @@ DATE_TRUNC_FUNCTIONS(timestamp)

FORCE_INLINE
gdv_date64 castDATE_int64(gdv_int64 in) { return in; }

FORCE_INLINE
gdv_date32 castDATE_int32(gdv_int32 in) { return in; }

FORCE_INLINE
gdv_date64 castDATE_date32(gdv_date32 days) { return days * MILLIS_IN_DAY; }

static int days_in_month[] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};

bool IsLastDayOfMonth(const EpochTimePoint& tp) {
Expand Down
2 changes: 2 additions & 0 deletions cpp/src/gandiva/precompiled/time_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ TEST(TestTime, TestCastDate) {
EXPECT_EQ(castDATE_utf8(context_ptr, "71-1-1", 6), 31536000000);
EXPECT_EQ(castDATE_utf8(context_ptr, "71-45-1", 7), 0);
EXPECT_EQ(castDATE_utf8(context_ptr, "71-12-XX", 8), 0);

EXPECT_EQ(castDATE_date32(1), 86400000);
}

TEST(TestTime, TestCastTimestamp) {
Expand Down
2 changes: 2 additions & 0 deletions cpp/src/gandiva/precompiled/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@ gdv_date64 castDATE_utf8(int64_t execution_context, const char* input, gdv_int32

gdv_date64 castDATE_int64(gdv_int64 date);

gdv_date64 castDATE_date32(gdv_date32 date);

gdv_date32 castDATE_int32(gdv_int32 date);

gdv_timestamp castTIMESTAMP_utf8(int64_t execution_context, const char* input,
Expand Down
75 changes: 63 additions & 12 deletions cpp/src/gandiva/tests/date_time_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <math.h>
#include <time.h>
#include "arrow/memory_pool.h"
#include "gandiva/precompiled/time_constants.h"
#include "gandiva/projector.h"
#include "gandiva/tests/test_util.h"
#include "gandiva/tree_expr_builder.h"
Expand Down Expand Up @@ -88,6 +89,26 @@ int64_t MillisSince(time_t base_line, int32_t yy, int32_t mm, int32_t dd, int32_
return static_cast<int64_t>(ts - base_line) * 1000 + millis;
}

int32_t DaysSince(time_t base_line, int32_t yy, int32_t mm, int32_t dd, int32_t hr,
int32_t min, int32_t sec, int32_t millis) {
struct tm given_ts;
memset(&given_ts, 0, sizeof(struct tm));
given_ts.tm_year = (yy - 1900);
given_ts.tm_mon = (mm - 1);
given_ts.tm_mday = dd;
given_ts.tm_hour = hr;
given_ts.tm_min = min;
given_ts.tm_sec = sec;

time_t ts = mktime(&given_ts);
if (ts == static_cast<time_t>(-1)) {
ARROW_LOG(FATAL) << "mktime() failed";
}
// time_t is an arithmetic type on both POSIX and Windows, we can simply
// subtract to get a duration in seconds.
return static_cast<int32_t>(((ts - base_line) * 1000 + millis) / MILLIS_IN_DAY);
}

TEST_F(TestProjector, TestIsNull) {
auto d0 = field("d0", date64());
auto t0 = field("t0", time32(arrow::TimeUnit::MILLI));
Expand Down Expand Up @@ -170,30 +191,47 @@ TEST_F(TestProjector, TestDate32IsNull) {

TEST_F(TestProjector, TestDateTime) {
auto field0 = field("f0", date64());
auto field1 = field("f1", date32());
auto field2 = field("f2", timestamp(arrow::TimeUnit::MILLI));
auto schema = arrow::schema({field0, field2});
auto schema = arrow::schema({field0, field1, field2});

// output fields
auto field_year = field("yy", int64());
auto field_month = field("mm", int64());
auto field_day = field("dd", int64());
auto field_hour = field("hh", int64());
auto field_date64 = field("date64", date64());

// extract year and month from date
auto date2year_expr =
TreeExprBuilder::MakeExpression("extractYear", {field0}, field_year);
auto date2month_expr =
TreeExprBuilder::MakeExpression("extractMonth", {field0}, field_month);

// extract year and month from date32, cast to date64 first
auto node_f1 = TreeExprBuilder::MakeField(field1);
auto date32_to_date64_func =
TreeExprBuilder::MakeFunction("castDATE", {node_f1}, date64());

auto date64_2year_func =
TreeExprBuilder::MakeFunction("extractYear", {date32_to_date64_func}, int64());
auto date64_2year_expr = TreeExprBuilder::MakeExpression(date64_2year_func, field_year);

auto date64_2month_func =
TreeExprBuilder::MakeFunction("extractMonth", {date32_to_date64_func}, int64());
auto date64_2month_expr =
TreeExprBuilder::MakeExpression(date64_2month_func, field_month);

// extract month and day from timestamp
auto ts2month_expr =
TreeExprBuilder::MakeExpression("extractMonth", {field2}, field_month);
auto ts2day_expr = TreeExprBuilder::MakeExpression("extractDay", {field2}, field_day);

std::shared_ptr<Projector> projector;
auto status = Projector::Make(
schema, {date2year_expr, date2month_expr, ts2month_expr, ts2day_expr},
TestConfiguration(), &projector);
auto status = Projector::Make(schema,
{date2year_expr, date2month_expr, date64_2year_expr,
date64_2month_expr, ts2month_expr, ts2day_expr},
TestConfiguration(), &projector);
ASSERT_TRUE(status.ok());

// Create a row-batch with some sample data
Expand All @@ -207,6 +245,13 @@ TEST_F(TestProjector, TestDateTime) {
auto array0 =
MakeArrowTypeArray<arrow::Date64Type, int64_t>(date64(), field0_data, validity);

std::vector<int32_t> field1_data = {DaysSince(epoch, 2000, 1, 1, 5, 0, 0, 0),
DaysSince(epoch, 1999, 12, 31, 5, 0, 0, 0),
DaysSince(epoch, 2015, 6, 30, 20, 0, 0, 0),
DaysSince(epoch, 2015, 7, 1, 20, 0, 0, 0)};
auto array1 =
MakeArrowTypeArray<arrow::Date32Type, int32_t>(date32(), field1_data, validity);

std::vector<int64_t> field2_data = {MillisSince(epoch, 1999, 12, 31, 5, 0, 0, 0),
MillisSince(epoch, 2000, 1, 2, 5, 0, 0, 0),
MillisSince(epoch, 2015, 7, 1, 1, 0, 0, 0),
Expand All @@ -216,27 +261,33 @@ TEST_F(TestProjector, TestDateTime) {
arrow::timestamp(arrow::TimeUnit::MILLI), field2_data, validity);

// expected output
// date 2 year and date 2 month
auto exp_yy_from_date = MakeArrowArrayInt64({2000, 1999, 2015, 2015}, validity);
auto exp_mm_from_date = MakeArrowArrayInt64({1, 12, 6, 7}, validity);
// date 2 year and date 2 month for date64
auto exp_yy_from_date64 = MakeArrowArrayInt64({2000, 1999, 2015, 2015}, validity);
auto exp_mm_from_date64 = MakeArrowArrayInt64({1, 12, 6, 7}, validity);

// date 2 year and date 2 month for date32
auto exp_yy_from_date32 = MakeArrowArrayInt64({2000, 1999, 2015, 2015}, validity);
auto exp_mm_from_date32 = MakeArrowArrayInt64({1, 12, 6, 7}, validity);

// ts 2 month and ts 2 day
auto exp_mm_from_ts = MakeArrowArrayInt64({12, 1, 7, 6}, validity);
auto exp_dd_from_ts = MakeArrowArrayInt64({31, 2, 1, 29}, validity);

// prepare input record batch
auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array2});
auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1, array2});

// Evaluate expression
arrow::ArrayVector outputs;
status = projector->Evaluate(*in_batch, pool_, &outputs);
EXPECT_TRUE(status.ok());

// Validate results
EXPECT_ARROW_ARRAY_EQUALS(exp_yy_from_date, outputs.at(0));
EXPECT_ARROW_ARRAY_EQUALS(exp_mm_from_date, outputs.at(1));
EXPECT_ARROW_ARRAY_EQUALS(exp_mm_from_ts, outputs.at(2));
EXPECT_ARROW_ARRAY_EQUALS(exp_dd_from_ts, outputs.at(3));
EXPECT_ARROW_ARRAY_EQUALS(exp_yy_from_date64, outputs.at(0));
EXPECT_ARROW_ARRAY_EQUALS(exp_mm_from_date64, outputs.at(1));
EXPECT_ARROW_ARRAY_EQUALS(exp_yy_from_date32, outputs.at(2));
EXPECT_ARROW_ARRAY_EQUALS(exp_mm_from_date32, outputs.at(3));
EXPECT_ARROW_ARRAY_EQUALS(exp_mm_from_ts, outputs.at(4));
EXPECT_ARROW_ARRAY_EQUALS(exp_dd_from_ts, outputs.at(5));
}

TEST_F(TestProjector, TestTime) {
Expand Down

0 comments on commit cc2b8cc

Please sign in to comment.