Skip to content

Commit

Permalink
[SPARK-35925][SQL] Support DayTimeIntervalType in width-bucket function
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?
Add support DayTimeIntervalType for width_bucket function.

### Why are the changes needed?
[SPARK-35925](https://issues.apache.org/jira/browse/SPARK-35925)
1. The `WIDTH_BUCKET` function assigns values to buckets (individual segments) in an equiwidth histogram.
2. DayTimeIntervalType is necessary as an input data type for `WIDTH_BUCKET`

### Does this PR introduce _any_ user-facing change?
Yes. The user can use `width_bucket` with DayTimeIntervalType.

### How was this patch tested?
Add ut test

Closes apache#34309 from Peng-Lei/SPARK-35925.

Authored-by: PengLei <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
  • Loading branch information
Peng-Lei authored and MaxGekk committed Oct 18, 2021
1 parent c29bb02 commit 21fa3ce
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1617,6 +1617,10 @@ object WidthBucket {
1
> SELECT _FUNC_(INTERVAL '1' YEAR, INTERVAL '0' YEAR, INTERVAL '10' YEAR, 10);
2
> SELECT _FUNC_(INTERVAL '0' DAY, INTERVAL '0' DAY, INTERVAL '10' DAY, 10);
1
> SELECT _FUNC_(INTERVAL '1' DAY, INTERVAL '0' DAY, INTERVAL '10' DAY, 10);
2
""",
since = "3.1.0",
group = "math_funcs")
Expand All @@ -1628,9 +1632,9 @@ case class WidthBucket(
extends QuaternaryExpression with ImplicitCastInputTypes with NullIntolerant {

override def inputTypes: Seq[AbstractDataType] = Seq(
TypeCollection(DoubleType, YearMonthIntervalType),
TypeCollection(DoubleType, YearMonthIntervalType),
TypeCollection(DoubleType, YearMonthIntervalType),
TypeCollection(DoubleType, YearMonthIntervalType, DayTimeIntervalType),
TypeCollection(DoubleType, YearMonthIntervalType, DayTimeIntervalType),
TypeCollection(DoubleType, YearMonthIntervalType, DayTimeIntervalType),
LongType)

override def checkInputDataTypes(): TypeCheckResult = {
Expand All @@ -1639,6 +1643,8 @@ case class WidthBucket(
(value.dataType, minValue.dataType, maxValue.dataType) match {
case (_: YearMonthIntervalType, _: YearMonthIntervalType, _: YearMonthIntervalType) =>
TypeCheckSuccess
case (_: DayTimeIntervalType, _: DayTimeIntervalType, _: DayTimeIntervalType) =>
TypeCheckSuccess
case _ =>
val types = Seq(value.dataType, minValue.dataType, maxValue.dataType)
TypeUtils.checkForSameTypeInputExpr(types, s"function $prettyName")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -740,4 +740,24 @@ class MathExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
checkEvaluation(WidthBucket(Literal(v), Literal(s), Literal(e), Literal(n)), expected)
}
}

test("SPARK-35925: Support DayTimeIntervalType in width-bucket function") {
Seq(
(Duration.ofDays(-1), Duration.ofDays(0), Duration.ofDays(10), 10L) -> 0L,
(Duration.ofHours(0), Duration.ofDays(0), Duration.ofDays(10), 10L) -> 1L,
(Duration.ofHours(11), Duration.ofHours(0), Duration.ofHours(10), 10L) -> 11L,
(Duration.ofMinutes(1), Duration.ofMinutes(0), Duration.ofMinutes(60), 10L) -> 1L,
(Duration.ofSeconds(-30), Duration.ofSeconds(-59), Duration.ofSeconds(60), 10L) -> 3L,
(Duration.ofDays(0), Duration.of(Long.MinValue, ChronoUnit.MICROS),
Duration.of(Long.MaxValue, ChronoUnit.MICROS), 10L) -> 6L,
(Duration.ofDays(0), Duration.of(Long.MinValue, ChronoUnit.MICROS),
Duration.ofDays(0), 10L) -> 11L,
(Duration.of(Long.MinValue, ChronoUnit.MICROS), Duration.of(Long.MinValue, ChronoUnit.MICROS),
Duration.ofDays(0), 10L) -> 1L,
(Duration.ofDays(-1), Duration.ofDays(0),
Duration.of(Long.MaxValue, ChronoUnit.MICROS), 10L) -> 0L
).foreach { case ((v, s, e, n), expected) =>
checkEvaluation(WidthBucket(Literal(v), Literal(s), Literal(e), Literal(n)), expected)
}
}
}
2 changes: 2 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/interval.sql
Original file line number Diff line number Diff line change
Expand Up @@ -384,3 +384,5 @@ SELECT signum(INTERVAL '10' HOUR);
SELECT signum(INTERVAL '0 0:0:0' DAY TO SECOND);
SELECT width_bucket(INTERVAL '0' YEAR, INTERVAL '0' YEAR, INTERVAL '10' YEAR, 10);
SELECT width_bucket(INTERVAL '-1' YEAR, INTERVAL -'1-2' YEAR TO MONTH, INTERVAL '1-2' YEAR TO MONTH, 10);
SELECT width_bucket(INTERVAL '0' DAY, INTERVAL '0' DAY, INTERVAL '10' DAY, 10);
SELECT width_bucket(INTERVAL '-59' MINUTE, INTERVAL -'1 01' DAY TO HOUR, INTERVAL '1 2:3:4.001' DAY TO SECOND, 10);
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 284
-- Number of queries: 286


-- !query
Expand Down Expand Up @@ -2673,3 +2673,19 @@ SELECT width_bucket(INTERVAL '-1' YEAR, INTERVAL -'1-2' YEAR TO MONTH, INTERVAL
struct<width_bucket(INTERVAL '-1' YEAR, INTERVAL '-1-2' YEAR TO MONTH, INTERVAL '1-2' YEAR TO MONTH, 10):bigint>
-- !query output
1


-- !query
SELECT width_bucket(INTERVAL '0' DAY, INTERVAL '0' DAY, INTERVAL '10' DAY, 10)
-- !query schema
struct<width_bucket(INTERVAL '0' DAY, INTERVAL '0' DAY, INTERVAL '10' DAY, 10):bigint>
-- !query output
1


-- !query
SELECT width_bucket(INTERVAL '-59' MINUTE, INTERVAL -'1 01' DAY TO HOUR, INTERVAL '1 2:3:4.001' DAY TO SECOND, 10)
-- !query schema
struct<width_bucket(INTERVAL '-59' MINUTE, INTERVAL '-1 01' DAY TO HOUR, INTERVAL '1 02:03:04.001' DAY TO SECOND, 10):bigint>
-- !query output
5
18 changes: 17 additions & 1 deletion sql/core/src/test/resources/sql-tests/results/interval.sql.out
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 284
-- Number of queries: 286


-- !query
Expand Down Expand Up @@ -2662,3 +2662,19 @@ SELECT width_bucket(INTERVAL '-1' YEAR, INTERVAL -'1-2' YEAR TO MONTH, INTERVAL
struct<width_bucket(INTERVAL '-1' YEAR, INTERVAL '-1-2' YEAR TO MONTH, INTERVAL '1-2' YEAR TO MONTH, 10):bigint>
-- !query output
1


-- !query
SELECT width_bucket(INTERVAL '0' DAY, INTERVAL '0' DAY, INTERVAL '10' DAY, 10)
-- !query schema
struct<width_bucket(INTERVAL '0' DAY, INTERVAL '0' DAY, INTERVAL '10' DAY, 10):bigint>
-- !query output
1


-- !query
SELECT width_bucket(INTERVAL '-59' MINUTE, INTERVAL -'1 01' DAY TO HOUR, INTERVAL '1 2:3:4.001' DAY TO SECOND, 10)
-- !query schema
struct<width_bucket(INTERVAL '-59' MINUTE, INTERVAL '-1 01' DAY TO HOUR, INTERVAL '1 02:03:04.001' DAY TO SECOND, 10):bigint>
-- !query output
5

0 comments on commit 21fa3ce

Please sign in to comment.