Skip to content

Commit

Permalink
[SPARK-49632][SQL][FOLLOW-UP] Fix suggestion for to_date function
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?
Change of suggested function on failure from `try_to_timestamp` to `try_to_date` for `to_date` function.

### Why are the changes needed?
In original PR apache#49227 we removed ANSI suggestion and left only a suggestion of try function to use. In case of `to_date`, use of `try_to_date` is more appropriate.

### Does this PR introduce _any_ user-facing change?
Yes

### How was this patch tested?
Existing tests scope error message change.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes apache#49290 from mihailom-db/cannot-parse-timestamp-follow-up.

Authored-by: Mihailo Milosevic <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
  • Loading branch information
mihailom-db authored and HyukjinKwon committed Dec 26, 2024
1 parent 4ad7f3d commit be2da52
Show file tree
Hide file tree
Showing 19 changed files with 88 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@
},
"CANNOT_PARSE_TIMESTAMP" : {
"message" : [
"<message>. Use `try_to_timestamp` to tolerate invalid input string and return NULL instead."
"<message>. Use <func> to tolerate invalid input string and return NULL instead."
],
"sqlState" : "22007"
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1129,6 +1129,7 @@ case class GetTimestamp(
left: Expression,
right: Expression,
override val dataType: DataType,
override val suggestedFuncOnFail: String = "try_to_timestamp",
timeZoneId: Option[String] = None,
failOnError: Boolean = SQLConf.get.ansiEnabled) extends ToTimestamp {

Expand Down Expand Up @@ -1267,6 +1268,7 @@ object TryToTimestampExpressionBuilder extends ExpressionBuilder {
abstract class ToTimestamp
extends BinaryExpression with TimestampFormatterHelper with ExpectsInputTypes {

val suggestedFuncOnFail: String = "try_to_timestamp"
def failOnError: Boolean

// The result of the conversion to timestamp is microseconds divided by this factor.
Expand Down Expand Up @@ -1321,9 +1323,9 @@ abstract class ToTimestamp
}
} catch {
case e: DateTimeException if failOnError =>
throw QueryExecutionErrors.ansiDateTimeParseError(e)
throw QueryExecutionErrors.ansiDateTimeParseError(e, suggestedFuncOnFail)
case e: ParseException if failOnError =>
throw QueryExecutionErrors.ansiDateTimeParseError(e)
throw QueryExecutionErrors.ansiDateTimeParseError(e, suggestedFuncOnFail)
case e if isParseError(e) => null
}
}
Expand All @@ -1334,7 +1336,7 @@ abstract class ToTimestamp
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val javaType = CodeGenerator.javaType(dataType)
val parseErrorBranch: String = if (failOnError) {
"throw QueryExecutionErrors.ansiDateTimeParseError(e);"
s"throw QueryExecutionErrors.ansiDateTimeParseError(e, \"${suggestedFuncOnFail}\");"
} else {
s"${ev.isNull} = true;"
}
Expand Down Expand Up @@ -2100,8 +2102,8 @@ case class ParseToDate(
extends RuntimeReplaceable with ImplicitCastInputTypes with TimeZoneAwareExpression {

override lazy val replacement: Expression = format.map { f =>
Cast(GetTimestamp(left, f, TimestampType, timeZoneId, ansiEnabled), DateType, timeZoneId,
EvalMode.fromBoolean(ansiEnabled))
Cast(GetTimestamp(left, f, TimestampType, "try_to_date", timeZoneId, ansiEnabled), DateType,
timeZoneId, EvalMode.fromBoolean(ansiEnabled))
}.getOrElse(Cast(left, DateType, timeZoneId,
EvalMode.fromBoolean(ansiEnabled))) // backwards compatibility

Expand Down Expand Up @@ -2179,7 +2181,7 @@ case class ParseToTimestamp(
extends RuntimeReplaceable with ImplicitCastInputTypes with TimeZoneAwareExpression {

override lazy val replacement: Expression = format.map { f =>
GetTimestamp(left, f, dataType, timeZoneId, failOnError = failOnError)
GetTimestamp(left, f, dataType, "try_to_timestamp", timeZoneId, failOnError = failOnError)
}.getOrElse(Cast(left, dataType, timeZoneId, ansiEnabled = failOnError))

def this(left: Expression, format: Expression) = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -265,11 +265,13 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
summary = "")
}

def ansiDateTimeParseError(e: Exception): SparkDateTimeException = {
def ansiDateTimeParseError(e: Exception, suggestedFunc: String): SparkDateTimeException = {
new SparkDateTimeException(
errorClass = "CANNOT_PARSE_TIMESTAMP",
messageParameters = Map(
"message" -> e.getMessage),
"message" -> e.getMessage,
"func" -> toSQLId(suggestedFunc)
),
context = Array.empty,
summary = "")
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [cast(gettimestamp(s#0, yyyy-MM-dd, TimestampType, Some(America/Los_Angeles), false) as date) AS to_date(s, yyyy-MM-dd)#0]
Project [cast(gettimestamp(s#0, yyyy-MM-dd, TimestampType, try_to_date, Some(America/Los_Angeles), false) as date) AS to_date(s, yyyy-MM-dd)#0]
+- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [gettimestamp(g#0, g#0, TimestampType, Some(America/Los_Angeles), false) AS to_timestamp_ltz(g, g)#0]
Project [gettimestamp(g#0, g#0, TimestampType, try_to_timestamp, Some(America/Los_Angeles), false) AS to_timestamp_ltz(g, g)#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [gettimestamp(g#0, g#0, TimestampNTZType, Some(America/Los_Angeles), false) AS to_timestamp_ntz(g, g)#0]
Project [gettimestamp(g#0, g#0, TimestampNTZType, try_to_timestamp, Some(America/Los_Angeles), false) AS to_timestamp_ntz(g, g)#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [gettimestamp(s#0, yyyy-MM-dd HH:mm:ss.SSSS, TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(s, yyyy-MM-dd HH:mm:ss.SSSS)#0]
Project [gettimestamp(s#0, yyyy-MM-dd HH:mm:ss.SSSS, TimestampType, try_to_timestamp, Some(America/Los_Angeles), false) AS to_timestamp(s, yyyy-MM-dd HH:mm:ss.SSSS)#0]
+- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [gettimestamp(g#0, g#0, TimestampType, Some(America/Los_Angeles), false) AS try_to_timestamp(g, g)#0]
Project [gettimestamp(g#0, g#0, TimestampType, try_to_timestamp, Some(America/Los_Angeles), false) AS try_to_timestamp(g, g)#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [unix_date(cast(gettimestamp(s#0, yyyy-MM-dd, TimestampType, Some(America/Los_Angeles), false) as date)) AS unix_date(to_date(s, yyyy-MM-dd))#0]
Project [unix_date(cast(gettimestamp(s#0, yyyy-MM-dd, TimestampType, try_to_date, Some(America/Los_Angeles), false) as date)) AS unix_date(to_date(s, yyyy-MM-dd))#0]
+- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [unix_micros(gettimestamp(s#0, yyyy-MM-dd HH:mm:ss.SSSS, TimestampType, Some(America/Los_Angeles), false)) AS unix_micros(to_timestamp(s, yyyy-MM-dd HH:mm:ss.SSSS))#0L]
Project [unix_micros(gettimestamp(s#0, yyyy-MM-dd HH:mm:ss.SSSS, TimestampType, try_to_timestamp, Some(America/Los_Angeles), false)) AS unix_micros(to_timestamp(s, yyyy-MM-dd HH:mm:ss.SSSS))#0L]
+- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [unix_millis(gettimestamp(s#0, yyyy-MM-dd HH:mm:ss.SSSS, TimestampType, Some(America/Los_Angeles), false)) AS unix_millis(to_timestamp(s, yyyy-MM-dd HH:mm:ss.SSSS))#0L]
Project [unix_millis(gettimestamp(s#0, yyyy-MM-dd HH:mm:ss.SSSS, TimestampType, try_to_timestamp, Some(America/Los_Angeles), false)) AS unix_millis(to_timestamp(s, yyyy-MM-dd HH:mm:ss.SSSS))#0L]
+- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
Project [unix_seconds(gettimestamp(s#0, yyyy-MM-dd HH:mm:ss.SSSS, TimestampType, Some(America/Los_Angeles), false)) AS unix_seconds(to_timestamp(s, yyyy-MM-dd HH:mm:ss.SSSS))#0L]
Project [unix_seconds(gettimestamp(s#0, yyyy-MM-dd HH:mm:ss.SSSS, TimestampType, try_to_timestamp, Some(America/Los_Angeles), false)) AS unix_seconds(to_timestamp(s, yyyy-MM-dd HH:mm:ss.SSSS))#0L]
+- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
1 change: 1 addition & 0 deletions sql/core/src/test/resources/sql-tests/results/date.sql.out
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ org.apache.spark.SparkDateTimeException
"errorClass" : "CANNOT_PARSE_TIMESTAMP",
"sqlState" : "22007",
"messageParameters" : {
"func" : "`try_to_date`",
"message" : "Invalid date 'February 29' as '1970' is not a leap year"
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ org.apache.spark.SparkDateTimeException
"errorClass" : "CANNOT_PARSE_TIMESTAMP",
"sqlState" : "22007",
"messageParameters" : {
"func" : "`try_to_date`",
"message" : "Unparseable date: \"02-29\""
}
}
Expand Down Expand Up @@ -1584,6 +1585,7 @@ org.apache.spark.SparkDateTimeException
"errorClass" : "CANNOT_PARSE_TIMESTAMP",
"sqlState" : "22007",
"messageParameters" : {
"func" : "`try_to_timestamp`",
"message" : "Unparseable date: \"2019-10-06 10:11:12.\""
}
}
Expand All @@ -1599,6 +1601,7 @@ org.apache.spark.SparkDateTimeException
"errorClass" : "CANNOT_PARSE_TIMESTAMP",
"sqlState" : "22007",
"messageParameters" : {
"func" : "`try_to_timestamp`",
"message" : "Unparseable date: \"2019-10-06 10:11:12.0\""
}
}
Expand All @@ -1614,6 +1617,7 @@ org.apache.spark.SparkDateTimeException
"errorClass" : "CANNOT_PARSE_TIMESTAMP",
"sqlState" : "22007",
"messageParameters" : {
"func" : "`try_to_timestamp`",
"message" : "Unparseable date: \"2019-10-06 10:11:12.1\""
}
}
Expand All @@ -1629,6 +1633,7 @@ org.apache.spark.SparkDateTimeException
"errorClass" : "CANNOT_PARSE_TIMESTAMP",
"sqlState" : "22007",
"messageParameters" : {
"func" : "`try_to_timestamp`",
"message" : "Unparseable date: \"2019-10-06 10:11:12.12\""
}
}
Expand All @@ -1644,6 +1649,7 @@ org.apache.spark.SparkDateTimeException
"errorClass" : "CANNOT_PARSE_TIMESTAMP",
"sqlState" : "22007",
"messageParameters" : {
"func" : "`try_to_timestamp`",
"message" : "Unparseable date: \"2019-10-06 10:11:12.123UTC\""
}
}
Expand All @@ -1659,6 +1665,7 @@ org.apache.spark.SparkDateTimeException
"errorClass" : "CANNOT_PARSE_TIMESTAMP",
"sqlState" : "22007",
"messageParameters" : {
"func" : "`try_to_timestamp`",
"message" : "Unparseable date: \"2019-10-06 10:11:12.1234\""
}
}
Expand All @@ -1674,6 +1681,7 @@ org.apache.spark.SparkDateTimeException
"errorClass" : "CANNOT_PARSE_TIMESTAMP",
"sqlState" : "22007",
"messageParameters" : {
"func" : "`try_to_timestamp`",
"message" : "Unparseable date: \"2019-10-06 10:11:12.12345CST\""
}
}
Expand All @@ -1689,6 +1697,7 @@ org.apache.spark.SparkDateTimeException
"errorClass" : "CANNOT_PARSE_TIMESTAMP",
"sqlState" : "22007",
"messageParameters" : {
"func" : "`try_to_timestamp`",
"message" : "Unparseable date: \"2019-10-06 10:11:12.123456PST\""
}
}
Expand All @@ -1704,6 +1713,7 @@ org.apache.spark.SparkDateTimeException
"errorClass" : "CANNOT_PARSE_TIMESTAMP",
"sqlState" : "22007",
"messageParameters" : {
"func" : "`try_to_timestamp`",
"message" : "Unparseable date: \"2019-10-06 10:11:12.1234567PST\""
}
}
Expand All @@ -1719,6 +1729,7 @@ org.apache.spark.SparkDateTimeException
"errorClass" : "CANNOT_PARSE_TIMESTAMP",
"sqlState" : "22007",
"messageParameters" : {
"func" : "`try_to_timestamp`",
"message" : "Unparseable date: \"123456 2019-10-06 10:11:12.123456PST\""
}
}
Expand All @@ -1734,6 +1745,7 @@ org.apache.spark.SparkDateTimeException
"errorClass" : "CANNOT_PARSE_TIMESTAMP",
"sqlState" : "22007",
"messageParameters" : {
"func" : "`try_to_timestamp`",
"message" : "Unparseable date: \"223456 2019-10-06 10:11:12.123456PST\""
}
}
Expand All @@ -1749,6 +1761,7 @@ org.apache.spark.SparkDateTimeException
"errorClass" : "CANNOT_PARSE_TIMESTAMP",
"sqlState" : "22007",
"messageParameters" : {
"func" : "`try_to_timestamp`",
"message" : "Unparseable date: \"2019-10-06 10:11:12.1234\""
}
}
Expand All @@ -1764,6 +1777,7 @@ org.apache.spark.SparkDateTimeException
"errorClass" : "CANNOT_PARSE_TIMESTAMP",
"sqlState" : "22007",
"messageParameters" : {
"func" : "`try_to_timestamp`",
"message" : "Unparseable date: \"2019-10-06 10:11:12.123\""
}
}
Expand All @@ -1779,6 +1793,7 @@ org.apache.spark.SparkDateTimeException
"errorClass" : "CANNOT_PARSE_TIMESTAMP",
"sqlState" : "22007",
"messageParameters" : {
"func" : "`try_to_timestamp`",
"message" : "Unparseable date: \"2019-10-06 10:11:12\""
}
}
Expand All @@ -1794,6 +1809,7 @@ org.apache.spark.SparkDateTimeException
"errorClass" : "CANNOT_PARSE_TIMESTAMP",
"sqlState" : "22007",
"messageParameters" : {
"func" : "`try_to_timestamp`",
"message" : "Unparseable date: \"2019-10-06 10:11:12.12\""
}
}
Expand All @@ -1809,6 +1825,7 @@ org.apache.spark.SparkDateTimeException
"errorClass" : "CANNOT_PARSE_TIMESTAMP",
"sqlState" : "22007",
"messageParameters" : {
"func" : "`try_to_timestamp`",
"message" : "Unparseable date: \"2019-10-06 10:11\""
}
}
Expand All @@ -1824,6 +1841,7 @@ org.apache.spark.SparkDateTimeException
"errorClass" : "CANNOT_PARSE_TIMESTAMP",
"sqlState" : "22007",
"messageParameters" : {
"func" : "`try_to_timestamp`",
"message" : "Unparseable date: \"2019-10-06S10:11:12.12345\""
}
}
Expand All @@ -1839,6 +1857,7 @@ org.apache.spark.SparkDateTimeException
"errorClass" : "CANNOT_PARSE_TIMESTAMP",
"sqlState" : "22007",
"messageParameters" : {
"func" : "`try_to_timestamp`",
"message" : "Unparseable date: \"12.12342019-10-06S10:11\""
}
}
Expand All @@ -1854,6 +1873,7 @@ org.apache.spark.SparkDateTimeException
"errorClass" : "CANNOT_PARSE_TIMESTAMP",
"sqlState" : "22007",
"messageParameters" : {
"func" : "`try_to_timestamp`",
"message" : "Unparseable date: \"12.1232019-10-06S10:11\""
}
}
Expand All @@ -1869,6 +1889,7 @@ org.apache.spark.SparkDateTimeException
"errorClass" : "CANNOT_PARSE_TIMESTAMP",
"sqlState" : "22007",
"messageParameters" : {
"func" : "`try_to_timestamp`",
"message" : "Unparseable date: \"12.1232019-10-06S10:11\""
}
}
Expand All @@ -1884,6 +1905,7 @@ org.apache.spark.SparkDateTimeException
"errorClass" : "CANNOT_PARSE_TIMESTAMP",
"sqlState" : "22007",
"messageParameters" : {
"func" : "`try_to_timestamp`",
"message" : "Unparseable date: \"12.1234019-10-06S10:11\""
}
}
Expand Down Expand Up @@ -1955,6 +1977,7 @@ org.apache.spark.SparkDateTimeException
"errorClass" : "CANNOT_PARSE_TIMESTAMP",
"sqlState" : "22007",
"messageParameters" : {
"func" : "`try_to_timestamp`",
"message" : "Unparseable date: \"02-29\""
}
}
Expand Down Expand Up @@ -2185,6 +2208,7 @@ org.apache.spark.SparkDateTimeException
"errorClass" : "CANNOT_PARSE_TIMESTAMP",
"sqlState" : "22007",
"messageParameters" : {
"func" : "`try_to_timestamp`",
"message" : "Unparseable date: \"2019-10-06 A\""
}
}
Expand Down
Loading

0 comments on commit be2da52

Please sign in to comment.