[SPARK-46725][SQL] Add DAYNAME function

### What changes were proposed in this pull request? Added DAYNAME function that returns three letter abbreviation day name for the specified date to: - Scala API - Python API - R API - Spark Connect Scala Client - Spark Connect Python Client ### Why are the changes needed? For parity with Snowflake ### Does this PR introduce _any_ user-facing change? Yes, since new function DAYNAME is added ### How was this patch tested? Tested on new unit tests ### Was this patch authored or co-authored using generative AI tooling? No Closes apache#44758 from PetarVasiljevic-DB/function_daytime. Lead-authored-by: Petar Vasiljevic <[email protected]> Co-authored-by: PetarVasiljevic-DB <[email protected]> Signed-off-by: Max Gekk <[email protected]>
sluk3r · Jan 21, 2024 · f7f424d · f7f424d
1 parent 45ec744
commit f7f424d
Show file tree

Hide file tree

Showing 19 changed files with 170 additions and 0 deletions.
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
@@ -1105,6 +1105,20 @@ setMethod("monthname",
             column(jc)
           })
 
+#' @details
+#' \code{dayname}: Extracts the three-letter abbreviated day name from a
+#' given date/timestamp/string.
+#'
+#' @rdname column_datetime_functions
+#' @aliases dayname dayname,Column-method
+#' @note dayname since 4.0.0
+setMethod("dayname",
+          signature(x = "Column"),
+          function(x) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "dayname", x@jc)
+            column(jc)
+          })
+
 #' @details
 #' \code{decode}: Computes the first argument into a string from a binary using the provided
 #' character set.

diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
@@ -1024,6 +1024,10 @@ setGeneric("dayofyear", function(x) { standardGeneric("dayofyear") })
 #' @name NULL
 setGeneric("monthname", function(x) { standardGeneric("monthname") })
 
+#' @rdname column_datetime_functions
+#' @name NULL
+setGeneric("dayname", function(x) { standardGeneric("dayname") })
+
 #' @rdname column_string_functions
 #' @name NULL
 setGeneric("decode", function(x, charset) { standardGeneric("decode") })

diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -2063,6 +2063,7 @@ test_that("date functions on a DataFrame", {
   expect_equal(collect(select(df, year(df$b)))[, 1], c(2012, 2013, 2014))
   expect_equal(collect(select(df, month(df$b)))[, 1], c(12, 12, 12))
   expect_equal(collect(select(df, monthname(df$b)))[, 1], c("Dec", "Dec", "Dec"))
+  expect_equal(collect(select(df, dayname(df$b)))[, 1], c("Thu", "Sat", "Mon"))
   expect_equal(collect(select(df, last_day(df$b)))[, 1],
                c(as.Date("2012-12-31"), as.Date("2013-12-31"), as.Date("2014-12-31")))
   expect_equal(collect(select(df, next_day(df$b, "MONDAY")))[, 1],

diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
@@ -5960,6 +5960,15 @@ object functions {
   def monthname(timeExp: Column): Column =
     Column.fn("monthname", timeExp)
 
+  /**
+   * Extracts the three-letter abbreviated month name from a given date/timestamp/string.
+   *
+   * @group datetime_funcs
+   * @since 4.0.0
+   */
+  def dayname(timeExp: Column): Column =
+    Column.fn("dayname", timeExp)
+
   //////////////////////////////////////////////////////////////////////////////////////////////
   // Collection functions
   //////////////////////////////////////////////////////////////////////////////////////////////

diff --git a/...ctor/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala b/...ctor/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
@@ -2129,6 +2129,10 @@ class PlanGenerationTestSuite
     fn.monthname(fn.col("d"))
   }
 
+  temporalFunctionTest("dayname") {
+    fn.dayname(fn.col("d"))
+  }
+
   temporalFunctionTest("next_day") {
     fn.next_day(fn.col("d"), "Mon")
   }

diff --git a/...or/connect/common/src/test/resources/query-tests/explain-results/function_dayname.explain b/...or/connect/common/src/test/resources/query-tests/explain-results/function_dayname.explain
@@ -0,0 +1,2 @@
+Project [dayname(d#0) AS dayname(d)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_dayname.json b/connector/connect/common/src/test/resources/query-tests/queries/function_dayname.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "dayname",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "d"
+          }
+        }]
+      }
+    }]
+  }
+}
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_dayname.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_dayname.proto.bin
diff --git a/python/docs/source/reference/pyspark.sql/functions.rst b/python/docs/source/reference/pyspark.sql/functions.rst
@@ -252,6 +252,7 @@ Date and Timestamp Functions
     datediff
     datepart
     day
+    dayname
     dayofmonth
     dayofweek
     dayofyear

diff --git a/python/pyspark/sql/connect/functions/builtin.py b/python/pyspark/sql/connect/functions/builtin.py
@@ -2996,6 +2996,13 @@ def monthname(col: "ColumnOrName") -> Column:
 monthname.__doc__ = pysparkfuncs.monthname.__doc__
 
 
+def dayname(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("dayname", col)
+
+
+dayname.__doc__ = pysparkfuncs.dayname.__doc__
+
+
 def extract(field: "ColumnOrName", source: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("extract", field, source)
 

diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py
@@ -7408,6 +7408,39 @@ def monthname(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("monthname", col)
 
 
+@_try_remote_functions
+def dayname(col: "ColumnOrName") -> Column:
+    """
+    Date and Timestamp Function: Returns the three-letter abbreviated day name from the given date.
+
+    .. versionadded:: 4.0.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target date/timestamp column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the three-letter abbreviation of day name for date/timestamp (Mon, Tue, Wed...)
+
+    Examples
+    --------
+    Example 1: Basic usage of dayname function.
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> df.select(sf.dayname('dt').alias('dayname')).show()
+    +-------+
+    |dayname|
+    +-------+
+    |    Wed|
+    +-------+
+    """
+    return _invoke_function_over_columns("dayname", col)
+
+
 @_try_remote_functions
 def extract(field: "ColumnOrName", source: "ColumnOrName") -> Column:
     """

diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py
@@ -421,6 +421,12 @@ def test_monthname(self):
         row = df.select(F.monthname(df.date)).first()
         self.assertEqual(row[0], "Nov")
 
+    def test_dayname(self):
+        dt = datetime.datetime(2017, 11, 6)
+        df = self.spark.createDataFrame([Row(date=dt)])
+        row = df.select(F.dayname(df.date)).first()
+        self.assertEqual(row[0], "Mon")
+
     # Test added for SPARK-37738; change Python API to accept both col & int as input
     def test_date_add_function(self):
         dt = datetime.date(2021, 12, 27)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -642,6 +642,7 @@ object FunctionRegistry {
     expression[UnixTimestamp]("unix_timestamp"),
     expression[DayOfWeek]("dayofweek"),
     expression[WeekDay]("weekday"),
+    expression[DayName]("dayname"),
     expression[WeekOfYear]("weekofyear"),
     expression[Year]("year"),
     expression[TimeWindow]("window"),

diff --git a/...talyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/...talyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -909,6 +909,25 @@ case class MonthName(child: Expression) extends GetDateField {
     copy(child = newChild)
 }
 
+@ExpressionDescription(
+  usage = "_FUNC_(date) - Returns the three-letter abbreviated day name from the given date.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(DATE('2008-02-20'));
+       Wed
+  """,
+  group = "datetime_funcs",
+  since = "4.0.0")
+case class DayName(child: Expression) extends GetDateField {
+  override val func = DateTimeUtils.getDayName
+  override val funcName = "getDayName"
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(DateType)
+  override def dataType: DataType = StringType
+  override protected def withNewChildInternal(newChild: Expression): DayName =
+    copy(child = newChild)
+}
+
 // scalastyle:off line.size.limit
 @ExpressionDescription(
   usage = "_FUNC_(timestamp, fmt) - Converts `timestamp` to a value of string in the format specified by the date format `fmt`.",

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -208,6 +208,17 @@ object DateTimeUtils extends SparkDateTimeUtils {
     UTF8String.fromString(monthName)
   }
 
+  /**
+   * Returns the three-letter abbreviated day name for the given number of days since 1970-01-01.
+   */
+  def getDayName(days: Int): UTF8String = {
+    val dayName = DayOfWeek
+      .of(getWeekDay(days) + 1)
+      .getDisplayName(TextStyle.SHORT, DateFormatter.defaultLocale)
+
+    UTF8String.fromString(dayName)
+  }
+
   /**
    * Adds months to a timestamp at the given time zone. It converts the input timestamp to a local
    * timestamp at the given time zone, adds months, and converts the resulted local timestamp

diff --git a/...alyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/...alyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -269,6 +269,17 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkConsistencyBetweenInterpretedAndCodegen(MonthName, DateType)
   }
 
+  test("DayName") {
+    checkEvaluation(DayName(Literal.create(null, DateType)), null)
+    checkEvaluation(DayName(Literal(d)), "Wed")
+    checkEvaluation(DayName(Cast(Literal(date), DateType, UTC_OPT)), "Wed")
+    checkEvaluation(DayName(Cast(Literal(ts), DateType, UTC_OPT)), "Fri")
+    checkEvaluation(DayName(Cast(Literal("2011-05-06"), DateType, UTC_OPT)), "Fri")
+    checkEvaluation(DayName(Cast(Literal(LocalDate.parse("2017-05-27")), DateType, UTC_OPT)), "Sat")
+    checkEvaluation(DayName(Cast(Literal(LocalDate.parse("1582-10-15")), DateType, UTC_OPT)), "Fri")
+    checkConsistencyBetweenInterpretedAndCodegen(DayName, DateType)
+  }
+
   test("DateFormat") {
     Seq("legacy", "corrected").foreach { legacyParserPolicy =>
       withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> legacyParserPolicy) {

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -5747,6 +5747,15 @@ object functions {
   def monthname(timeExp: Column): Column =
     Column.fn("monthname", timeExp)
 
+  /**
+   * Extracts the three-letter abbreviated day name from a given date/timestamp/string.
+   *
+   * @group datetime_funcs
+   * @since 4.0.0
+   */
+  def dayname(timeExp: Column): Column =
+    Column.fn("dayname", timeExp)
+
   //////////////////////////////////////////////////////////////////////////////////////////////
   // Collection functions
   //////////////////////////////////////////////////////////////////////////////////////////////

diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -116,6 +116,7 @@
 | org.apache.spark.sql.catalyst.expressions.DatePartExpressionBuilder | date_part | SELECT date_part('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456') | struct<date_part(YEAR, TIMESTAMP '2019-08-12 01:00:00.123456'):int> |
 | org.apache.spark.sql.catalyst.expressions.DatePartExpressionBuilder | datepart | SELECT datepart('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456') | struct<datepart(YEAR FROM TIMESTAMP '2019-08-12 01:00:00.123456'):int> |
 | org.apache.spark.sql.catalyst.expressions.DateSub | date_sub | SELECT date_sub('2016-07-30', 1) | struct<date_sub(2016-07-30, 1):date> |
+| org.apache.spark.sql.catalyst.expressions.DayName | dayname | SELECT dayname(DATE('2008-02-20')) | struct<dayname(2008-02-20):string> |
 | org.apache.spark.sql.catalyst.expressions.DayOfMonth | day | SELECT day('2009-07-30') | struct<day(2009-07-30):int> |
 | org.apache.spark.sql.catalyst.expressions.DayOfMonth | dayofmonth | SELECT dayofmonth('2009-07-30') | struct<dayofmonth(2009-07-30):int> |
 | org.apache.spark.sql.catalyst.expressions.DayOfWeek | dayofweek | SELECT dayofweek('2009-07-30') | struct<dayofweek(2009-07-30):int> |

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
@@ -279,6 +279,18 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession {
       Row("Apr", "Apr", "Apr"))
   }
 
+  test("dayname") {
+    val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c")
+
+    checkAnswer(
+      df.select(dayname($"a"), dayname($"b"), dayname($"c")),
+      Row("Wed", "Wed", "Mon"))
+
+    checkAnswer(
+      df.selectExpr("dayname(a)", "dayname(b)", "dayname(c)"),
+      Row("Wed", "Wed", "Mon"))
+  }
+
   test("extract") {
     val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c")
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		Project [dayname(d#0) AS dayname(d)#0]
		+- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]