Skip to content

Commit

Permalink
[SPARK-46725][SQL] Add DAYNAME function
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?
Added DAYNAME function that returns three letter abbreviation day name for the specified date to:
- Scala API
- Python API
- R API
- Spark Connect Scala Client
- Spark Connect Python Client

### Why are the changes needed?

For parity with Snowflake

### Does this PR introduce _any_ user-facing change?

Yes, since new function DAYNAME is added

### How was this patch tested?

Tested on new unit tests

### Was this patch authored or co-authored using generative AI tooling?

No

Closes apache#44758 from PetarVasiljevic-DB/function_daytime.

Lead-authored-by: Petar Vasiljevic <[email protected]>
Co-authored-by: PetarVasiljevic-DB <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
  • Loading branch information
2 people authored and MaxGekk committed Jan 21, 2024
1 parent 45ec744 commit f7f424d
Show file tree
Hide file tree
Showing 19 changed files with 170 additions and 0 deletions.
14 changes: 14 additions & 0 deletions R/pkg/R/functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -1105,6 +1105,20 @@ setMethod("monthname",
column(jc)
})

#' @details
#' \code{dayname}: Extracts the three-letter abbreviated day name from a
#' given date/timestamp/string.
#'
#' @rdname column_datetime_functions
#' @aliases dayname dayname,Column-method
#' @note dayname since 4.0.0
setMethod("dayname",
signature(x = "Column"),
function(x) {
jc <- callJStatic("org.apache.spark.sql.functions", "dayname", x@jc)
column(jc)
})

#' @details
#' \code{decode}: Computes the first argument into a string from a binary using the provided
#' character set.
Expand Down
4 changes: 4 additions & 0 deletions R/pkg/R/generics.R
Original file line number Diff line number Diff line change
Expand Up @@ -1024,6 +1024,10 @@ setGeneric("dayofyear", function(x) { standardGeneric("dayofyear") })
#' @name NULL
setGeneric("monthname", function(x) { standardGeneric("monthname") })

#' @rdname column_datetime_functions
#' @name NULL
setGeneric("dayname", function(x) { standardGeneric("dayname") })

#' @rdname column_string_functions
#' @name NULL
setGeneric("decode", function(x, charset) { standardGeneric("decode") })
Expand Down
1 change: 1 addition & 0 deletions R/pkg/tests/fulltests/test_sparkSQL.R
Original file line number Diff line number Diff line change
Expand Up @@ -2063,6 +2063,7 @@ test_that("date functions on a DataFrame", {
expect_equal(collect(select(df, year(df$b)))[, 1], c(2012, 2013, 2014))
expect_equal(collect(select(df, month(df$b)))[, 1], c(12, 12, 12))
expect_equal(collect(select(df, monthname(df$b)))[, 1], c("Dec", "Dec", "Dec"))
expect_equal(collect(select(df, dayname(df$b)))[, 1], c("Thu", "Sat", "Mon"))
expect_equal(collect(select(df, last_day(df$b)))[, 1],
c(as.Date("2012-12-31"), as.Date("2013-12-31"), as.Date("2014-12-31")))
expect_equal(collect(select(df, next_day(df$b, "MONDAY")))[, 1],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5960,6 +5960,15 @@ object functions {
def monthname(timeExp: Column): Column =
Column.fn("monthname", timeExp)

/**
* Extracts the three-letter abbreviated month name from a given date/timestamp/string.
*
* @group datetime_funcs
* @since 4.0.0
*/
def dayname(timeExp: Column): Column =
Column.fn("dayname", timeExp)

//////////////////////////////////////////////////////////////////////////////////////////////
// Collection functions
//////////////////////////////////////////////////////////////////////////////////////////////
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2129,6 +2129,10 @@ class PlanGenerationTestSuite
fn.monthname(fn.col("d"))
}

temporalFunctionTest("dayname") {
fn.dayname(fn.col("d"))
}

temporalFunctionTest("next_day") {
fn.next_day(fn.col("d"), "Mon")
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Project [dayname(d#0) AS dayname(d)#0]
+- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"common": {
"planId": "1"
},
"project": {
"input": {
"common": {
"planId": "0"
},
"localRelation": {
"schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
}
},
"expressions": [{
"unresolvedFunction": {
"functionName": "dayname",
"arguments": [{
"unresolvedAttribute": {
"unparsedIdentifier": "d"
}
}]
}
}]
}
}
Binary file not shown.
1 change: 1 addition & 0 deletions python/docs/source/reference/pyspark.sql/functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ Date and Timestamp Functions
datediff
datepart
day
dayname
dayofmonth
dayofweek
dayofyear
Expand Down
7 changes: 7 additions & 0 deletions python/pyspark/sql/connect/functions/builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -2996,6 +2996,13 @@ def monthname(col: "ColumnOrName") -> Column:
monthname.__doc__ = pysparkfuncs.monthname.__doc__


def dayname(col: "ColumnOrName") -> Column:
return _invoke_function_over_columns("dayname", col)


dayname.__doc__ = pysparkfuncs.dayname.__doc__


def extract(field: "ColumnOrName", source: "ColumnOrName") -> Column:
return _invoke_function_over_columns("extract", field, source)

Expand Down
33 changes: 33 additions & 0 deletions python/pyspark/sql/functions/builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -7408,6 +7408,39 @@ def monthname(col: "ColumnOrName") -> Column:
return _invoke_function_over_columns("monthname", col)


@_try_remote_functions
def dayname(col: "ColumnOrName") -> Column:
"""
Date and Timestamp Function: Returns the three-letter abbreviated day name from the given date.

.. versionadded:: 4.0.0

Parameters
----------
col : :class:`~pyspark.sql.Column` or str
target date/timestamp column to work on.

Returns
-------
:class:`~pyspark.sql.Column`
the three-letter abbreviation of day name for date/timestamp (Mon, Tue, Wed...)

Examples
--------
Example 1: Basic usage of dayname function.

>>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
>>> df.select(sf.dayname('dt').alias('dayname')).show()
+-------+
|dayname|
+-------+
| Wed|
+-------+
"""
return _invoke_function_over_columns("dayname", col)


@_try_remote_functions
def extract(field: "ColumnOrName", source: "ColumnOrName") -> Column:
"""
Expand Down
6 changes: 6 additions & 0 deletions python/pyspark/sql/tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,12 @@ def test_monthname(self):
row = df.select(F.monthname(df.date)).first()
self.assertEqual(row[0], "Nov")

def test_dayname(self):
dt = datetime.datetime(2017, 11, 6)
df = self.spark.createDataFrame([Row(date=dt)])
row = df.select(F.dayname(df.date)).first()
self.assertEqual(row[0], "Mon")

# Test added for SPARK-37738; change Python API to accept both col & int as input
def test_date_add_function(self):
dt = datetime.date(2021, 12, 27)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -642,6 +642,7 @@ object FunctionRegistry {
expression[UnixTimestamp]("unix_timestamp"),
expression[DayOfWeek]("dayofweek"),
expression[WeekDay]("weekday"),
expression[DayName]("dayname"),
expression[WeekOfYear]("weekofyear"),
expression[Year]("year"),
expression[TimeWindow]("window"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -909,6 +909,25 @@ case class MonthName(child: Expression) extends GetDateField {
copy(child = newChild)
}

@ExpressionDescription(
usage = "_FUNC_(date) - Returns the three-letter abbreviated day name from the given date.",
examples = """
Examples:
> SELECT _FUNC_(DATE('2008-02-20'));
Wed
""",
group = "datetime_funcs",
since = "4.0.0")
case class DayName(child: Expression) extends GetDateField {
override val func = DateTimeUtils.getDayName
override val funcName = "getDayName"

override def inputTypes: Seq[AbstractDataType] = Seq(DateType)
override def dataType: DataType = StringType
override protected def withNewChildInternal(newChild: Expression): DayName =
copy(child = newChild)
}

// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "_FUNC_(timestamp, fmt) - Converts `timestamp` to a value of string in the format specified by the date format `fmt`.",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,17 @@ object DateTimeUtils extends SparkDateTimeUtils {
UTF8String.fromString(monthName)
}

/**
* Returns the three-letter abbreviated day name for the given number of days since 1970-01-01.
*/
def getDayName(days: Int): UTF8String = {
val dayName = DayOfWeek
.of(getWeekDay(days) + 1)
.getDisplayName(TextStyle.SHORT, DateFormatter.defaultLocale)

UTF8String.fromString(dayName)
}

/**
* Adds months to a timestamp at the given time zone. It converts the input timestamp to a local
* timestamp at the given time zone, adds months, and converts the resulted local timestamp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,17 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
checkConsistencyBetweenInterpretedAndCodegen(MonthName, DateType)
}

test("DayName") {
checkEvaluation(DayName(Literal.create(null, DateType)), null)
checkEvaluation(DayName(Literal(d)), "Wed")
checkEvaluation(DayName(Cast(Literal(date), DateType, UTC_OPT)), "Wed")
checkEvaluation(DayName(Cast(Literal(ts), DateType, UTC_OPT)), "Fri")
checkEvaluation(DayName(Cast(Literal("2011-05-06"), DateType, UTC_OPT)), "Fri")
checkEvaluation(DayName(Cast(Literal(LocalDate.parse("2017-05-27")), DateType, UTC_OPT)), "Sat")
checkEvaluation(DayName(Cast(Literal(LocalDate.parse("1582-10-15")), DateType, UTC_OPT)), "Fri")
checkConsistencyBetweenInterpretedAndCodegen(DayName, DateType)
}

test("DateFormat") {
Seq("legacy", "corrected").foreach { legacyParserPolicy =>
withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> legacyParserPolicy) {
Expand Down
9 changes: 9 additions & 0 deletions sql/core/src/main/scala/org/apache/spark/sql/functions.scala
Original file line number Diff line number Diff line change
Expand Up @@ -5747,6 +5747,15 @@ object functions {
def monthname(timeExp: Column): Column =
Column.fn("monthname", timeExp)

/**
* Extracts the three-letter abbreviated day name from a given date/timestamp/string.
*
* @group datetime_funcs
* @since 4.0.0
*/
def dayname(timeExp: Column): Column =
Column.fn("dayname", timeExp)

//////////////////////////////////////////////////////////////////////////////////////////////
// Collection functions
//////////////////////////////////////////////////////////////////////////////////////////////
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@
| org.apache.spark.sql.catalyst.expressions.DatePartExpressionBuilder | date_part | SELECT date_part('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456') | struct<date_part(YEAR, TIMESTAMP '2019-08-12 01:00:00.123456'):int> |
| org.apache.spark.sql.catalyst.expressions.DatePartExpressionBuilder | datepart | SELECT datepart('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456') | struct<datepart(YEAR FROM TIMESTAMP '2019-08-12 01:00:00.123456'):int> |
| org.apache.spark.sql.catalyst.expressions.DateSub | date_sub | SELECT date_sub('2016-07-30', 1) | struct<date_sub(2016-07-30, 1):date> |
| org.apache.spark.sql.catalyst.expressions.DayName | dayname | SELECT dayname(DATE('2008-02-20')) | struct<dayname(2008-02-20):string> |
| org.apache.spark.sql.catalyst.expressions.DayOfMonth | day | SELECT day('2009-07-30') | struct<day(2009-07-30):int> |
| org.apache.spark.sql.catalyst.expressions.DayOfMonth | dayofmonth | SELECT dayofmonth('2009-07-30') | struct<dayofmonth(2009-07-30):int> |
| org.apache.spark.sql.catalyst.expressions.DayOfWeek | dayofweek | SELECT dayofweek('2009-07-30') | struct<dayofweek(2009-07-30):int> |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,18 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession {
Row("Apr", "Apr", "Apr"))
}

test("dayname") {
val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c")

checkAnswer(
df.select(dayname($"a"), dayname($"b"), dayname($"c")),
Row("Wed", "Wed", "Mon"))

checkAnswer(
df.selectExpr("dayname(a)", "dayname(b)", "dayname(c)"),
Row("Wed", "Wed", "Mon"))
}

test("extract") {
val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c")

Expand Down

0 comments on commit f7f424d

Please sign in to comment.