Skip to content

Commit

Permalink
[SPARK-7118] [Python] Add the coalesce Spark SQL function available i…
Browse files Browse the repository at this point in the history
…n PySpark

This patch adds a proxy call from PySpark to the Spark SQL coalesce function and this patch comes out of a discussion on devspark with rxin

This contribution is my original work and i license the work to the project under the project's open source license.

Olivier.

Author: Olivier Girardot <[email protected]>

Closes apache#5698 from ogirardot/master and squashes the following commits:

d9a4439 [Olivier Girardot] SPARK-7118 Add the coalesce Spark SQL function available in PySpark
  • Loading branch information
Olivier Girardot authored and rxin committed May 7, 2015
1 parent 9e2ffb1 commit 068c315
Showing 1 changed file with 37 additions and 0 deletions.
37 changes: 37 additions & 0 deletions python/pyspark/sql/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
'rand',
'randn',
'sparkPartitionId',
'coalesce',
'udf']


Expand Down Expand Up @@ -167,6 +168,42 @@ def approxCountDistinct(col, rsd=None):
return Column(jc)


def coalesce(*cols):
"""Returns the first column that is not null.
>>> cDf = sqlContext.createDataFrame([(None, None), (1, None), (None, 2)], ("a", "b"))
>>> cDf.show()
+----+----+
| a| b|
+----+----+
|null|null|
| 1|null|
|null| 2|
+----+----+
>>> cDf.select(coalesce(cDf["a"], cDf["b"])).show()
+-------------+
|Coalesce(a,b)|
+-------------+
| null|
| 1|
| 2|
+-------------+
>>> cDf.select('*', coalesce(cDf["a"], lit(0.0))).show()
+----+----+---------------+
| a| b|Coalesce(a,0.0)|
+----+----+---------------+
|null|null| 0.0|
| 1|null| 1.0|
|null| 2| 0.0|
+----+----+---------------+
"""
sc = SparkContext._active_spark_context
jc = sc._jvm.functions.coalesce(_to_seq(sc, cols, _to_java_column))
return Column(jc)


def countDistinct(col, *cols):
"""Returns a new :class:`Column` for distinct count of ``col`` or ``cols``.
Expand Down

0 comments on commit 068c315

Please sign in to comment.