Skip to content

Commit

Permalink
[SPARK-5445][SQL] Consolidate Java and Scala DSL static methods.
Browse files Browse the repository at this point in the history
Turns out Scala does generate static methods for ones defined in a companion object. Finally no need to separate api.java.dsl and api.scala.dsl.

Author: Reynold Xin <[email protected]>

Closes apache#4276 from rxin/dsl and squashes the following commits:

30aa611 [Reynold Xin] Add all files.
1a9d215 [Reynold Xin] [SPARK-5445][SQL] Consolidate Java and Scala DSL static methods.
  • Loading branch information
rxin committed Jan 29, 2015
1 parent f9e5694 commit 7156322
Show file tree
Hide file tree
Showing 24 changed files with 42 additions and 141 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package org.apache.spark.examples.sql

import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.api.scala.dsl._
import org.apache.spark.sql.Dsl._

// One method for defining the schema of an RDD is to make a case class with the desired column
// names and types.
Expand Down
2 changes: 1 addition & 1 deletion mllib/src/main/scala/org/apache/spark/ml/Transformer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import org.apache.spark.Logging
import org.apache.spark.annotation.AlphaComponent
import org.apache.spark.ml.param._
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.api.scala.dsl._
import org.apache.spark.sql.Dsl._
import org.apache.spark.sql.types._

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
import org.apache.spark.mllib.linalg.{BLAS, Vector, VectorUDT}
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.sql._
import org.apache.spark.sql.api.scala.dsl._
import org.apache.spark.sql.Dsl._
import org.apache.spark.sql.types.{DoubleType, StructField, StructType}
import org.apache.spark.storage.StorageLevel

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import org.apache.spark.ml.param._
import org.apache.spark.mllib.feature
import org.apache.spark.mllib.linalg.{Vector, VectorUDT}
import org.apache.spark.sql._
import org.apache.spark.sql.api.scala.dsl._
import org.apache.spark.sql.Dsl._
import org.apache.spark.sql.types.{StructField, StructType}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import org.apache.spark.ml.{Estimator, Model}
import org.apache.spark.ml.param._
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{Column, DataFrame}
import org.apache.spark.sql.api.scala.dsl._
import org.apache.spark.sql.Dsl._
import org.apache.spark.sql.types.{DoubleType, FloatType, IntegerType, StructField, StructType}
import org.apache.spark.util.Utils
import org.apache.spark.util.collection.{OpenHashMap, OpenHashSet, SortDataFormat, Sorter}
Expand Down
4 changes: 2 additions & 2 deletions python/pyspark/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -2342,7 +2342,7 @@ def sum(self):

def _create_column_from_literal(literal):
sc = SparkContext._active_spark_context
return sc._jvm.org.apache.spark.sql.api.java.dsl.lit(literal)
return sc._jvm.org.apache.spark.sql.Dsl.lit(literal)


def _create_column_from_name(name):
Expand Down Expand Up @@ -2515,7 +2515,7 @@ def _(col):
jcol = col._jc
else:
jcol = _create_column_from_name(col)
jc = getattr(sc._jvm.org.apache.spark.sql.api.java.dsl, name)(jcol)
jc = getattr(sc._jvm.org.apache.spark.sql.Dsl, name)(jcol)
return Column(jc)
return staticmethod(_)

Expand Down
5 changes: 2 additions & 3 deletions sql/core/src/main/scala/org/apache/spark/sql/Column.scala
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package org.apache.spark.sql

import scala.language.implicitConversions

import org.apache.spark.sql.api.scala.dsl.lit
import org.apache.spark.sql.Dsl.lit
import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, Star}
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.plans.logical.{Project, LogicalPlan}
Expand All @@ -28,8 +28,7 @@ import org.apache.spark.sql.types._

object Column {
/**
* Creates a [[Column]] based on the given column name.
* Same as [[api.scala.dsl.col]] and [[api.java.dsl.col]].
* Creates a [[Column]] based on the given column name. Same as [[Dsl.col]].
*/
def apply(colName: String): Column = new Column(colName)

Expand Down
3 changes: 1 addition & 2 deletions sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,7 @@ import org.apache.spark.util.Utils
* }}}
*
* Once created, it can be manipulated using the various domain-specific-language (DSL) functions
* defined in: [[DataFrame]] (this class), [[Column]], [[api.scala.dsl]] for Scala DSL, and
* [[api.java.dsl]] for Java DSL.
* defined in: [[DataFrame]] (this class), [[Column]], [[Dsl]] for the DSL.
*
* To select a column from the data frame, use the apply method:
* {{{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,43 +15,38 @@
* limitations under the License.
*/

package org.apache.spark.sql.api.scala
package org.apache.spark.sql

import scala.language.implicitConversions
import scala.reflect.runtime.universe.{TypeTag, typeTag}

import org.apache.spark.rdd.RDD
import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.ScalaReflection
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.types._


/**
* Scala version of the domain specific functions available for [[DataFrame]].
*
* The Java-version is at [[api.java.dsl]].
* Domain specific functions available for [[DataFrame]].
*/
package object dsl {
// NOTE: Update also the Java version when we update this version.
object Dsl {

/** An implicit conversion that turns a Scala `Symbol` into a [[Column]]. */
implicit def symbolToColumn(s: Symbol): ColumnName = new ColumnName(s.name)

// /**
// * An implicit conversion that turns a RDD of product into a [[DataFrame]].
// *
// * This method requires an implicit SQLContext in scope. For example:
// * {{{
// * implicit val sqlContext: SQLContext = ...
// * val rdd: RDD[(Int, String)] = ...
// * rdd.toDataFrame // triggers the implicit here
// * }}}
// */
// implicit def rddToDataFrame[A <: Product: TypeTag](rdd: RDD[A])(implicit context: SQLContext)
// : DataFrame = {
// context.createDataFrame(rdd)
// }
// /**
// * An implicit conversion that turns a RDD of product into a [[DataFrame]].
// *
// * This method requires an implicit SQLContext in scope. For example:
// * {{{
// * implicit val sqlContext: SQLContext = ...
// * val rdd: RDD[(Int, String)] = ...
// * rdd.toDataFrame // triggers the implicit here
// * }}}
// */
// implicit def rddToDataFrame[A <: Product: TypeTag](rdd: RDD[A])(implicit context: SQLContext)
// : DataFrame = {
// context.createDataFrame(rdd)
// }

/** Converts $"col name" into an [[Column]]. */
implicit class StringToColumn(val sc: StringContext) extends AnyVal {
Expand Down
92 changes: 0 additions & 92 deletions sql/core/src/main/scala/org/apache/spark/sql/api/java/dsl.java

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package org.apache.spark.sql

import org.apache.spark.sql.TestData._
import org.apache.spark.sql.columnar._
import org.apache.spark.sql.api.scala.dsl._
import org.apache.spark.sql.Dsl._
import org.apache.spark.sql.test.TestSQLContext._
import org.apache.spark.storage.{StorageLevel, RDDBlockId}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.sql

import org.apache.spark.sql.api.scala.dsl._
import org.apache.spark.sql.Dsl._
import org.apache.spark.sql.test.TestSQLContext
import org.apache.spark.sql.types.{BooleanType, IntegerType, StructField, StructType}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.sql

import org.apache.spark.sql.api.scala.dsl._
import org.apache.spark.sql.Dsl._
import org.apache.spark.sql.types._

/* Implicits */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ package org.apache.spark.sql
import org.scalatest.BeforeAndAfterEach

import org.apache.spark.sql.TestData._
import org.apache.spark.sql.api.scala.dsl._
import org.apache.spark.sql.Dsl._
import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
import org.apache.spark.sql.execution.joins._
import org.apache.spark.sql.test.TestSQLContext._
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import java.util.TimeZone

import org.scalatest.BeforeAndAfterAll

import org.apache.spark.sql.api.scala.dsl._
import org.apache.spark.sql.Dsl._
import org.apache.spark.sql.catalyst.errors.TreeNodeException
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.types._
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ package org.apache.spark.sql
import java.sql.Timestamp

import org.apache.spark.sql.catalyst.plans.logical
import org.apache.spark.sql.api.scala.dsl._
import org.apache.spark.sql.Dsl._
import org.apache.spark.sql.test._

/* Implicits */
Expand Down
4 changes: 2 additions & 2 deletions sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.sql

import org.apache.spark.sql.api.scala.dsl.StringToColumn
import org.apache.spark.sql.Dsl.StringToColumn
import org.apache.spark.sql.test._

/* Implicits */
Expand Down Expand Up @@ -45,7 +45,7 @@ class UDFSuite extends QueryTest {
test("struct UDF") {
udf.register("returnStruct", (f1: String, f2: String) => FunctionResult(f1, f2))

val result=
val result =
sql("SELECT returnStruct('test', 'test2') as ret")
.select($"ret.f1").head().getString(0)
assert(result === "test")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ package org.apache.spark.sql
import scala.beans.{BeanInfo, BeanProperty}

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.api.scala.dsl._
import org.apache.spark.sql.Dsl._
import org.apache.spark.sql.test.TestSQLContext._
import org.apache.spark.sql.types._

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.sql.columnar

import org.apache.spark.sql.api.scala.dsl._
import org.apache.spark.sql.Dsl._
import org.apache.spark.sql.TestData._
import org.apache.spark.sql.catalyst.expressions.Row
import org.apache.spark.sql.test.TestSQLContext._
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ package org.apache.spark.sql.execution
import org.scalatest.FunSuite

import org.apache.spark.sql.{SQLConf, execution}
import org.apache.spark.sql.api.scala.dsl._
import org.apache.spark.sql.Dsl._
import org.apache.spark.sql.TestData._
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.plans._
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import java.sql.{Date, Timestamp}

import org.apache.spark.sql.TestData._
import org.apache.spark.sql.catalyst.util._
import org.apache.spark.sql.api.scala.dsl._
import org.apache.spark.sql.Dsl._
import org.apache.spark.sql.json.JsonRDD.{compatibleType, enforceCorrectType}
import org.apache.spark.sql.test.TestSQLContext
import org.apache.spark.sql.test.TestSQLContext._
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import parquet.schema.{MessageType, MessageTypeParser}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.sql.{DataFrame, QueryTest, SQLConf}
import org.apache.spark.sql.api.scala.dsl._
import org.apache.spark.sql.Dsl._
import org.apache.spark.sql.catalyst.ScalaReflection
import org.apache.spark.sql.catalyst.expressions.Row
import org.apache.spark.sql.test.TestSQLContext
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import org.apache.hadoop.hive.conf.HiveConf.ConfVars
import org.apache.spark.{SparkFiles, SparkException}
import org.apache.spark.sql.{DataFrame, Row}
import org.apache.spark.sql.catalyst.plans.logical.Project
import org.apache.spark.sql.api.scala.dsl._
import org.apache.spark.sql.Dsl._
import org.apache.spark.sql.hive._
import org.apache.spark.sql.hive.test.TestHive
import org.apache.spark.sql.hive.test.TestHive._
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.apache.spark.sql.hive.execution

import org.apache.spark.sql.Row
import org.apache.spark.sql.api.scala.dsl._
import org.apache.spark.sql.Dsl._
import org.apache.spark.sql.hive.test.TestHive
import org.apache.spark.sql.hive.test.TestHive._

Expand Down

0 comments on commit 7156322

Please sign in to comment.