forked from apache/spark
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SPARK-9763][SQL] Minimize exposure of internal SQL classes.
There are a few changes in this pull request: 1. Moved all data sources to execution.datasources, except the public JDBC APIs. 2. In order to maintain backward compatibility from 1, added a backward compatibility translation map in data source resolution. 3. Moved ui and metric package into execution. 4. Added more documentation on some internal classes. 5. Renamed DataSourceRegister.format -> shortName. 6. Added "override" modifier on shortName. 7. Removed IntSQLMetric. Author: Reynold Xin <[email protected]> Closes apache#8056 from rxin/SPARK-9763 and squashes the following commits: 9df4801 [Reynold Xin] Removed hardcoded name in test cases. d9babc6 [Reynold Xin] Shorten. e484419 [Reynold Xin] Removed VisibleForTesting. 171b812 [Reynold Xin] MimaExcludes. 2041389 [Reynold Xin] Compile ... 79dda42 [Reynold Xin] Compile. 0818ba3 [Reynold Xin] Removed IntSQLMetric. c46884f [Reynold Xin] Two more fixes. f9aa88d [Reynold Xin] [SPARK-9763][SQL] Minimize exposure of internal SQL classes. (cherry picked from commit 40ed2af) Signed-off-by: Reynold Xin <[email protected]>
- Loading branch information
Showing
76 changed files
with
1,114 additions
and
966 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
6 changes: 3 additions & 3 deletions
6
...core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
org.apache.spark.sql.jdbc.DefaultSource | ||
org.apache.spark.sql.json.DefaultSource | ||
org.apache.spark.sql.parquet.DefaultSource | ||
org.apache.spark.sql.execution.datasources.jdbc.DefaultSource | ||
org.apache.spark.sql.execution.datasources.json.DefaultSource | ||
org.apache.spark.sql.execution.datasources.parquet.DefaultSource |
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
185 changes: 185 additions & 0 deletions
185
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DDLParser.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,185 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.spark.sql.execution.datasources | ||
|
||
import scala.language.implicitConversions | ||
import scala.util.matching.Regex | ||
|
||
import org.apache.spark.Logging | ||
import org.apache.spark.sql.SaveMode | ||
import org.apache.spark.sql.catalyst.{TableIdentifier, AbstractSparkSQLParser} | ||
import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation | ||
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan | ||
import org.apache.spark.sql.types._ | ||
|
||
|
||
/** | ||
* A parser for foreign DDL commands. | ||
*/ | ||
class DDLParser(parseQuery: String => LogicalPlan) | ||
extends AbstractSparkSQLParser with DataTypeParser with Logging { | ||
|
||
def parse(input: String, exceptionOnError: Boolean): LogicalPlan = { | ||
try { | ||
parse(input) | ||
} catch { | ||
case ddlException: DDLException => throw ddlException | ||
case _ if !exceptionOnError => parseQuery(input) | ||
case x: Throwable => throw x | ||
} | ||
} | ||
|
||
// Keyword is a convention with AbstractSparkSQLParser, which will scan all of the `Keyword` | ||
// properties via reflection the class in runtime for constructing the SqlLexical object | ||
protected val CREATE = Keyword("CREATE") | ||
protected val TEMPORARY = Keyword("TEMPORARY") | ||
protected val TABLE = Keyword("TABLE") | ||
protected val IF = Keyword("IF") | ||
protected val NOT = Keyword("NOT") | ||
protected val EXISTS = Keyword("EXISTS") | ||
protected val USING = Keyword("USING") | ||
protected val OPTIONS = Keyword("OPTIONS") | ||
protected val DESCRIBE = Keyword("DESCRIBE") | ||
protected val EXTENDED = Keyword("EXTENDED") | ||
protected val AS = Keyword("AS") | ||
protected val COMMENT = Keyword("COMMENT") | ||
protected val REFRESH = Keyword("REFRESH") | ||
|
||
protected lazy val ddl: Parser[LogicalPlan] = createTable | describeTable | refreshTable | ||
|
||
protected def start: Parser[LogicalPlan] = ddl | ||
|
||
/** | ||
* `CREATE [TEMPORARY] TABLE avroTable [IF NOT EXISTS] | ||
* USING org.apache.spark.sql.avro | ||
* OPTIONS (path "../hive/src/test/resources/data/files/episodes.avro")` | ||
* or | ||
* `CREATE [TEMPORARY] TABLE avroTable(intField int, stringField string...) [IF NOT EXISTS] | ||
* USING org.apache.spark.sql.avro | ||
* OPTIONS (path "../hive/src/test/resources/data/files/episodes.avro")` | ||
* or | ||
* `CREATE [TEMPORARY] TABLE avroTable [IF NOT EXISTS] | ||
* USING org.apache.spark.sql.avro | ||
* OPTIONS (path "../hive/src/test/resources/data/files/episodes.avro")` | ||
* AS SELECT ... | ||
*/ | ||
protected lazy val createTable: Parser[LogicalPlan] = { | ||
// TODO: Support database.table. | ||
(CREATE ~> TEMPORARY.? <~ TABLE) ~ (IF ~> NOT <~ EXISTS).? ~ ident ~ | ||
tableCols.? ~ (USING ~> className) ~ (OPTIONS ~> options).? ~ (AS ~> restInput).? ^^ { | ||
case temp ~ allowExisting ~ tableName ~ columns ~ provider ~ opts ~ query => | ||
if (temp.isDefined && allowExisting.isDefined) { | ||
throw new DDLException( | ||
"a CREATE TEMPORARY TABLE statement does not allow IF NOT EXISTS clause.") | ||
} | ||
|
||
val options = opts.getOrElse(Map.empty[String, String]) | ||
if (query.isDefined) { | ||
if (columns.isDefined) { | ||
throw new DDLException( | ||
"a CREATE TABLE AS SELECT statement does not allow column definitions.") | ||
} | ||
// When IF NOT EXISTS clause appears in the query, the save mode will be ignore. | ||
val mode = if (allowExisting.isDefined) { | ||
SaveMode.Ignore | ||
} else if (temp.isDefined) { | ||
SaveMode.Overwrite | ||
} else { | ||
SaveMode.ErrorIfExists | ||
} | ||
|
||
val queryPlan = parseQuery(query.get) | ||
CreateTableUsingAsSelect(tableName, | ||
provider, | ||
temp.isDefined, | ||
Array.empty[String], | ||
mode, | ||
options, | ||
queryPlan) | ||
} else { | ||
val userSpecifiedSchema = columns.flatMap(fields => Some(StructType(fields))) | ||
CreateTableUsing( | ||
tableName, | ||
userSpecifiedSchema, | ||
provider, | ||
temp.isDefined, | ||
options, | ||
allowExisting.isDefined, | ||
managedIfNoPath = false) | ||
} | ||
} | ||
} | ||
|
||
protected lazy val tableCols: Parser[Seq[StructField]] = "(" ~> repsep(column, ",") <~ ")" | ||
|
||
/* | ||
* describe [extended] table avroTable | ||
* This will display all columns of table `avroTable` includes column_name,column_type,comment | ||
*/ | ||
protected lazy val describeTable: Parser[LogicalPlan] = | ||
(DESCRIBE ~> opt(EXTENDED)) ~ (ident <~ ".").? ~ ident ^^ { | ||
case e ~ db ~ tbl => | ||
val tblIdentifier = db match { | ||
case Some(dbName) => | ||
Seq(dbName, tbl) | ||
case None => | ||
Seq(tbl) | ||
} | ||
DescribeCommand(UnresolvedRelation(tblIdentifier, None), e.isDefined) | ||
} | ||
|
||
protected lazy val refreshTable: Parser[LogicalPlan] = | ||
REFRESH ~> TABLE ~> (ident <~ ".").? ~ ident ^^ { | ||
case maybeDatabaseName ~ tableName => | ||
RefreshTable(TableIdentifier(tableName, maybeDatabaseName)) | ||
} | ||
|
||
protected lazy val options: Parser[Map[String, String]] = | ||
"(" ~> repsep(pair, ",") <~ ")" ^^ { case s: Seq[(String, String)] => s.toMap } | ||
|
||
protected lazy val className: Parser[String] = repsep(ident, ".") ^^ { case s => s.mkString(".")} | ||
|
||
override implicit def regexToParser(regex: Regex): Parser[String] = acceptMatch( | ||
s"identifier matching regex $regex", { | ||
case lexical.Identifier(str) if regex.unapplySeq(str).isDefined => str | ||
case lexical.Keyword(str) if regex.unapplySeq(str).isDefined => str | ||
} | ||
) | ||
|
||
protected lazy val optionPart: Parser[String] = "[_a-zA-Z][_a-zA-Z0-9]*".r ^^ { | ||
case name => name | ||
} | ||
|
||
protected lazy val optionName: Parser[String] = repsep(optionPart, ".") ^^ { | ||
case parts => parts.mkString(".") | ||
} | ||
|
||
protected lazy val pair: Parser[(String, String)] = | ||
optionName ~ stringLit ^^ { case k ~ v => (k, v) } | ||
|
||
protected lazy val column: Parser[StructField] = | ||
ident ~ dataType ~ (COMMENT ~> stringLit).? ^^ { case columnName ~ typ ~ cm => | ||
val meta = cm match { | ||
case Some(comment) => | ||
new MetadataBuilder().putString(COMMENT.str.toLowerCase, comment).build() | ||
case None => Metadata.empty | ||
} | ||
|
||
StructField(columnName, typ, nullable = true, meta) | ||
} | ||
} |
64 changes: 64 additions & 0 deletions
64
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DefaultSource.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.spark.sql.execution.datasources | ||
|
||
import java.util.Properties | ||
|
||
import org.apache.spark.sql.SQLContext | ||
import org.apache.spark.sql.execution.datasources.jdbc.{JDBCRelation, JDBCPartitioningInfo, DriverRegistry} | ||
import org.apache.spark.sql.sources.{BaseRelation, DataSourceRegister, RelationProvider} | ||
|
||
|
||
class DefaultSource extends RelationProvider with DataSourceRegister { | ||
|
||
override def shortName(): String = "jdbc" | ||
|
||
/** Returns a new base relation with the given parameters. */ | ||
override def createRelation( | ||
sqlContext: SQLContext, | ||
parameters: Map[String, String]): BaseRelation = { | ||
val url = parameters.getOrElse("url", sys.error("Option 'url' not specified")) | ||
val driver = parameters.getOrElse("driver", null) | ||
val table = parameters.getOrElse("dbtable", sys.error("Option 'dbtable' not specified")) | ||
val partitionColumn = parameters.getOrElse("partitionColumn", null) | ||
val lowerBound = parameters.getOrElse("lowerBound", null) | ||
val upperBound = parameters.getOrElse("upperBound", null) | ||
val numPartitions = parameters.getOrElse("numPartitions", null) | ||
|
||
if (driver != null) DriverRegistry.register(driver) | ||
|
||
if (partitionColumn != null | ||
&& (lowerBound == null || upperBound == null || numPartitions == null)) { | ||
sys.error("Partitioning incompletely specified") | ||
} | ||
|
||
val partitionInfo = if (partitionColumn == null) { | ||
null | ||
} else { | ||
JDBCPartitioningInfo( | ||
partitionColumn, | ||
lowerBound.toLong, | ||
upperBound.toLong, | ||
numPartitions.toInt) | ||
} | ||
val parts = JDBCRelation.columnPartition(partitionInfo) | ||
val properties = new Properties() // Additional properties that we will pass to getConnection | ||
parameters.foreach(kv => properties.setProperty(kv._1, kv._2)) | ||
JDBCRelation(url, table, parts, properties)(sqlContext) | ||
} | ||
} |
Oops, something went wrong.