Skip to content

Commit

Permalink
[SPARK-33438][SQL] Eagerly init objects with defined SQL Confs for co…
Browse files Browse the repository at this point in the history
…mmand `set -v`

### What changes were proposed in this pull request?
In Spark, `set -v` is defined as "Queries all properties that are defined in the SQLConf of the sparkSession".
But there are other external modules that also define properties and register them to SQLConf. In this case,
it can't be displayed by `set -v` until the conf object is initiated (i.e. calling the object at least once).

In this PR, I propose to eagerly initiate all the objects registered to SQLConf, so that `set -v` will always output
the completed properties.

### Why are the changes needed?
Improve the `set -v` command to produces completed and  deterministic results

### Does this PR introduce _any_ user-facing change?
`set -v` command will dump more configs

### How was this patch tested?
existing tests

Closes apache#30363 from linhongliu-db/set-v.

Authored-by: Linhong Liu <[email protected]>
Signed-off-by: HyukjinKwon <[email protected]>
  • Loading branch information
linhongliu-db authored and HyukjinKwon committed Feb 8, 2021
1 parent a854906 commit 037bfb2
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import java.util.zip.Deflater
import scala.collection.JavaConverters._
import scala.collection.immutable
import scala.util.Try
import scala.util.control.NonFatal
import scala.util.matching.Regex

import org.apache.hadoop.fs.Path
Expand All @@ -35,6 +36,7 @@ import org.apache.spark.internal.config._
import org.apache.spark.internal.config.{IGNORE_MISSING_FILES => SPARK_IGNORE_MISSING_FILES}
import org.apache.spark.network.util.ByteUnit
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.ScalaReflection
import org.apache.spark.sql.catalyst.analysis.{HintErrorLogger, Resolver}
import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode
import org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator
Expand Down Expand Up @@ -3829,6 +3831,27 @@ class SQLConf extends Serializable with Logging {
}
}

private var definedConfsLoaded = false
/**
* Init [[StaticSQLConf]] and [[org.apache.spark.sql.hive.HiveUtils]] so that all the defined
* SQL Configurations will be registered to SQLConf
*/
private def loadDefinedConfs(): Unit = {
if (!definedConfsLoaded) {
definedConfsLoaded = true
// Force to register static SQL configurations
StaticSQLConf
try {
// Force to register SQL configurations from Hive module
val symbol = ScalaReflection.mirror.staticModule("org.apache.spark.sql.hive.HiveUtils")
ScalaReflection.mirror.reflectModule(symbol).instance
} catch {
case NonFatal(e) =>
logWarning("SQL configurations from Hive module is not loaded", e)
}
}
}

/**
* Return all the configuration properties that have been set (i.e. not the default).
* This creates a new copy of the config properties in the form of a Map.
Expand All @@ -3841,6 +3864,7 @@ class SQLConf extends Serializable with Logging {
* definition contains key, defaultValue and doc.
*/
def getAllDefinedConfs: Seq[(String, String, String, String)] = sqlConfEntries.synchronized {
loadDefinedConfs()
sqlConfEntries.values.asScala.filter(_.isPublic).map { entry =>
val displayValue = Option(getConfString(entry.key, null)).getOrElse(entry.defaultValueString)
(entry.key, displayValue, entry.doc, entry.version)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,17 @@ package org.apache.spark.sql.api.python
import java.io.InputStream
import java.nio.channels.Channels

import scala.util.control.NonFatal

import org.apache.spark.api.java.JavaRDD
import org.apache.spark.api.python.PythonRDDServer
import org.apache.spark.internal.Logging
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, SQLContext}
import org.apache.spark.sql.catalyst.ScalaReflection
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
import org.apache.spark.sql.catalyst.expressions.ExpressionInfo
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
import org.apache.spark.sql.execution.{ExplainMode, QueryExecution}
import org.apache.spark.sql.execution.arrow.ArrowConverters
import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.DataType

private[sql] object PythonSQLUtils extends Logging {
Expand All @@ -46,16 +43,6 @@ private[sql] object PythonSQLUtils extends Logging {

private def listAllSQLConfigs(): Seq[(String, String, String, String)] = {
val conf = new SQLConf()
// Force to build static SQL configurations
StaticSQLConf
// Force to build SQL configurations from Hive module
try {
val symbol = ScalaReflection.mirror.staticModule("org.apache.spark.sql.hive.HiveUtils")
ScalaReflection.mirror.reflectModule(symbol).instance
} catch {
case NonFatal(e) =>
logWarning("Cannot generated sql configurations from hive module", e)
}
conf.getAllDefinedConfs
}

Expand Down

0 comments on commit 037bfb2

Please sign in to comment.