Skip to content

Commit

Permalink
[SPARK-31058][SQL][TEST-HIVE1.2] Consolidate the implementation of `q…
Browse files Browse the repository at this point in the history
…uoteIfNeeded`

### What changes were proposed in this pull request?
There are two implementation of quoteIfNeeded.  One is in `org.apache.spark.sql.connector.catalog.CatalogV2Implicits.quote` and the other is in `OrcFiltersBase.quoteAttributeNameIfNeeded`. This PR will consolidate them into one.

### Why are the changes needed?
Simplify the codebase.

### Does this PR introduce any user-facing change?
No

### How was this patch tested?
Existing UTs.

Closes apache#27814 from dbtsai/SPARK-31058.

Authored-by: DB Tsai <[email protected]>
Signed-off-by: DB Tsai <[email protected]>
  • Loading branch information
dbtsai committed Mar 6, 2020
1 parent 8d5ef2f commit fe126a6
Show file tree
Hide file tree
Showing 7 changed files with 34 additions and 45 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ public String name() {
@Override
public String toString() {
return Stream.concat(Stream.of(namespace), Stream.of(name))
.map(CatalogV2Implicits::quote)
.map(CatalogV2Implicits::quoteIfNeeded)
.collect(Collectors.joining("."));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,15 +84,15 @@ private[sql] object CatalogV2Implicits {
}

implicit class NamespaceHelper(namespace: Array[String]) {
def quoted: String = namespace.map(quote).mkString(".")
def quoted: String = namespace.map(quoteIfNeeded).mkString(".")
}

implicit class IdentifierHelper(ident: Identifier) {
def quoted: String = {
if (ident.namespace.nonEmpty) {
ident.namespace.map(quote).mkString(".") + "." + quote(ident.name)
ident.namespace.map(quoteIfNeeded).mkString(".") + "." + quoteIfNeeded(ident.name)
} else {
quote(ident.name)
quoteIfNeeded(ident.name)
}
}

Expand Down Expand Up @@ -122,10 +122,10 @@ private[sql] object CatalogV2Implicits {
s"$quoted is not a valid TableIdentifier as it has more than 2 name parts.")
}

def quoted: String = parts.map(quote).mkString(".")
def quoted: String = parts.map(quoteIfNeeded).mkString(".")
}

def quote(part: String): String = {
def quoteIfNeeded(part: String): String = {
if (part.contains(".") || part.contains("`")) {
s"`${part.replace("`", "``")}`"
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import scala.collection.mutable

import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.CatalogTable
import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.quoteIfNeeded
import org.apache.spark.sql.connector.expressions.{LogicalExpressions, Transform}
import org.apache.spark.sql.types.StructType

Expand All @@ -35,20 +36,12 @@ private[sql] case class V1Table(v1Table: CatalogTable) extends Table {
def quoted: String = {
identifier.database match {
case Some(db) =>
Seq(db, identifier.table).map(quote).mkString(".")
Seq(db, identifier.table).map(quoteIfNeeded).mkString(".")
case _ =>
quote(identifier.table)
quoteIfNeeded(identifier.table)

}
}

private def quote(part: String): String = {
if (part.contains(".") || part.contains("`")) {
s"`${part.replace("`", "``")}`"
} else {
part
}
}
}

def catalogTable: CatalogTable = v1Table
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ class ResolveSessionCatalog(
.map(_._2.dataType)
.getOrElse {
throw new AnalysisException(
s"ALTER COLUMN cannot find column ${quote(colName)} in v1 table. " +
s"ALTER COLUMN cannot find column ${quoteIfNeeded(colName)} in v1 table. " +
s"Available: ${v1Table.schema.fieldNames.mkString(", ")}")
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,16 +36,6 @@ trait OrcFiltersBase {
}
}

// Since ORC 1.5.0 (ORC-323), we need to quote for column names with `.` characters
// in order to distinguish predicate pushdown for nested columns.
protected[sql] def quoteAttributeNameIfNeeded(name: String) : String = {
if (!name.contains("`") && name.contains(".")) {
s"`$name`"
} else {
name
}
}

/**
* Return true if this is a searchable type in ORC.
* Both CharType and VarcharType are cleaned at AstBuilder.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import org.apache.orc.storage.ql.io.sarg.SearchArgumentFactory.newBuilder
import org.apache.orc.storage.serde2.io.HiveDecimalWritable

import org.apache.spark.SparkException
import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.quoteIfNeeded
import org.apache.spark.sql.sources.Filter
import org.apache.spark.sql.types._

Expand Down Expand Up @@ -218,47 +219,49 @@ private[sql] object OrcFilters extends OrcFiltersBase {
// NOTE: For all case branches dealing with leaf predicates below, the additional `startAnd()`
// call is mandatory. ORC `SearchArgument` builder requires that all leaf predicates must be
// wrapped by a "parent" predicate (`And`, `Or`, or `Not`).
// Since ORC 1.5.0 (ORC-323), we need to quote for column names with `.` characters
// in order to distinguish predicate pushdown for nested columns.
expression match {
case EqualTo(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
val quotedName = quoteAttributeNameIfNeeded(attribute)
val quotedName = quoteIfNeeded(attribute)
val castedValue = castLiteralValue(value, dataTypeMap(attribute))
Some(builder.startAnd().equals(quotedName, getType(attribute), castedValue).end())

case EqualNullSafe(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
val quotedName = quoteAttributeNameIfNeeded(attribute)
val quotedName = quoteIfNeeded(attribute)
val castedValue = castLiteralValue(value, dataTypeMap(attribute))
Some(builder.startAnd().nullSafeEquals(quotedName, getType(attribute), castedValue).end())

case LessThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
val quotedName = quoteAttributeNameIfNeeded(attribute)
val quotedName = quoteIfNeeded(attribute)
val castedValue = castLiteralValue(value, dataTypeMap(attribute))
Some(builder.startAnd().lessThan(quotedName, getType(attribute), castedValue).end())

case LessThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
val quotedName = quoteAttributeNameIfNeeded(attribute)
val quotedName = quoteIfNeeded(attribute)
val castedValue = castLiteralValue(value, dataTypeMap(attribute))
Some(builder.startAnd().lessThanEquals(quotedName, getType(attribute), castedValue).end())

case GreaterThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
val quotedName = quoteAttributeNameIfNeeded(attribute)
val quotedName = quoteIfNeeded(attribute)
val castedValue = castLiteralValue(value, dataTypeMap(attribute))
Some(builder.startNot().lessThanEquals(quotedName, getType(attribute), castedValue).end())

case GreaterThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
val quotedName = quoteAttributeNameIfNeeded(attribute)
val quotedName = quoteIfNeeded(attribute)
val castedValue = castLiteralValue(value, dataTypeMap(attribute))
Some(builder.startNot().lessThan(quotedName, getType(attribute), castedValue).end())

case IsNull(attribute) if isSearchableType(dataTypeMap(attribute)) =>
val quotedName = quoteAttributeNameIfNeeded(attribute)
val quotedName = quoteIfNeeded(attribute)
Some(builder.startAnd().isNull(quotedName, getType(attribute)).end())

case IsNotNull(attribute) if isSearchableType(dataTypeMap(attribute)) =>
val quotedName = quoteAttributeNameIfNeeded(attribute)
val quotedName = quoteIfNeeded(attribute)
Some(builder.startNot().isNull(quotedName, getType(attribute)).end())

case In(attribute, values) if isSearchableType(dataTypeMap(attribute)) =>
val quotedName = quoteAttributeNameIfNeeded(attribute)
val quotedName = quoteIfNeeded(attribute)
val castedValues = values.map(v => castLiteralValue(v, dataTypeMap(attribute)))
Some(builder.startAnd().in(quotedName, getType(attribute),
castedValues.map(_.asInstanceOf[AnyRef]): _*).end())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory.newBuilder
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable

import org.apache.spark.SparkException
import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.quoteIfNeeded
import org.apache.spark.sql.sources.Filter
import org.apache.spark.sql.types._

Expand Down Expand Up @@ -218,47 +219,49 @@ private[sql] object OrcFilters extends OrcFiltersBase {
// NOTE: For all case branches dealing with leaf predicates below, the additional `startAnd()`
// call is mandatory. ORC `SearchArgument` builder requires that all leaf predicates must be
// wrapped by a "parent" predicate (`And`, `Or`, or `Not`).
// Since ORC 1.5.0 (ORC-323), we need to quote for column names with `.` characters
// in order to distinguish predicate pushdown for nested columns.
expression match {
case EqualTo(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
val quotedName = quoteAttributeNameIfNeeded(attribute)
val quotedName = quoteIfNeeded(attribute)
val castedValue = castLiteralValue(value, dataTypeMap(attribute))
Some(builder.startAnd().equals(quotedName, getType(attribute), castedValue).end())

case EqualNullSafe(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
val quotedName = quoteAttributeNameIfNeeded(attribute)
val quotedName = quoteIfNeeded(attribute)
val castedValue = castLiteralValue(value, dataTypeMap(attribute))
Some(builder.startAnd().nullSafeEquals(quotedName, getType(attribute), castedValue).end())

case LessThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
val quotedName = quoteAttributeNameIfNeeded(attribute)
val quotedName = quoteIfNeeded(attribute)
val castedValue = castLiteralValue(value, dataTypeMap(attribute))
Some(builder.startAnd().lessThan(quotedName, getType(attribute), castedValue).end())

case LessThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
val quotedName = quoteAttributeNameIfNeeded(attribute)
val quotedName = quoteIfNeeded(attribute)
val castedValue = castLiteralValue(value, dataTypeMap(attribute))
Some(builder.startAnd().lessThanEquals(quotedName, getType(attribute), castedValue).end())

case GreaterThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
val quotedName = quoteAttributeNameIfNeeded(attribute)
val quotedName = quoteIfNeeded(attribute)
val castedValue = castLiteralValue(value, dataTypeMap(attribute))
Some(builder.startNot().lessThanEquals(quotedName, getType(attribute), castedValue).end())

case GreaterThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
val quotedName = quoteAttributeNameIfNeeded(attribute)
val quotedName = quoteIfNeeded(attribute)
val castedValue = castLiteralValue(value, dataTypeMap(attribute))
Some(builder.startNot().lessThan(quotedName, getType(attribute), castedValue).end())

case IsNull(attribute) if isSearchableType(dataTypeMap(attribute)) =>
val quotedName = quoteAttributeNameIfNeeded(attribute)
val quotedName = quoteIfNeeded(attribute)
Some(builder.startAnd().isNull(quotedName, getType(attribute)).end())

case IsNotNull(attribute) if isSearchableType(dataTypeMap(attribute)) =>
val quotedName = quoteAttributeNameIfNeeded(attribute)
val quotedName = quoteIfNeeded(attribute)
Some(builder.startNot().isNull(quotedName, getType(attribute)).end())

case In(attribute, values) if isSearchableType(dataTypeMap(attribute)) =>
val quotedName = quoteAttributeNameIfNeeded(attribute)
val quotedName = quoteIfNeeded(attribute)
val castedValues = values.map(v => castLiteralValue(v, dataTypeMap(attribute)))
Some(builder.startAnd().in(quotedName, getType(attribute),
castedValues.map(_.asInstanceOf[AnyRef]): _*).end())
Expand Down

0 comments on commit fe126a6

Please sign in to comment.