Skip to content

Commit

Permalink
[SPARK-25402][SQL] Null handling in BooleanSimplification
Browse files Browse the repository at this point in the history
## What changes were proposed in this pull request?
This PR is to fix the null handling in BooleanSimplification. In the rule BooleanSimplification, there are two cases that do not properly handle null values. The optimization is not right if either side is null. This PR is to fix them.

## How was this patch tested?
Added test cases

Closes apache#22390 from gatorsmile/fixBooleanSimplification.

Authored-by: gatorsmile <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
  • Loading branch information
gatorsmile authored and cloud-fan committed Sep 12, 2018
1 parent 9f5c5b4 commit 79cc597
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -263,10 +263,15 @@ object BooleanSimplification extends Rule[LogicalPlan] with PredicateHelper {
case TrueLiteral Or _ => TrueLiteral
case _ Or TrueLiteral => TrueLiteral

case a And b if Not(a).semanticEquals(b) => FalseLiteral
case a Or b if Not(a).semanticEquals(b) => TrueLiteral
case a And b if a.semanticEquals(Not(b)) => FalseLiteral
case a Or b if a.semanticEquals(Not(b)) => TrueLiteral
case a And b if Not(a).semanticEquals(b) =>
If(IsNull(a), Literal.create(null, a.dataType), FalseLiteral)
case a And b if a.semanticEquals(Not(b)) =>
If(IsNull(b), Literal.create(null, b.dataType), FalseLiteral)

case a Or b if Not(a).semanticEquals(b) =>
If(IsNull(a), Literal.create(null, a.dataType), TrueLiteral)
case a Or b if a.semanticEquals(Not(b)) =>
If(IsNull(b), Literal.create(null, b.dataType), TrueLiteral)

case a And b if a.semanticEquals(b) => a
case a Or b if a.semanticEquals(b) => a
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.plans.PlanTest
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.rules._
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.BooleanType

class BooleanSimplificationSuite extends PlanTest with PredicateHelper {

Expand All @@ -37,6 +38,7 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper {
Batch("Constant Folding", FixedPoint(50),
NullPropagation,
ConstantFolding,
SimplifyConditionals,
BooleanSimplification,
PruneFilters) :: Nil
}
Expand All @@ -48,6 +50,14 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper {
testRelation.output, Seq(Row(1, 2, 3, "abc"))
)

val testNotNullableRelation = LocalRelation('a.int.notNull, 'b.int.notNull, 'c.int.notNull,
'd.string.notNull, 'e.boolean.notNull, 'f.boolean.notNull, 'g.boolean.notNull,
'h.boolean.notNull)

val testNotNullableRelationWithData = LocalRelation.fromExternalRows(
testNotNullableRelation.output, Seq(Row(1, 2, 3, "abc"))
)

private def checkCondition(input: Expression, expected: LogicalPlan): Unit = {
val plan = testRelationWithData.where(input).analyze
val actual = Optimize.execute(plan)
Expand All @@ -61,6 +71,13 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper {
comparePlans(actual, correctAnswer)
}

private def checkConditionInNotNullableRelation(
input: Expression, expected: LogicalPlan): Unit = {
val plan = testNotNullableRelationWithData.where(input).analyze
val actual = Optimize.execute(plan)
comparePlans(actual, expected)
}

test("a && a => a") {
checkCondition(Literal(1) < 'a && Literal(1) < 'a, Literal(1) < 'a)
checkCondition(Literal(1) < 'a && Literal(1) < 'a && Literal(1) < 'a, Literal(1) < 'a)
Expand Down Expand Up @@ -174,10 +191,30 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper {
}

test("Complementation Laws") {
checkCondition('a && !'a, testRelation)
checkCondition(!'a && 'a, testRelation)
checkConditionInNotNullableRelation('e && !'e, testNotNullableRelation)
checkConditionInNotNullableRelation(!'e && 'e, testNotNullableRelation)

checkConditionInNotNullableRelation('e || !'e, testNotNullableRelationWithData)
checkConditionInNotNullableRelation(!'e || 'e, testNotNullableRelationWithData)
}

test("Complementation Laws - null handling") {
checkCondition('e && !'e,
testRelationWithData.where(If('e.isNull, Literal.create(null, BooleanType), false)).analyze)
checkCondition(!'e && 'e,
testRelationWithData.where(If('e.isNull, Literal.create(null, BooleanType), false)).analyze)

checkCondition('e || !'e,
testRelationWithData.where(If('e.isNull, Literal.create(null, BooleanType), true)).analyze)
checkCondition(!'e || 'e,
testRelationWithData.where(If('e.isNull, Literal.create(null, BooleanType), true)).analyze)
}

test("Complementation Laws - negative case") {
checkCondition('e && !'f, testRelationWithData.where('e && !'f).analyze)
checkCondition(!'f && 'e, testRelationWithData.where(!'f && 'e).analyze)

checkCondition('a || !'a, testRelationWithData)
checkCondition(!'a || 'a, testRelationWithData)
checkCondition('e || !'f, testRelationWithData.where('e || !'f).analyze)
checkCondition(!'f || 'e, testRelationWithData.where(!'f || 'e).analyze)
}
}
10 changes: 10 additions & 0 deletions sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2569,4 +2569,14 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
check(lit(2).cast("int"), $"c" === 2, Seq(Row(1, 1, 2, 0), Row(1, 1, 2, 1)))
check(lit(2).cast("int"), $"c" =!= 2, Seq())
}

test("SPARK-25402 Null handling in BooleanSimplification") {
val schema = StructType.fromDDL("a boolean, b int")
val rows = Seq(Row(null, 1))

val rdd = sparkContext.parallelize(rows)
val df = spark.createDataFrame(rdd, schema)

checkAnswer(df.where("(NOT a) OR a"), Seq.empty)
}
}

0 comments on commit 79cc597

Please sign in to comment.