Skip to content

Commit

Permalink
[SPARK-15825] [SQL] Fix SMJ invalid results
Browse files Browse the repository at this point in the history
## What changes were proposed in this pull request?
Code generated `SortMergeJoin` failed with wrong results when using structs as keys. This could (eventually) be traced back to the use of a wrong row reference when comparing structs.

## How was this patch tested?
TBD

Author: Herman van Hovell <[email protected]>

Closes apache#13589 from hvanhovell/SPARK-15822.
  • Loading branch information
hvanhovell authored and davies committed Jun 10, 2016
1 parent 026eb90 commit e05a2fe
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,7 @@ class CodegenContext {
addNewFunction(compareFunc, funcCode)
s"this.$compareFunc($c1, $c2)"
case schema: StructType =>
INPUT_ROW = "i"
val comparisons = GenerateOrdering.genComparisons(this, schema)
val compareFunc = freshName("compareStruct")
val funcCode: String =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -271,4 +271,19 @@ class InnerJoinSuite extends SparkPlanTest with SharedSQLContext {
)
)
}

{
def df: DataFrame = spark.range(3).selectExpr("struct(id, id) as key", "id as value")
lazy val left = df.selectExpr("key", "concat('L', value) as value").alias("left")
lazy val right = df.selectExpr("key", "concat('R', value) as value").alias("right")
testInnerJoin(
"SPARK-15822 - test structs as keys",
left,
right,
() => (left.col("key") === right.col("key")).expr,
Seq(
(Row(0, 0), "L0", Row(0, 0), "R0"),
(Row(1, 1), "L1", Row(1, 1), "R1"),
(Row(2, 2), "L2", Row(2, 2), "R2")))
}
}

0 comments on commit e05a2fe

Please sign in to comment.