Skip to content

Commit

Permalink
[SQL] Update Hive test harness for Hive 12 and 13
Browse files Browse the repository at this point in the history
As part of the upgrade I also copy the newest version of the query tests, and whitelist a bunch of new ones that are now passing.

Author: Michael Armbrust <[email protected]>

Closes apache#2936 from marmbrus/fix13tests and squashes the following commits:

d9cbdab [Michael Armbrust] Remove user specific tests
65801cd [Michael Armbrust] style and rat
8f6b09a [Michael Armbrust] Update test harness to work with both Hive 12 and 13.
f044843 [Michael Armbrust] Update Hive query tests and golden files to 0.13
  • Loading branch information
marmbrus authored and JoshRosen committed Oct 25, 2014
1 parent 898b22a commit 3a845d3
Show file tree
Hide file tree
Showing 8,166 changed files with 38,307 additions and 47,487 deletions.
The diff you're trying to view is too large. We only load the first 3000 changed files.
1 change: 1 addition & 0 deletions .rat-excludes
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ sbt-launch-lib.bash
plugins.sbt
work
.*\.q
.*\.qv
golden
test.out/*
.*iml
Expand Down
2 changes: 1 addition & 1 deletion dev/run-tests
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ CURRENT_BLOCK=$BLOCK_SPARK_UNIT_TESTS
# If the Spark SQL tests are enabled, run the tests with the Hive profiles enabled.
# This must be a single argument, as it is.
if [ -n "$_RUN_SQL_TESTS" ]; then
SBT_MAVEN_PROFILES_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive -Phive-0.12.0"
SBT_MAVEN_PROFILES_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive"
fi

if [ -n "$_SQL_TESTS_ONLY" ]; then
Expand Down
6 changes: 5 additions & 1 deletion project/SparkBuild.scala
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,11 @@ object Hive {
|import org.apache.spark.sql.hive._
|import org.apache.spark.sql.hive.test.TestHive._
|import org.apache.spark.sql.parquet.ParquetTestData""".stripMargin,
cleanupCommands in console := "sparkContext.stop()"
cleanupCommands in console := "sparkContext.stop()",
// Some of our log4j jars make it impossible to submit jobs from this JVM to Hive Map/Reduce
// in order to generate golden files. This is only required for developers who are adding new
// new query tests.
fullClasspath in Test := (fullClasspath in Test).value.filterNot { f => f.toString.contains("jcl-over") }
)

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ object HiveTypeCoercion {
*/
trait HiveTypeCoercion {

import HiveTypeCoercion._

val typeCoercionRules =
PropagateTypes ::
ConvertNaNs ::
Expand Down Expand Up @@ -340,6 +342,13 @@ trait HiveTypeCoercion {
// Skip nodes who's children have not been resolved yet.
case e if !e.childrenResolved => e

case a @ CreateArray(children) if !a.resolved =>
val commonType = a.childTypes.reduce(
(a,b) =>
findTightestCommonType(a,b).getOrElse(StringType))
CreateArray(
children.map(c => if (c.dataType == commonType) c else Cast(c, commonType)))

// Promote SUM, SUM DISTINCT and AVERAGE to largest types to prevent overflows.
case s @ Sum(e @ DecimalType()) => s // Decimal is already the biggest.
case Sum(e @ IntegralType()) if e.dataType != LongType => Sum(Cast(e, LongType))
Expand All @@ -356,6 +365,10 @@ trait HiveTypeCoercion {
Average(Cast(e, LongType))
case Average(e @ FractionalType()) if e.dataType != DoubleType =>
Average(Cast(e, DoubleType))

// Hive lets you do aggregation of timestamps... for some reason
case Sum(e @ TimestampType()) => Sum(Cast(e, DoubleType))
case Average(e @ TimestampType()) => Average(Cast(e, DoubleType))
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,28 @@ case class GetField(child: Expression, fieldName: String) extends UnaryExpressio

override def toString = s"$child.$fieldName"
}

/**
* Returns an Array containing the evaluation of all children expressions.
*/
case class CreateArray(children: Seq[Expression]) extends Expression {
override type EvaluatedType = Any

lazy val childTypes = children.map(_.dataType).distinct

override lazy val resolved =
childrenResolved && childTypes.size <= 1

override def dataType: DataType = {
assert(resolved, s"Invalid dataType of mixed ArrayType ${childTypes.mkString(",")}")
ArrayType(childTypes.headOption.getOrElse(NullType))
}

override def nullable: Boolean = false

override def eval(input: Row): Any = {
children.map(_.eval(input))
}

override def toString = s"Array(${children.mkString(",")})"
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ import org.apache.spark.sql.catalyst.optimizer.{Optimizer, DefaultOptimizer}
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.rules.RuleExecutor
import org.apache.spark.sql.catalyst.types.DataType
import org.apache.spark.sql.columnar.InMemoryRelation
import org.apache.spark.sql.execution.{SparkStrategies, _}
import org.apache.spark.sql.json._
import org.apache.spark.sql.parquet.ParquetRelation
Expand Down
Loading

0 comments on commit 3a845d3

Please sign in to comment.