[SPARK-11807] Remove support for Hadoop < 2.2

rxin · rxin · commit 0a38637d05d2 · 2015-12-21T22:15:52.000-08:00
i.e. Hadoop 1 and Hadoop 2.0 Author: Reynold Xin <rxin@databricks.com> Closes apache#10404 from rxin/SPARK-11807.
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -663,16 +663,8 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
 
   // For testing.
   private[history] def isFsInSafeMode(dfs: DistributedFileSystem): Boolean = {
-    val hadoop1Class = "org.apache.hadoop.hdfs.protocol.FSConstants$SafeModeAction"
     val hadoop2Class = "org.apache.hadoop.hdfs.protocol.HdfsConstants$SafeModeAction"
-    val actionClass: Class[_] =
-      try {
-        getClass().getClassLoader().loadClass(hadoop2Class)
-      } catch {
-        case _: ClassNotFoundException =>
-          getClass().getClassLoader().loadClass(hadoop1Class)
-      }
-
+    val actionClass: Class[_] = getClass().getClassLoader().loadClass(hadoop2Class)
     val action = actionClass.getField("SAFEMODE_GET").get(null)
     val method = dfs.getClass().getMethod("setSafeMode", action.getClass())
     method.invoke(dfs, action).asInstanceOf[Boolean]
diff --git a/core/src/main/scala/org/apache/spark/mapreduce/SparkHadoopMapReduceUtil.scala b/core/src/main/scala/org/apache/spark/mapreduce/SparkHadoopMapReduceUtil.scala
@@ -26,17 +26,13 @@ import org.apache.spark.util.Utils
 private[spark]
 trait SparkHadoopMapReduceUtil {
   def newJobContext(conf: Configuration, jobId: JobID): JobContext = {
-    val klass = firstAvailableClass(
-        "org.apache.hadoop.mapreduce.task.JobContextImpl",  // hadoop2, hadoop2-yarn
-        "org.apache.hadoop.mapreduce.JobContext")           // hadoop1
+    val klass = Utils.classForName("org.apache.hadoop.mapreduce.task.JobContextImpl")
     val ctor = klass.getDeclaredConstructor(classOf[Configuration], classOf[JobID])
     ctor.newInstance(conf, jobId).asInstanceOf[JobContext]
   }
 
   def newTaskAttemptContext(conf: Configuration, attemptId: TaskAttemptID): TaskAttemptContext = {
-    val klass = firstAvailableClass(
-        "org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl",  // hadoop2, hadoop2-yarn
-        "org.apache.hadoop.mapreduce.TaskAttemptContext")           // hadoop1
+    val klass = Utils.classForName("org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl")
     val ctor = klass.getDeclaredConstructor(classOf[Configuration], classOf[TaskAttemptID])
     ctor.newInstance(conf, attemptId).asInstanceOf[TaskAttemptContext]
   }
@@ -69,13 +65,4 @@ trait SparkHadoopMapReduceUtil {
       }
     }
   }
-
-  private def firstAvailableClass(first: String, second: String): Class[_] = {
-    try {
-      Utils.classForName(first)
-    } catch {
-      case e: ClassNotFoundException =>
-        Utils.classForName(second)
-    }
-  }
 }
diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
@@ -166,9 +166,6 @@ if [[ "$1" == "package" ]]; then
 
   # We increment the Zinc port each time to avoid OOM's and other craziness if multiple builds
   # share the same Zinc server.
-  make_binary_release "hadoop1" "-Psparkr -Phadoop-1 -Phive -Phive-thriftserver" "3030" &
-  make_binary_release "hadoop1-scala2.11" "-Psparkr -Phadoop-1 -Phive -Dscala-2.11" "3031" &
-  make_binary_release "cdh4" "-Psparkr -Phadoop-1 -Phive -Phive-thriftserver -Dhadoop.version=2.0.0-mr1-cdh4.2.0" "3032" &
   make_binary_release "hadoop2.3" "-Psparkr -Phadoop-2.3 -Phive -Phive-thriftserver -Pyarn" "3033" &
   make_binary_release "hadoop2.4" "-Psparkr -Phadoop-2.4 -Phive -Phive-thriftserver -Pyarn" "3034" &
   make_binary_release "hadoop2.6" "-Psparkr -Phadoop-2.6 -Phive -Phive-thriftserver -Pyarn" "3034" &
diff --git a/dev/run-tests-jenkins.py b/dev/run-tests-jenkins.py
@@ -163,10 +163,6 @@ def main():
     if "test-maven" in ghprb_pull_title:
         os.environ["AMPLAB_JENKINS_BUILD_TOOL"] = "maven"
     # Switch the Hadoop profile based on the PR title:
-    if "test-hadoop1.0" in ghprb_pull_title:
-        os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop1.0"
-    if "test-hadoop2.0" in ghprb_pull_title:
-        os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop2.0"
     if "test-hadoop2.2" in ghprb_pull_title:
         os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop2.2"
     if "test-hadoop2.3" in ghprb_pull_title:
diff --git a/dev/run-tests.py b/dev/run-tests.py
@@ -301,8 +301,6 @@ def get_hadoop_profiles(hadoop_version):
     """
 
     sbt_maven_hadoop_profiles = {
-        "hadoop1.0": ["-Phadoop-1", "-Dhadoop.version=1.2.1"],
-        "hadoop2.0": ["-Phadoop-1", "-Dhadoop.version=2.0.0-mr1-cdh4.1.1"],
         "hadoop2.2": ["-Pyarn", "-Phadoop-2.2"],
         "hadoop2.3": ["-Pyarn", "-Phadoop-2.3", "-Dhadoop.version=2.3.0"],
         "hadoop2.6": ["-Pyarn", "-Phadoop-2.6"],
diff --git a/docs/building-spark.md b/docs/building-spark.md
@@ -33,13 +33,13 @@ to the `sharedSettings` val. See also [this PR](https://github.com/apache/spark/
 
 # Building a Runnable Distribution
 
-To create a Spark distribution like those distributed by the 
-[Spark Downloads](http://spark.apache.org/downloads.html) page, and that is laid out so as 
-to be runnable, use `make-distribution.sh` in the project root directory. It can be configured 
+To create a Spark distribution like those distributed by the
+[Spark Downloads](http://spark.apache.org/downloads.html) page, and that is laid out so as
+to be runnable, use `make-distribution.sh` in the project root directory. It can be configured
 with Maven profile settings and so on like the direct Maven build. Example:
 
     ./make-distribution.sh --name custom-spark --tgz -Psparkr -Phadoop-2.4 -Phive -Phive-thriftserver -Pyarn
-    
+
 For more information on usage, run `./make-distribution.sh --help`
 
 # Setting up Maven's Memory Usage
@@ -74,23 +74,13 @@ Because HDFS is not protocol-compatible across versions, if you want to read fro
     <tr><th>Hadoop version</th><th>Profile required</th></tr>
   </thead>
   <tbody>
-    <tr><td>1.x to 2.1.x</td><td>hadoop-1</td></tr>
     <tr><td>2.2.x</td><td>hadoop-2.2</td></tr>
     <tr><td>2.3.x</td><td>hadoop-2.3</td></tr>
     <tr><td>2.4.x</td><td>hadoop-2.4</td></tr>
     <tr><td>2.6.x and later 2.x</td><td>hadoop-2.6</td></tr>
   </tbody>
 </table>
 
-For Apache Hadoop versions 1.x, Cloudera CDH "mr1" distributions, and other Hadoop versions without YARN, use:
-
-{% highlight bash %}
-# Apache Hadoop 1.2.1
-mvn -Dhadoop.version=1.2.1 -Phadoop-1 -DskipTests clean package
-
-# Cloudera CDH 4.2.0 with MapReduce v1
-mvn -Dhadoop.version=2.0.0-mr1-cdh4.2.0 -Phadoop-1 -DskipTests clean package
-{% endhighlight %}
 
 You can enable the `yarn` profile and optionally set the `yarn.version` property if it is different from `hadoop.version`. Spark only supports YARN versions 2.2.0 and later.
 
diff --git a/make-distribution.sh b/make-distribution.sh
@@ -58,7 +58,7 @@ while (( "$#" )); do
     --hadoop)
       echo "Error: '--hadoop' is no longer supported:"
       echo "Error: use Maven profiles and options -Dhadoop.version and -Dyarn.version instead."
-      echo "Error: Related profiles include hadoop-1, hadoop-2.2, hadoop-2.3 and hadoop-2.4."
+      echo "Error: Related profiles include hadoop-2.2, hadoop-2.3 and hadoop-2.4."
       exit_with_usage
       ;;
     --with-yarn)
diff --git a/pom.xml b/pom.xml
@@ -2442,19 +2442,6 @@
     http://hadoop.apache.org/docs/ra.b.c/hadoop-project-dist/hadoop-common/dependency-analysis.html
     -->
 
-    <profile>
-      <id>hadoop-1</id>
-      <properties>
-        <hadoop.version>1.2.1</hadoop.version>
-        <protobuf.version>2.4.1</protobuf.version>
-        <hbase.version>0.98.7-hadoop1</hbase.version>
-        <avro.mapred.classifier>hadoop1</avro.mapred.classifier>
-        <codehaus.jackson.version>1.8.8</codehaus.jackson.version>
-        <akka.group>org.spark-project.akka</akka.group>
-        <akka.version>2.3.4-spark</akka.version>
-      </properties>
-    </profile>
-
     <profile>
       <id>hadoop-2.2</id>
     <!-- SPARK-7249: Default hadoop profile. Uses global properties. -->
diff --git a/sql/README.md b/sql/README.md
@@ -20,7 +20,7 @@ If you are working with Hive 0.12.0, you will need to set several environmental
 ```
 export HIVE_HOME="<path to>/hive/build/dist"
 export HIVE_DEV_HOME="<path to>/hive/"
-export HADOOP_HOME="<path to>/hadoop-1.0.4"
+export HADOOP_HOME="<path to>/hadoop"
 ```
 
 If you are working with Hive 0.13.1, the following steps are needed:

Original file line number	Diff line number	Diff line change
`@@ -26,17 +26,13 @@ import org.apache.spark.util.Utils`
`26`	`26`	`private[spark]`
`27`	`27`	`trait SparkHadoopMapReduceUtil {`
`28`	`28`	`def newJobContext(conf: Configuration, jobId: JobID): JobContext = {`
`29`		`- val klass = firstAvailableClass(`
`30`		`- "org.apache.hadoop.mapreduce.task.JobContextImpl", // hadoop2, hadoop2-yarn`
`31`		`- "org.apache.hadoop.mapreduce.JobContext") // hadoop1`
	`29`	`+ val klass = Utils.classForName("org.apache.hadoop.mapreduce.task.JobContextImpl")`
`32`	`30`	`val ctor = klass.getDeclaredConstructor(classOf[Configuration], classOf[JobID])`
`33`	`31`	`ctor.newInstance(conf, jobId).asInstanceOf[JobContext]`
`34`	`32`	`}`
`35`	`33`
`36`	`34`	`def newTaskAttemptContext(conf: Configuration, attemptId: TaskAttemptID): TaskAttemptContext = {`
`37`		`- val klass = firstAvailableClass(`
`38`		`- "org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl", // hadoop2, hadoop2-yarn`
`39`		`- "org.apache.hadoop.mapreduce.TaskAttemptContext") // hadoop1`
	`35`	`+ val klass = Utils.classForName("org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl")`
`40`	`36`	`val ctor = klass.getDeclaredConstructor(classOf[Configuration], classOf[TaskAttemptID])`
`41`	`37`	`ctor.newInstance(conf, attemptId).asInstanceOf[TaskAttemptContext]`
`42`	`38`	`}`
`@@ -69,13 +65,4 @@ trait SparkHadoopMapReduceUtil {`
`69`	`65`	`}`
`70`	`66`	`}`
`71`	`67`	`}`
`72`		`-`
`73`		`- private def firstAvailableClass(first: String, second: String): Class[_] = {`
`74`		`- try {`
`75`		`- Utils.classForName(first)`
`76`		`- } catch {`
`77`		`- case e: ClassNotFoundException =>`
`78`		`- Utils.classForName(second)`
`79`		`- }`
`80`		`- }`
`81`	`68`	`}`