Skip to content

Commit 0a38637

Browse files
committed
[SPARK-11807] Remove support for Hadoop < 2.2
i.e. Hadoop 1 and Hadoop 2.0 Author: Reynold Xin <[email protected]> Closes apache#10404 from rxin/SPARK-11807.
1 parent 29cecd4 commit 0a38637

File tree

9 files changed

+9
-62
lines changed

9 files changed

+9
-62
lines changed

core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala

+1-9
Original file line numberDiff line numberDiff line change
@@ -663,16 +663,8 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
663663

664664
// For testing.
665665
private[history] def isFsInSafeMode(dfs: DistributedFileSystem): Boolean = {
666-
val hadoop1Class = "org.apache.hadoop.hdfs.protocol.FSConstants$SafeModeAction"
667666
val hadoop2Class = "org.apache.hadoop.hdfs.protocol.HdfsConstants$SafeModeAction"
668-
val actionClass: Class[_] =
669-
try {
670-
getClass().getClassLoader().loadClass(hadoop2Class)
671-
} catch {
672-
case _: ClassNotFoundException =>
673-
getClass().getClassLoader().loadClass(hadoop1Class)
674-
}
675-
667+
val actionClass: Class[_] = getClass().getClassLoader().loadClass(hadoop2Class)
676668
val action = actionClass.getField("SAFEMODE_GET").get(null)
677669
val method = dfs.getClass().getMethod("setSafeMode", action.getClass())
678670
method.invoke(dfs, action).asInstanceOf[Boolean]

core/src/main/scala/org/apache/spark/mapreduce/SparkHadoopMapReduceUtil.scala

+2-15
Original file line numberDiff line numberDiff line change
@@ -26,17 +26,13 @@ import org.apache.spark.util.Utils
2626
private[spark]
2727
trait SparkHadoopMapReduceUtil {
2828
def newJobContext(conf: Configuration, jobId: JobID): JobContext = {
29-
val klass = firstAvailableClass(
30-
"org.apache.hadoop.mapreduce.task.JobContextImpl", // hadoop2, hadoop2-yarn
31-
"org.apache.hadoop.mapreduce.JobContext") // hadoop1
29+
val klass = Utils.classForName("org.apache.hadoop.mapreduce.task.JobContextImpl")
3230
val ctor = klass.getDeclaredConstructor(classOf[Configuration], classOf[JobID])
3331
ctor.newInstance(conf, jobId).asInstanceOf[JobContext]
3432
}
3533

3634
def newTaskAttemptContext(conf: Configuration, attemptId: TaskAttemptID): TaskAttemptContext = {
37-
val klass = firstAvailableClass(
38-
"org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl", // hadoop2, hadoop2-yarn
39-
"org.apache.hadoop.mapreduce.TaskAttemptContext") // hadoop1
35+
val klass = Utils.classForName("org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl")
4036
val ctor = klass.getDeclaredConstructor(classOf[Configuration], classOf[TaskAttemptID])
4137
ctor.newInstance(conf, attemptId).asInstanceOf[TaskAttemptContext]
4238
}
@@ -69,13 +65,4 @@ trait SparkHadoopMapReduceUtil {
6965
}
7066
}
7167
}
72-
73-
private def firstAvailableClass(first: String, second: String): Class[_] = {
74-
try {
75-
Utils.classForName(first)
76-
} catch {
77-
case e: ClassNotFoundException =>
78-
Utils.classForName(second)
79-
}
80-
}
8168
}

dev/create-release/release-build.sh

-3
Original file line numberDiff line numberDiff line change
@@ -166,9 +166,6 @@ if [[ "$1" == "package" ]]; then
166166

167167
# We increment the Zinc port each time to avoid OOM's and other craziness if multiple builds
168168
# share the same Zinc server.
169-
make_binary_release "hadoop1" "-Psparkr -Phadoop-1 -Phive -Phive-thriftserver" "3030" &
170-
make_binary_release "hadoop1-scala2.11" "-Psparkr -Phadoop-1 -Phive -Dscala-2.11" "3031" &
171-
make_binary_release "cdh4" "-Psparkr -Phadoop-1 -Phive -Phive-thriftserver -Dhadoop.version=2.0.0-mr1-cdh4.2.0" "3032" &
172169
make_binary_release "hadoop2.3" "-Psparkr -Phadoop-2.3 -Phive -Phive-thriftserver -Pyarn" "3033" &
173170
make_binary_release "hadoop2.4" "-Psparkr -Phadoop-2.4 -Phive -Phive-thriftserver -Pyarn" "3034" &
174171
make_binary_release "hadoop2.6" "-Psparkr -Phadoop-2.6 -Phive -Phive-thriftserver -Pyarn" "3034" &

dev/run-tests-jenkins.py

-4
Original file line numberDiff line numberDiff line change
@@ -163,10 +163,6 @@ def main():
163163
if "test-maven" in ghprb_pull_title:
164164
os.environ["AMPLAB_JENKINS_BUILD_TOOL"] = "maven"
165165
# Switch the Hadoop profile based on the PR title:
166-
if "test-hadoop1.0" in ghprb_pull_title:
167-
os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop1.0"
168-
if "test-hadoop2.0" in ghprb_pull_title:
169-
os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop2.0"
170166
if "test-hadoop2.2" in ghprb_pull_title:
171167
os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop2.2"
172168
if "test-hadoop2.3" in ghprb_pull_title:

dev/run-tests.py

-2
Original file line numberDiff line numberDiff line change
@@ -301,8 +301,6 @@ def get_hadoop_profiles(hadoop_version):
301301
"""
302302

303303
sbt_maven_hadoop_profiles = {
304-
"hadoop1.0": ["-Phadoop-1", "-Dhadoop.version=1.2.1"],
305-
"hadoop2.0": ["-Phadoop-1", "-Dhadoop.version=2.0.0-mr1-cdh4.1.1"],
306304
"hadoop2.2": ["-Pyarn", "-Phadoop-2.2"],
307305
"hadoop2.3": ["-Pyarn", "-Phadoop-2.3", "-Dhadoop.version=2.3.0"],
308306
"hadoop2.6": ["-Pyarn", "-Phadoop-2.6"],

docs/building-spark.md

+4-14
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,13 @@ to the `sharedSettings` val. See also [this PR](https://github.com/apache/spark/
3333

3434
# Building a Runnable Distribution
3535

36-
To create a Spark distribution like those distributed by the
37-
[Spark Downloads](http://spark.apache.org/downloads.html) page, and that is laid out so as
38-
to be runnable, use `make-distribution.sh` in the project root directory. It can be configured
36+
To create a Spark distribution like those distributed by the
37+
[Spark Downloads](http://spark.apache.org/downloads.html) page, and that is laid out so as
38+
to be runnable, use `make-distribution.sh` in the project root directory. It can be configured
3939
with Maven profile settings and so on like the direct Maven build. Example:
4040

4141
./make-distribution.sh --name custom-spark --tgz -Psparkr -Phadoop-2.4 -Phive -Phive-thriftserver -Pyarn
42-
42+
4343
For more information on usage, run `./make-distribution.sh --help`
4444

4545
# Setting up Maven's Memory Usage
@@ -74,23 +74,13 @@ Because HDFS is not protocol-compatible across versions, if you want to read fro
7474
<tr><th>Hadoop version</th><th>Profile required</th></tr>
7575
</thead>
7676
<tbody>
77-
<tr><td>1.x to 2.1.x</td><td>hadoop-1</td></tr>
7877
<tr><td>2.2.x</td><td>hadoop-2.2</td></tr>
7978
<tr><td>2.3.x</td><td>hadoop-2.3</td></tr>
8079
<tr><td>2.4.x</td><td>hadoop-2.4</td></tr>
8180
<tr><td>2.6.x and later 2.x</td><td>hadoop-2.6</td></tr>
8281
</tbody>
8382
</table>
8483

85-
For Apache Hadoop versions 1.x, Cloudera CDH "mr1" distributions, and other Hadoop versions without YARN, use:
86-
87-
{% highlight bash %}
88-
# Apache Hadoop 1.2.1
89-
mvn -Dhadoop.version=1.2.1 -Phadoop-1 -DskipTests clean package
90-
91-
# Cloudera CDH 4.2.0 with MapReduce v1
92-
mvn -Dhadoop.version=2.0.0-mr1-cdh4.2.0 -Phadoop-1 -DskipTests clean package
93-
{% endhighlight %}
9484

9585
You can enable the `yarn` profile and optionally set the `yarn.version` property if it is different from `hadoop.version`. Spark only supports YARN versions 2.2.0 and later.
9686

make-distribution.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ while (( "$#" )); do
5858
--hadoop)
5959
echo "Error: '--hadoop' is no longer supported:"
6060
echo "Error: use Maven profiles and options -Dhadoop.version and -Dyarn.version instead."
61-
echo "Error: Related profiles include hadoop-1, hadoop-2.2, hadoop-2.3 and hadoop-2.4."
61+
echo "Error: Related profiles include hadoop-2.2, hadoop-2.3 and hadoop-2.4."
6262
exit_with_usage
6363
;;
6464
--with-yarn)

pom.xml

-13
Original file line numberDiff line numberDiff line change
@@ -2442,19 +2442,6 @@
24422442
http://hadoop.apache.org/docs/ra.b.c/hadoop-project-dist/hadoop-common/dependency-analysis.html
24432443
-->
24442444

2445-
<profile>
2446-
<id>hadoop-1</id>
2447-
<properties>
2448-
<hadoop.version>1.2.1</hadoop.version>
2449-
<protobuf.version>2.4.1</protobuf.version>
2450-
<hbase.version>0.98.7-hadoop1</hbase.version>
2451-
<avro.mapred.classifier>hadoop1</avro.mapred.classifier>
2452-
<codehaus.jackson.version>1.8.8</codehaus.jackson.version>
2453-
<akka.group>org.spark-project.akka</akka.group>
2454-
<akka.version>2.3.4-spark</akka.version>
2455-
</properties>
2456-
</profile>
2457-
24582445
<profile>
24592446
<id>hadoop-2.2</id>
24602447
<!-- SPARK-7249: Default hadoop profile. Uses global properties. -->

sql/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ If you are working with Hive 0.12.0, you will need to set several environmental
2020
```
2121
export HIVE_HOME="<path to>/hive/build/dist"
2222
export HIVE_DEV_HOME="<path to>/hive/"
23-
export HADOOP_HOME="<path to>/hadoop-1.0.4"
23+
export HADOOP_HOME="<path to>/hadoop"
2424
```
2525

2626
If you are working with Hive 0.13.1, the following steps are needed:

0 commit comments

Comments
 (0)