Skip to content

Commit

Permalink
run world count failed
Browse files Browse the repository at this point in the history
  • Loading branch information
kiwenlau committed Jun 12, 2016
1 parent ba051f4 commit 9d1e5a9
Show file tree
Hide file tree
Showing 19 changed files with 313 additions and 10 deletions.
Binary file modified .DS_Store
Binary file not shown.
47 changes: 47 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
FROM ubuntu:14.04

MAINTAINER KiwenLau <[email protected]>

# install openssh-server, openjdk and wget
RUN apt-get update && apt-get install -y openssh-server openjdk-7-jdk wget

# install hadoop 2.7.2
RUN wget https://github.com/kiwenlau/compile-hadoop/releases/download/2.7.2/hadoop-2.7.2.tar.gz && \
tar -xzvf hadoop-2.7.2.tar.gz && \
mv hadoop-2.7.2 /usr/local/hadoop && \
rm hadoop-2.7.2.tar.gz

# copy hadoop configuration files
COPY config/* /tmp/

# ssh without key
RUN ssh-keygen -t rsa -f ~/.ssh/id_rsa -P '' && \
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys && \
mv /tmp/ssh_config ~/.ssh/config

ENV HADOOP_INSTALL /usr/local/hadoop

RUN mkdir -p ~/hdfs/namenode && \
mkdir -p ~/hdfs/datanode && \
mkdir $HADOOP_INSTALL/logs

RUN mv /tmp/.bashrc ~/.bashrc && \
mv /tmp/hadoop-env.sh /usr/local/hadoop/etc/hadoop/hadoop-env.sh && \
mv /tmp/hdfs-site.xml $HADOOP_INSTALL/etc/hadoop/hdfs-site.xml && \
mv /tmp/core-site.xml $HADOOP_INSTALL/etc/hadoop/core-site.xml && \
mv /tmp/mapred-site.xml $HADOOP_INSTALL/etc/hadoop/mapred-site.xml && \
mv /tmp/yarn-site.xml $HADOOP_INSTALL/etc/hadoop/yarn-site.xml && \
mv /tmp/slaves $HADOOP_INSTALL/etc/hadoop/slaves && \
mv /tmp/start-hadoop.sh ~/start-hadoop.sh && \
mv /tmp/run-wordcount.sh ~/run-wordcount.sh

RUN chmod +x ~/start-hadoop.sh && \
chmod +x ~/run-wordcount.sh && \
chmod +x $HADOOP_INSTALL/sbin/start-dfs.sh && \
chmod +x $HADOOP_INSTALL/sbin/start-yarn.sh && \
chmod 1777 /tmp

# format namenode
RUN /usr/local/hadoop/bin/hdfs namenode -format

CMD [ "sh", "-c", "service ssh start; bash"]
21 changes: 13 additions & 8 deletions build-image.sh
Original file line number Diff line number Diff line change
@@ -1,17 +1,22 @@
#!/bin/bash

echo -e "\nbuild docker hadoop-base image\n"
sudo docker build -f hadoop-base/Dockerfile -t kiwenlau/hadoop-base:1.0 ./hadoop-base
# echo -e "\nbuild docker hadoop-base image\n"
# sudo docker build -f hadoop-base/Dockerfile -t kiwenlau/hadoop-base:1.0 ./hadoop-base

echo ""
# echo ""


echo -e "\nbuild docker hadoop-master image\n"
sudo docker build -f hadoop-master/Dockerfile -t kiwenlau/hadoop-master:1.0 ./hadoop-master
# echo -e "\nbuild docker hadoop-master image\n"
# sudo docker build -f hadoop-master/Dockerfile -t kiwenlau/hadoop-master:1.0 ./hadoop-master

echo ""
# echo ""

echo -e "\nbuild docker hadoop-slave image\n"
sudo docker build -f hadoop-slave/Dockerfile -t kiwenlau/hadoop-slave:1.0 ./hadoop-slave
# echo -e "\nbuild docker hadoop-slave image\n"
# sudo docker build -f hadoop-slave/Dockerfile -t kiwenlau/hadoop-slave:1.0 ./hadoop-slave

# echo ""

echo -e "\nbuild docker hadoop image\n"
sudo docker build -t kiwenlau/hadoop:1.0 .

echo ""
Binary file added config/.DS_Store
Binary file not shown.
19 changes: 19 additions & 0 deletions config/.bashrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# java
export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64
export PATH=$PATH:$JAVA_HOME/bin


# hadoop
export HADOOP_INSTALL=/usr/local/hadoop
export HADOOP_HOME=$HADOOP_INSTALL
export HADOOP_MAPRED_HOME=$HADOOP_INSTALL
export HADOOP_COMMON_HOME=$HADOOP_INSTALL
export HADOOP_HDFS_HOME=$HADOOP_INSTALL
export YARN_HOME=$HADOOP_INSTALL

export HADOOP_CONF_DIR=$HADOOP_INSTALL/etc/hadoop
export YARN_CONF_DIR=$HADOOP_INSTALL/etc/hadoop

export PATH=$PATH:$HADOOP_INSTALL/bin
export PATH=$PATH:$HADOOP_INSTALL/sbin

9 changes: 9 additions & 0 deletions config/core-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop-master:9000/</value>
<description>NameNode URI</description>
</property>
</configuration>
98 changes: 98 additions & 0 deletions config/hadoop-env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Set Hadoop-specific environment variables here.

# The only required environment variable is JAVA_HOME. All others are
# optional. When running a distributed configuration it is best to
# set JAVA_HOME in this file, so that it is correctly defined on
# remote nodes.

# The java implementation to use.
export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64

# The jsvc implementation to use. Jsvc is required to run secure datanodes
# that bind to privileged ports to provide authentication of data transfer
# protocol. Jsvc is not required if SASL is configured for authentication of
# data transfer protocol using non-privileged ports.
#export JSVC_HOME=${JSVC_HOME}

export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"}

# Extra Java CLASSPATH elements. Automatically insert capacity-scheduler.
for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar; do
if [ "$HADOOP_CLASSPATH" ]; then
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
else
export HADOOP_CLASSPATH=$f
fi
done

# The maximum amount of heap to use, in MB. Default is 1000.
#export HADOOP_HEAPSIZE=
#export HADOOP_NAMENODE_INIT_HEAPSIZE=""

# Extra Java runtime options. Empty by default.
export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"

# Command specific options appended to HADOOP_OPTS when specified
export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_NAMENODE_OPTS"
export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANODE_OPTS"

export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS"

export HADOOP_NFS3_OPTS="$HADOOP_NFS3_OPTS"
export HADOOP_PORTMAP_OPTS="-Xmx512m $HADOOP_PORTMAP_OPTS"

# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS"
#HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS"

# On secure datanodes, user to run the datanode as after dropping privileges.
# This **MUST** be uncommented to enable secure HDFS if using privileged ports
# to provide authentication of data transfer protocol. This **MUST NOT** be
# defined if SASL is configured for authentication of data transfer protocol
# using non-privileged ports.
export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER}

# Where log files are stored. $HADOOP_HOME/logs by default.
#export HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$USER

# Where log files are stored in the secure data environment.
export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}

###
# HDFS Mover specific parameters
###
# Specify the JVM options to be used when starting the HDFS Mover.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HADOOP_MOVER_OPTS=""

###
# Advanced Users Only!
###

# The directory where pid files are stored. /tmp by default.
# NOTE: this should be set to a directory that can only be written to by
# the user that will run the hadoop daemons. Otherwise there is the
# potential for a symlink attack.
export HADOOP_PID_DIR=${HADOOP_PID_DIR}
export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR}

# A string representing this instance of hadoop. $USER by default.
export HADOOP_IDENT_STRING=$USER
34 changes: 34 additions & 0 deletions config/hdfs-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///root/hdfs/namenode</value>
<description>NameNode directory for namespace and transaction logs storage.</description>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///root/hdfs/datanode</value>
<description>DataNode directory</description>
</property>

<property>
<name>dfs.replication</name>
<value>3</value>
</property>

<property>
<name>dfs.permissions</name>
<value>false</value>
</property>

<property>
<name>dfs.datanode.use.datanode.hostname</name>
<value>false</value>
</property>

<property>
<name>dfs.namenode.datanode.registration.ip-hostname-check</name>
<value>false</value>
</property>
</configuration>
9 changes: 9 additions & 0 deletions config/mapred-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
28 changes: 28 additions & 0 deletions config/run-wordcount.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/bin/bash

# test the hadoop cluster by running wordcount

# create input files
mkdir input
echo "Hello Docker" >input/file2.txt
echo "Hello Hadoop" >input/file1.txt

# create input directory on HDFS
hadoop fs -mkdir -p input

# put input files to HDFS
hdfs dfs -put ./input/* input

# run wordcount
hadoop jar $HADOOP_INSTALL/share/hadoop/mapreduce/sources/hadoop-mapreduce-examples-2.7.2-sources.jar org.apache.hadoop.examples.WordCount input output

# print the input files
echo -e "\ninput file1.txt:"
hdfs dfs -cat input/file1.txt

echo -e "\ninput file2.txt:"
hdfs dfs -cat input/file2.txt

# print the output of wordcount
echo -e "\nwordcount output:"
hdfs dfs -cat output/part-r-00000
2 changes: 2 additions & 0 deletions config/slaves
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
hadoop-slave1
hadoop-slave2
9 changes: 9 additions & 0 deletions config/ssh_config
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Host localhost
StrictHostKeyChecking no

Host 0.0.0.0
StrictHostKeyChecking no

Host hadoop-*
StrictHostKeyChecking no
UserKnownHostsFile=/dev/null
12 changes: 12 additions & 0 deletions config/start-hadoop.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash

echo -e "\n"

$HADOOP_INSTALL/sbin/start-dfs.sh

echo -e "\n"

$HADOOP_INSTALL/sbin/start-yarn.sh

echo -e "\n"

31 changes: 31 additions & 0 deletions config/yarn-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
<?xml version="1.0"?>
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>hadoop-master:8025</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>hadoop-master:8030</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>hadoop-master:8040</value>
</property>
<property>
<name>yarn.nodemanager.address</name>
<value>master:8050</value>
</property>
<property>
<name>yarn.nodemanager.localizer.address</name>
<value>master:8060</value>
</property>
</configuration>
Binary file added hadoop-base/.DS_Store
Binary file not shown.
Binary file added hadoop-base/config/.DS_Store
Binary file not shown.
Binary file added hadoop-master/.DS_Store
Binary file not shown.
Binary file added hadoop-master/files/.DS_Store
Binary file not shown.
4 changes: 2 additions & 2 deletions start-container.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ sudo docker run -itd \
-p 19888:19888 \
--name hadoop-master \
--hostname hadoop-master \
kiwenlau/hadoop-master:1.0 &> /dev/null
kiwenlau/hadoop:1.0 &> /dev/null


# start hadoop slave container
Expand All @@ -27,7 +27,7 @@ do
--net=hadoop \
--name hadoop-slave$i \
--hostname hadoop-slave$i \
kiwenlau/hadoop-slave:1.0 &> /dev/null
kiwenlau/hadoop:1.0 &> /dev/null
i=$(( $i + 1 ))
done

Expand Down

0 comments on commit 9d1e5a9

Please sign in to comment.