Skip to content

Commit

Permalink
MAPREDUCE-6415. Create a tool to combine aggregated logs into HAR fil…
Browse files Browse the repository at this point in the history
…es. (Robert Kanter via kasha)
  • Loading branch information
kambatla committed Sep 10, 2015
1 parent 4014ce5 commit 119cc75
Show file tree
Hide file tree
Showing 12 changed files with 2,527 additions and 0 deletions.
1,308 changes: 1,308 additions & 0 deletions MAPREDUCE-6415.003.patch

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,13 @@
<include>*-sources.jar</include>
</includes>
</fileSet>
<fileSet>
<directory>../hadoop-archive-logs/target</directory>
<outputDirectory>/share/hadoop/${hadoop.component}/sources</outputDirectory>
<includes>
<include>*-sources.jar</include>
</includes>
</fileSet>
<fileSet>
<directory>../hadoop-datajoin/target</directory>
<outputDirectory>/share/hadoop/${hadoop.component}/sources</outputDirectory>
Expand Down
3 changes: 3 additions & 0 deletions hadoop-mapreduce-project/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,9 @@ Release 2.8.0 - UNRELEASED
MAPREDUCE-6304. Specifying node labels when submitting MR jobs.
(Naganarasimha G R via wangda)

MAPREDUCE-6415. Create a tool to combine aggregated logs into HAR files.
(Robert Kanter via kasha)

IMPROVEMENTS

MAPREDUCE-6291. Correct mapred queue usage command.
Expand Down
8 changes: 8 additions & 0 deletions hadoop-mapreduce-project/bin/mapred
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ MYNAME="${BASH_SOURCE-$0}"
function hadoop_usage
{
hadoop_add_subcommand "archive" "create a hadoop archive"
hadoop_add_subcommand "archive-logs" "combine aggregated logs into hadoop archives"
hadoop_add_subcommand "classpath" "prints the class path needed for running mapreduce subcommands"
hadoop_add_subcommand "distcp" "copy file or directories recursively"
hadoop_add_subcommand "historyserver" "run job history servers as a standalone daemon"
Expand Down Expand Up @@ -72,6 +73,13 @@ case ${COMMAND} in
hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
archive-logs)
CLASS=org.apache.hadoop.tools.HadoopArchiveLogs
hadoop_debug "Injecting TOOL_PATH into CLASSPATH"
hadoop_add_classpath "${TOOL_PATH}"
hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
classpath)
hadoop_do_classpath_subcommand CLASS "$@"
;;
Expand Down
5 changes: 5 additions & 0 deletions hadoop-project/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,11 @@
<artifactId>hadoop-archives</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-archive-logs</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-distcp</artifactId>
Expand Down
171 changes: 171 additions & 0 deletions hadoop-tools/hadoop-archive-logs/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-project</artifactId>
<version>3.0.0-SNAPSHOT</version>
<relativePath>../../hadoop-project</relativePath>
</parent>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-archive-logs</artifactId>
<version>3.0.0-SNAPSHOT</version>
<description>Apache Hadoop Archive Logs</description>
<name>Apache Hadoop Archive Logs</name>
<packaging>jar</packaging>

<properties>
<hadoop.log.dir>${project.build.directory}/log</hadoop.log.dir>
</properties>

<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-applications-distributedshell</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<scope>test</scope>
<type>test-jar</type>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-server-tests</artifactId>
<type>test-jar</type>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-archives</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-common</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-api</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-client</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-server-resourcemanager</artifactId>
<scope>provided</scope>
</dependency>
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<scope>test</scope>
</dependency>
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<scope>test</scope>
<type>test-jar</type>
</dependency>
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-jobclient</artifactId>
<scope>test</scope>
<type>test-jar</type>
</dependency>
</dependencies>

<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
<executions>
<execution>
<id>create-log-dir</id>
<phase>process-test-resources</phase>
<goals>
<goal>run</goal>
</goals>
<configuration>
<target>
<delete dir="${test.build.data}"/>
<mkdir dir="${test.build.data}"/>
<mkdir dir="${hadoop.log.dir}"/>
</target>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<archive>
<manifest>
<mainClass>org.apache.hadoop.tools.HadoopArchiveLogs</mainClass>
</manifest>
</archive>
</configuration>
</plugin>
</plugins>
</build>
</project>
Loading

0 comments on commit 119cc75

Please sign in to comment.