Skip to content

Commit

Permalink
Added HDFS Sink (apache#2409)
Browse files Browse the repository at this point in the history
* Added HDFS Sink

* Fixed issues identified during PR review

* Fixed comment

* Added HDFS Container to externalServices

* Ignoring HdfsSink test for now

* Removed HDFS Container to externalServices

* Fixed ASL licensing

* Fixed compile errors

* Added HDFS to SinkType Enum
  • Loading branch information
david-streamlio authored and srkukarni committed Sep 6, 2018
1 parent d734738 commit 261eab1
Show file tree
Hide file tree
Showing 34 changed files with 2,157 additions and 8 deletions.
50 changes: 44 additions & 6 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -495,12 +495,6 @@ flexible messaging model and an intuitive client API.</description>
<version>${log4j2.version}</version>
</dependency>

<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.5</version>
</dependency>

<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
Expand Down Expand Up @@ -1350,6 +1344,50 @@ flexible messaging model and an intuitive client API.</description>
<profile>
<id>docker</id>
</profile>

<profile>
<!-- Checks style and licensing requirements. This is a good
idea to run for contributions and for the release process. While it would
be nice to run always these plugins can considerably slow the build and have
proven to create unstable builds in our multi-module project and when building
using multiple threads. The stability issues seen with Checkstyle in multi-module
builds include false-positives and false negatives. -->
<id>contrib-check</id>
<build>
<plugins>
<plugin>
<groupId>org.apache.rat</groupId>
<artifactId>apache-rat-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>check</goal>
</goals>
<phase>verify</phase>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-checkstyle-plugin</artifactId>
<executions>
<execution>
<id>check-style</id>
<phase>verify</phase>
<configuration>
<configLocation>./buildtools/src/main/resources/pulsar/checkstyle.xml</configLocation>
<suppressionsLocation>/buildtools/src/main/resources/pulsar/suppressions.xml</suppressionsLocation>
<encoding>UTF-8</encoding>
</configuration>
<goals>
<goal>check</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
</profiles>

<repositories>
Expand Down
69 changes: 69 additions & 0 deletions pulsar-io/hdfs/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.pulsar</groupId>
<artifactId>pulsar-io</artifactId>
<version>2.2.0-incubating-SNAPSHOT</version>
</parent>
<artifactId>pulsar-io-hdfs</artifactId>

<dependencies>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>pulsar-io-core</artifactId>
<version>${project.version}</version>
</dependency>

<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</dependency>

<dependency>
<groupId>com.fasterxml.jackson.dataformat</groupId>
<artifactId>jackson-dataformat-yaml</artifactId>
</dependency>

<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>3.1.1</version>
</dependency>

<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
<scope>test</scope>
</dependency>
</dependencies>

<build>
<plugins>
<plugin>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-nar-maven-plugin</artifactId>
</plugin>
</plugins>
</build>

</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.pulsar.io.hdfs;

import java.io.Serializable;

import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.Setter;
import lombok.ToString;
import lombok.experimental.Accessors;

import org.apache.commons.lang.StringUtils;

/**
* Configuration object for all HDFS components.
*/
@Data
@Setter
@Getter
@EqualsAndHashCode
@ToString
@Accessors(chain = true)
public abstract class AbstractHdfsConfig implements Serializable {

private static final long serialVersionUID = 1L;

/**
* A file or comma separated list of files which contains the Hadoop file system configuration,
* e.g. 'core-site.xml', 'hdfs-site.xml'.
*/
private String hdfsConfigResources;

/**
* The HDFS directory from which files should be read from or written to.
*/
private String directory;

/**
* The character encoding for the files, e.g. UTF-8, ASCII, etc.
*/
private String encoding;

/**
* The compression codec used to compress/de-compress the files on HDFS.
*/
private Compression compression;

/**
* The Kerberos user principal account to use for authentication.
*/
private String kerberosUserPrincipal;

/**
* The full pathname to the Kerberos keytab file to use for authentication.
*/
private String keytab;

public void validate() {
if (StringUtils.isEmpty(hdfsConfigResources) || StringUtils.isEmpty(directory)) {
throw new IllegalArgumentException("Required property not set.");
}

if ((StringUtils.isNotEmpty(kerberosUserPrincipal) && StringUtils.isEmpty(keytab))
|| (StringUtils.isEmpty(kerberosUserPrincipal) && StringUtils.isNotEmpty(keytab))) {
throw new IllegalArgumentException("Values for both kerberosUserPrincipal & keytab are required.");
}
}
}
Loading

0 comments on commit 261eab1

Please sign in to comment.