forked from apache/flink
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[FLINK-10365] [s3] Factor out Hadoop FS classes into pre-shaded artif…
…act and update to Hadoop 3
- Loading branch information
1 parent
3292dc5
commit b0e5642
Showing
9 changed files
with
9,591 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
This project bundles the minimal dependencies from Hadoop's | ||
FileSystem abstraction and shades them to avoid dependency conflicts. | ||
|
||
This project is the basis for the bundled File System adapters | ||
that are based on Hadoop code, but keep the appearance of Flink | ||
being Hadoop-free, from a dependency perspective. | ||
|
||
For this to work, however, we needed to adapt Hadoop's `Configuration` | ||
class to load a (shaded) `core-default-shaded.xml` configuration with the | ||
relocated class names of classes loaded via reflection. | ||
|
||
# Changing the Hadoop Version | ||
|
||
If you want to change the Hadoop version this project depends on, the following | ||
steps are required to keep the shading correct: | ||
|
||
1. from the respective Hadoop jar (currently 3.1.0), | ||
- copy `org/apache/hadoop/conf/Configuration.java` to `src/main/java/org/apache/hadoop/conf/` and | ||
- replace `core-default.xml` with `core-default-shaded.xml`. | ||
- copy `org/apache/hadoop/util/NativeCodeLoader.java` to `src/main/java/org/apache/hadoop/util/` and | ||
- replace the native method stubs as in the current setup (empty methods, or return false) | ||
- copy `core-default.xml` to `src/main/resources/core-default-shaded.xml` and | ||
- change every occurrence of `org.apache.hadoop` into `org.apache.flink.fs.shaded.hadoop3.org.apache.hadoop` | ||
- copy `core-site.xml` to `src/test/resources/core-site.xml` (as is) | ||
|
||
2. verify the shaded jar: | ||
- does not contain any unshaded classes | ||
- all other classes should be under `org.apache.flink.fs.shaded.hadoop3` | ||
- there should be a `META-INF/services/org.apache.flink.core.fs.FileSystemFactory` file pointing to two classes: `org.apache.flink.fs.s3hadoop.S3FileSystemFactory` and `org.apache.flink.fs.s3hadoop.S3AFileSystemFactory` | ||
- other service files under `META-INF/services` should have their names and contents in the relocated `org.apache.flink.fs.s3hadoop.shaded` package | ||
- contains a `core-default-shaded.xml` file | ||
- does not contain a `core-default.xml` or `core-site.xml` file |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,262 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<!-- | ||
Licensed to the Apache Software Foundation (ASF) under one | ||
or more contributor license agreements. See the NOTICE file | ||
distributed with this work for additional information | ||
regarding copyright ownership. The ASF licenses this file | ||
to you under the Apache License, Version 2.0 (the | ||
"License"); you may not use this file except in compliance | ||
with the License. You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, | ||
software distributed under the License is distributed on an | ||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
KIND, either express or implied. See the License for the | ||
specific language governing permissions and limitations | ||
under the License. | ||
--> | ||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> | ||
|
||
<modelVersion>4.0.0</modelVersion> | ||
|
||
<parent> | ||
<groupId>org.apache.flink</groupId> | ||
<artifactId>flink-filesystems</artifactId> | ||
<version>1.7-SNAPSHOT</version> | ||
<relativePath>..</relativePath> | ||
</parent> | ||
|
||
<artifactId>flink-fs-hadoop-shaded</artifactId> | ||
<name>flink-filesystems :: flink-fs-hadoop-shaded</name> | ||
|
||
<packaging>jar</packaging> | ||
|
||
<dependencies> | ||
<!-- The Hadoop file system abstraction --> | ||
<dependency> | ||
<groupId>org.apache.hadoop</groupId> | ||
<artifactId>hadoop-common</artifactId> | ||
<version>${fs.hadoopshaded.version}</version> | ||
<exclusions> | ||
<exclusion> | ||
<groupId>jdk.tools</groupId> | ||
<artifactId>jdk.tools</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>com.jcraft</groupId> | ||
<artifactId>jsch</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>com.sun.jersey</groupId> | ||
<artifactId>jersey-core</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>com.sun.jersey</groupId> | ||
<artifactId>jersey-servlet</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>com.sun.jersey</groupId> | ||
<artifactId>jersey-json</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>com.sun.jersey</groupId> | ||
<artifactId>jersey-server</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>org.apache.avro</groupId> | ||
<artifactId>avro</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>org.slf4j</groupId> | ||
<artifactId>slf4j-log4j12</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>log4j</groupId> | ||
<artifactId>log4j</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>org.eclipse.jetty</groupId> | ||
<artifactId>jetty-server</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>org.eclipse.jetty</groupId> | ||
<artifactId>jetty-util</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>org.eclipse.jetty</groupId> | ||
<artifactId>jetty-servlet</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>org.eclipse.jetty</groupId> | ||
<artifactId>jetty-webapp</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>javax.servlet</groupId> | ||
<artifactId>javax.servlet-api</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>javax.servlet.jsp</groupId> | ||
<artifactId>jsp-api</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>org.apache.kerby</groupId> | ||
<artifactId>kerb-simplekdc</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>org.apache.curator</groupId> | ||
<artifactId>curator-client</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>org.apache.curator</groupId> | ||
<artifactId>curator-framework</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>org.apache.curator</groupId> | ||
<artifactId>curator-recipes</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>org.apache.zookeeper</groupId> | ||
<artifactId>zookeeper</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>commons-net</groupId> | ||
<artifactId>commons-net</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>commons-cli</groupId> | ||
<artifactId>commons-cli</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>commons-codec</groupId> | ||
<artifactId>commons-codec</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>com.google.protobuf</groupId> | ||
<artifactId>protobuf-java</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>com.google.code.gson</groupId> | ||
<artifactId>gson</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>org.apache.httpcomponents</groupId> | ||
<artifactId>httpclient</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>org.apache.commons</groupId> | ||
<artifactId>commons-compress</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>org.apache.commons</groupId> | ||
<artifactId>commons-math3</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>com.nimbusds</groupId> | ||
<artifactId>nimbus-jose-jwt</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>net.minidev</groupId> | ||
<artifactId>json-smart</artifactId> | ||
</exclusion> | ||
</exclusions> | ||
</dependency> | ||
</dependencies> | ||
|
||
<build> | ||
|
||
<!-- this is merely an intermediate build artifact and should not be --> | ||
<!-- deployed to maven central --> | ||
<plugins> | ||
<plugin> | ||
<groupId>org.apache.maven.plugins</groupId> | ||
<artifactId>maven-deploy-plugin</artifactId> | ||
<configuration> | ||
<skip>true</skip> | ||
</configuration> | ||
</plugin> | ||
|
||
<!-- publish the core-site.xml for tests --> | ||
<plugin> | ||
<groupId>org.apache.maven.plugins</groupId> | ||
<artifactId>maven-jar-plugin</artifactId> | ||
<executions> | ||
<execution> | ||
<goals> | ||
<goal>test-jar</goal> | ||
</goals> | ||
</execution> | ||
</executions> | ||
</plugin> | ||
|
||
<!-- relocate all dependencies to hide them --> | ||
<plugin> | ||
<groupId>org.apache.maven.plugins</groupId> | ||
<artifactId>maven-shade-plugin</artifactId> | ||
<executions> | ||
<execution> | ||
<id>shade-flink</id> | ||
<phase>package</phase> | ||
<goals> | ||
<goal>shade</goal> | ||
</goals> | ||
<configuration> | ||
<shadeTestJar>false</shadeTestJar> | ||
<artifactSet> | ||
<includes> | ||
<include>*:*</include> | ||
</includes> | ||
</artifactSet> | ||
<relocations> | ||
<!-- we shade only the parts that are internal to Hadoop and not used / exposed downstream --> | ||
<relocation> | ||
<pattern>com.google.re2j</pattern> | ||
<shadedPattern>org.apache.flink.fs.shaded.hadoop3.com.google.re2j</shadedPattern> | ||
</relocation> | ||
<relocation> | ||
<pattern>org.apache.htrace</pattern> | ||
<shadedPattern>org.apache.flink.fs.shaded.hadoop3.org.apache.htrace</shadedPattern> | ||
</relocation> | ||
<relocation> | ||
<pattern>com.fasterxml</pattern> | ||
<shadedPattern>org.apache.flink.fs.shaded.hadoop3.com.fasterxml</shadedPattern> | ||
</relocation> | ||
<relocation> | ||
<pattern>org.codehaus</pattern> | ||
<shadedPattern>org.apache.flink.fs.shaded.hadoop3.org.codehaus</shadedPattern> | ||
</relocation> | ||
<relocation> | ||
<pattern>com.ctc</pattern> | ||
<shadedPattern>org.apache.flink.fs.shaded.hadoop3.com.ctc</shadedPattern> | ||
</relocation> | ||
</relocations> | ||
<filters> | ||
<!-- remove the classes from Hadoop that we replace with our overwritten implementation --> | ||
<filter> | ||
<artifact>org.apache.hadoop:hadoop-common</artifact> | ||
<excludes> | ||
<exclude>org/apache/hadoop/conf/Configuration**</exclude> | ||
<exclude>org/apache/hadoop/util/NativeCodeLoader**</exclude> | ||
<exclude>org/apache/hadoop/util/VersionInfo**</exclude> | ||
<exclude>core-default.xml</exclude> | ||
<exclude>common-version-info.properties</exclude> | ||
<exclude>org.apache.hadoop.application-classloader.properties</exclude> | ||
</excludes> | ||
</filter> | ||
<filter> | ||
<artifact>*</artifact> | ||
<excludes> | ||
<exclude>properties.dtd</exclude> | ||
<exclude>PropertyList-1.0.dtd</exclude> | ||
<exclude>META-INF/maven/**</exclude> | ||
<exclude>META-INF/services/javax.xml.stream.*</exclude> | ||
</excludes> | ||
</filter> | ||
</filters> | ||
</configuration> | ||
</execution> | ||
</executions> | ||
</plugin> | ||
</plugins> | ||
</build> | ||
</project> |
Oops, something went wrong.