Skip to content

Commit

Permalink
Aliyun: Add OSSOutputStream (apache#3288)
Browse files Browse the repository at this point in the history
  • Loading branch information
xingbowu authored Oct 19, 2021
1 parent 34e72b5 commit 329fa5b
Show file tree
Hide file tree
Showing 9 changed files with 623 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.iceberg.aliyun;

import java.io.Serializable;
import java.util.Map;
import org.apache.iceberg.util.PropertyUtil;

public class AliyunProperties implements Serializable {
/**
* Location to put staging files for uploading to OSS, defaults to the directory value of java.io.tmpdir.
*/
public static final String OSS_STAGING_DIRECTORY = "oss.staging-dir";
private final String ossStagingDirectory;

public AliyunProperties(Map<String, String> properties) {
this.ossStagingDirectory = PropertyUtil.propertyAsString(properties, OSS_STAGING_DIRECTORY,
System.getProperty("java.io.tmpdir"));
}

public String ossStagingDirectory() {
return ossStagingDirectory;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.iceberg.aliyun.oss;

import com.aliyun.oss.OSS;
import com.aliyun.oss.model.ObjectMetadata;
import com.aliyun.oss.model.PutObjectRequest;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.UncheckedIOException;
import java.util.Arrays;
import org.apache.iceberg.aliyun.AliyunProperties;
import org.apache.iceberg.exceptions.NotFoundException;
import org.apache.iceberg.io.PositionOutputStream;
import org.apache.iceberg.relocated.com.google.common.base.Joiner;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class OSSOutputStream extends PositionOutputStream {
private static final Logger LOG = LoggerFactory.getLogger(OSSOutputStream.class);
private final StackTraceElement[] createStack;

private final OSS client;
private final OSSURI uri;

private final File currentStagingFile;
private final OutputStream stream;
private long pos = 0;
private boolean closed = false;

OSSOutputStream(OSS client, OSSURI uri, AliyunProperties aliyunProperties) {
this.client = client;
this.uri = uri;
this.createStack = Thread.currentThread().getStackTrace();

this.currentStagingFile = newStagingFile(aliyunProperties.ossStagingDirectory());
this.stream = newStream(currentStagingFile);
}

private static File newStagingFile(String ossStagingDirectory) {
try {
File stagingFile = File.createTempFile("oss-file-io-", ".tmp", new File(ossStagingDirectory));
stagingFile.deleteOnExit();
return stagingFile;
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}

private static OutputStream newStream(File currentStagingFile) {
try {
return new BufferedOutputStream(new FileOutputStream(currentStagingFile));
} catch (FileNotFoundException e) {
throw new NotFoundException(e, "Failed to create file: %s", currentStagingFile);
}
}

private static InputStream uncheckedInputStream(File file) {
try {
return new FileInputStream(file);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}

@Override
public long getPos() {
return pos;
}

@Override
public void flush() throws IOException {
Preconditions.checkState(!closed, "Already closed.");
stream.flush();
}

@Override
public void write(int b) throws IOException {
Preconditions.checkState(!closed, "Already closed.");
stream.write(b);
pos += 1;
}

@Override
public void write(byte[] b, int off, int len) throws IOException {
Preconditions.checkState(!closed, "Already closed.");
stream.write(b, off, len);
pos += len;
}

@Override
public void close() throws IOException {
if (closed) {
return;
}

super.close();
closed = true;

try {
stream.close();
completeUploads();
} finally {
cleanUpStagingFiles();
}
}

private void completeUploads() {
long contentLength = currentStagingFile.length();
if (contentLength == 0) {
LOG.debug("Skipping empty upload to OSS");
return;
}

LOG.debug("Uploading {} staged bytes to OSS", contentLength);
InputStream contentStream = uncheckedInputStream(currentStagingFile);
ObjectMetadata metadata = new ObjectMetadata();
metadata.setContentLength(contentLength);

PutObjectRequest request = new PutObjectRequest(uri.bucket(), uri.key(), contentStream, metadata);
client.putObject(request);
}

private void cleanUpStagingFiles() {
if (!currentStagingFile.delete()) {
LOG.warn("Failed to delete staging file: {}", currentStagingFile);
}
}

@SuppressWarnings("checkstyle:NoFinalizer")
@Override
protected void finalize() throws Throwable {
super.finalize();
if (!closed) {
close(); // releasing resources is more important than printing the warning.
String trace = Joiner.on("\n\t").join(Arrays.copyOfRange(createStack, 1, createStack.length));
LOG.warn("Unclosed output stream created by:\n\t{}", trace);
}
}
}
109 changes: 109 additions & 0 deletions aliyun/src/main/java/org/apache/iceberg/aliyun/oss/OSSURI.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.iceberg.aliyun.oss;

import com.aliyun.oss.internal.OSSUtils;
import java.util.Set;
import org.apache.iceberg.exceptions.ValidationException;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;

/**
* This class represents a fully qualified location in OSS for input/output
* operations expressed as as URI. This implementation is provided to
* ensure compatibility with Hadoop Path implementations that may introduce
* encoding issues with native URI implementation.
*
* Note: Path-style access is deprecated and not supported by this
* implementation.
*/
public class OSSURI {
private static final String SCHEME_DELIM = "://";
private static final String PATH_DELIM = "/";
private static final String QUERY_DELIM = "\\?";
private static final String FRAGMENT_DELIM = "#";
private static final Set<String> VALID_SCHEMES = ImmutableSet.of("https", "oss");
private final String location;
private final String bucket;
private final String key;

/**
* Creates a new OSSURI based on the bucket and key parsed from the location
* The location in string form has the syntax as below, which refers to RFC2396:
* [scheme:][//bucket][object key][#fragment]
* [scheme:][//bucket][object key][?query][#fragment]
*
* It specifies precisely which characters are permitted in the various components of a URI reference
* in Aliyun OSS documentation as below:
* Bucket: https://help.aliyun.com/document_detail/257087.html
* Object: https://help.aliyun.com/document_detail/273129.html
* Scheme: https or oss
*
* <p>
* Supported access styles are https and oss://... URIs.
*
* @param location fully qualified URI.
*/
public OSSURI(String location) {
Preconditions.checkNotNull(location, "OSS location cannot be null.");

this.location = location;
String[] schemeSplit = location.split(SCHEME_DELIM, -1);
ValidationException.check(schemeSplit.length == 2, "Invalid OSS location: %s", location);

String scheme = schemeSplit[0];
ValidationException.check(VALID_SCHEMES.contains(scheme.toLowerCase()),
"Invalid scheme: %s in OSS location %s", scheme, location);

String[] authoritySplit = schemeSplit[1].split(PATH_DELIM, 2);
ValidationException.check(authoritySplit.length == 2,
"Invalid bucket or key in OSS location: %s", location);
ValidationException.check(!authoritySplit[1].trim().isEmpty(),
"Missing key in OSS location: %s", location);
this.bucket = authoritySplit[0];
OSSUtils.ensureBucketNameValid(bucket);

// Strip query and fragment if they exist
String path = authoritySplit[1];
path = path.split(QUERY_DELIM, -1)[0];
path = path.split(FRAGMENT_DELIM, -1)[0];
this.key = path;
OSSUtils.ensureObjectKeyValid(key);
}

/**
* Return OSS bucket name.
*/
public String bucket() {
return bucket;
}

/**
* Return OSS object key name.
*/
public String key() {
return key;
}

@Override
public String toString() {
return location;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.iceberg.aliyun.oss;

import com.aliyun.oss.OSS;
import org.apache.iceberg.util.SerializableSupplier;
import org.junit.After;
import org.junit.Before;
import org.junit.ClassRule;

public abstract class AliyunOSSTestBase {
@ClassRule
public static final AliyunOSSTestRule OSS_TEST_RULE = AliyunOSSTestUtility.initialize();

private final SerializableSupplier<OSS> ossClient = OSS_TEST_RULE::createOSSClient;
private final String bucketName = OSS_TEST_RULE.testBucketName();
private final String keyPrefix = OSS_TEST_RULE.keyPrefix();

@Before
public void before() {
OSS_TEST_RULE.setUpBucket(bucketName);
}

@After
public void after() {
OSS_TEST_RULE.tearDownBucket(bucketName);
}

protected String location(String key) {
return String.format("oss://%s/%s%s", bucketName, keyPrefix, key);
}

protected SerializableSupplier<OSS> ossClient() {
return ossClient;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,18 @@ public void evaluate() throws Throwable {
};
}

/**
* Returns the common key prefix for those newly created objects in test cases. For example, we set the test bucket
* to be 'oss-testing-bucket' and the key prefix to be 'iceberg-objects/', then the produced objects in test cases
* will be:
* <pre>
* oss://oss-testing-bucket/iceberg-objects/a.dat
* oss://oss-testing-bucket/iceberg-objects/b.dat
* ...
* </pre>
*/
String keyPrefix();

/**
* Start the Aliyun Object storage services application that the OSS client could connect to.
*/
Expand Down
Loading

0 comments on commit 329fa5b

Please sign in to comment.