Skip to content

Commit

Permalink
Aliyun: Add OSSInputStream (apache#3348)
Browse files Browse the repository at this point in the history
  • Loading branch information
xingbowu authored Oct 26, 2021
1 parent d55695e commit fbf048b
Show file tree
Hide file tree
Showing 2 changed files with 286 additions and 0 deletions.
153 changes: 153 additions & 0 deletions aliyun/src/main/java/org/apache/iceberg/aliyun/oss/OSSInputStream.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.iceberg.aliyun.oss;

import com.aliyun.oss.OSS;
import com.aliyun.oss.model.GetObjectRequest;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import org.apache.iceberg.io.SeekableInputStream;
import org.apache.iceberg.relocated.com.google.common.base.Joiner;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.io.ByteStreams;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class OSSInputStream extends SeekableInputStream {
private static final Logger LOG = LoggerFactory.getLogger(OSSInputStream.class);
private static final int SKIP_SIZE = 1024 * 1024;

private final StackTraceElement[] createStack;
private final OSS client;
private final OSSURI uri;

private InputStream stream = null;
private long pos = 0;
private long next = 0;
private boolean closed = false;

public OSSInputStream(OSS client, OSSURI uri) {
this.client = client;
this.uri = uri;
this.createStack = Thread.currentThread().getStackTrace();
}

@Override
public long getPos() {
return next;
}

@Override
public void seek(long newPos) {
Preconditions.checkState(!closed, "Cannot seek: already closed");
Preconditions.checkArgument(newPos >= 0, "Position is negative: %s", newPos);

// this allows a seek beyond the end of the stream but the next read will fail
next = newPos;
}

@Override
public int read() throws IOException {
Preconditions.checkState(!closed, "Cannot read: already closed");
positionStream();

pos += 1;
next += 1;

return stream.read();
}

@Override
public int read(byte[] b, int off, int len) throws IOException {
Preconditions.checkState(!closed, "Cannot read: already closed");
positionStream();

int bytesRead = stream.read(b, off, len);
pos += bytesRead;
next += bytesRead;

return bytesRead;
}

@Override
public void close() throws IOException {
if (closed) {
return;
}

super.close();
closeStream();
closed = true;
}

private void positionStream() throws IOException {
if ((stream != null) && (next == pos)) {
// already at specified position.
return;
}

if ((stream != null) && (next > pos)) {
// seeking forwards
long skip = next - pos;
if (skip <= Math.max(stream.available(), SKIP_SIZE)) {
// already buffered or seek is small enough
LOG.debug("Read-through seek for {} from {} to offset {}", uri, pos, next);
try {
ByteStreams.skipFully(stream, skip);
pos = next;
return;
} catch (IOException ignored) {
// will retry by re-opening the stream.
}
}
}

// close the stream and open at desired position.
LOG.debug("Seek with new stream for {} to offset {}", uri, next);
pos = next;
openStream();
}

private void openStream() throws IOException {
closeStream();

GetObjectRequest request = new GetObjectRequest(uri.bucket(), uri.key()).withRange(pos, -1);
stream = client.getObject(request).getObjectContent();
}

private void closeStream() throws IOException {
if (stream != null) {
stream.close();
stream = null;
}
}

@SuppressWarnings("checkstyle:NoFinalizer")
@Override
protected void finalize() throws Throwable {
super.finalize();
if (!closed) {
close(); // releasing resources is more important than printing the warning
String trace = Joiner.on("\n\t").join(Arrays.copyOfRange(createStack, 1, createStack.length));
LOG.warn("Unclosed input stream created by: \n\t{}", trace);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.iceberg.aliyun.oss;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.Arrays;
import java.util.Random;
import java.util.concurrent.ThreadLocalRandom;
import org.apache.commons.io.IOUtils;
import org.apache.iceberg.io.SeekableInputStream;
import org.junit.Test;

import static org.apache.iceberg.AssertHelpers.assertThrows;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;

public class TestOSSInputStream extends AliyunOSSTestBase {
private final Random random = ThreadLocalRandom.current();

@Test
public void testRead() throws Exception {
OSSURI uri = new OSSURI(location("read.dat"));
int dataSize = 1024 * 1024 * 10;
byte[] data = randomData(dataSize);

writeOSSData(uri, data);

try (SeekableInputStream in = new OSSInputStream(ossClient().get(), uri)) {
int readSize = 1024;

readAndCheck(in, in.getPos(), readSize, data, false);
readAndCheck(in, in.getPos(), readSize, data, true);

// Seek forward in current stream
int seekSize = 1024;
readAndCheck(in, in.getPos() + seekSize, readSize, data, false);
readAndCheck(in, in.getPos() + seekSize, readSize, data, true);

// Buffered read
readAndCheck(in, in.getPos(), readSize, data, true);
readAndCheck(in, in.getPos(), readSize, data, false);

// Seek with new stream
long seekNewStreamPosition = 2 * 1024 * 1024;
readAndCheck(in, in.getPos() + seekNewStreamPosition, readSize, data, true);
readAndCheck(in, in.getPos() + seekNewStreamPosition, readSize, data, false);

// Backseek and read
readAndCheck(in, 0, readSize, data, true);
readAndCheck(in, 0, readSize, data, false);
}
}

private void readAndCheck(SeekableInputStream in, long rangeStart, int size, byte[] original, boolean buffered)
throws IOException {
in.seek(rangeStart);
assertEquals("Should have the correct position", rangeStart, in.getPos());

long rangeEnd = rangeStart + size;
byte[] actual = new byte[size];

if (buffered) {
IOUtils.readFully(in, actual);
} else {
int read = 0;
while (read < size) {
actual[read++] = (byte) in.read();
}
}

assertEquals("Should have the correct position", rangeEnd, in.getPos());

assertArrayEquals("Should have expected range data",
Arrays.copyOfRange(original, (int) rangeStart, (int) rangeEnd), actual);
}

@Test
public void testClose() throws Exception {
OSSURI uri = new OSSURI(location("closed.dat"));
SeekableInputStream closed = new OSSInputStream(ossClient().get(), uri);
closed.close();
assertThrows("Cannot seek the input stream after closed.", IllegalStateException.class,
"Cannot seek: already closed",
() -> {
closed.seek(0);
return null;
});
}

@Test
public void testSeek() throws Exception {
OSSURI uri = new OSSURI(location("seek.dat"));
byte[] expected = randomData(1024 * 1024);

writeOSSData(uri, expected);

try (SeekableInputStream in = new OSSInputStream(ossClient().get(), uri)) {
in.seek(expected.length / 2);
byte[] actual = new byte[expected.length / 2];
IOUtils.readFully(in, actual);
assertArrayEquals("Should have expected seeking stream",
Arrays.copyOfRange(expected, expected.length / 2, expected.length), actual);
}
}

private byte[] randomData(int size) {
byte[] data = new byte[size];
random.nextBytes(data);
return data;
}

private void writeOSSData(OSSURI uri, byte[] data) {
ossClient().get().putObject(uri.bucket(), uri.key(), new ByteArrayInputStream(data));
}
}

0 comments on commit fbf048b

Please sign in to comment.