forked from apache/iceberg
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Aliyun: Add OSSInputStream (apache#3348)
- Loading branch information
Showing
2 changed files
with
286 additions
and
0 deletions.
There are no files selected for viewing
153 changes: 153 additions & 0 deletions
153
aliyun/src/main/java/org/apache/iceberg/aliyun/oss/OSSInputStream.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package org.apache.iceberg.aliyun.oss; | ||
|
||
import com.aliyun.oss.OSS; | ||
import com.aliyun.oss.model.GetObjectRequest; | ||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.util.Arrays; | ||
import org.apache.iceberg.io.SeekableInputStream; | ||
import org.apache.iceberg.relocated.com.google.common.base.Joiner; | ||
import org.apache.iceberg.relocated.com.google.common.base.Preconditions; | ||
import org.apache.iceberg.relocated.com.google.common.io.ByteStreams; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
public class OSSInputStream extends SeekableInputStream { | ||
private static final Logger LOG = LoggerFactory.getLogger(OSSInputStream.class); | ||
private static final int SKIP_SIZE = 1024 * 1024; | ||
|
||
private final StackTraceElement[] createStack; | ||
private final OSS client; | ||
private final OSSURI uri; | ||
|
||
private InputStream stream = null; | ||
private long pos = 0; | ||
private long next = 0; | ||
private boolean closed = false; | ||
|
||
public OSSInputStream(OSS client, OSSURI uri) { | ||
this.client = client; | ||
this.uri = uri; | ||
this.createStack = Thread.currentThread().getStackTrace(); | ||
} | ||
|
||
@Override | ||
public long getPos() { | ||
return next; | ||
} | ||
|
||
@Override | ||
public void seek(long newPos) { | ||
Preconditions.checkState(!closed, "Cannot seek: already closed"); | ||
Preconditions.checkArgument(newPos >= 0, "Position is negative: %s", newPos); | ||
|
||
// this allows a seek beyond the end of the stream but the next read will fail | ||
next = newPos; | ||
} | ||
|
||
@Override | ||
public int read() throws IOException { | ||
Preconditions.checkState(!closed, "Cannot read: already closed"); | ||
positionStream(); | ||
|
||
pos += 1; | ||
next += 1; | ||
|
||
return stream.read(); | ||
} | ||
|
||
@Override | ||
public int read(byte[] b, int off, int len) throws IOException { | ||
Preconditions.checkState(!closed, "Cannot read: already closed"); | ||
positionStream(); | ||
|
||
int bytesRead = stream.read(b, off, len); | ||
pos += bytesRead; | ||
next += bytesRead; | ||
|
||
return bytesRead; | ||
} | ||
|
||
@Override | ||
public void close() throws IOException { | ||
if (closed) { | ||
return; | ||
} | ||
|
||
super.close(); | ||
closeStream(); | ||
closed = true; | ||
} | ||
|
||
private void positionStream() throws IOException { | ||
if ((stream != null) && (next == pos)) { | ||
// already at specified position. | ||
return; | ||
} | ||
|
||
if ((stream != null) && (next > pos)) { | ||
// seeking forwards | ||
long skip = next - pos; | ||
if (skip <= Math.max(stream.available(), SKIP_SIZE)) { | ||
// already buffered or seek is small enough | ||
LOG.debug("Read-through seek for {} from {} to offset {}", uri, pos, next); | ||
try { | ||
ByteStreams.skipFully(stream, skip); | ||
pos = next; | ||
return; | ||
} catch (IOException ignored) { | ||
// will retry by re-opening the stream. | ||
} | ||
} | ||
} | ||
|
||
// close the stream and open at desired position. | ||
LOG.debug("Seek with new stream for {} to offset {}", uri, next); | ||
pos = next; | ||
openStream(); | ||
} | ||
|
||
private void openStream() throws IOException { | ||
closeStream(); | ||
|
||
GetObjectRequest request = new GetObjectRequest(uri.bucket(), uri.key()).withRange(pos, -1); | ||
stream = client.getObject(request).getObjectContent(); | ||
} | ||
|
||
private void closeStream() throws IOException { | ||
if (stream != null) { | ||
stream.close(); | ||
stream = null; | ||
} | ||
} | ||
|
||
@SuppressWarnings("checkstyle:NoFinalizer") | ||
@Override | ||
protected void finalize() throws Throwable { | ||
super.finalize(); | ||
if (!closed) { | ||
close(); // releasing resources is more important than printing the warning | ||
String trace = Joiner.on("\n\t").join(Arrays.copyOfRange(createStack, 1, createStack.length)); | ||
LOG.warn("Unclosed input stream created by: \n\t{}", trace); | ||
} | ||
} | ||
} |
133 changes: 133 additions & 0 deletions
133
aliyun/src/test/java/org/apache/iceberg/aliyun/oss/TestOSSInputStream.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package org.apache.iceberg.aliyun.oss; | ||
|
||
import java.io.ByteArrayInputStream; | ||
import java.io.IOException; | ||
import java.util.Arrays; | ||
import java.util.Random; | ||
import java.util.concurrent.ThreadLocalRandom; | ||
import org.apache.commons.io.IOUtils; | ||
import org.apache.iceberg.io.SeekableInputStream; | ||
import org.junit.Test; | ||
|
||
import static org.apache.iceberg.AssertHelpers.assertThrows; | ||
import static org.junit.Assert.assertArrayEquals; | ||
import static org.junit.Assert.assertEquals; | ||
|
||
public class TestOSSInputStream extends AliyunOSSTestBase { | ||
private final Random random = ThreadLocalRandom.current(); | ||
|
||
@Test | ||
public void testRead() throws Exception { | ||
OSSURI uri = new OSSURI(location("read.dat")); | ||
int dataSize = 1024 * 1024 * 10; | ||
byte[] data = randomData(dataSize); | ||
|
||
writeOSSData(uri, data); | ||
|
||
try (SeekableInputStream in = new OSSInputStream(ossClient().get(), uri)) { | ||
int readSize = 1024; | ||
|
||
readAndCheck(in, in.getPos(), readSize, data, false); | ||
readAndCheck(in, in.getPos(), readSize, data, true); | ||
|
||
// Seek forward in current stream | ||
int seekSize = 1024; | ||
readAndCheck(in, in.getPos() + seekSize, readSize, data, false); | ||
readAndCheck(in, in.getPos() + seekSize, readSize, data, true); | ||
|
||
// Buffered read | ||
readAndCheck(in, in.getPos(), readSize, data, true); | ||
readAndCheck(in, in.getPos(), readSize, data, false); | ||
|
||
// Seek with new stream | ||
long seekNewStreamPosition = 2 * 1024 * 1024; | ||
readAndCheck(in, in.getPos() + seekNewStreamPosition, readSize, data, true); | ||
readAndCheck(in, in.getPos() + seekNewStreamPosition, readSize, data, false); | ||
|
||
// Backseek and read | ||
readAndCheck(in, 0, readSize, data, true); | ||
readAndCheck(in, 0, readSize, data, false); | ||
} | ||
} | ||
|
||
private void readAndCheck(SeekableInputStream in, long rangeStart, int size, byte[] original, boolean buffered) | ||
throws IOException { | ||
in.seek(rangeStart); | ||
assertEquals("Should have the correct position", rangeStart, in.getPos()); | ||
|
||
long rangeEnd = rangeStart + size; | ||
byte[] actual = new byte[size]; | ||
|
||
if (buffered) { | ||
IOUtils.readFully(in, actual); | ||
} else { | ||
int read = 0; | ||
while (read < size) { | ||
actual[read++] = (byte) in.read(); | ||
} | ||
} | ||
|
||
assertEquals("Should have the correct position", rangeEnd, in.getPos()); | ||
|
||
assertArrayEquals("Should have expected range data", | ||
Arrays.copyOfRange(original, (int) rangeStart, (int) rangeEnd), actual); | ||
} | ||
|
||
@Test | ||
public void testClose() throws Exception { | ||
OSSURI uri = new OSSURI(location("closed.dat")); | ||
SeekableInputStream closed = new OSSInputStream(ossClient().get(), uri); | ||
closed.close(); | ||
assertThrows("Cannot seek the input stream after closed.", IllegalStateException.class, | ||
"Cannot seek: already closed", | ||
() -> { | ||
closed.seek(0); | ||
return null; | ||
}); | ||
} | ||
|
||
@Test | ||
public void testSeek() throws Exception { | ||
OSSURI uri = new OSSURI(location("seek.dat")); | ||
byte[] expected = randomData(1024 * 1024); | ||
|
||
writeOSSData(uri, expected); | ||
|
||
try (SeekableInputStream in = new OSSInputStream(ossClient().get(), uri)) { | ||
in.seek(expected.length / 2); | ||
byte[] actual = new byte[expected.length / 2]; | ||
IOUtils.readFully(in, actual); | ||
assertArrayEquals("Should have expected seeking stream", | ||
Arrays.copyOfRange(expected, expected.length / 2, expected.length), actual); | ||
} | ||
} | ||
|
||
private byte[] randomData(int size) { | ||
byte[] data = new byte[size]; | ||
random.nextBytes(data); | ||
return data; | ||
} | ||
|
||
private void writeOSSData(OSSURI uri, byte[] data) { | ||
ossClient().get().putObject(uri.bucket(), uri.key(), new ByteArrayInputStream(data)); | ||
} | ||
} |