Skip to content

Commit 237e991

Browse files
authored
Core: Add REST support for lazy snapshot loading (apache#6850)
1 parent 42d209f commit 237e991

File tree

3 files changed

+169
-5
lines changed

3 files changed

+169
-5
lines changed

core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java

+42-5
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import java.io.UncheckedIOException;
2727
import java.time.Duration;
2828
import java.util.List;
29+
import java.util.Locale;
2930
import java.util.Map;
3031
import java.util.Set;
3132
import java.util.concurrent.Future;
@@ -88,6 +89,7 @@ public class RESTSessionCatalog extends BaseSessionCatalog
8889
implements Configurable<Configuration>, Closeable {
8990
private static final Logger LOG = LoggerFactory.getLogger(RESTSessionCatalog.class);
9091
private static final String REST_METRICS_REPORTING_ENABLED = "rest-metrics-reporting-enabled";
92+
private static final String REST_SNAPSHOT_LOADING_MODE = "snapshot-loading-mode";
9193
private static final List<String> TOKEN_PREFERENCE_ORDER =
9294
ImmutableList.of(
9395
OAuth2Properties.ID_TOKEN_TYPE,
@@ -102,6 +104,7 @@ public class RESTSessionCatalog extends BaseSessionCatalog
102104
private boolean refreshAuthByDefault = false;
103105
private RESTClient client = null;
104106
private ResourcePaths paths = null;
107+
private SnapshotMode snapshotMode = null;
105108
private Object conf = null;
106109
private FileIO io = null;
107110
private MetricsReporter reporter = null;
@@ -110,6 +113,15 @@ public class RESTSessionCatalog extends BaseSessionCatalog
110113
// a lazy thread pool for token refresh
111114
private volatile ScheduledExecutorService refreshExecutor = null;
112115

116+
enum SnapshotMode {
117+
ALL,
118+
REFS;
119+
120+
Map<String, String> params() {
121+
return ImmutableMap.of("snapshots", this.name().toLowerCase(Locale.US));
122+
}
123+
}
124+
113125
public RESTSessionCatalog() {
114126
this(config -> HTTPClient.builder().uri(config.get(CatalogProperties.URI)).build());
115127
}
@@ -179,6 +191,13 @@ public void initialize(String name, Map<String, String> unresolved) {
179191
this.io =
180192
CatalogUtil.loadFileIO(
181193
ioImpl != null ? ioImpl : ResolvingFileIO.class.getName(), mergedProps, conf);
194+
195+
this.snapshotMode =
196+
SnapshotMode.valueOf(
197+
PropertyUtil.propertyAsString(
198+
mergedProps, REST_SNAPSHOT_LOADING_MODE, SnapshotMode.ALL.name())
199+
.toUpperCase(Locale.US));
200+
182201
String metricsReporterImpl = mergedProps.get(CatalogProperties.METRICS_REPORTER_IMPL);
183202
this.reporter =
184203
null != metricsReporterImpl
@@ -263,9 +282,11 @@ public void renameTable(SessionContext context, TableIdentifier from, TableIdent
263282
client.post(paths.rename(), request, null, headers(context), ErrorHandlers.tableErrorHandler());
264283
}
265284

266-
private LoadTableResponse loadInternal(SessionContext context, TableIdentifier identifier) {
285+
private LoadTableResponse loadInternal(
286+
SessionContext context, TableIdentifier identifier, SnapshotMode mode) {
267287
return client.get(
268288
paths.table(identifier),
289+
mode.params(),
269290
LoadTableResponse.class,
270291
headers(context),
271292
ErrorHandlers.tableErrorHandler());
@@ -279,7 +300,7 @@ public Table loadTable(SessionContext context, TableIdentifier identifier) {
279300
LoadTableResponse response;
280301
TableIdentifier loadedIdent;
281302
try {
282-
response = loadInternal(context, identifier);
303+
response = loadInternal(context, identifier, snapshotMode);
283304
loadedIdent = identifier;
284305
metadataType = null;
285306

@@ -289,7 +310,7 @@ public Table loadTable(SessionContext context, TableIdentifier identifier) {
289310
// attempt to load a metadata table using the identifier's namespace as the base table
290311
TableIdentifier baseIdent = TableIdentifier.of(identifier.namespace().levels());
291312
try {
292-
response = loadInternal(context, baseIdent);
313+
response = loadInternal(context, baseIdent, snapshotMode);
293314
loadedIdent = baseIdent;
294315
} catch (NoSuchTableException ignored) {
295316
// the base table does not exist
@@ -302,13 +323,29 @@ public Table loadTable(SessionContext context, TableIdentifier identifier) {
302323
}
303324

304325
AuthSession session = tableSession(response.config(), session(context));
326+
TableMetadata tableMetadata;
327+
328+
if (snapshotMode == SnapshotMode.REFS) {
329+
tableMetadata =
330+
TableMetadata.buildFrom(response.tableMetadata())
331+
.setSnapshotsSupplier(
332+
() ->
333+
loadInternal(context, identifier, SnapshotMode.ALL)
334+
.tableMetadata()
335+
.snapshots())
336+
.discardChanges()
337+
.build();
338+
} else {
339+
tableMetadata = response.tableMetadata();
340+
}
341+
305342
RESTTableOperations ops =
306343
new RESTTableOperations(
307344
client,
308345
paths.table(loadedIdent),
309346
session::headers,
310347
tableFileIO(response.config()),
311-
response.tableMetadata());
348+
tableMetadata);
312349

313350
TableIdentifier tableIdentifier = loadedIdent;
314351
BaseTable table =
@@ -588,7 +625,7 @@ public Transaction createTransaction() {
588625

589626
@Override
590627
public Transaction replaceTransaction() {
591-
LoadTableResponse response = loadInternal(context, ident);
628+
LoadTableResponse response = loadInternal(context, ident, snapshotMode);
592629
String fullName = fullTableName(ident);
593630

594631
AuthSession session = tableSession(response.config(), session(context));

core/src/test/java/org/apache/iceberg/rest/TestRESTCatalog.java

+114
Original file line numberDiff line numberDiff line change
@@ -23,24 +23,29 @@
2323
import static org.mockito.ArgumentMatchers.any;
2424
import static org.mockito.ArgumentMatchers.eq;
2525
import static org.mockito.Mockito.times;
26+
import static org.mockito.Mockito.verify;
2627

2728
import com.fasterxml.jackson.core.JsonProcessingException;
2829
import com.fasterxml.jackson.databind.ObjectMapper;
2930
import java.io.File;
3031
import java.io.IOException;
3132
import java.nio.file.Path;
3233
import java.util.Map;
34+
import java.util.Set;
3335
import java.util.UUID;
3436
import java.util.concurrent.atomic.AtomicInteger;
3537
import java.util.function.Consumer;
38+
import java.util.stream.Collectors;
3639
import org.apache.hadoop.conf.Configuration;
3740
import org.apache.iceberg.AssertHelpers;
3841
import org.apache.iceberg.CatalogProperties;
3942
import org.apache.iceberg.DataFiles;
4043
import org.apache.iceberg.FileScanTask;
4144
import org.apache.iceberg.PartitionSpec;
4245
import org.apache.iceberg.Schema;
46+
import org.apache.iceberg.SnapshotRef;
4347
import org.apache.iceberg.Table;
48+
import org.apache.iceberg.TableMetadata;
4449
import org.apache.iceberg.catalog.CatalogTests;
4550
import org.apache.iceberg.catalog.SessionCatalog;
4651
import org.apache.iceberg.catalog.TableIdentifier;
@@ -49,7 +54,9 @@
4954
import org.apache.iceberg.metrics.MetricsReport;
5055
import org.apache.iceberg.metrics.MetricsReporter;
5156
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
57+
import org.apache.iceberg.relocated.com.google.common.collect.Maps;
5258
import org.apache.iceberg.rest.RESTCatalogAdapter.HTTPMethod;
59+
import org.apache.iceberg.rest.RESTSessionCatalog.SnapshotMode;
5360
import org.apache.iceberg.rest.auth.AuthSessionUtil;
5461
import org.apache.iceberg.rest.auth.OAuth2Properties;
5562
import org.apache.iceberg.rest.auth.OAuth2Util;
@@ -737,6 +744,113 @@ public void testTableCredential() {
737744
ImmutableMap.of("Authorization", "Bearer client-credentials-token:sub=table-user"));
738745
}
739746

747+
@Test
748+
public void testSnapshotParams() {
749+
assertThat(SnapshotMode.ALL.params()).isEqualTo(ImmutableMap.of("snapshots", "all"));
750+
751+
assertThat(SnapshotMode.REFS.params()).isEqualTo(ImmutableMap.of("snapshots", "refs"));
752+
}
753+
754+
@Test
755+
public void testTableSnapshotLoading() {
756+
RESTCatalogAdapter adapter = Mockito.spy(new RESTCatalogAdapter(backendCatalog));
757+
758+
RESTCatalog catalog =
759+
new RESTCatalog(SessionCatalog.SessionContext.createEmpty(), (config) -> adapter);
760+
catalog.initialize(
761+
"test",
762+
ImmutableMap.of(
763+
CatalogProperties.URI,
764+
"ignored",
765+
CatalogProperties.FILE_IO_IMPL,
766+
"org.apache.iceberg.io.InMemoryFileIO",
767+
// default loading to refs only
768+
"snapshot-loading-mode",
769+
"refs"));
770+
771+
// Create a table with multiple snapshots
772+
Table table = catalog.createTable(TABLE, SCHEMA);
773+
table
774+
.newFastAppend()
775+
.appendFile(
776+
DataFiles.builder(PartitionSpec.unpartitioned())
777+
.withPath("/path/to/data-a.parquet")
778+
.withFileSizeInBytes(10)
779+
.withRecordCount(2)
780+
.build())
781+
.commit();
782+
783+
table
784+
.newFastAppend()
785+
.appendFile(
786+
DataFiles.builder(PartitionSpec.unpartitioned())
787+
.withPath("/path/to/data-b.parquet")
788+
.withFileSizeInBytes(10)
789+
.withRecordCount(2)
790+
.build())
791+
.commit();
792+
793+
ResourcePaths paths = ResourcePaths.forCatalogProperties(Maps.newHashMap());
794+
795+
// Respond with only referenced snapshots
796+
Answer<?> refsAnswer =
797+
invocation -> {
798+
LoadTableResponse originalResponse = (LoadTableResponse) invocation.callRealMethod();
799+
TableMetadata fullTableMetadata = originalResponse.tableMetadata();
800+
801+
Set<Long> referencedSnapshotIds =
802+
fullTableMetadata.refs().values().stream()
803+
.map(SnapshotRef::snapshotId)
804+
.collect(Collectors.toSet());
805+
806+
TableMetadata refsMetadata =
807+
fullTableMetadata.removeSnapshotsIf(
808+
s -> !referencedSnapshotIds.contains(s.snapshotId()));
809+
810+
return LoadTableResponse.builder()
811+
.withTableMetadata(refsMetadata)
812+
.addAllConfig(originalResponse.config())
813+
.build();
814+
};
815+
816+
Mockito.doAnswer(refsAnswer)
817+
.when(adapter)
818+
.execute(
819+
eq(HTTPMethod.GET),
820+
eq(paths.table(TABLE)),
821+
eq(ImmutableMap.of("snapshots", "refs")),
822+
any(),
823+
eq(LoadTableResponse.class),
824+
any(),
825+
any());
826+
827+
Table refsTables = catalog.loadTable(TABLE);
828+
829+
assertThat(refsTables.currentSnapshot()).isEqualTo(table.currentSnapshot());
830+
// verify that the table was loaded with the refs argument
831+
verify(adapter, times(1))
832+
.execute(
833+
eq(HTTPMethod.GET),
834+
eq(paths.table(TABLE)),
835+
eq(ImmutableMap.of("snapshots", "refs")),
836+
any(),
837+
eq(LoadTableResponse.class),
838+
any(),
839+
any());
840+
841+
// verify that all snapshots are loaded when referenced
842+
assertThat(refsTables.snapshots()).containsExactlyInAnyOrderElementsOf(table.snapshots());
843+
verify(adapter, times(1))
844+
.execute(
845+
eq(HTTPMethod.GET),
846+
eq(paths.table(TABLE)),
847+
eq(ImmutableMap.of("snapshots", "all")),
848+
any(),
849+
eq(LoadTableResponse.class),
850+
any(),
851+
any());
852+
}
853+
740854
public void testTableAuth(
741855
String catalogToken,
742856
Map<String, String> credentials,

open-api/rest-catalog-open-api.yaml

+13
Original file line numberDiff line numberDiff line change
@@ -523,6 +523,19 @@ paths:
523523
table. The configuration key "token" is used to pass an access token to be used as a bearer token
524524
for table requests. Otherwise, a token may be passed using a RFC 8693 token type as a configuration
525525
key. For example, "urn:ietf:params:oauth:token-type:jwt=<JWT-token>".
526+
parameters:
527+
- in: query
528+
name: snapshots
529+
description:
530+
The snapshots to return in the body of the metadata. Setting the value to `all` would
531+
return the full set of snapshots currently valid for the table. Setting the value to
532+
`refs` would load all snapshots referenced by branches or tags.
533+
534+
Default if no param is provided is `all`.
535+
required: false
536+
schema:
537+
type: string
538+
enum: [ all, refs ]
526539
responses:
527540
200:
528541
$ref: '#/components/responses/LoadTableResponse'

0 commit comments

Comments
 (0)