Skip to content

Commit

Permalink
Add initial Iceberg integration boilerplate and minor doc changes (#117)
Browse files Browse the repository at this point in the history
  • Loading branch information
jackye1995 authored Feb 24, 2025
1 parent 31f0e82 commit 6472f23
Show file tree
Hide file tree
Showing 13 changed files with 716 additions and 64 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@
## Introduction

TrinityLake is an **Open Lakehouse Format** for Big Data Analytics, ML & AI.
It allows anyone to build a fully functional lakehouse with storage (e.g. Amazon S3) as the only dependency.
It allows anyone to build a fully functional lakehouse with storage (e.g. S3, HDFS) as the only dependency.

The TrinityLake format defines different objects in a lakehouse and
provides a consistent and efficient way for accessing and manipulating the interactions among these objects.
It offers the following key features:

- **Storage only** as a lakehouse solution that works exactly the same way locally, on premise and in the cloud
- **Multi-object multi-statement transactions** with standard SQL `BEGIN` and `COMMIT` semantics
- **Consistent time travel and snapshot export** across all objects in the Lakehouse
- **Consistent time travel and snapshot export** across all objects in the lakehouse
- **Distributed transactions** which can enable use cases like complicated write-audit-publish workflows
- **Compatibility with open table formats** like Apache Iceberg, supporting both standard SQL `MANAGED` and `EXTERNAL` as well as federation-based access patterns.
- **Compatibility with open catalog standards** like Apache Iceberg REST Catalog specification, serving as a highly scalable yet extremely lightweight backend implementation
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trinitylake.iceberg;

import io.trinitylake.relocated.com.google.common.collect.Maps;
import io.trinitylake.storage.LakehouseStorage;
import io.trinitylake.storage.LakehouseStorages;
import io.trinitylake.util.PropertyUtil;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.iceberg.CatalogProperties;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.Schema;
import org.apache.iceberg.Table;
import org.apache.iceberg.Transaction;
import org.apache.iceberg.catalog.Catalog;
import org.apache.iceberg.catalog.Namespace;
import org.apache.iceberg.catalog.SupportsNamespaces;
import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.iceberg.exceptions.NamespaceNotEmptyException;
import org.apache.iceberg.exceptions.NoSuchNamespaceException;

public class TrinityLakeIcebergCatalog implements Catalog, SupportsNamespaces {

private LakehouseStorage storage;

public TrinityLakeIcebergCatalog() {}

@Override
public void initialize(String name, Map<String, String> properties) {
String warehouse =
PropertyUtil.propertyAsString(properties, CatalogProperties.WAREHOUSE_LOCATION);
String storageType =
PropertyUtil.propertyAsString(properties, TrinityLakeIcebergCatalogProperties.STORAGE_TYPE);
Map<String, String> storageOpsProperties =
PropertyUtil.propertiesWithPrefix(
properties, TrinityLakeIcebergCatalogProperties.STORAGE_OPS_PROPERTIES_PREFIX);

Map<String, String> storageProperties = Maps.newHashMap();
storageProperties.putAll(storageOpsProperties);
storageProperties.put(LakehouseStorages.STORAGE_TYPE, storageType);
storageProperties.put(LakehouseStorages.STORAGE_ROOT, warehouse);
this.storage = LakehouseStorages.initialize(properties);
}

@Override
public boolean namespaceExists(Namespace namespace) {
return SupportsNamespaces.super.namespaceExists(namespace);
}

@Override
public List<Namespace> listNamespaces() {
return SupportsNamespaces.super.listNamespaces();
}

@Override
public void createNamespace(Namespace namespace) {
SupportsNamespaces.super.createNamespace(namespace);
}

@Override
public boolean removeProperties(Namespace namespace, Set<String> set)
throws NoSuchNamespaceException {
return false;
}

@Override
public boolean setProperties(Namespace namespace, Map<String, String> map)
throws NoSuchNamespaceException {
return false;
}

@Override
public boolean dropNamespace(Namespace namespace) throws NamespaceNotEmptyException {
return false;
}

@Override
public Map<String, String> loadNamespaceMetadata(Namespace namespace)
throws NoSuchNamespaceException {
return Map.of();
}

@Override
public List<Namespace> listNamespaces(Namespace namespace) throws NoSuchNamespaceException {
return List.of();
}

@Override
public void createNamespace(Namespace namespace, Map<String, String> map) {}

@Override
public TableBuilder buildTable(TableIdentifier identifier, Schema schema) {
return Catalog.super.buildTable(identifier, schema);
}

@Override
public Table registerTable(TableIdentifier identifier, String metadataFileLocation) {
return Catalog.super.registerTable(identifier, metadataFileLocation);
}

@Override
public void invalidateTable(TableIdentifier identifier) {
Catalog.super.invalidateTable(identifier);
}

@Override
public boolean dropTable(TableIdentifier identifier) {
return Catalog.super.dropTable(identifier);
}

@Override
public boolean tableExists(TableIdentifier identifier) {
return Catalog.super.tableExists(identifier);
}

@Override
public Transaction newReplaceTableTransaction(
TableIdentifier identifier, Schema schema, boolean orCreate) {
return Catalog.super.newReplaceTableTransaction(identifier, schema, orCreate);
}

@Override
public Transaction newReplaceTableTransaction(
TableIdentifier identifier, Schema schema, PartitionSpec spec, boolean orCreate) {
return Catalog.super.newReplaceTableTransaction(identifier, schema, spec, orCreate);
}

@Override
public Transaction newReplaceTableTransaction(
TableIdentifier identifier,
Schema schema,
PartitionSpec spec,
Map<String, String> properties,
boolean orCreate) {
return Catalog.super.newReplaceTableTransaction(identifier, schema, spec, properties, orCreate);
}

@Override
public Transaction newReplaceTableTransaction(
TableIdentifier identifier,
Schema schema,
PartitionSpec spec,
String location,
Map<String, String> properties,
boolean orCreate) {
return Catalog.super.newReplaceTableTransaction(
identifier, schema, spec, location, properties, orCreate);
}

@Override
public Transaction newCreateTableTransaction(TableIdentifier identifier, Schema schema) {
return Catalog.super.newCreateTableTransaction(identifier, schema);
}

@Override
public Transaction newCreateTableTransaction(
TableIdentifier identifier, Schema schema, PartitionSpec spec) {
return Catalog.super.newCreateTableTransaction(identifier, schema, spec);
}

@Override
public Transaction newCreateTableTransaction(
TableIdentifier identifier,
Schema schema,
PartitionSpec spec,
Map<String, String> properties) {
return Catalog.super.newCreateTableTransaction(identifier, schema, spec, properties);
}

@Override
public Transaction newCreateTableTransaction(
TableIdentifier identifier,
Schema schema,
PartitionSpec spec,
String location,
Map<String, String> properties) {
return Catalog.super.newCreateTableTransaction(identifier, schema, spec, location, properties);
}

@Override
public Table createTable(TableIdentifier identifier, Schema schema) {
return Catalog.super.createTable(identifier, schema);
}

@Override
public Table createTable(TableIdentifier identifier, Schema schema, PartitionSpec spec) {
return Catalog.super.createTable(identifier, schema, spec);
}

@Override
public Table createTable(
TableIdentifier identifier,
Schema schema,
PartitionSpec spec,
Map<String, String> properties) {
return Catalog.super.createTable(identifier, schema, spec, properties);
}

@Override
public Table createTable(
TableIdentifier identifier,
Schema schema,
PartitionSpec spec,
String location,
Map<String, String> properties) {
return Catalog.super.createTable(identifier, schema, spec, location, properties);
}

@Override
public String name() {
return Catalog.super.name();
}

@Override
public Table loadTable(TableIdentifier tableIdentifier) {
return null;
}

@Override
public void renameTable(TableIdentifier tableIdentifier, TableIdentifier tableIdentifier1) {}

@Override
public boolean dropTable(TableIdentifier tableIdentifier, boolean b) {
return false;
}

@Override
public List<TableIdentifier> listTables(Namespace namespace) {
return List.of();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trinitylake.iceberg;

public class TrinityLakeIcebergCatalogProperties {

private TrinityLakeIcebergCatalogProperties() {}

public static final String STORAGE_TYPE = "storage.type";

public static final String STORAGE_OPS_PROPERTIES_PREFIX = "storage.ops.";

public static final String VERSION_NAMESPACE_PREFIX = "vn.namespace-prefix";

public static final String TRANSACTION_NAMESPACE_PREFIX = "txn.namespace-prefix";

public static final String TRANSACTION_ISOLATION_LEVEL = "txn.isolation-level";

public static final String TRANSACTION_TTL_MILLIS = "txn.ttl-millis";
}
Loading

0 comments on commit 6472f23

Please sign in to comment.