Skip to content

Commit

Permalink
Merge pull request databricks#38 from tabular-io/java-api-part-3
Browse files Browse the repository at this point in the history
Introduction to Java API Part 3 examples
  • Loading branch information
samredai authored Sep 26, 2022
2 parents 5d24c3f + 61358b4 commit 9ce8011
Show file tree
Hide file tree
Showing 4 changed files with 129 additions and 69 deletions.
5 changes: 3 additions & 2 deletions spark/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,9 @@ RUN curl https://jdbc.postgresql.org/download/postgresql-42.2.24.jar -o postgres
&& rm postgresql-42.2.24.jar

# Download iceberg spark runtime
RUN curl https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.3_2.12/0.14.0/iceberg-spark-runtime-3.3_2.12-0.14.0.jar -Lo iceberg-spark-runtime-3.3_2.12-0.14.0.jar \
&& mv iceberg-spark-runtime-3.3_2.12-0.14.0.jar /opt/spark/jars
RUN curl https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.3_2.12/0.14.1/iceberg-spark-runtime-3.3_2.12-0.14.1.jar -Lo iceberg-spark-runtime-3.3_2.12-0.14.1.jar \
&& mv iceberg-spark-runtime-3.3_2.12-0.14.1.jar /opt/spark/jars
COPY tabular-client-runtime-0.28.2.jar /opt/spark/jars

# Download Java AWS SDK
RUN curl https://repo1.maven.org/maven2/software/amazon/awssdk/bundle/2.17.165/bundle-2.17.165.jar -Lo bundle-2.17.165.jar \
Expand Down
41 changes: 0 additions & 41 deletions spark/bootstrap/ddl.sql

This file was deleted.

24 changes: 0 additions & 24 deletions spark/bootstrap/load_movielens_data.py

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -321,13 +321,137 @@
"System.out.println(dataFile);"
]
},
{
"cell_type": "markdown",
"id": "41e9e10f",
"metadata": {},
"source": [
"## [Part 3 - Table Scans](https://tabular.io/blog/java-api-part-3/)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0240262c",
"id": "81033412",
"metadata": {},
"outputs": [],
"source": []
"source": [
"import org.apache.iceberg.Schema;\n",
"import org.apache.iceberg.types.Types;\n",
"import org.apache.iceberg.catalog.Namespace;\n",
"import org.apache.iceberg.catalog.TableIdentifier;\n",
"import org.apache.iceberg.PartitionSpec;\n",
"\n",
"Schema schema = new Schema(\n",
" Types.NestedField.optional(1, \"event_id\", Types.StringType.get()),\n",
" Types.NestedField.optional(2, \"username\", Types.StringType.get()),\n",
" Types.NestedField.optional(3, \"userid\", Types.IntegerType.get()),\n",
" Types.NestedField.optional(4, \"api_version\", Types.StringType.get()),\n",
" Types.NestedField.optional(5, \"command\", Types.StringType.get())\n",
" );\n",
"\n",
"Namespace webapp = Namespace.of(\"webapp\");\n",
"TableIdentifier name = TableIdentifier.of(webapp, \"user_events\");\n",
"catalog.createTable(name, schema, PartitionSpec.unpartitioned());"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "12c45c6b",
"metadata": {},
"outputs": [],
"source": [
"import java.util.UUID;\n",
"import com.google.common.collect.ImmutableList;\n",
"import com.google.common.collect.ImmutableMap;\n",
"import org.apache.iceberg.data.GenericRecord;\n",
"\n",
"GenericRecord record = GenericRecord.create(schema);\n",
"ImmutableList.Builder<GenericRecord> builder = ImmutableList.builder();\n",
"builder.add(record.copy(ImmutableMap.of(\"event_id\", UUID.randomUUID().toString(), \"username\", \"Bruce\", \"userid\", 1, \"api_version\", \"1.0\", \"command\", \"grapple\")));\n",
"builder.add(record.copy(ImmutableMap.of(\"event_id\", UUID.randomUUID().toString(), \"username\", \"Wayne\", \"userid\", 1, \"api_version\", \"1.0\", \"command\", \"glide\")));\n",
"builder.add(record.copy(ImmutableMap.of(\"event_id\", UUID.randomUUID().toString(), \"username\", \"Clark\", \"userid\", 1, \"api_version\", \"2.0\", \"command\", \"fly\")));\n",
"builder.add(record.copy(ImmutableMap.of(\"event_id\", UUID.randomUUID().toString(), \"username\", \"Kent\", \"userid\", 1, \"api_version\", \"1.0\", \"command\", \"land\")));\n",
"ImmutableList<GenericRecord> records = builder.build();"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "83bc5319",
"metadata": {},
"outputs": [],
"source": [
"import org.apache.iceberg.Files;\n",
"import org.apache.iceberg.io.DataWriter;\n",
"import org.apache.iceberg.io.OutputFile;\n",
"import org.apache.iceberg.parquet.Parquet;\n",
"import org.apache.iceberg.data.parquet.GenericParquetWriter;\n",
"\n",
"File f = new File(\"/home/iceberg/warehouse/\" + UUID.randomUUID().toString());\n",
"OutputFile file = Files.localOutput(f);\n",
"DataWriter<GenericRecord> dataWriter =\n",
" Parquet.writeData(file)\n",
" .schema(schema)\n",
" .createWriterFunc(GenericParquetWriter::buildWriter)\n",
" .overwrite()\n",
" .withSpec(PartitionSpec.unpartitioned())\n",
" .build();\n",
"try {\n",
" for (GenericRecord record : builder.build()) {\n",
" dataWriter.write(record);\n",
" }\n",
"} finally {\n",
" dataWriter.close();\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "469e6af4",
"metadata": {},
"outputs": [],
"source": [
"import org.apache.iceberg.DataFile;\n",
"\n",
"DataFile dataFile = dataWriter.toDataFile();"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "142b6ed1",
"metadata": {},
"outputs": [],
"source": [
"import org.apache.iceberg.catalog.Namespace;\n",
"import org.apache.iceberg.catalog.TableIdentifier;\n",
"import org.apache.iceberg.Table;\n",
"\n",
"Namespace webapp = Namespace.of(\"webapp\");\n",
"TableIdentifier name = TableIdentifier.of(webapp, \"user_events\");\n",
"Table tbl = catalog.loadTable(name);\n",
"tbl.newAppend().appendFile(dataFile).commit()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c61e9e79",
"metadata": {},
"outputs": [],
"source": [
"import org.apache.iceberg.io.CloseableIterable;\n",
"import org.apache.iceberg.data.Record;\n",
"import org.apache.iceberg.data.IcebergGenerics;\n",
"\n",
"CloseableIterable<Record> result = IcebergGenerics.read(tbl).build();\n",
"for (Record r: result) {\n",
" System.out.println(r);\n",
"}"
]
}
],
"metadata": {
Expand Down

0 comments on commit 9ce8011

Please sign in to comment.