Improve docs on converting Avro schemas (apache#587)

ElementAnalytics · Nov 20, 2019 · f74854f · f74854f
1 parent 94fdc73
commit f74854f
Show file tree

Hide file tree

Showing 3 changed files with 25 additions and 10 deletions.
diff --git a/core/src/main/java/org/apache/iceberg/avro/AvroSchemaUtil.java b/core/src/main/java/org/apache/iceberg/avro/AvroSchemaUtil.java
@@ -80,6 +80,11 @@ public static Type convert(Schema schema) {
     return AvroSchemaVisitor.visit(schema, new SchemaToType(schema));
   }
 
+  public static org.apache.iceberg.Schema toIceberg(Schema schema) {
+    final List<Types.NestedField> fields = convert(schema).asNestedType().asStructType().fields();
+    return new org.apache.iceberg.Schema(fields);
+  }
+
   static boolean hasIds(Schema schema) {
     return AvroCustomOrderSchemaVisitor.visit(schema, new HasIds());
   }

diff --git a/site/docs/api-quickstart.md b/site/docs/api-quickstart.md
@@ -42,6 +42,7 @@ val table = catalog.createTable(name, schema, spec)
 // write into the new logs table with Spark 2.4
 logsDF.write
     .format("iceberg")
+    .mode("append")
     .save("logging.logs")
 ```
 
@@ -56,13 +57,14 @@ To create a table in HDFS, use `HadoopTables`:
 ```scala
 import org.apache.iceberg.hadoop.HadoopTables
 
-val tables = new HadoopTables(conf)
+val tables = new HadoopTables(spark.sessionState.newHadoopConf())
 
 val table = tables.create(schema, spec, "hdfs:/tables/logging/logs")
 
 // write into the new logs table with Spark 2.4
 logsDF.write
     .format("iceberg")
+    .mode("append")
     .save("hdfs:/tables/logging/logs")
 ```
 
@@ -107,15 +109,12 @@ When a table is created, all IDs in the schema are re-assigned to ensure uniquen
 To create an Iceberg schema from an existing Avro schema, use converters in `AvroSchemaUtil`:
 
 ```scala
-import org.apache.iceberg.avro.AvroSchemaUtil
 import org.apache.avro.Schema.Parser
+import org.apache.iceberg.avro.AvroSchemaUtil
 
-val avroSchema = new Parser().parse(
-    """{ "type": "record", "name": "com.example.AvroType",
-      |  "fields": [ ... ]
-      |}""".stripMargin
+val avroSchema = new Parser().parse("""{"type": "record", ... }""")
 
-val schema = AvroSchemaUtil.convert(avroSchema)
+val icebergSchema = AvroSchemaUtil.toIceberg(avroSchema)
 ```
 
 ### Convert a schema from Spark

diff --git a/site/docs/spark.md b/site/docs/spark.md
@@ -17,7 +17,7 @@
 
 # Spark
 
-Iceberg uses Spark's DataSourceV2 API for data source and catalog implementations. Spark DSv2 is an evolving API with different levels of support in Spark versions.
+Iceberg uses Apache Spark's DataSourceV2 API for data source and catalog implementations. Spark DSv2 is an evolving API with different levels of support in Spark versions.
 
 | Feature support                              | Spark 2.4 | Spark 3.0 (unreleased) | Notes                                          |
 |----------------------------------------------|-----------|------------------------|------------------------------------------------|
@@ -44,6 +44,19 @@ To use Iceberg in Spark 2.4, add the `iceberg-spark-runtime` Jar to Spark's `jar
 
 Spark 2.4 is limited to reading and writing existing Iceberg tables. Use the [Iceberg API](../api) to create Iceberg tables.
 
+Recommended way is to include Iceberg's latest released using the `--packages` option:
+```sh
+spark-shell --packages org.apache.iceberg:iceberg-spark-runtime:0.7.0-incubating
+```
+
+You can also build Iceberg locally, and add the jar to Spark's classpath. This can be helpful to test unreleased features or while developing something new:
+
+```sh
+./gradlew assemble
+spark-shell --jars spark-runtime/build/libs/iceberg-spark-runtime-93990904.jar
+```
+
+Where you have to replace `93990904` with the git hash that you're using.
 
 ### Reading an Iceberg table
 
@@ -231,5 +244,3 @@ spark.read.format("iceberg").load("db.table.files").show(truncate = false)
 | s3:/.../table/data/00002-5-8d6d60e8-d427-4809-bcf0-f5d45a4aad96.parquet | PARQUET     | 1            | 597                | [1 -> 90, 2 -> 62] | [1 -> 1, 2 -> 1] | [1 -> 0, 2 -> 0]  | [1 -> , 2 -> a] | [1 -> , 2 -> a] | null         | [4]           |
 +-------------------------------------------------------------------------+-------------+--------------+--------------------+--------------------+------------------+-------------------+-----------------+-----------------+--------------+---------------+
 ```
-
-