GCS Destination: MLP documentation (Beta) (airbytehq#11499)

* updated gcs destination spec end documentation * fixed remarks * updated version * updated destination_specs yaml file
shursulei · Apr 5, 2022 · 99875c4 · 99875c4
1 parent 2f850b9
commit 99875c4
Show file tree

Hide file tree

Showing 5 changed files with 110 additions and 91 deletions.
diff --git a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml
@@ -83,7 +83,7 @@
 - name: Google Cloud Storage (GCS)
   destinationDefinitionId: ca8f6566-e555-4b40-943a-545bf123117a
   dockerRepository: airbyte/destination-gcs
-  dockerImageTag: 0.2.0
+  dockerImageTag: 0.2.1
   documentationUrl: https://docs.airbyte.io/integrations/destinations/gcs
   icon: googlecloudstorage.svg
   resourceRequirements:

diff --git a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml
@@ -1274,7 +1274,7 @@
     - "overwrite"
     - "append"
     supportsNamespaces: true
-- dockerImage: "airbyte/destination-gcs:0.2.0"
+- dockerImage: "airbyte/destination-gcs:0.2.1"
   spec:
     documentationUrl: "https://docs.airbyte.io/integrations/destinations/gcs"
     connectionSpecification:
@@ -1284,32 +1284,36 @@
       required:
       - "gcs_bucket_name"
       - "gcs_bucket_path"
-      - "gcs_bucket_region"
       - "credential"
       - "format"
       additionalProperties: false
       properties:
         gcs_bucket_name:
           title: "GCS Bucket Name"
+          order: 1
           type: "string"
-          description: "You can find the bucket name in the  App Engine Admin console\
-            \ Application Settings page, under the label  Google Cloud Storage Bucket."
+          description: "You can find the bucket name in the App Engine Admin console\
+            \ Application Settings page, under the label Google Cloud Storage Bucket.\
+            \ Read more <a href=\"https://cloud.google.com/storage/docs/naming-buckets\"\
+            >here</a>."
           examples:
           - "airbyte_sync"
         gcs_bucket_path:
+          title: "GCS Bucket Path"
           description: "GCS Bucket Path string Subdirectory under the above bucket\
             \ to sync the data into."
+          order: 2
           type: "string"
           examples:
           - "data_sync/test"
         gcs_bucket_region:
-          title: "GCS Bucket Region"
+          title: "GCS Bucket Region (Optional)"
           type: "string"
-          default: ""
-          description: "Select a Region of the GCS Bucket."
+          order: 3
+          default: "us"
+          description: "Select a Region of the GCS Bucket. Read more <a href=\"https://cloud.google.com/storage/docs/locations\"\
+            >here</a>."
           enum:
-          - ""
-          - "-- North America --"
           - "northamerica-northeast1"
           - "northamerica-northeast2"
           - "us-central1"
@@ -1319,18 +1323,15 @@
           - "us-west2"
           - "us-west3"
           - "us-west4"
-          - "-- South America --"
           - "southamerica-east1"
           - "southamerica-west1"
-          - "-- Europe --"
           - "europe-central2"
           - "europe-north1"
           - "europe-west1"
           - "europe-west2"
           - "europe-west3"
           - "europe-west4"
           - "europe-west6"
-          - "-- Asia --"
           - "asia-east1"
           - "asia-east2"
           - "asia-northeast1"
@@ -1340,20 +1341,22 @@
           - "asia-south2"
           - "asia-southeast1"
           - "asia-southeast2"
-          - "-- Australia --"
           - "australia-southeast1"
           - "australia-southeast2"
-          - "-- Multi-regions --"
           - "asia"
           - "eu"
           - "us"
-          - "-- Dual-regions --"
           - "asia1"
           - "eur4"
           - "nam4"
         credential:
-          title: "Credential"
+          title: "Authentication"
+          description: "An HMAC key is a type of credential and can be associated\
+            \ with a service account or a user account in Cloud Storage. Read more\
+            \ <a href=\"https://cloud.google.com/storage/docs/authentication/hmackeys\"\
+            >here</a>."
           type: "object"
+          order: 0
           oneOf:
           - title: "HMAC Key"
             required:
@@ -1368,25 +1371,35 @@
                 default: "HMAC_KEY"
               hmac_key_access_id:
                 type: "string"
-                description: "HMAC key access ID. When linked to a service account,\
-                  \ this ID is 61 characters long; when linked to a user account,\
-                  \ it is 24 characters long."
-                title: "HMAC Key Access ID"
+                description: "When linked to a service account, this ID is 61 characters\
+                  \ long; when linked to a user account, it is 24 characters long.\
+                  \ Read more <a href=\"https://cloud.google.com/storage/docs/authentication/hmackeys#overview\"\
+                  >here</a>."
+                title: "Access ID"
                 airbyte_secret: true
+                order: 0
                 examples:
                 - "1234567890abcdefghij1234"
               hmac_key_secret:
                 type: "string"
                 description: "The corresponding secret for the access ID. It is a\
-                  \ 40-character base-64 encoded string."
-                title: "HMAC Key Secret"
+                  \ 40-character base-64 encoded string.  Read more <a href=\"https://cloud.google.com/storage/docs/authentication/hmackeys#secrets\"\
+                  >here</a>."
+                title: "Secret"
                 airbyte_secret: true
+                order: 1
                 examples:
                 - "1234567890abcdefghij1234567890ABCDEFGHIJ"
         format:
           title: "Output Format"
           type: "object"
-          description: "Output data format"
+          description: "Output data format. One of the following formats must be selected\
+            \ - <a href=\"https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-avro#advantages_of_avro\"\
+            >AVRO</a> format, <a href=\"https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-parquet#parquet_schemas\"\
+            >PARQUET</a> format, <a href=\"https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-csv#loading_csv_data_into_a_table\"\
+            >CSV</a> format, or <a href=\"https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-json#loading_json_data_into_a_new_table\"\
+            >JSONL</a> format."
+          order: 4
           oneOf:
           - title: "Avro: Apache Avro"
             required:
@@ -1416,15 +1429,14 @@
                 - title: "Deflate"
                   required:
                   - "codec"
-                  - "compression_level"
                   properties:
                     codec:
                       type: "string"
                       enum:
                       - "Deflate"
                       default: "Deflate"
                     compression_level:
-                      title: "Deflate level"
+                      title: "Deflate level (Optional)"
                       description: "0: no compression & fastest, 9: best compression\
                         \ & slowest."
                       type: "integer"
@@ -1443,16 +1455,22 @@
                 - title: "xz"
                   required:
                   - "codec"
-                  - "compression_level"
                   properties:
                     codec:
                       type: "string"
                       enum:
                       - "xz"
                       default: "xz"
                     compression_level:
-                      title: "Compression Level"
-                      description: "See <a href=\"https://commons.apache.org/proper/commons-compress/apidocs/org/apache/commons/compress/compressors/xz/XZCompressorOutputStream.html#XZCompressorOutputStream-java.io.OutputStream-int-\"\
+                      title: "Compression Level (Optional)"
+                      description: "The presets 0-3 are fast presets with medium compression.\
+                        \ The presets 4-6 are fairly slow presets with high compression.\
+                        \ The default preset is 6. The presets 7-9 are like the preset\
+                        \ 6 but use bigger dictionaries and have higher compressor\
+                        \ and decompressor memory requirements. Unless the uncompressed\
+                        \ size of the file exceeds 8 MiB, 16 MiB, or 32 MiB, it is\
+                        \ waste of memory to use the presets 7, 8, or 9, respectively.\
+                        \ Read more <a href=\"https://commons.apache.org/proper/commons-compress/apidocs/org/apache/commons/compress/compressors/xz/XZCompressorOutputStream.html#XZCompressorOutputStream-java.io.OutputStream-int-\"\
                         >here</a> for details."
                       type: "integer"
                       default: 6
@@ -1461,15 +1479,14 @@
                 - title: "zstandard"
                   required:
                   - "codec"
-                  - "compression_level"
                   properties:
                     codec:
                       type: "string"
                       enum:
                       - "zstandard"
                       default: "zstandard"
                     compression_level:
-                      title: "Compression Level"
+                      title: "Compression Level (Optional)"
                       description: "Negative levels are 'fast' modes akin to lz4 or\
                         \ snappy, levels above 9 are generally for archival purposes,\
                         \ and levels above 18 use a lot of memory."
@@ -1478,7 +1495,7 @@
                       minimum: -5
                       maximum: 22
                     include_checksum:
-                      title: "Include Checksum"
+                      title: "Include Checksum (Optional)"
                       description: "If true, include a checksum with each data block."
                       type: "boolean"
                       default: false
@@ -1492,7 +1509,7 @@
                       - "snappy"
                       default: "snappy"
               part_size_mb:
-                title: "Block Size (MB) for GCS multipart upload"
+                title: "Block Size (MB) for GCS multipart upload (Optional)"
                 description: "This is the size of a \"Part\" being buffered in memory.\
                   \ It limits the memory usage when writing. Larger values will allow\
                   \ to upload a bigger files and improve the speed, but consumes9\
@@ -1504,7 +1521,6 @@
           - title: "CSV: Comma-Separated Values"
             required:
             - "format_type"
-            - "flattening"
             properties:
               format_type:
                 type: "string"
@@ -1513,15 +1529,15 @@
                 default: "CSV"
               flattening:
                 type: "string"
-                title: "Normalization (Flattening)"
-                description: "Whether the input json data should be normalized (flattened)\
+                title: "Normalization (Optional)"
+                description: "Whether the input JSON data should be normalized (flattened)\
                   \ in the output CSV. Please refer to docs for details."
                 default: "No flattening"
                 enum:
                 - "No flattening"
                 - "Root level flattening"
               part_size_mb:
-                title: "Block Size (MB) for GCS multipart upload"
+                title: "Block Size (MB) for GCS multipart upload (Optional)"
                 description: "This is the size of a \"Part\" being buffered in memory.\
                   \ It limits the memory usage when writing. Larger values will allow\
                   \ to upload a bigger files and improve the speed, but consumes9\
@@ -1540,7 +1556,7 @@
                 - "JSONL"
                 default: "JSONL"
               part_size_mb:
-                title: "Block Size (MB) for GCS multipart upload"
+                title: "Block Size (MB) for GCS multipart upload (Optional)"
                 description: "This is the size of a \"Part\" being buffered in memory.\
                   \ It limits the memory usage when writing. Larger values will allow\
                   \ to upload a bigger files and improve the speed, but consumes9\
@@ -1559,9 +1575,10 @@
                 - "Parquet"
                 default: "Parquet"
               compression_codec:
-                title: "Compression Codec"
+                title: "Compression Codec (Optional)"
                 description: "The compression algorithm used to compress data pages."
                 type: "string"
+                default: "UNCOMPRESSED"
                 enum:
                 - "UNCOMPRESSED"
                 - "SNAPPY"
@@ -1570,9 +1587,8 @@
                 - "BROTLI"
                 - "LZ4"
                 - "ZSTD"
-                default: "UNCOMPRESSED"
               block_size_mb:
-                title: "Block Size (Row Group Size) (MB)"
+                title: "Block Size (Row Group Size) (MB) (Optional)"
                 description: "This is the size of a row group being buffered in memory.\
                   \ It limits the memory usage when writing. Larger values will improve\
                   \ the IO when reading, but consume more memory when writing. Default:\
@@ -1582,15 +1598,15 @@
                 examples:
                 - 128
               max_padding_size_mb:
-                title: "Max Padding Size (MB)"
+                title: "Max Padding Size (MB) (Optional)"
                 description: "Maximum size allowed as padding to align row groups.\
                   \ This is also the minimum size of a row group. Default: 8 MB."
                 type: "integer"
                 default: 8
                 examples:
                 - 8
               page_size_kb:
-                title: "Page Size (KB)"
+                title: "Page Size (KB) (Optional)"
                 description: "The page size is for compression. A block is composed\
                   \ of pages. A page is the smallest unit that must be read fully\
                   \ to access a single record. If this value is too small, the compression\
@@ -1600,7 +1616,7 @@
                 examples:
                 - 1024
               dictionary_page_size_kb:
-                title: "Dictionary Page Size (KB)"
+                title: "Dictionary Page Size (KB) (Optional)"
                 description: "There is one dictionary page per column per row group\
                   \ when dictionary encoding is used. The dictionary page size works\
                   \ like the page size but for dictionary. Default: 1024 KB."
@@ -1609,7 +1625,7 @@
                 examples:
                 - 1024
               dictionary_encoding:
-                title: "Dictionary Encoding"
+                title: "Dictionary Encoding (Optional)"
                 description: "Default: true."
                 type: "boolean"
                 default: true

diff --git a/airbyte-integrations/connectors/destination-gcs/Dockerfile b/airbyte-integrations/connectors/destination-gcs/Dockerfile
@@ -16,5 +16,5 @@ ENV APPLICATION destination-gcs
 
 COPY --from=build /airbyte /airbyte
 
-LABEL io.airbyte.version=0.2.0
+LABEL io.airbyte.version=0.2.1
 LABEL io.airbyte.name=airbyte/destination-gcs