Skip to content

Commit

Permalink
GCS Destination: MLP documentation (Beta) (airbytehq#11499)
Browse files Browse the repository at this point in the history
* updated gcs destination spec end documentation

* fixed remarks

* updated version

* updated destination_specs yaml file
  • Loading branch information
andriikorotkov authored Apr 5, 2022
1 parent 2f850b9 commit 99875c4
Show file tree
Hide file tree
Showing 5 changed files with 110 additions and 91 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@
- name: Google Cloud Storage (GCS)
destinationDefinitionId: ca8f6566-e555-4b40-943a-545bf123117a
dockerRepository: airbyte/destination-gcs
dockerImageTag: 0.2.0
dockerImageTag: 0.2.1
documentationUrl: https://docs.airbyte.io/integrations/destinations/gcs
icon: googlecloudstorage.svg
resourceRequirements:
Expand Down
104 changes: 60 additions & 44 deletions airbyte-config/init/src/main/resources/seed/destination_specs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1274,7 +1274,7 @@
- "overwrite"
- "append"
supportsNamespaces: true
- dockerImage: "airbyte/destination-gcs:0.2.0"
- dockerImage: "airbyte/destination-gcs:0.2.1"
spec:
documentationUrl: "https://docs.airbyte.io/integrations/destinations/gcs"
connectionSpecification:
Expand All @@ -1284,32 +1284,36 @@
required:
- "gcs_bucket_name"
- "gcs_bucket_path"
- "gcs_bucket_region"
- "credential"
- "format"
additionalProperties: false
properties:
gcs_bucket_name:
title: "GCS Bucket Name"
order: 1
type: "string"
description: "You can find the bucket name in the App Engine Admin console\
\ Application Settings page, under the label Google Cloud Storage Bucket."
description: "You can find the bucket name in the App Engine Admin console\
\ Application Settings page, under the label Google Cloud Storage Bucket.\
\ Read more <a href=\"https://cloud.google.com/storage/docs/naming-buckets\"\
>here</a>."
examples:
- "airbyte_sync"
gcs_bucket_path:
title: "GCS Bucket Path"
description: "GCS Bucket Path string Subdirectory under the above bucket\
\ to sync the data into."
order: 2
type: "string"
examples:
- "data_sync/test"
gcs_bucket_region:
title: "GCS Bucket Region"
title: "GCS Bucket Region (Optional)"
type: "string"
default: ""
description: "Select a Region of the GCS Bucket."
order: 3
default: "us"
description: "Select a Region of the GCS Bucket. Read more <a href=\"https://cloud.google.com/storage/docs/locations\"\
>here</a>."
enum:
- ""
- "-- North America --"
- "northamerica-northeast1"
- "northamerica-northeast2"
- "us-central1"
Expand All @@ -1319,18 +1323,15 @@
- "us-west2"
- "us-west3"
- "us-west4"
- "-- South America --"
- "southamerica-east1"
- "southamerica-west1"
- "-- Europe --"
- "europe-central2"
- "europe-north1"
- "europe-west1"
- "europe-west2"
- "europe-west3"
- "europe-west4"
- "europe-west6"
- "-- Asia --"
- "asia-east1"
- "asia-east2"
- "asia-northeast1"
Expand All @@ -1340,20 +1341,22 @@
- "asia-south2"
- "asia-southeast1"
- "asia-southeast2"
- "-- Australia --"
- "australia-southeast1"
- "australia-southeast2"
- "-- Multi-regions --"
- "asia"
- "eu"
- "us"
- "-- Dual-regions --"
- "asia1"
- "eur4"
- "nam4"
credential:
title: "Credential"
title: "Authentication"
description: "An HMAC key is a type of credential and can be associated\
\ with a service account or a user account in Cloud Storage. Read more\
\ <a href=\"https://cloud.google.com/storage/docs/authentication/hmackeys\"\
>here</a>."
type: "object"
order: 0
oneOf:
- title: "HMAC Key"
required:
Expand All @@ -1368,25 +1371,35 @@
default: "HMAC_KEY"
hmac_key_access_id:
type: "string"
description: "HMAC key access ID. When linked to a service account,\
\ this ID is 61 characters long; when linked to a user account,\
\ it is 24 characters long."
title: "HMAC Key Access ID"
description: "When linked to a service account, this ID is 61 characters\
\ long; when linked to a user account, it is 24 characters long.\
\ Read more <a href=\"https://cloud.google.com/storage/docs/authentication/hmackeys#overview\"\
>here</a>."
title: "Access ID"
airbyte_secret: true
order: 0
examples:
- "1234567890abcdefghij1234"
hmac_key_secret:
type: "string"
description: "The corresponding secret for the access ID. It is a\
\ 40-character base-64 encoded string."
title: "HMAC Key Secret"
\ 40-character base-64 encoded string. Read more <a href=\"https://cloud.google.com/storage/docs/authentication/hmackeys#secrets\"\
>here</a>."
title: "Secret"
airbyte_secret: true
order: 1
examples:
- "1234567890abcdefghij1234567890ABCDEFGHIJ"
format:
title: "Output Format"
type: "object"
description: "Output data format"
description: "Output data format. One of the following formats must be selected\
\ - <a href=\"https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-avro#advantages_of_avro\"\
>AVRO</a> format, <a href=\"https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-parquet#parquet_schemas\"\
>PARQUET</a> format, <a href=\"https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-csv#loading_csv_data_into_a_table\"\
>CSV</a> format, or <a href=\"https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-json#loading_json_data_into_a_new_table\"\
>JSONL</a> format."
order: 4
oneOf:
- title: "Avro: Apache Avro"
required:
Expand Down Expand Up @@ -1416,15 +1429,14 @@
- title: "Deflate"
required:
- "codec"
- "compression_level"
properties:
codec:
type: "string"
enum:
- "Deflate"
default: "Deflate"
compression_level:
title: "Deflate level"
title: "Deflate level (Optional)"
description: "0: no compression & fastest, 9: best compression\
\ & slowest."
type: "integer"
Expand All @@ -1443,16 +1455,22 @@
- title: "xz"
required:
- "codec"
- "compression_level"
properties:
codec:
type: "string"
enum:
- "xz"
default: "xz"
compression_level:
title: "Compression Level"
description: "See <a href=\"https://commons.apache.org/proper/commons-compress/apidocs/org/apache/commons/compress/compressors/xz/XZCompressorOutputStream.html#XZCompressorOutputStream-java.io.OutputStream-int-\"\
title: "Compression Level (Optional)"
description: "The presets 0-3 are fast presets with medium compression.\
\ The presets 4-6 are fairly slow presets with high compression.\
\ The default preset is 6. The presets 7-9 are like the preset\
\ 6 but use bigger dictionaries and have higher compressor\
\ and decompressor memory requirements. Unless the uncompressed\
\ size of the file exceeds 8 MiB, 16 MiB, or 32 MiB, it is\
\ waste of memory to use the presets 7, 8, or 9, respectively.\
\ Read more <a href=\"https://commons.apache.org/proper/commons-compress/apidocs/org/apache/commons/compress/compressors/xz/XZCompressorOutputStream.html#XZCompressorOutputStream-java.io.OutputStream-int-\"\
>here</a> for details."
type: "integer"
default: 6
Expand All @@ -1461,15 +1479,14 @@
- title: "zstandard"
required:
- "codec"
- "compression_level"
properties:
codec:
type: "string"
enum:
- "zstandard"
default: "zstandard"
compression_level:
title: "Compression Level"
title: "Compression Level (Optional)"
description: "Negative levels are 'fast' modes akin to lz4 or\
\ snappy, levels above 9 are generally for archival purposes,\
\ and levels above 18 use a lot of memory."
Expand All @@ -1478,7 +1495,7 @@
minimum: -5
maximum: 22
include_checksum:
title: "Include Checksum"
title: "Include Checksum (Optional)"
description: "If true, include a checksum with each data block."
type: "boolean"
default: false
Expand All @@ -1492,7 +1509,7 @@
- "snappy"
default: "snappy"
part_size_mb:
title: "Block Size (MB) for GCS multipart upload"
title: "Block Size (MB) for GCS multipart upload (Optional)"
description: "This is the size of a \"Part\" being buffered in memory.\
\ It limits the memory usage when writing. Larger values will allow\
\ to upload a bigger files and improve the speed, but consumes9\
Expand All @@ -1504,7 +1521,6 @@
- title: "CSV: Comma-Separated Values"
required:
- "format_type"
- "flattening"
properties:
format_type:
type: "string"
Expand All @@ -1513,15 +1529,15 @@
default: "CSV"
flattening:
type: "string"
title: "Normalization (Flattening)"
description: "Whether the input json data should be normalized (flattened)\
title: "Normalization (Optional)"
description: "Whether the input JSON data should be normalized (flattened)\
\ in the output CSV. Please refer to docs for details."
default: "No flattening"
enum:
- "No flattening"
- "Root level flattening"
part_size_mb:
title: "Block Size (MB) for GCS multipart upload"
title: "Block Size (MB) for GCS multipart upload (Optional)"
description: "This is the size of a \"Part\" being buffered in memory.\
\ It limits the memory usage when writing. Larger values will allow\
\ to upload a bigger files and improve the speed, but consumes9\
Expand All @@ -1540,7 +1556,7 @@
- "JSONL"
default: "JSONL"
part_size_mb:
title: "Block Size (MB) for GCS multipart upload"
title: "Block Size (MB) for GCS multipart upload (Optional)"
description: "This is the size of a \"Part\" being buffered in memory.\
\ It limits the memory usage when writing. Larger values will allow\
\ to upload a bigger files and improve the speed, but consumes9\
Expand All @@ -1559,9 +1575,10 @@
- "Parquet"
default: "Parquet"
compression_codec:
title: "Compression Codec"
title: "Compression Codec (Optional)"
description: "The compression algorithm used to compress data pages."
type: "string"
default: "UNCOMPRESSED"
enum:
- "UNCOMPRESSED"
- "SNAPPY"
Expand All @@ -1570,9 +1587,8 @@
- "BROTLI"
- "LZ4"
- "ZSTD"
default: "UNCOMPRESSED"
block_size_mb:
title: "Block Size (Row Group Size) (MB)"
title: "Block Size (Row Group Size) (MB) (Optional)"
description: "This is the size of a row group being buffered in memory.\
\ It limits the memory usage when writing. Larger values will improve\
\ the IO when reading, but consume more memory when writing. Default:\
Expand All @@ -1582,15 +1598,15 @@
examples:
- 128
max_padding_size_mb:
title: "Max Padding Size (MB)"
title: "Max Padding Size (MB) (Optional)"
description: "Maximum size allowed as padding to align row groups.\
\ This is also the minimum size of a row group. Default: 8 MB."
type: "integer"
default: 8
examples:
- 8
page_size_kb:
title: "Page Size (KB)"
title: "Page Size (KB) (Optional)"
description: "The page size is for compression. A block is composed\
\ of pages. A page is the smallest unit that must be read fully\
\ to access a single record. If this value is too small, the compression\
Expand All @@ -1600,7 +1616,7 @@
examples:
- 1024
dictionary_page_size_kb:
title: "Dictionary Page Size (KB)"
title: "Dictionary Page Size (KB) (Optional)"
description: "There is one dictionary page per column per row group\
\ when dictionary encoding is used. The dictionary page size works\
\ like the page size but for dictionary. Default: 1024 KB."
Expand All @@ -1609,7 +1625,7 @@
examples:
- 1024
dictionary_encoding:
title: "Dictionary Encoding"
title: "Dictionary Encoding (Optional)"
description: "Default: true."
type: "boolean"
default: true
Expand Down
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/destination-gcs/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ ENV APPLICATION destination-gcs

COPY --from=build /airbyte /airbyte

LABEL io.airbyte.version=0.2.0
LABEL io.airbyte.version=0.2.1
LABEL io.airbyte.name=airbyte/destination-gcs
Loading

0 comments on commit 99875c4

Please sign in to comment.