Skip to content

Commit

Permalink
Update docstrings to adhere to sphinx standards (apache#14918)
Browse files Browse the repository at this point in the history
  • Loading branch information
jlaneve authored Mar 22, 2021
1 parent fc32d36 commit e172bd0
Show file tree
Hide file tree
Showing 19 changed files with 54 additions and 69 deletions.
48 changes: 27 additions & 21 deletions airflow/providers/amazon/aws/transfers/dynamodb_to_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,34 +61,40 @@ class DynamoDBToS3Operator(BaseOperator):
To parallelize the replication, users can create multiple tasks of DynamoDBToS3Operator.
For instance to replicate with parallelism of 2, create two tasks like:
.. code-block::
op1 = DynamoDBToS3Operator(
task_id='replicator-1',
dynamodb_table_name='hello',
dynamodb_scan_kwargs={
'TotalSegments': 2,
'Segment': 0,
},
...
)
op2 = DynamoDBToS3Operator(
task_id='replicator-2',
dynamodb_table_name='hello',
dynamodb_scan_kwargs={
'TotalSegments': 2,
'Segment': 1,
},
...
)
.. code-block:: python
op1 = DynamoDBToS3Operator(
task_id='replicator-1',
dynamodb_table_name='hello',
dynamodb_scan_kwargs={
'TotalSegments': 2,
'Segment': 0,
},
...
)
op2 = DynamoDBToS3Operator(
task_id='replicator-2',
dynamodb_table_name='hello',
dynamodb_scan_kwargs={
'TotalSegments': 2,
'Segment': 1,
},
...
)
:param dynamodb_table_name: Dynamodb table to replicate data from
:type dynamodb_table_name: str
:param s3_bucket_name: S3 bucket to replicate data to
:type s3_bucket_name: str
:param file_size: Flush file to s3 if file size >= file_size
:type file_size: int
:param dynamodb_scan_kwargs: kwargs pass to <https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/dynamodb.html#DynamoDB.Table.scan> # noqa: E501 pylint: disable=line-too-long
:type dynamodb_scan_kwargs: Optional[Dict[str, Any]]
:param s3_key_prefix: Prefix of s3 object key
:type s3_key_prefix: Optional[str]
:param process_func: How we transforms a dynamodb item to bytes. By default we dump the json
:type process_func: Callable[[Dict[str, Any]], bytes]
"""

@apply_defaults
Expand Down
2 changes: 1 addition & 1 deletion airflow/providers/amazon/aws/transfers/glacier_to_gcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ class GlacierToGCSOperator(BaseOperator):
If set as a sequence, the identities from the list must grant
Service Account Token Creator IAM role to the directly preceding identity, with first
account from the list granting this role to the originating account (templated).
:type impersonation_chain: Union[str, Sequence[str]]
:type google_impersonation_chain: Union[str, Sequence[str]]
"""

template_fields = ("vault_name", "bucket_name", "object_name")
Expand Down
3 changes: 0 additions & 3 deletions airflow/providers/amazon/aws/transfers/s3_to_ftp.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,6 @@ class S3ToFTPOperator(BaseOperator):
:param ftp_path: The ftp remote path. This is the specified file path for
uploading file to the FTP server.
:type ftp_path: str
:param s3_conn_id: The s3 connection id. The name or identifier for
establishing a connection to S3.
:type s3_conn_id: str
:param s3_bucket: The targeted s3 bucket. This is the S3 bucket from
where the file is downloaded.
:type s3_bucket: str
Expand Down
4 changes: 2 additions & 2 deletions airflow/providers/google/cloud/operators/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,10 +283,10 @@ class BigQueryIntervalCheckOperator(_BigQueryDbHookMixin, SQLIntervalCheckOperat
:param days_back: number of days between ds and the ds we want to check
against. Defaults to 7 days
:type days_back: int
:param metrics_threshold: a dictionary of ratios indexed by metrics, for
:param metrics_thresholds: a dictionary of ratios indexed by metrics, for
example 'COUNT(*)': 1.5 would require a 50 percent or less difference
between the current day, and the prior days_back.
:type metrics_threshold: dict
:type metrics_thresholds: dict
:param use_legacy_sql: Whether to use legacy SQL (true)
or standard SQL (false).
:type use_legacy_sql: bool
Expand Down
7 changes: 0 additions & 7 deletions airflow/providers/google/cloud/operators/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,6 @@ class ComputeEngineStartInstanceOperator(ComputeEngineBaseOperator):
:param api_version: Optional, API version used (for example v1 - or beta). Defaults
to v1.
:type api_version: str
:param validate_body: Optional, If set to False, body validation is not performed.
Defaults to False.
:param impersonation_chain: Optional service account to impersonate using short-term
credentials, or chained list of accounts required to get the access_token
of the last account in the list, which will be impersonated in the request.
Expand Down Expand Up @@ -166,8 +164,6 @@ class ComputeEngineStopInstanceOperator(ComputeEngineBaseOperator):
:param api_version: Optional, API version used (for example v1 - or beta). Defaults
to v1.
:type api_version: str
:param validate_body: Optional, If set to False, body validation is not performed.
Defaults to False.
:param impersonation_chain: Optional service account to impersonate using short-term
credentials, or chained list of accounts required to get the access_token
of the last account in the list, which will be impersonated in the request.
Expand Down Expand Up @@ -553,9 +549,6 @@ class ComputeEngineInstanceGroupUpdateManagerTemplateOperator(ComputeEngineBaseO
:param api_version: Optional, API version used (for example v1 - or beta). Defaults
to v1.
:type api_version: str
:param validate_body: Optional, If set to False, body validation is not performed.
Defaults to False.
:type validate_body: bool
:param impersonation_chain: Optional service account to impersonate using short-term
credentials, or chained list of accounts required to get the access_token
of the last account in the list, which will be impersonated in the request.
Expand Down
2 changes: 1 addition & 1 deletion airflow/providers/google/cloud/operators/datacatalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -1413,7 +1413,7 @@ class CloudDataCatalogListTagsOperator(BaseOperator):
:type location: str
:param entry_group: Required. The entry group of the tags to get.
:type entry_group: str
:param entry_group: Required. The entry of the tags to get.
:param entry: Required. The entry of the tags to get.
:type entry: str
:param page_size: The maximum number of resources contained in the underlying API response. If page
streaming is performed per- resource, this parameter does not affect the return value. If page
Expand Down
5 changes: 3 additions & 2 deletions airflow/providers/google/cloud/operators/dataflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,7 @@ class DataflowTemplatedJobStartOperator(BaseOperator):
:type template: str
:param job_name: The 'jobName' to use when executing the DataFlow template
(templated).
:type job_name: Optional[str]
:param options: Map of job runtime environment options.
It will update environment argument if passed.
Expand Down Expand Up @@ -529,7 +530,7 @@ class DataflowTemplatedJobStartOperator(BaseOperator):
Service Account Token Creator IAM role to the directly preceding identity, with first
account from the list granting this role to the originating account (templated).
:type impersonation_chain: Union[str, Sequence[str]]
:type environment: Optional, Map of job runtime environment options.
:param environment: Optional, Map of job runtime environment options.
.. seealso::
For more information on possible configurations, look at the API documentation
Expand Down Expand Up @@ -849,7 +850,7 @@ class DataflowStartSqlJobOperator(BaseOperator):
<gcloud beta dataflow sql query>`__
command reference
:param options: dict
:type options: dict
:param location: The location of the Dataflow job (for example europe-west1)
:type location: str
:param project_id: The ID of the GCP project that owns the job.
Expand Down
7 changes: 5 additions & 2 deletions airflow/providers/google/cloud/operators/dataprep.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,11 @@ class DataprepRunJobGroupOperator(BaseOperator):
For more information on how to use this operator, take a look at the guide:
:ref:`howto/operator:DataprepRunJobGroupOperator`
:param recipe_id: The identifier for the recipe you would like to run.
:type recipe_id: int
:param dataprep_conn_id: The Dataprep connection ID
:type dataprep_conn_id: str
:param body_request: Passed as the body_request to GoogleDataprepHook's run_job_group,
where it's the identifier for the recipe to run
:type body_request: dict
"""

template_fields = ("body_request",)
Expand Down
6 changes: 0 additions & 6 deletions airflow/providers/google/cloud/operators/dataproc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1630,9 +1630,6 @@ class DataprocInstantiateWorkflowTemplateOperator(BaseOperator):
``Job`` created and stored in the backend is returned.
It is recommended to always set this value to a UUID.
:type request_id: str
:param parameters: Optional. Map from parameter names to values that should be used for those
parameters. Values may not exceed 100 characters.
:type parameters: Dict[str, str]
:param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
retried.
:type retry: google.api_core.retry.Retry
Expand Down Expand Up @@ -1733,9 +1730,6 @@ class DataprocInstantiateInlineWorkflowTemplateOperator(BaseOperator):
``Job`` created and stored in the backend is returned.
It is recommended to always set this value to a UUID.
:type request_id: str
:param parameters: Optional. Map from parameter names to values that should be used for those
parameters. Values may not exceed 100 characters.
:type parameters: Dict[str, str]
:param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
retried.
:type retry: google.api_core.retry.Retry
Expand Down
2 changes: 1 addition & 1 deletion airflow/providers/google/cloud/operators/life_sciences.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class LifeSciencesRunPipelineOperator(BaseOperator):
:type location: str
:param project_id: ID of the Google Cloud project if None then
default project_id is used.
:param project_id: str
:type project_id: str
:param gcp_conn_id: The connection ID to use to connect to Google Cloud.
:type gcp_conn_id: str
:param api_version: API version used (for example v2beta).
Expand Down
2 changes: 1 addition & 1 deletion airflow/providers/google/cloud/operators/mlengine.py
Original file line number Diff line number Diff line change
Expand Up @@ -1104,7 +1104,7 @@ class MLEngineStartTrainingJobOperator(BaseOperator):
provided, master_type must be set as well. If a custom image is
specified, this is mutually exclusive with package_uris and
training_python_module. (templated)
:type master_type: dict
:type master_config: dict
:param runtime_version: The Google Cloud ML runtime version to use for
training. (templated)
:type runtime_version: str
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,6 @@ class AzureFileShareToGCSOperator(BaseOperator):
:type wasb_conn_id: str
:param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud.
:type gcp_conn_id: str
:param dest_gcs_conn_id: (Deprecated) The connection ID used to connect to Google Cloud.
This parameter has been deprecated. You should pass the gcp_conn_id parameter instead.
:type dest_gcs_conn_id: str
:param dest_gcs: The destination Google Cloud Storage bucket and prefix
where you want to store the files. (templated)
:type dest_gcs: str
Expand Down
4 changes: 0 additions & 4 deletions airflow/providers/google/cloud/transfers/bigquery_to_mysql.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,6 @@ class BigQueryToMySqlOperator(BaseOperator):
:param dataset_table: A dotted ``<dataset>.<table>``: the big query table of origin
:type dataset_table: str
:param max_results: The maximum number of records (rows) to be fetched
from the table. (templated)
:type max_results: str
:param selected_fields: List of fields to return (comma-separated). If
unspecified, all fields are returned.
:type selected_fields: str
Expand All @@ -62,7 +59,6 @@ class BigQueryToMySqlOperator(BaseOperator):
if any. For this to work, the service account making the request must have
domain-wide delegation enabled.
:type delegate_to: str
:type delegate_to: str
:param mysql_conn_id: reference to a specific mysql hook
:type mysql_conn_id: str
:param database: name of database which overwrite defined one in connection
Expand Down
10 changes: 4 additions & 6 deletions airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,10 @@ class FacebookAdsReportToGcsOperator(BaseOperator):
For more information on how to use this operator, take a look at the guide:
:ref:`howto/operator:FacebookAdsReportToGcsOperator`
:param bucket: The GCS bucket to upload to
:type bucket: str
:param obj: GCS path to save the object. Must be the full file path (ex. `path/to/file.txt`)
:type obj: str
:param bucket_name: The GCS bucket to upload to
:type bucket_name: str
:param object_name: GCS path to save the object. Must be the full file path (ex. `path/to/file.txt`)
:type object_name: str
:param gcp_conn_id: Airflow Google Cloud connection ID
:type gcp_conn_id: str
:param facebook_conn_id: Airflow Facebook Ads connection ID
Expand All @@ -60,8 +60,6 @@ class FacebookAdsReportToGcsOperator(BaseOperator):
:param params: Parameters that determine the query for Facebook
https://developers.facebook.com/docs/marketing-api/insights/parameters/v6.0
:type params: Dict[str, Any]
:param sleep_time: Time to sleep when async call is happening
:type sleep_time: int
:param gzip: Option to compress local file or file data for upload
:type gzip: bool
:param impersonation_chain: Optional service account to impersonate using short-term
Expand Down
6 changes: 3 additions & 3 deletions airflow/providers/google/cloud/transfers/gcs_to_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ class GCSToLocalFilesystemOperator(BaseOperator):
:param bucket: The Google Cloud Storage bucket where the object is.
Must not contain 'gs://' prefix. (templated)
:type bucket: str
:param object: The name of the object to download in the Google cloud
:param object_name: The name of the object to download in the Google cloud
storage bucket. (templated)
:type object: str
:type object_name: str
:param filename: The file path, including filename, on the local file system (where the
operator is being executed) that the file should be downloaded to. (templated)
If no filename passed, the downloaded data will not be stored on the local file
Expand Down Expand Up @@ -75,7 +75,7 @@ class GCSToLocalFilesystemOperator(BaseOperator):

template_fields = (
'bucket',
'object',
'object_name',
'filename',
'store_to_xcom_key',
'impersonation_chain',
Expand Down
4 changes: 2 additions & 2 deletions airflow/providers/google/cloud/transfers/salesforce_to_gcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class SalesforceToGcsOperator(BaseOperator):
:type object_name: str
:param salesforce_conn_id: the name of the connection that has the parameters
we need to connect to Salesforce.
:type conn_id: str
:type salesforce_conn_id: str
:param include_deleted: True if the query should include deleted records.
:type include_deleted: bool
:param query_params: Additional optional arguments
Expand All @@ -57,7 +57,7 @@ class SalesforceToGcsOperator(BaseOperator):
:param gzip: Option to compress local file or file data for upload
:type gzip: bool
:param gcp_conn_id: the name of the connection that has the parameters we need to connect to GCS.
:type conn_id: str
:type gcp_conn_id: str
"""

template_fields = (
Expand Down
2 changes: 1 addition & 1 deletion airflow/providers/google/cloud/transfers/sheets_to_gcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class GoogleSheetsToGCSOperator(BaseOperator):
:type sheet_filter: List[str]
:param destination_bucket: The destination Google cloud storage bucket where the
report should be written to. (templated)
:param destination_bucket: str
:type destination_bucket: str
:param destination_path: The Google cloud storage URI array for the object created by the operator.
For example: ``path/to/my/files``.
:type destination_path: str
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,8 @@ class GoogleDisplayVideo360GetSDFDownloadOperationSensor(BaseSensorOperator):
For more information on how to use this operator, take a look at the guide:
:ref:`howto/operator:GoogleDisplayVideo360GetSDFDownloadOperationSensor`
:param name: The name of the operation resource
:type name: Dict[str, Any]
:param operation_name: The name of the operation resource
:type operation_name: Dict[str, Any]
:param api_version: The version of the api that will be requested for example 'v1'.
:type api_version: str
:param gcp_conn_id: The connection ID to use when fetching connection info.
Expand Down
2 changes: 1 addition & 1 deletion airflow/providers/yandex/operators/yandexcloud_dataproc.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ class DataprocDeleteClusterOperator(BaseOperator):
"""Deletes Yandex.Cloud Data Proc cluster.
:param connection_id: ID of the Yandex.Cloud Airflow connection.
:type cluster_id: Optional[str]
:type connection_id: Optional[str]
:param cluster_id: ID of the cluster to remove. (templated)
:type cluster_id: Optional[str]
"""
Expand Down

0 comments on commit e172bd0

Please sign in to comment.