diff --git a/airflow/api/common/mark_tasks.py b/airflow/api/common/mark_tasks.py index 3feb87265956d..ad4c0a4fcae70 100644 --- a/airflow/api/common/mark_tasks.py +++ b/airflow/api/common/mark_tasks.py @@ -97,26 +97,16 @@ def set_state( on the schedule (but it will as for subdag dag runs if needed). :param tasks: the iterable of tasks from which to work. task.task.dag needs to be set - :type tasks: list[airflow.models.baseoperator.BaseOperator] :param dag_run_id: the run_id of the dagrun to start looking from - :type dag_run_id: str :param execution_date: the execution date from which to start looking(deprecated) - :type execution_date: datetime.datetime :param upstream: Mark all parents (upstream tasks) - :type upstream: bool :param downstream: Mark all siblings (downstream tasks) of task_id, including SubDags - :type downstream: bool :param future: Mark all future tasks on the interval of the dag up until last execution date. - :type future: bool :param past: Retroactively mark all tasks starting from start_date of the DAG - :type past: bool :param state: State to which the tasks need to be set - :type state: str :param commit: Commit tasks to be altered to the database - :type commit: bool :param session: database session - :type session: sqlalchemy.orm.session.Session :return: list of tasks that have been created and updated """ if not tasks: diff --git a/airflow/api_connexion/exceptions.py b/airflow/api_connexion/exceptions.py index 6838ac8f2e492..0c6c4fa0d3a8f 100644 --- a/airflow/api_connexion/exceptions.py +++ b/airflow/api_connexion/exceptions.py @@ -36,11 +36,7 @@ def common_error_handler(exception: BaseException) -> flask.Response: - """ - Used to capture connexion exceptions and add link to the type field - - :type exception: Exception - """ + """Used to capture connexion exceptions and add link to the type field.""" if isinstance(exception, ProblemException): link = EXCEPTIONS_LINK_MAP.get(exception.status) diff --git a/airflow/configuration.py b/airflow/configuration.py index 0a113b4b694fa..e36917a08a42e 100644 --- a/airflow/configuration.py +++ b/airflow/configuration.py @@ -616,25 +616,19 @@ def as_dict( :param display_source: If False, the option value is returned. If True, a tuple of (option_value, source) is returned. Source is either 'airflow.cfg', 'default', 'env var', or 'cmd'. - :type display_source: bool :param display_sensitive: If True, the values of options set by env vars and bash commands will be displayed. If False, those options are shown as '< hidden >' - :type display_sensitive: bool :param raw: Should the values be output as interpolated values, or the "raw" form that can be fed back in to ConfigParser - :type raw: bool :param include_env: Should the value of configuration from AIRFLOW__ environment variables be included or not - :type include_env: bool :param include_cmds: Should the result of calling any *_cmd config be set (True, default), or should the _cmd options be left as the command to run (False) - :type include_cmds: bool :param include_secret: Should the result of calling any *_secret config be set (True, default), or should the _secret options be left as the path to get the secret from (False) - :type include_secret: bool :rtype: Dict[str, Dict[str, str]] :return: Dictionary, where the key is the name of the section and the content is the dictionary with the name of the parameter and its value. diff --git a/airflow/dag_processing/manager.py b/airflow/dag_processing/manager.py index 54548f133398e..f8f0bd9ebb6cb 100644 --- a/airflow/dag_processing/manager.py +++ b/airflow/dag_processing/manager.py @@ -94,18 +94,13 @@ class DagFileProcessorAgent(LoggingMixin, MultiprocessingStartMethodMixin): :param dag_directory: Directory where DAG definitions are kept. All files in file_paths should be under this directory - :type dag_directory: str :param max_runs: The number of times to parse and schedule each file. -1 for unlimited. - :type max_runs: int :param processor_timeout: How long to wait before timing out a DAG file processor - :type processor_timeout: timedelta :param dag_ids: if specified, only schedule tasks with these DAG IDs - :type dag_ids: list[str] :param pickle_dags: whether to pickle DAGs. :type: pickle_dags: bool :param async_mode: Whether to start agent in async mode - :type async_mode: bool """ def __init__( @@ -188,7 +183,6 @@ def send_callback_to_execute(self, request: CallbackRequest) -> None: Sends information about the callback to be executed by DagFileProcessor. :param request: Callback request to be executed. - :type request: CallbackRequest """ if not self._parent_signal_conn: raise ValueError("Process not started.") @@ -204,9 +198,7 @@ def send_sla_callback_request_to_execute(self, full_filepath: str, dag_id: str) Sends information about the SLA callback to be executed by DagFileProcessor. :param full_filepath: DAG File path - :type full_filepath: str :param dag_id: DAG ID - :type dag_id: str """ if not self._parent_signal_conn: raise ValueError("Process not started.") @@ -390,20 +382,13 @@ class DagFileProcessorManager(LoggingMixin): :param dag_directory: Directory where DAG definitions are kept. All files in file_paths should be under this directory - :type dag_directory: unicode :param max_runs: The number of times to parse and schedule each file. -1 for unlimited. - :type max_runs: int :param processor_timeout: How long to wait before timing out a DAG file processor - :type processor_timeout: timedelta :param signal_conn: connection to communicate signal with processor agent. - :type signal_conn: MultiprocessingConnection :param dag_ids: if specified, only schedule tasks with these DAG IDs - :type dag_ids: list[str] :param pickle_dags: whether to pickle DAGs. - :type pickle_dags: bool :param async_mode: whether to start the manager in async mode - :type async_mode: bool """ def __init__( @@ -709,7 +694,6 @@ def clear_nonexistent_import_errors(self, session): Clears import errors for files that no longer exist. :param session: session for ORM operations - :type session: sqlalchemy.orm.session.Session """ query = session.query(errors.ImportError) if self._file_paths: @@ -723,7 +707,6 @@ def _log_file_processing_stats(self, known_file_paths): :param known_file_paths: a list of file paths that may contain Airflow DAG definitions - :type known_file_paths: list[unicode] :return: None """ # File Path: Path to the file containing the DAG definition @@ -786,7 +769,6 @@ def _log_file_processing_stats(self, known_file_paths): def get_pid(self, file_path): """ :param file_path: the path to the file that's being processed - :type file_path: unicode :return: the PID of the process processing the given file or None if the specified file is not being processed :rtype: int @@ -805,7 +787,6 @@ def get_all_pids(self): def get_last_runtime(self, file_path): """ :param file_path: the path to the file that was processed - :type file_path: unicode :return: the runtime (in seconds) of the process of the last run, or None if the file was never processed. :rtype: float @@ -816,7 +797,6 @@ def get_last_runtime(self, file_path): def get_last_dag_count(self, file_path): """ :param file_path: the path to the file that was processed - :type file_path: unicode :return: the number of dags loaded from that file, or None if the file was never processed. :rtype: int @@ -827,7 +807,6 @@ def get_last_dag_count(self, file_path): def get_last_error_count(self, file_path): """ :param file_path: the path to the file that was processed - :type file_path: unicode :return: the number of import errors from processing, or None if the file was never processed. :rtype: int @@ -838,7 +817,6 @@ def get_last_error_count(self, file_path): def get_last_finish_time(self, file_path): """ :param file_path: the path to the file that was processed - :type file_path: unicode :return: the finish time of the process of the last run, or None if the file was never processed. :rtype: datetime @@ -849,7 +827,6 @@ def get_last_finish_time(self, file_path): def get_start_time(self, file_path): """ :param file_path: the path to the file that's being processed - :type file_path: unicode :return: the start time of the process that's processing the specified file or None if the file is not currently being processed :rtype: datetime @@ -861,7 +838,6 @@ def get_start_time(self, file_path): def get_run_count(self, file_path): """ :param file_path: the path to the file that's being processed - :type file_path: unicode :return: the number of times the given file has been parsed :rtype: int """ @@ -873,7 +849,6 @@ def set_file_paths(self, new_file_paths): Update this with a new set of paths to DAG definition files. :param new_file_paths: list of paths to DAG definition files - :type new_file_paths: list[unicode] :return: None """ self._file_paths = new_file_paths diff --git a/airflow/dag_processing/processor.py b/airflow/dag_processing/processor.py index 4bb338380d5c9..14397686afe76 100644 --- a/airflow/dag_processing/processor.py +++ b/airflow/dag_processing/processor.py @@ -58,13 +58,9 @@ class DagFileProcessorProcess(LoggingMixin, MultiprocessingStartMethodMixin): """Runs DAG processing in a separate process using DagFileProcessor :param file_path: a Python file containing Airflow DAG definitions - :type file_path: str :param pickle_dags: whether to serialize the DAG objects to the DB - :type pickle_dags: bool :param dag_ids: If specified, only look at these DAG ID's - :type dag_ids: List[str] :param callback_requests: failure callback to execute - :type callback_requests: List[airflow.utils.callback_requests.CallbackRequest] """ # Counter that increments every time an instance of this class is created @@ -116,21 +112,14 @@ def _run_file_processor( Process the given file. :param result_channel: the connection to use for passing back the result - :type result_channel: multiprocessing.Connection :param parent_channel: the parent end of the channel to close in the child - :type parent_channel: multiprocessing.Connection :param file_path: the file to process - :type file_path: str :param pickle_dags: whether to pickle the DAGs found in the file and save them to the DB - :type pickle_dags: bool :param dag_ids: if specified, only examine DAG ID's that are in this list - :type dag_ids: list[str] :param thread_name: the name to use for the process that is launched - :type thread_name: str :param callback_requests: failure callback to execute - :type callback_requests: List[airflow.utils.callback_requests.CallbackRequest] :return: the process that was launched :rtype: multiprocessing.Process """ @@ -223,7 +212,6 @@ def terminate(self, sigkill: bool = False) -> None: Terminate (and then kill) the process launched to process the file. :param sigkill: whether to issue a SIGKILL if SIGTERM doesn't work. - :type sigkill: bool """ if self._process is None or self._parent_channel is None: raise AirflowException("Tried to call terminate before starting!") @@ -353,9 +341,7 @@ class DagFileProcessor(LoggingMixin): Returns a tuple of 'number of dags found' and 'the count of import errors' :param dag_ids: If specified, only look at these DAG ID's - :type dag_ids: List[str] :param log: Logger to save the processing process - :type log: logging.Logger """ UNIT_TEST_MODE: bool = conf.getboolean('core', 'UNIT_TEST_MODE') @@ -532,9 +518,7 @@ def update_import_errors(session: Session, dagbag: DagBag) -> None: Airflow UI so that users know that there are issues parsing DAGs. :param session: session for ORM operations - :type session: sqlalchemy.orm.session.Session :param dagbag: DagBag containing DAGs with import errors - :type dagbag: airflow.DagBag """ # Clear the errors of the processed files for dagbag_file in dagbag.file_last_changed: @@ -565,7 +549,6 @@ def execute_callbacks( :param dagbag: Dag Bag of dags :param callback_requests: failure callbacks to execute - :type callback_requests: List[airflow.utils.callback_requests.CallbackRequest] :param session: DB session. """ for request in callback_requests: @@ -628,14 +611,10 @@ def process_file( 6. Record any errors importing the file into ORM :param file_path: the path to the Python file that should be executed - :type file_path: str :param callback_requests: failure callback to execute - :type callback_requests: List[airflow.utils.dag_processing.CallbackRequest] :param pickle_dags: whether serialize the DAGs found in the file and save them to the db - :type pickle_dags: bool :param session: Sqlalchemy ORM Session - :type session: Session :return: number of dags found, count of import errors :rtype: Tuple[int, int] """ diff --git a/airflow/decorators/__init__.pyi b/airflow/decorators/__init__.pyi index 00b79c977252e..eb9a7a09de8ce 100644 --- a/airflow/decorators/__init__.pyi +++ b/airflow/decorators/__init__.pyi @@ -46,17 +46,14 @@ class TaskDecoratorFactory: unrolled to multiple XCom values. List/Tuples will unroll to xcom values with index as key. Dict will unroll to xcom values with keys as XCom keys. Defaults to False. - :type multiple_outputs: bool :param templates_dict: a dictionary where the values are templates that will get templated by the Airflow engine sometime between ``__init__`` and ``execute`` takes place and are made available in your callable's context after the template has been applied - :type templates_dict: dict of str :param show_return_value_in_logs: a bool value whether to show return_value logs. Defaults to True, which allows return value log output. It can be set to False to prevent log output of return value when you return huge data such as transmission a large amount of XCom to TaskAPI. - :type show_return_value_in_logs: bool """ # [START mixin_for_typing] @overload @@ -78,17 +75,14 @@ class TaskDecoratorFactory: unrolled to multiple XCom values. List/Tuples will unroll to xcom values with index as key. Dict will unroll to xcom values with keys as XCom keys. Defaults to False. - :type multiple_outputs: bool :param templates_dict: a dictionary where the values are templates that will get templated by the Airflow engine sometime between ``__init__`` and ``execute`` takes place and are made available in your callable's context after the template has been applied - :type templates_dict: dict of str :param show_return_value_in_logs: a bool value whether to show return_value logs. Defaults to True, which allows return value log output. It can be set to False to prevent log output of return value when you return huge data such as transmission a large amount of XCom to TaskAPI. - :type show_return_value_in_logs: bool """ @overload def __call__(self, python_callable: F) -> F: ... @@ -113,31 +107,24 @@ class TaskDecoratorFactory: unrolled to multiple XCom values. List/Tuples will unroll to xcom values with index as key. Dict will unroll to xcom values with keys as XCom keys. Defaults to False. - :type multiple_outputs: bool :param requirements: Either a list of requirement strings, or a (templated) "requirements file" as specified by pip. - :type requirements: list[str] | str :param python_version: The Python version to run the virtualenv with. Note that both 2 and 2.7 are acceptable forms. - :type python_version: Optional[Union[str, int, float]] :param use_dill: Whether to use dill to serialize the args and result (pickle is default). This allow more complex types but requires you to include dill in your requirements. - :type use_dill: bool :param system_site_packages: Whether to include system_site_packages in your virtualenv. See virtualenv documentation for more information. - :type system_site_packages: bool :param templates_dict: a dictionary where the values are templates that will get templated by the Airflow engine sometime between ``__init__`` and ``execute`` takes place and are made available in your callable's context after the template has been applied - :type templates_dict: dict of str :param show_return_value_in_logs: a bool value whether to show return_value logs. Defaults to True, which allows return value log output. It can be set to False to prevent log output of return value when you return huge data such as transmission a large amount of XCom to TaskAPI. - :type show_return_value_in_logs: bool """ @overload def virtualenv(self, python_callable: F) -> F: ... @@ -188,87 +175,56 @@ class TaskDecoratorFactory: unrolled to multiple XCom values. List/Tuples will unroll to xcom values with index as key. Dict will unroll to xcom values with keys as XCom keys. Defaults to False. - :type multiple_outputs: bool :param use_dill: Whether to use dill or pickle for serialization - :type use_dill: bool :param image: Docker image from which to create the container. If image tag is omitted, "latest" will be used. - :type image: str :param api_version: Remote API version. Set to ``auto`` to automatically detect the server's version. - :type api_version: str :param container_name: Name of the container. Optional (templated) - :type container_name: str or None :param cpus: Number of CPUs to assign to the container. This value gets multiplied with 1024. - :type cpus: float :param docker_url: URL of the host running the docker daemon. Default is unix://var/run/docker.sock - :type docker_url: str :param environment: Environment variables to set in the container. (templated) - :type environment: dict :param private_environment: Private environment variables to set in the container. These are not templated, and hidden from the website. - :type private_environment: dict :param force_pull: Pull the docker image on every run. Default is False. - :type force_pull: bool :param mem_limit: Maximum amount of memory the container can use. Either a float value, which represents the limit in bytes, or a string like ``128m`` or ``1g``. - :type mem_limit: float or str :param host_tmp_dir: Specify the location of the temporary directory on the host which will be mapped to tmp_dir. If not provided defaults to using the standard system temp directory. - :type host_tmp_dir: str :param network_mode: Network mode for the container. - :type network_mode: str :param tls_ca_cert: Path to a PEM-encoded certificate authority to secure the docker connection. - :type tls_ca_cert: str :param tls_client_cert: Path to the PEM-encoded certificate used to authenticate docker client. - :type tls_client_cert: str :param tls_client_key: Path to the PEM-encoded key used to authenticate docker client. - :type tls_client_key: str :param tls_hostname: Hostname to match against the docker server certificate or False to disable the check. - :type tls_hostname: str or bool :param tls_ssl_version: Version of SSL to use when communicating with docker daemon. - :type tls_ssl_version: str :param tmp_dir: Mount point inside the container to a temporary directory created on the host by the operator. The path is also made available via the environment variable ``AIRFLOW_TMP_DIR`` inside the container. - :type tmp_dir: str :param user: Default user inside the docker container. - :type user: int or str :param mounts: List of mounts to mount into the container, e.g. ``['/host/path:/container/path', '/host/path2:/container/path2:ro']``. - :type mounts: list :param working_dir: Working directory to set on the container (equivalent to the -w switch the docker client) - :type working_dir: str :param xcom_all: Push all the stdout or just the last line. The default is False (last line). - :type xcom_all: bool :param docker_conn_id: ID of the Airflow connection to use - :type docker_conn_id: str :param dns: Docker custom DNS servers - :type dns: list[str] :param dns_search: Docker custom DNS search domain - :type dns_search: list[str] :param auto_remove: Auto-removal of the container on daemon side when the container's process exits. The default is False. - :type auto_remove: bool :param shm_size: Size of ``/dev/shm`` in bytes. The size must be greater than 0. If omitted uses system default. - :type shm_size: int :param tty: Allocate pseudo-TTY to the container This needs to be set see logs of the Docker container. - :type tty: bool :param privileged: Give extended privileges to this container. - :type privileged: bool :param cap_add: Include container capabilities - :type cap_add: list[str] """ # [END decorator_signature] diff --git a/airflow/decorators/base.py b/airflow/decorators/base.py index e660b9d64ed30..8c013e51c9de0 100644 --- a/airflow/decorators/base.py +++ b/airflow/decorators/base.py @@ -110,20 +110,15 @@ class DecoratedOperator(BaseOperator): Wraps a Python callable and captures args/kwargs when called for execution. :param python_callable: A reference to an object that is callable - :type python_callable: python callable :param op_kwargs: a dictionary of keyword arguments that will get unpacked in your function (templated) - :type op_kwargs: dict :param op_args: a list of positional arguments that will get unpacked when calling your callable (templated) - :type op_args: list :param multiple_outputs: If set to True, the decorated function's return value will be unrolled to multiple XCom values. Dict will unroll to XCom values with its keys as XCom keys. Defaults to False. - :type multiple_outputs: bool :param kwargs_to_upstream: For certain operators, we might need to upstream certain arguments that would otherwise be absorbed by the DecoratedOperator (for example python_callable for the PythonOperator). This gives a user the option to upstream kwargs as needed. - :type kwargs_to_upstream: dict """ template_fields: Sequence[str] = ('op_args', 'op_kwargs') @@ -332,13 +327,10 @@ def task_decorator_factory( Accepts kwargs for operator kwarg. Can be reused in a single DAG. :param python_callable: Function to decorate - :type python_callable: Optional[Callable] :param multiple_outputs: If set to True, the decorated function's return value will be unrolled to multiple XCom values. Dict will unroll to XCom values with its keys as XCom keys. Defaults to False. - :type multiple_outputs: bool :param decorated_operator_class: The operator that executes the logic needed to run the python function in the correct environment - :type decorated_operator_class: BaseOperator """ if multiple_outputs is None: diff --git a/airflow/decorators/python.py b/airflow/decorators/python.py index 1f9c41c2b11da..df19e2dd96b4a 100644 --- a/airflow/decorators/python.py +++ b/airflow/decorators/python.py @@ -26,16 +26,12 @@ class _PythonDecoratedOperator(DecoratedOperator, PythonOperator): Wraps a Python callable and captures args/kwargs when called for execution. :param python_callable: A reference to an object that is callable - :type python_callable: python callable :param op_kwargs: a dictionary of keyword arguments that will get unpacked in your function (templated) - :type op_kwargs: dict :param op_args: a list of positional arguments that will get unpacked when calling your callable (templated) - :type op_args: list :param multiple_outputs: If set to True, the decorated function's return value will be unrolled to multiple XCom values. Dict will unroll to XCom values with its keys as XCom keys. Defaults to False. - :type multiple_outputs: bool """ template_fields: Sequence[str] = ('op_args', 'op_kwargs') @@ -71,10 +67,8 @@ def python_task( Accepts kwargs for operator kwarg. Can be reused in a single DAG. :param python_callable: Function to decorate - :type python_callable: Optional[Callable] :param multiple_outputs: If set to True, the decorated function's return value will be unrolled to multiple XCom values. Dict will unroll to XCom values with its keys as XCom keys. Defaults to False. - :type multiple_outputs: bool """ return task_decorator_factory( python_callable=python_callable, diff --git a/airflow/decorators/python_virtualenv.py b/airflow/decorators/python_virtualenv.py index 4b72a21e4c49c..efd8f0cb62d6a 100644 --- a/airflow/decorators/python_virtualenv.py +++ b/airflow/decorators/python_virtualenv.py @@ -29,16 +29,12 @@ class _PythonVirtualenvDecoratedOperator(DecoratedOperator, PythonVirtualenvOper Wraps a Python callable and captures args/kwargs when called for execution. :param python_callable: A reference to an object that is callable - :type python_callable: python callable :param op_kwargs: a dictionary of keyword arguments that will get unpacked in your function (templated) - :type op_kwargs: dict :param op_args: a list of positional arguments that will get unpacked when calling your callable (templated) - :type op_args: list :param multiple_outputs: If set to True, the decorated function's return value will be unrolled to multiple XCom values. Dict will unroll to XCom values with its keys as XCom keys. Defaults to False. - :type multiple_outputs: bool """ template_fields: Sequence[str] = ('op_args', 'op_kwargs') @@ -83,11 +79,9 @@ def virtualenv_task( :meta private: :param python_callable: Function to decorate - :type python_callable: Optional[Callable] :param multiple_outputs: If set to True, the decorated function's return value will be unrolled to multiple XCom values. Dict will unroll to XCom values with its keys as XCom keys. Defaults to False. - :type multiple_outputs: bool """ return task_decorator_factory( python_callable=python_callable, diff --git a/airflow/example_dags/example_dag_decorator.py b/airflow/example_dags/example_dag_decorator.py index 122466f6d7057..fb653d237acb6 100644 --- a/airflow/example_dags/example_dag_decorator.py +++ b/airflow/example_dags/example_dag_decorator.py @@ -44,7 +44,6 @@ def example_dag_decorator(email: str = 'example@example.com'): DAG to send server IP to email. :param email: Email to send IP to. Defaults to example@example.com. - :type email: str """ get_ip = GetRequestOperator(task_id='get_ip', url="http://httpbin.org/get") diff --git a/airflow/example_dags/example_trigger_target_dag.py b/airflow/example_dags/example_trigger_target_dag.py index 41aecf1a1b613..b7943a3dfded0 100644 --- a/airflow/example_dags/example_trigger_target_dag.py +++ b/airflow/example_dags/example_trigger_target_dag.py @@ -34,7 +34,6 @@ def run_this_func(dag_run=None): Print the payload "message" passed to the DagRun conf attribute. :param dag_run: The DagRun object - :type dag_run: DagRun """ print(f"Remotely received value of {dag_run.conf.get('message')} for key=message") diff --git a/airflow/exceptions.py b/airflow/exceptions.py index a2d539d99dd83..254959b7eb675 100644 --- a/airflow/exceptions.py +++ b/airflow/exceptions.py @@ -63,7 +63,6 @@ class AirflowRescheduleException(AirflowException): Raise when the task should be re-scheduled at a later time. :param reschedule_date: The date when the task should be rescheduled - :type reschedule_date: datetime.datetime """ def __init__(self, reschedule_date): diff --git a/airflow/executors/celery_executor.py b/airflow/executors/celery_executor.py index b0e125fd44218..d472be4dbaefa 100644 --- a/airflow/executors/celery_executor.py +++ b/airflow/executors/celery_executor.py @@ -149,9 +149,7 @@ class ExceptionWithTraceback: Wrapper class used to propagate exceptions to parent processes from subprocesses. :param exception: The exception to wrap - :type exception: Exception :param exception_traceback: The stacktrace to wrap - :type exception_traceback: str """ def __init__(self, exception: Exception, exception_traceback: str): @@ -558,7 +556,6 @@ def fetch_celery_task_state(async_result: AsyncResult) -> Tuple[str, Union[str, :param async_result: a tuple of the Celery task key and the async Celery object used to fetch the task's state - :type async_result: tuple(str, celery.result.AsyncResult) :return: a tuple of the Celery task key and the Celery state and the celery info of the task :rtype: tuple[str, str, str] diff --git a/airflow/hooks/dbapi.py b/airflow/hooks/dbapi.py index 5fb4278b9e6ce..f86fe2681134c 100644 --- a/airflow/hooks/dbapi.py +++ b/airflow/hooks/dbapi.py @@ -57,7 +57,6 @@ class DbApiHook(BaseHook): :param schema: Optional DB schema that overrides the schema specified in the connection. Make sure that if you change the schema parameter value in the constructor of the derived Hook, such change should be done before calling the ``DBApiHook.__init__()``. - :type schema: Optional[str] """ # Override to provide the connection name. @@ -123,11 +122,8 @@ def get_pandas_df(self, sql, parameters=None, **kwargs): :param sql: the sql statement to be executed (str) or a list of sql statements to execute - :type sql: str or list :param parameters: The parameters to render the SQL query with. - :type parameters: dict or iterable :param kwargs: (optional) passed into pandas.io.sql.read_sql method - :type kwargs: dict """ try: from pandas.io import sql as psql @@ -143,9 +139,7 @@ def get_records(self, sql, parameters=None): :param sql: the sql statement to be executed (str) or a list of sql statements to execute - :type sql: str or list :param parameters: The parameters to render the SQL query with. - :type parameters: dict or iterable """ with closing(self.get_conn()) as conn: with closing(conn.cursor()) as cur: @@ -161,9 +155,7 @@ def get_first(self, sql, parameters=None): :param sql: the sql statement to be executed (str) or a list of sql statements to execute - :type sql: str or list :param parameters: The parameters to render the SQL query with. - :type parameters: dict or iterable """ with closing(self.get_conn()) as conn: with closing(conn.cursor()) as cur: @@ -181,14 +173,10 @@ def run(self, sql, autocommit=False, parameters=None, handler=None): :param sql: the sql statement to be executed (str) or a list of sql statements to execute - :type sql: str or list :param autocommit: What to set the connection's autocommit setting to before executing the query. - :type autocommit: bool :param parameters: The parameters to render the SQL query with. - :type parameters: dict or iterable :param handler: The result handler which is called with the result of each statement. - :type handler: callable :return: query results if handler was provided. """ scalar = isinstance(sql, str) @@ -249,7 +237,6 @@ def get_autocommit(self, conn): does not support autocommit. :param conn: Connection to get autocommit setting from. - :type conn: connection object. :return: connection autocommit setting. :rtype: bool """ @@ -266,13 +253,9 @@ def _generate_insert_sql(table, values, target_fields, replace, **kwargs): The REPLACE variant is specific to MySQL syntax. :param table: Name of the target table - :type table: str :param values: The row to insert into the table - :type values: tuple of cell values :param target_fields: The names of the columns to fill in the table - :type target_fields: iterable of strings :param replace: Whether to replace instead of insert - :type replace: bool :return: The generated INSERT or REPLACE SQL statement :rtype: str """ @@ -299,16 +282,11 @@ def insert_rows(self, table, rows, target_fields=None, commit_every=1000, replac a new transaction is created every commit_every rows :param table: Name of the target table - :type table: str :param rows: The rows to insert into the table - :type rows: iterable of tuples :param target_fields: The names of the columns to fill in the table - :type target_fields: iterable of strings :param commit_every: The maximum number of rows to insert in one transaction. Set to 0 to insert all rows in one transaction. - :type commit_every: int :param replace: Whether to replace instead of insert - :type replace: bool """ i = 0 with closing(self.get_conn()) as conn: @@ -339,9 +317,7 @@ def _serialize_cell(cell, conn=None): Returns the SQL literal of the cell as a string. :param cell: The cell to insert into the table - :type cell: object :param conn: The database connection - :type conn: connection object :return: The serialized cell :rtype: str """ @@ -356,9 +332,7 @@ def bulk_dump(self, table, tmp_file): Dumps a database table into a tab-delimited file :param table: The name of the source table - :type table: str :param tmp_file: The path of the target file - :type tmp_file: str """ raise NotImplementedError() @@ -367,9 +341,7 @@ def bulk_load(self, table, tmp_file): Loads a tab-delimited file into a database table :param table: The name of the target table - :type table: str :param tmp_file: The path of the file to load into the table - :type tmp_file: str """ raise NotImplementedError() diff --git a/airflow/jobs/backfill_job.py b/airflow/jobs/backfill_job.py index de994b03b3483..406c2ead66901 100644 --- a/airflow/jobs/backfill_job.py +++ b/airflow/jobs/backfill_job.py @@ -71,27 +71,16 @@ class _DagRunTaskStatus: it easier to pass it around. :param to_run: Tasks to run in the backfill - :type to_run: dict[tuple[TaskInstanceKey], airflow.models.TaskInstance] :param running: Maps running task instance key to task instance object - :type running: dict[tuple[TaskInstanceKey], airflow.models.TaskInstance] :param skipped: Tasks that have been skipped - :type skipped: set[tuple[TaskInstanceKey]] :param succeeded: Tasks that have succeeded so far - :type succeeded: set[tuple[TaskInstanceKey]] :param failed: Tasks that have failed - :type failed: set[tuple[TaskInstanceKey]] :param not_ready: Tasks not ready for execution - :type not_ready: set[tuple[TaskInstanceKey]] :param deadlocked: Deadlocked tasks - :type deadlocked: set[airflow.models.TaskInstance] :param active_runs: Active dag runs at a certain point in time - :type active_runs: list[DagRun] :param executed_dag_run_dates: Datetime objects for the executed dag runs - :type executed_dag_run_dates: set[datetime.datetime] :param finished_runs: Number of finished runs so far - :type finished_runs: int :param total_runs: Number of total dag runs able to run - :type total_runs: int """ # TODO(edgarRd): AIRFLOW-1444: Add consistency check on counts @@ -142,31 +131,19 @@ def __init__( ): """ :param dag: DAG object. - :type dag: airflow.models.DAG :param start_date: start date for the backfill date range. - :type start_date: datetime.datetime :param end_date: end date for the backfill date range. - :type end_date: datetime.datetime :param mark_success: flag whether to mark the task auto success. - :type mark_success: bool :param donot_pickle: whether pickle - :type donot_pickle: bool :param ignore_first_depends_on_past: whether to ignore depend on past - :type ignore_first_depends_on_past: bool :param ignore_task_deps: whether to ignore the task dependency - :type ignore_task_deps: bool :param pool: pool to backfill - :type pool: str :param delay_on_limit_secs: :param verbose: - :type verbose: flag to whether display verbose message to backfill console :param conf: a dictionary which user could pass k-v pairs for backfill - :type conf: dictionary :param rerun_failed_tasks: flag to whether to auto rerun the failed task in backfill - :type rerun_failed_tasks: bool :param run_backwards: Whether to process the dates from most to least recent - :type run_backwards bool :param run_at_least_once: If true, always run the DAG at least once even if no logical run exists within the time range. :type: bool @@ -197,7 +174,6 @@ def _update_counters(self, ti_status, session=None): to tasks to run in case required. :param ti_status: the internal status of the backfill job tasks - :type ti_status: BackfillJob._DagRunTaskStatus """ tis_to_be_scheduled = [] refreshed_tis = [] @@ -354,9 +330,7 @@ def _task_instances_for_dag_run(self, dag_run, session=None): run in the given dag run. :param dag_run: the dag run to get the tasks from - :type dag_run: airflow.models.DagRun :param session: the database session object - :type session: sqlalchemy.orm.session.Session """ tasks_to_run = {} @@ -416,15 +390,10 @@ def _process_backfill_task_instances( them in a backfill process. :param ti_status: the internal status of the job - :type ti_status: BackfillJob._DagRunTaskStatus :param executor: the executor to run the task instances - :type executor: BaseExecutor :param pickle_id: the pickle_id if dag is pickled, None otherwise - :type pickle_id: int :param start_date: the start date of the backfill job - :type start_date: datetime.datetime :param session: the current session object - :type session: sqlalchemy.orm.session.Session :return: the list of execution_dates for the finished dag runs :rtype: list """ @@ -710,17 +679,11 @@ def _execute_dagruns(self, dagrun_infos, ti_status, executor, pickle_id, start_d Returns a list of execution dates of the dag runs that were executed. :param dagrun_infos: Schedule information for dag runs - :type dagrun_infos: list[DagRunInfo] :param ti_status: internal BackfillJob status structure to tis track progress - :type ti_status: BackfillJob._DagRunTaskStatus :param executor: the executor to use, it must be previously started - :type executor: BaseExecutor :param pickle_id: numeric id of the pickled dag, None if not pickled - :type pickle_id: int :param start_date: backfill start date - :type start_date: datetime.datetime :param session: the current session object - :type session: sqlalchemy.orm.session.Session """ for dagrun_info in dagrun_infos: for dag in [self.dag] + self.dag.subdags: @@ -860,7 +823,6 @@ def reset_state_for_orphaned_tasks(self, filter_by_dag_run=None, session=None): are made in sequence. :param filter_by_dag_run: the dag_run we want to process, None if all - :type filter_by_dag_run: airflow.models.DagRun :return: the number of TIs reset :rtype: int """ diff --git a/airflow/jobs/base_job.py b/airflow/jobs/base_job.py index c50cc7bb4d3fa..2a3fe0f0fefff 100644 --- a/airflow/jobs/base_job.py +++ b/airflow/jobs/base_job.py @@ -137,7 +137,6 @@ def is_alive(self, grace_multiplier=2.1): :param grace_multiplier: multiplier of heartrate to require heart beat within - :type grace_multiplier: number :rtype: boolean """ return ( @@ -186,7 +185,6 @@ def heartbeat(self, only_if_necessary: bool = False): :param only_if_necessary: If the heartbeat is not yet due then do nothing (don't update column, don't call ``heartbeat_callback``) - :type only_if_necessary: boolean """ seconds_remaining = 0 if self.latest_heartbeat: diff --git a/airflow/jobs/scheduler_job.py b/airflow/jobs/scheduler_job.py index a69c0574f966e..e8f506dfeb9e2 100644 --- a/airflow/jobs/scheduler_job.py +++ b/airflow/jobs/scheduler_job.py @@ -79,22 +79,16 @@ class SchedulerJob(BaseJob): :param subdir: directory containing Python files with Airflow DAG definitions, or a specific path to a file - :type subdir: str :param num_runs: The number of times to run the scheduling loop. If you have a large number of DAG files this could complete before each file has been parsed. -1 for unlimited times. - :type num_runs: int :param num_times_parse_dags: The number of times to try to parse each DAG file. -1 for unlimited times. - :type num_times_parse_dags: int :param scheduler_idle_sleep_time: The number of seconds to wait between polls of running processors - :type scheduler_idle_sleep_time: int :param do_pickle: once a DAG object is obtained by executing the Python file, whether to serialize the DAG object to the DB - :type do_pickle: bool :param log: override the default Logger - :type log: logging.Logger """ __mapper_args__ = {'polymorphic_identity': 'SchedulerJob'} @@ -217,7 +211,6 @@ def __get_concurrency_maps( Get the concurrency maps. :param states: List of states to query for - :type states: list[airflow.utils.state.State] :return: A map from (dag_id, task_id) to # of task instances and a map from (dag_id, task_id) to # of task instances in the given state list :rtype: tuple[dict[str, int], dict[tuple[str, str], int]] @@ -242,7 +235,6 @@ def _executable_task_instances_to_queued(self, max_tis: int, session: Session = dag max_active_tasks, executor state, and priority. :param max_tis: Maximum number of TIs to queue in this loop. - :type max_tis: int :return: list[airflow.models.TaskInstance] """ from airflow.utils.db import DBLocks @@ -486,9 +478,7 @@ def _enqueue_task_instances_with_queued_state( with the executor. :param task_instances: TaskInstances to enqueue - :type task_instances: list[TaskInstance] :param session: The session object - :type session: Session """ # actually enqueue them for ti in task_instances: @@ -528,7 +518,6 @@ def _critical_section_execute_task_instances(self, session: Session) -> int: MariaDB or MySQL 5.x) the other schedulers will wait for the lock before continuing. :param session: - :type session: sqlalchemy.orm.Session :return: Number of task instance with state changed. """ if self.max_tis_per_query == 0: diff --git a/airflow/kubernetes/k8s_model.py b/airflow/kubernetes/k8s_model.py index c604cb3fa81b5..01e294672aa52 100644 --- a/airflow/kubernetes/k8s_model.py +++ b/airflow/kubernetes/k8s_model.py @@ -38,7 +38,6 @@ class K8SModel(ABC): def attach_to_pod(self, pod: k8s.V1Pod) -> k8s.V1Pod: """ :param pod: A pod to attach this Kubernetes object to - :type pod: kubernetes.client.models.V1Pod :return: The pod with the object attached """ @@ -46,9 +45,7 @@ def attach_to_pod(self, pod: k8s.V1Pod) -> k8s.V1Pod: def append_to_pod(pod: k8s.V1Pod, k8s_objects: Optional[List[K8SModel]]): """ :param pod: A pod to attach a list of Kubernetes objects to - :type pod: kubernetes.client.models.V1Pod :param k8s_objects: a potential None list of K8SModels - :type k8s_objects: Optional[List[K8SModel]] :return: pod with the objects attached if they exist """ if not k8s_objects: diff --git a/airflow/kubernetes/kube_client.py b/airflow/kubernetes/kube_client.py index 36cfc8d20e904..97836be998608 100644 --- a/airflow/kubernetes/kube_client.py +++ b/airflow/kubernetes/kube_client.py @@ -118,11 +118,8 @@ def get_kube_client( Retrieves Kubernetes client :param in_cluster: whether we are in cluster - :type in_cluster: bool :param cluster_context: context of the cluster - :type cluster_context: str :param config_file: configuration file - :type config_file: str :return kubernetes client :rtype client.CoreV1Api """ diff --git a/airflow/kubernetes/pod_generator.py b/airflow/kubernetes/pod_generator.py index 96ee1ffa4f2e8..1de2108c9d291 100644 --- a/airflow/kubernetes/pod_generator.py +++ b/airflow/kubernetes/pod_generator.py @@ -94,11 +94,8 @@ class PodGenerator: the first container in the list of containers. :param pod: The fully specified pod. Mutually exclusive with `path_or_string` - :type pod: Optional[kubernetes.client.models.V1Pod] :param pod_template_file: Path to YAML file. Mutually exclusive with `pod` - :type pod_template_file: Optional[str] :param extract_xcom: Whether to bring up a container for xcom - :type extract_xcom: bool """ def __init__( @@ -225,9 +222,7 @@ def reconcile_pods(base_pod: k8s.V1Pod, client_pod: Optional[k8s.V1Pod]) -> k8s. """ :param base_pod: has the base attributes which are overwritten if they exist in the client pod and remain if they do not exist in the client_pod - :type base_pod: k8s.V1Pod :param client_pod: the pod that the client wants to create. - :type client_pod: k8s.V1Pod :return: the merged pods This can't be done recursively as certain fields some overwritten, and some concatenated. @@ -248,9 +243,7 @@ def reconcile_metadata(base_meta, client_meta): Merge kubernetes Metadata objects :param base_meta: has the base attributes which are overwritten if they exist in the client_meta and remain if they do not exist in the client_meta - :type base_meta: k8s.V1ObjectMeta :param client_meta: the spec that the client wants to create. - :type client_meta: k8s.V1ObjectMeta :return: the merged specs """ if base_meta and not client_meta: @@ -274,9 +267,7 @@ def reconcile_specs( """ :param base_spec: has the base attributes which are overwritten if they exist in the client_spec and remain if they do not exist in the client_spec - :type base_spec: k8s.V1PodSpec :param client_spec: the spec that the client wants to create. - :type client_spec: k8s.V1PodSpec :return: the merged specs """ if base_spec and not client_spec: @@ -300,9 +291,7 @@ def reconcile_containers( """ :param base_containers: has the base attributes which are overwritten if they exist in the client_containers and remain if they do not exist in the client_containers - :type base_containers: List[k8s.V1Container] :param client_containers: the containers that the client wants to create. - :type client_containers: List[k8s.V1Container] :return: the merged containers The runs recursively over the list of containers. @@ -504,7 +493,6 @@ def extend_object_field(base_obj, client_obj, field_name): :param client_obj: an object which has a property `field_name` that is a list. A copy of this object is returned with `field_name` modified :param field_name: the name of the list field - :type field_name: str :return: the client_obj with the property `field_name` being the two properties appended """ client_obj_cp = copy.deepcopy(client_obj) diff --git a/airflow/kubernetes/pod_generator_deprecated.py b/airflow/kubernetes/pod_generator_deprecated.py index c4d83d60a4e5a..e4655f8cafb2b 100644 --- a/airflow/kubernetes/pod_generator_deprecated.py +++ b/airflow/kubernetes/pod_generator_deprecated.py @@ -82,65 +82,37 @@ class PodGenerator: the first container in the list of containers. :param image: The docker image - :type image: Optional[str] :param name: name in the metadata section (not the container name) - :type name: Optional[str] :param namespace: pod namespace - :type namespace: Optional[str] :param volume_mounts: list of kubernetes volumes mounts - :type volume_mounts: Optional[List[Union[k8s.V1VolumeMount, dict]]] :param envs: A dict containing the environment variables - :type envs: Optional[Dict[str, str]] :param cmds: The command to be run on the first container - :type cmds: Optional[List[str]] :param args: The arguments to be run on the pod - :type args: Optional[List[str]] :param labels: labels for the pod metadata - :type labels: Optional[Dict[str, str]] :param node_selectors: node selectors for the pod - :type node_selectors: Optional[Dict[str, str]] :param ports: list of ports. Applies to the first container. - :type ports: Optional[List[Union[k8s.V1ContainerPort, dict]]] :param volumes: Volumes to be attached to the first container - :type volumes: Optional[List[Union[k8s.V1Volume, dict]]] :param image_pull_policy: Specify a policy to cache or always pull an image - :type image_pull_policy: str :param restart_policy: The restart policy of the pod - :type restart_policy: str :param image_pull_secrets: Any image pull secrets to be given to the pod. If more than one secret is required, provide a comma separated list: secret_a,secret_b - :type image_pull_secrets: str :param init_containers: A list of init containers - :type init_containers: Optional[List[k8s.V1Container]] :param service_account_name: Identity for processes that run in a Pod - :type service_account_name: Optional[str] :param resources: Resource requirements for the first containers - :type resources: Optional[Union[k8s.V1ResourceRequirements, dict]] :param annotations: annotations for the pod - :type annotations: Optional[Dict[str, str]] :param affinity: A dict containing a group of affinity scheduling rules - :type affinity: Optional[dict] :param hostnetwork: If True enable host networking on the pod - :type hostnetwork: bool :param tolerations: A list of kubernetes tolerations - :type tolerations: Optional[list] :param security_context: A dict containing the security context for the pod - :type security_context: Optional[Union[k8s.V1PodSecurityContext, dict]] :param configmaps: Any configmap refs to envfrom. If more than one configmap is required, provide a comma separated list configmap_a,configmap_b - :type configmaps: List[str] :param dnspolicy: Specify a dnspolicy for the pod - :type dnspolicy: Optional[str] :param schedulername: Specify a schedulername for the pod - :type schedulername: Optional[str] :param pod: The fully specified pod. Mutually exclusive with `path_or_string` - :type pod: Optional[kubernetes.client.models.V1Pod] :param extract_xcom: Whether to bring up a container for xcom - :type extract_xcom: bool :param priority_class_name: priority class name for the launched Pod - :type priority_class_name: str """ def __init__( diff --git a/airflow/kubernetes/pod_launcher_deprecated.py b/airflow/kubernetes/pod_launcher_deprecated.py index de9c5f52fe564..3367aa0cc23ef 100644 --- a/airflow/kubernetes/pod_launcher_deprecated.py +++ b/airflow/kubernetes/pod_launcher_deprecated.py @@ -139,7 +139,6 @@ def monitor_pod(self, pod: V1Pod, get_logs: bool) -> Tuple[State, Optional[str]] Monitors a pod and returns the final state :param pod: pod spec that will be monitored - :type pod : V1Pod :param get_logs: whether to read the logs locally :return: Tuple[State, Optional[str]] """ @@ -180,7 +179,6 @@ def parse_log_line(self, line: str) -> Tuple[str, str]: Parse K8s log line and returns the final state :param line: k8s log line - :type line: str :return: timestamp and log message :rtype: Tuple[str, str] """ diff --git a/airflow/kubernetes/secret.py b/airflow/kubernetes/secret.py index 1ca26111303dd..afb30916ff357 100644 --- a/airflow/kubernetes/secret.py +++ b/airflow/kubernetes/secret.py @@ -35,20 +35,15 @@ def __init__(self, deploy_type, deploy_target, secret, key=None, items=None): :param deploy_type: The type of secret deploy in Kubernetes, either `env` or `volume` - :type deploy_type: str :param deploy_target: (Optional) The environment variable when `deploy_type` `env` or file path when `deploy_type` `volume` where expose secret. If `key` is not provided deploy target should be None. - :type deploy_target: str or None :param secret: Name of the secrets object in Kubernetes - :type secret: str :param key: (Optional) Key of the secret within the Kubernetes Secret if not provided in `deploy_type` `env` it will mount all secrets in object - :type key: str or None :param items: (Optional) items that can be added to a volume secret for specifying projects of secret keys to paths https://kubernetes.io/docs/concepts/configuration/secret/#projection-of-secret-keys-to-specific-paths - :type items: List[k8s.V1KeyToPath] """ if deploy_type not in ('env', 'volume'): raise AirflowConfigException("deploy_type must be env or volume") diff --git a/airflow/lineage/backend.py b/airflow/lineage/backend.py index cde8d94d1f4c8..ca072f434105a 100644 --- a/airflow/lineage/backend.py +++ b/airflow/lineage/backend.py @@ -36,12 +36,8 @@ def send_lineage( Sends lineage metadata to a backend :param operator: the operator executing a transformation on the inlets and outlets - :type operator: airflow.models.baseoperator.BaseOperator :param inlets: the inlets to this operator - :type inlets: list :param outlets: the outlets from this operator - :type outlets: list :param context: the current context of the task instance - :type context: dict """ raise NotImplementedError() diff --git a/airflow/macros/__init__.py b/airflow/macros/__init__.py index 7d7322a018790..e1f27411e5069 100644 --- a/airflow/macros/__init__.py +++ b/airflow/macros/__init__.py @@ -32,9 +32,7 @@ def ds_add(ds: str, days: int) -> str: Add or subtract days from a YYYY-MM-DD :param ds: anchor date in ``YYYY-MM-DD`` format to add to - :type ds: str :param days: number of days to add to the ds, you can use negative values - :type days: int >>> ds_add('2015-01-01', 5) '2015-01-06' @@ -53,11 +51,8 @@ def ds_format(ds: str, input_format: str, output_format: str) -> str: as specified in the output format :param ds: input string which contains a date - :type ds: str :param input_format: input string format. E.g. %Y-%m-%d - :type input_format: str :param output_format: output string format E.g. %Y-%m-%d - :type output_format: str >>> ds_format('2015-01-01', "%Y-%m-%d", "%m-%d-%y") '01-01-15' @@ -73,10 +68,8 @@ def datetime_diff_for_humans(dt: Any, since: Optional[DateTime] = None) -> str: one and now. :param dt: The datetime to display the diff for - :type dt: datetime.datetime :param since: When to display the date from. If ``None`` then the diff is between ``dt`` and now. - :type since: None or DateTime :rtype: str """ import pendulum diff --git a/airflow/macros/hive.py b/airflow/macros/hive.py index 39c66cdec1229..fe5685fcf2536 100644 --- a/airflow/macros/hive.py +++ b/airflow/macros/hive.py @@ -26,22 +26,17 @@ def max_partition( Gets the max partition for a table. :param schema: The hive schema the table lives in - :type schema: str :param table: The hive table you are interested in, supports the dot notation as in "my_database.my_table", if a dot is found, the schema param is disregarded - :type table: str :param metastore_conn_id: The hive connection you are interested in. If your default is set you don't need to use this parameter. - :type metastore_conn_id: str :param filter_map: partition_key:partition_value map used for partition filtering, e.g. {'key1': 'value1', 'key2': 'value2'}. Only partitions matching all partition_key:partition_value pairs will be considered as candidates of max partition. - :type filter_map: dict :param field: the field to get the max value from. If there's only one partition field, this will be inferred - :type field: str >>> max_partition('airflow.static_babynames_partitioned') '2015-01-01' @@ -60,11 +55,8 @@ def _closest_date(target_dt, date_list, before_target=None): An optional parameter can be given to get the closest before or after. :param target_dt: The target date - :type target_dt: datetime.date :param date_list: The list of dates to search - :type date_list: list[datetime.date] :param before_target: closest before or after the target - :type before_target: bool or None :returns: The closest date :rtype: datetime.date or None """ @@ -85,15 +77,10 @@ def closest_ds_partition(table, ds, before=True, schema="default", metastore_con An optional parameter can be given to get the closest before or after. :param table: A hive table name - :type table: str :param ds: A datestamp ``%Y-%m-%d`` e.g. ``yyyy-mm-dd`` - :type ds: list[datetime.date] :param before: closest before (True), after (False) or either side of ds - :type before: bool or None :param schema: table schema - :type schema: str :param metastore_conn_id: which metastore connection to use - :type metastore_conn_id: str :returns: The closest date :rtype: str or None diff --git a/airflow/models/baseoperator.py b/airflow/models/baseoperator.py index c294bbbe31caa..c7b6b31881834 100644 --- a/airflow/models/baseoperator.py +++ b/airflow/models/baseoperator.py @@ -271,34 +271,25 @@ class derived from this one results in the creation of a task object, be set by using the set_upstream and/or set_downstream methods. :param task_id: a unique, meaningful id for the task - :type task_id: str :param owner: the owner of the task. Using a meaningful description (e.g. user/person/team/role name) to clarify ownership is recommended. - :type owner: str :param email: the 'to' email address(es) used in email alerts. This can be a single email or multiple ones. Multiple addresses can be specified as a comma or semi-colon separated string or by passing a list of strings. - :type email: str or list[str] :param email_on_retry: Indicates whether email alerts should be sent when a task is retried - :type email_on_retry: bool :param email_on_failure: Indicates whether email alerts should be sent when a task failed - :type email_on_failure: bool :param retries: the number of retries that should be performed before failing the task - :type retries: int :param retry_delay: delay between retries, can be set as ``timedelta`` or ``float`` seconds, which will be converted into ``timedelta``, the default is ``timedelta(seconds=300)``. - :type retry_delay: datetime.timedelta or float :param retry_exponential_backoff: allow progressively longer waits between retries by using exponential backoff algorithm on retry delay (delay will be converted into seconds) - :type retry_exponential_backoff: bool :param max_retry_delay: maximum delay interval between retries, can be set as ``timedelta`` or ``float`` seconds, which will be converted into ``timedelta``. - :type max_retry_delay: datetime.timedelta or float :param start_date: The ``start_date`` for the task, determines the ``execution_date`` for the first task instance. The best practice is to have the start_date rounded @@ -315,13 +306,10 @@ class derived from this one results in the creation of a task object, ``TimeSensor`` and ``TimeDeltaSensor``. We advise against using dynamic ``start_date`` and recommend using fixed ones. Read the FAQ entry about start_date for more information. - :type start_date: datetime.datetime :param end_date: if specified, the scheduler won't go beyond this date - :type end_date: datetime.datetime :param depends_on_past: when set to true, task instances will run sequentially and only if the previous instance has succeeded or has been skipped. The task instance for the start_date is allowed to run. - :type depends_on_past: bool :param wait_for_downstream: when set to true, an instance of task X will wait for tasks immediately downstream of the previous instance of task X to finish successfully or be skipped before it runs. This is useful if the @@ -330,14 +318,11 @@ class derived from this one results in the creation of a task object, is forced to True wherever wait_for_downstream is used. Also note that only tasks *immediately* downstream of the previous task instance are waited for; the statuses of any tasks further downstream are ignored. - :type wait_for_downstream: bool :param dag: a reference to the dag the task is attached to (if any) - :type dag: airflow.models.DAG :param priority_weight: priority weight of this task against other task. This allows the executor to trigger higher priority tasks before others when things get backed up. Set priority_weight as a higher number for more important tasks. - :type priority_weight: int :param weight_rule: weighting method used for the effective total priority weight of the task. Options are: ``{ downstream | upstream | absolute }`` default is ``downstream`` @@ -360,17 +345,13 @@ class derived from this one results in the creation of a task object, significantly speeding up the task creation process as for very large DAGs. Options can be set as string or using the constants defined in the static class ``airflow.utils.WeightRule`` - :type weight_rule: str :param queue: which queue to target when running this job. Not all executors implement queue management, the CeleryExecutor does support targeting specific queues. - :type queue: str :param pool: the slot pool this task should run in, slot pools are a way to limit concurrency for certain tasks - :type pool: str :param pool_slots: the number of pool slots this task should use (>= 1) Values less than 1 are not allowed. - :type pool_slots: int :param sla: time by which the job is expected to succeed. Note that this represents the ``timedelta`` after the period is closed. For example if you set an SLA of 1 hour, the scheduler would send an email @@ -382,37 +363,29 @@ class derived from this one results in the creation of a task object, for future reference. All tasks that share the same SLA time get bundled in a single email, sent soon after that time. SLA notification are sent once and only once for each task instance. - :type sla: datetime.timedelta :param execution_timeout: max time allowed for the execution of this task instance, if it goes beyond it will raise and fail. - :type execution_timeout: datetime.timedelta :param on_failure_callback: a function to be called when a task instance of this task fails. a context dictionary is passed as a single parameter to this function. Context contains references to related objects to the task instance and is documented under the macros section of the API. - :type on_failure_callback: TaskStateChangeCallback :param on_execute_callback: much like the ``on_failure_callback`` except that it is executed right before the task is executed. - :type on_execute_callback: TaskStateChangeCallback :param on_retry_callback: much like the ``on_failure_callback`` except that it is executed when retries occur. - :type on_retry_callback: TaskStateChangeCallback :param on_success_callback: much like the ``on_failure_callback`` except that it is executed when the task succeeds. - :type on_success_callback: TaskStateChangeCallback :param pre_execute: a function to be called immediately before task execution, receiving a context dictionary; raising an exception will prevent the task from being executed. |experimental| - :type pre_execute: TaskPreExecuteHook :param post_execute: a function to be called immediately after task execution, receiving a context dictionary and task result; raising an exception will prevent the task from succeeding. |experimental| - :type post_execute: TaskPostExecuteHook :param trigger_rule: defines the rule by which dependencies are applied for the task to get triggered. Options are: ``{ all_success | all_failed | all_done | one_success | @@ -420,15 +393,11 @@ class derived from this one results in the creation of a task object, default is ``all_success``. Options can be set as string or using the constants defined in the static class ``airflow.utils.TriggerRule`` - :type trigger_rule: str :param resources: A map of resource parameter names (the argument names of the Resources constructor) to their values. - :type resources: dict :param run_as_user: unix username to impersonate while running the task - :type run_as_user: str :param max_active_tis_per_dag: When set, a task will be able to limit the concurrent runs across execution_dates. - :type max_active_tis_per_dag: int :param executor_config: Additional task-level configuration parameters that are interpreted by a specific executor. Parameters are namespaced by the name of executor. @@ -443,28 +412,20 @@ class derived from this one results in the creation of a task object, } ) - :type executor_config: dict :param do_xcom_push: if True, an XCom is pushed containing the Operator's result - :type do_xcom_push: bool :param task_group: The TaskGroup to which the task should belong. This is typically provided when not using a TaskGroup as a context manager. - :type task_group: airflow.utils.task_group.TaskGroup :param doc: Add documentation or notes to your Task objects that is visible in Task Instance details View in the Webserver - :type doc: str :param doc_md: Add documentation (in Markdown format) or notes to your Task objects that is visible in Task Instance details View in the Webserver - :type doc_md: str :param doc_rst: Add documentation (in RST format) or notes to your Task objects that is visible in Task Instance details View in the Webserver - :type doc_rst: str :param doc_json: Add documentation (in JSON format) or notes to your Task objects that is visible in Task Instance details View in the Webserver - :type doc_json: str :param doc_yaml: Add documentation (in YAML format) or notes to your Task objects that is visible in Task Instance details View in the Webserver - :type doc_yaml: str """ # For derived classes to define which fields will get jinjaified @@ -1108,9 +1069,7 @@ def render_template_fields( Template all attributes listed in template_fields. Note this operation is irreversible. :param context: Dict with values to apply on content - :type context: dict :param jinja_env: Jinja environment - :type jinja_env: jinja2.Environment """ if not jinja_env: jinja_env = self.get_template_env() @@ -1143,14 +1102,10 @@ def render_template( be templated recursively. :param content: Content to template. Only strings can be templated (may be inside collection). - :type content: Any :param context: Dict with values to apply on templated content - :type context: dict :param jinja_env: Jinja environment. Can be provided to avoid re-creating Jinja environments during recursion. - :type jinja_env: jinja2.Environment :param seen_oids: template fields already rendered (to avoid RecursionError on circular dependencies) - :type seen_oids: set :return: Templated content """ if not jinja_env: @@ -1459,14 +1414,11 @@ def xcom_push( :param context: Execution Context Dictionary :type: Any :param key: A key for the XCom - :type key: str :param value: A value for the XCom. The value is pickled and stored in the database. - :type value: any pickleable object :param execution_date: if provided, the XCom will not be visible until this date. This can be used, for example, to send a message to a task on a future date without it being immediately visible. - :type execution_date: datetime """ context['ti'].xcom_push(key=key, value=value, execution_date=execution_date) @@ -1497,17 +1449,13 @@ def xcom_pull( available as a constant XCOM_RETURN_KEY. This key is automatically given to XComs returned by tasks (as opposed to being pushed manually). To remove the filter, pass key=None. - :type key: str :param task_ids: Only XComs from tasks with matching ids will be pulled. Can pass None to remove the filter. - :type task_ids: str or iterable of strings (representing task_ids) :param dag_id: If provided, only pulls XComs from this DAG. If None (default), the DAG of the calling task is used. - :type dag_id: str :param include_prior_dates: If False, only XComs from the current execution_date are returned. If True, XComs from previous dates are returned as well. - :type include_prior_dates: bool """ return context['ti'].xcom_pull( key=key, task_ids=task_ids, dag_id=dag_id, include_prior_dates=include_prior_dates @@ -1995,9 +1943,6 @@ def chain(*tasks: Union[DependencyMixin, Sequence[DependencyMixin]]) -> None: :param tasks: Individual and/or list of tasks, EdgeModifiers, XComArgs, or TaskGroups to set dependencies - :type tasks: List[airflow.models.BaseOperator], airflow.models.BaseOperator, - List[airflow.utils.EdgeModifier], airflow.utils.EdgeModifier, List[airflow.models.XComArg], XComArg, - List[airflow.utils.TaskGroup], or airflow.utils.TaskGroup """ for index, up_task in enumerate(tasks[:-1]): down_task = tasks[index + 1] @@ -2115,9 +2060,7 @@ def cross_downstream( t3.set_downstream(x3) :param from_tasks: List of tasks or XComArgs to start from. - :type from_tasks: List[airflow.models.BaseOperator] or List[airflow.models.XComArg] :param to_tasks: List of tasks or XComArgs to set as downstream dependencies. - :type to_tasks: List[airflow.models.BaseOperator] or List[airflow.models.XComArg] """ for task in from_tasks: task.set_downstream(to_tasks) diff --git a/airflow/models/connection.py b/airflow/models/connection.py index 3f09d98b56b37..b6cde8d84998c 100644 --- a/airflow/models/connection.py +++ b/airflow/models/connection.py @@ -71,26 +71,16 @@ class Connection(Base, LoggingMixin): For more information on how to use this class, see: :doc:`/howto/connection` :param conn_id: The connection ID. - :type conn_id: str :param conn_type: The connection type. - :type conn_type: str :param description: The connection description. - :type description: str :param host: The host. - :type host: str :param login: The login. - :type login: str :param password: The password. - :type password: str :param schema: The schema. - :type schema: str :param port: The port number. - :type port: int :param extra: Extra metadata. Non-standard data such as private/SSH keys can be saved here. JSON encoded object. - :type extra: str :param uri: URI address describing connection parameters. - :type uri: str """ EXTRA_KEY = '__extra__' diff --git a/airflow/models/dag.py b/airflow/models/dag.py index b29be7dc3b659..048cbf96e05b2 100644 --- a/airflow/models/dag.py +++ b/airflow/models/dag.py @@ -195,90 +195,65 @@ class DAG(LoggingMixin): :param dag_id: The id of the DAG; must consist exclusively of alphanumeric characters, dashes, dots and underscores (all ASCII) - :type dag_id: str :param description: The description for the DAG to e.g. be shown on the webserver - :type description: str :param schedule_interval: Defines how often that DAG runs, this timedelta object gets added to your latest task instance's execution_date to figure out the next schedule - :type schedule_interval: datetime.timedelta or - dateutil.relativedelta.relativedelta or str that acts as a cron - expression :param timetable: Specify which timetable to use (in which case schedule_interval must not be set). See :doc:`/howto/timetable` for more information - :type timetable: airflow.timetables.base.Timetable :param start_date: The timestamp from which the scheduler will attempt to backfill - :type start_date: datetime.datetime :param end_date: A date beyond which your DAG won't run, leave to None for open ended scheduling - :type end_date: datetime.datetime :param template_searchpath: This list of folders (non relative) defines where jinja will look for your templates. Order matters. Note that jinja/airflow includes the path of your DAG file by default - :type template_searchpath: str or list[str] :param template_undefined: Template undefined type. - :type template_undefined: jinja2.StrictUndefined :param user_defined_macros: a dictionary of macros that will be exposed in your jinja templates. For example, passing ``dict(foo='bar')`` to this argument allows you to ``{{ foo }}`` in all jinja templates related to this DAG. Note that you can pass any type of object here. - :type user_defined_macros: dict :param user_defined_filters: a dictionary of filters that will be exposed in your jinja templates. For example, passing ``dict(hello=lambda name: 'Hello %s' % name)`` to this argument allows you to ``{{ 'world' | hello }}`` in all jinja templates related to this DAG. - :type user_defined_filters: dict :param default_args: A dictionary of default parameters to be used as constructor keyword parameters when initialising operators. Note that operators have the same hook, and precede those defined here, meaning that if your dict contains `'depends_on_past': True` here and `'depends_on_past': False` in the operator's call `default_args`, the actual value will be `False`. - :type default_args: dict :param params: a dictionary of DAG level parameters that are made accessible in templates, namespaced under `params`. These params can be overridden at the task level. - :type params: dict :param max_active_tasks: the number of task instances allowed to run concurrently - :type max_active_tasks: int :param max_active_runs: maximum number of active DAG runs, beyond this number of DAG runs in a running state, the scheduler won't create new active DAG runs - :type max_active_runs: int :param dagrun_timeout: specify how long a DagRun should be up before timing out / failing, so that new DagRuns can be created. The timeout is only enforced for scheduled DagRuns. - :type dagrun_timeout: datetime.timedelta :param sla_miss_callback: specify a function to call when reporting SLA timeouts. See :ref:`sla_miss_callback` for more information about the function signature and parameters that are passed to the callback. - :type sla_miss_callback: callable :param default_view: Specify DAG default view (tree, graph, duration, gantt, landing_times), default tree - :type default_view: str :param orientation: Specify DAG orientation in graph view (LR, TB, RL, BT), default LR - :type orientation: str :param catchup: Perform scheduler catchup (or only run latest)? Defaults to True - :type catchup: bool :param on_failure_callback: A function to be called when a DagRun of this dag fails. A context dictionary is passed as a single parameter to this function. - :type on_failure_callback: callable :param on_success_callback: Much like the ``on_failure_callback`` except that it is executed when the dag succeeds. - :type on_success_callback: callable :param access_control: Specify optional DAG-level actions, e.g., "{'role1': {'can_read'}, 'role2': {'can_read', 'can_edit'}}" - :type access_control: dict :param is_paused_upon_creation: Specifies if the dag is paused when created for the first time. If the dag exists already, this flag will be ignored. If this optional parameter is not specified, the global config setting will be used. - :type is_paused_upon_creation: bool or None :param jinja_environment_kwargs: additional configuration options to be passed to Jinja ``Environment`` for template rendering @@ -294,13 +269,10 @@ class DAG(LoggingMixin): **See**: `Jinja Environment documentation `_ - :type jinja_environment_kwargs: dict :param render_template_as_native_obj: If True, uses a Jinja ``NativeEnvironment`` to render templates as native Python types. If False, a Jinja ``Environment`` is used to render templates as string values. - :type render_template_as_native_obj: bool :param tags: List of tags to help filtering DAGs in the UI. - :type tags: List[str] """ _comps = { @@ -870,9 +842,7 @@ def get_run_dates(self, start_date, end_date=None): dag's schedule interval. Returned dates can be used for execution dates. :param start_date: The start date of the interval. - :type start_date: datetime :param end_date: The end date of the interval. Defaults to ``timezone.utcnow()``. - :type end_date: datetime :return: A list of dates within the interval following the dag's schedule. :rtype: list """ @@ -1171,7 +1141,6 @@ def get_num_active_runs(self, external_trigger=None, only_running=True, session= Returns the number of active "running" dag runs :param external_trigger: True for externally triggered active dag runs - :type external_trigger: bool :param session: :return: number greater than 0 for active dag runs """ @@ -1651,23 +1620,14 @@ def set_task_instance_state( in failed or upstream_failed state. :param task_id: Task ID of the TaskInstance - :type task_id: str :param execution_date: Execution date of the TaskInstance - :type execution_date: Optional[datetime.datetime] :param dag_run_id: The run_id of the TaskInstance - :type dag_run_id: Optional[str] :param state: State to set the TaskInstance to - :type state: TaskInstanceState :param upstream: Include all upstream tasks of the given task_id - :type upstream: bool :param downstream: Include all downstream tasks of the given task_id - :type downstream: bool :param future: Include all future TaskInstances of the given task_id - :type future: bool :param commit: Commit changes - :type commit: bool :param past: Include all past TaskInstances of the given task_id - :type past: bool """ from airflow.api.common.mark_tasks import set_state @@ -1837,32 +1797,20 @@ def clear( a specified date range. :param task_ids: List of task ids to clear - :type task_ids: List[str] :param start_date: The minimum execution_date to clear - :type start_date: datetime.datetime or None :param end_date: The maximum execution_date to clear - :type end_date: datetime.datetime or None :param only_failed: Only clear failed tasks - :type only_failed: bool :param only_running: Only clear running tasks. - :type only_running: bool :param confirm_prompt: Ask for confirmation - :type confirm_prompt: bool :param include_subdags: Clear tasks in subdags and clear external tasks indicated by ExternalTaskMarker - :type include_subdags: bool :param include_parentdag: Clear tasks in the parent dag of the subdag. - :type include_parentdag: bool :param dag_run_state: state to set DagRun to. If set to False, dagrun state will not be changed. :param dry_run: Find the tasks to clear but don't clear them. - :type dry_run: bool :param session: The sqlalchemy session to use - :type session: sqlalchemy.orm.session.Session :param dag_bag: The DagBag used to find the dags subdags (Optional) - :type dag_bag: airflow.models.dagbag.DagBag :param exclude_task_ids: A set of ``task_id`` that should not be cleared - :type exclude_task_ids: frozenset """ if get_tis: warnings.warn( @@ -2034,7 +1982,6 @@ def partial_subset( :param task_ids_or_regex: Either a list of task_ids, or a regex to match against task ids (as a string, or compiled regex pattern). - :type task_ids_or_regex: [str] or str or re.Pattern :param include_downstream: Include all downstream tasks of matched tasks, in addition to matched tasks. :param include_upstream: Include all upstream tasks of matched tasks, @@ -2175,7 +2122,6 @@ def add_task(self, task): Add a task to the DAG :param task: the task you want to add - :type task: task """ if not self.start_date and not task.start_date: raise AirflowException("DAG is missing the start_date parameter") @@ -2212,7 +2158,6 @@ def add_tasks(self, tasks): Add a list of tasks to the DAG :param tasks: a lit of tasks you want to add - :type tasks: list of tasks """ for task in tasks: self.add_task(task) @@ -2249,31 +2194,19 @@ def run( Runs the DAG. :param start_date: the start date of the range to run - :type start_date: datetime.datetime :param end_date: the end date of the range to run - :type end_date: datetime.datetime :param mark_success: True to mark jobs as succeeded without running them - :type mark_success: bool :param local: True to run the tasks using the LocalExecutor - :type local: bool :param executor: The executor instance to run the tasks - :type executor: airflow.executor.base_executor.BaseExecutor :param donot_pickle: True to avoid pickling DAG object and send to workers - :type donot_pickle: bool :param ignore_task_deps: True to skip upstream tasks - :type ignore_task_deps: bool :param ignore_first_depends_on_past: True to ignore depends_on_past dependencies for the first set of tasks only - :type ignore_first_depends_on_past: bool :param pool: Resource pool to use - :type pool: str :param delay_on_limit_secs: Time in seconds to wait before next attempt to run dag run when max_active_runs limit has been reached - :type delay_on_limit_secs: float :param verbose: Make logging output more verbose - :type verbose: bool :param conf: user defined dictionary passed from CLI - :type conf: dict :param rerun_failed_tasks: :type: bool :param run_backwards: @@ -2341,27 +2274,16 @@ def create_dagrun( Returns the dag run. :param run_id: defines the run id for this dag run - :type run_id: str :param run_type: type of DagRun - :type run_type: airflow.utils.types.DagRunType :param execution_date: the execution date of this dag run - :type execution_date: datetime.datetime :param state: the state of the dag run - :type state: airflow.utils.state.DagRunState :param start_date: the date this dag run should be evaluated - :type start_date: datetime :param external_trigger: whether this dag run is externally triggered - :type external_trigger: bool :param conf: Dict containing configuration/parameters to pass to the DAG - :type conf: dict :param creating_job_id: id of the job creating this DagRun - :type creating_job_id: int :param session: database session - :type session: sqlalchemy.orm.session.Session :param dag_hash: Hash of Serialized DAG - :type dag_hash: str :param data_interval: Data interval of the DagRun - :type data_interval: tuple[datetime, datetime] | None """ if run_id: # Infer run_type from run_id if needed. if not isinstance(run_id, str): @@ -2439,7 +2361,6 @@ def bulk_write_to_db(cls, dags: Collection["DAG"], session=NEW_SESSION): Note that this method can be called for both DAGs and SubDAGs. A SubDag is actually a SubDagOperator. :param dags: the DAG objects to save to the DB - :type dags: List[airflow.models.dag.DAG] :return: None """ if not dags: @@ -2570,7 +2491,6 @@ def deactivate_unknown_dags(active_dag_ids, session=NEW_SESSION): marked as active in the ORM :param active_dag_ids: list of DAG IDs that are active - :type active_dag_ids: list[unicode] :return: None """ if len(active_dag_ids) == 0: @@ -2589,7 +2509,6 @@ def deactivate_stale_dags(expiration_date, session=NEW_SESSION): :param expiration_date: set inactive DAGs that were touched before this time - :type expiration_date: datetime :return: None """ for dag in ( @@ -2614,11 +2533,8 @@ def get_num_task_instances(dag_id, task_ids=None, states=None, session=NEW_SESSI :param session: ORM session :param dag_id: ID of the DAG to get the task concurrency of - :type dag_id: unicode :param task_ids: A list of valid task IDs for the given DAG - :type task_ids: list[unicode] :param states: A list of states to filter by if supplied - :type states: list[state] :return: The number of running tasks :rtype: int """ @@ -3002,9 +2918,7 @@ def dag(*dag_args, **dag_kwargs): Accepts kwargs for operator kwarg. Can be used to parameterize DAGs. :param dag_args: Arguments for DAG object - :type dag_args: Any :param dag_kwargs: Kwargs for DAG object. - :type dag_kwargs: Any """ def wrapper(f: Callable): diff --git a/airflow/models/dagbag.py b/airflow/models/dagbag.py index 09e288a6ee783..eac62dcd39506 100644 --- a/airflow/models/dagbag.py +++ b/airflow/models/dagbag.py @@ -77,20 +77,15 @@ class DagBag(LoggingMixin): independent settings sets. :param dag_folder: the folder to scan to find DAGs - :type dag_folder: unicode :param include_examples: whether to include the examples that ship with airflow or not - :type include_examples: bool :param include_smart_sensor: whether to include the smart sensor native DAGs that create the smart sensor operators for whole cluster - :type include_smart_sensor: bool :param read_dags_from_db: Read DAGs from DB if ``True`` is passed. If ``False`` DAGs are read from python files. - :type read_dags_from_db: bool :param load_op_links: Should the extra operator link be loaded via plugins when de-serializing the DAG? This flag is set to False in Scheduler so that Extra Operator links are not loaded to not run User code in Scheduler. - :type load_op_links: bool """ DAGBAG_IMPORT_TIMEOUT = conf.getfloat('core', 'DAGBAG_IMPORT_TIMEOUT') @@ -173,7 +168,6 @@ def get_dag(self, dag_id, session: Session = None): Gets the DAG out of the dictionary, and refreshes it if expired :param dag_id: DAG Id - :type dag_id: str """ # Avoid circular import from airflow.models.dag import DagModel diff --git a/airflow/models/dagrun.py b/airflow/models/dagrun.py index a8e44c22c0aad..052ae0a657e3c 100644 --- a/airflow/models/dagrun.py +++ b/airflow/models/dagrun.py @@ -224,7 +224,6 @@ def refresh_from_db(self, session: Session = NEW_SESSION) -> None: Reloads the current dagrun from the database :param session: database session - :type session: Session """ dr = session.query(DagRun).filter(DagRun.dag_id == self.dag_id, DagRun.run_id == self.run_id).one() self.id = dr.id @@ -317,26 +316,16 @@ def find( Returns a set of dag runs for the given search criteria. :param dag_id: the dag_id or list of dag_id to find dag runs for - :type dag_id: str or list[str] :param run_id: defines the run id for this dag run - :type run_id: str :param run_type: type of DagRun - :type run_type: airflow.utils.types.DagRunType :param execution_date: the execution date - :type execution_date: datetime.datetime or list[datetime.datetime] :param state: the state of the dag run - :type state: DagRunState :param external_trigger: whether this dag run is externally triggered - :type external_trigger: bool :param no_backfills: return no backfills (True), return all (False). Defaults to False - :type no_backfills: bool :param session: database session - :type session: sqlalchemy.orm.session.Session :param execution_start_date: dag run that was executed from this date - :type execution_start_date: datetime.datetime :param execution_end_date: dag run that was executed until this date - :type execution_end_date: datetime.datetime """ qry = session.query(cls) dag_ids = [dag_id] if isinstance(dag_id, str) else dag_id @@ -383,13 +372,9 @@ def find_duplicate( *None* is returned if no such DAG run is found. :param dag_id: the dag_id to find duplicates for - :type dag_id: str :param run_id: defines the run id for this dag run - :type run_id: str :param execution_date: the execution date - :type execution_date: datetime.datetime :param session: database session - :type session: sqlalchemy.orm.session.Session """ return ( session.query(cls) @@ -451,9 +436,7 @@ def get_task_instance( Returns the task instance specified by task_id for this dag run :param task_id: the task id - :type task_id: str :param session: Sqlalchemy ORM Session - :type session: Session """ return ( session.query(TI) @@ -508,10 +491,8 @@ def update_state( of its TaskInstances. :param session: Sqlalchemy ORM Session - :type session: Session :param execute_callbacks: Should dag callbacks (success/failure, SLA etc) be invoked directly (default: true) or recorded as a pending request in the ``callback`` property - :type execute_callbacks: bool :return: Tuple containing tis that can be scheduled in the current loop & `callback` that needs to be executed """ @@ -785,7 +766,6 @@ def verify_integrity(self, session: Session = NEW_SESSION): database yet. It will set state to removed or add the task if required. :param session: Sqlalchemy ORM Session - :type session: Session """ from airflow.settings import task_instance_mutation_hook @@ -864,11 +844,8 @@ def get_run(session: Session, dag_id: str, execution_date: datetime) -> Optional :meta private: :param session: Sqlalchemy ORM Session - :type session: Session :param dag_id: DAG ID - :type dag_id: unicode :param execution_date: execution date - :type execution_date: datetime :return: DagRun corresponding to the given dag_id and execution date if one exists. None otherwise. :rtype: airflow.models.DagRun diff --git a/airflow/models/param.py b/airflow/models/param.py index f6812c5ac4883..ba33b87a6696b 100644 --- a/airflow/models/param.py +++ b/airflow/models/param.py @@ -34,12 +34,9 @@ class Param: it always validates and returns the default value. :param default: The value this Param object holds - :type default: Any :param description: Optional help text for the Param - :type description: str :param schema: The validation schema of the Param, if not given then all kwargs except default & description will form the schema - :type schema: dict """ CLASS_IDENTIFIER = '__class' @@ -61,10 +58,8 @@ def resolve(self, value: Any = NOTSET, suppress_exception: bool = False) -> Any: In future release we will require the value to be json-serializable. :param value: The value to be updated for the Param - :type value: Any :param suppress_exception: To raise an exception or not when the validations fails. If true and validations fails, the return value would be None. - :type suppress_exception: bool """ try: json.dumps(value) @@ -111,9 +106,7 @@ class ParamsDict(MutableMapping[str, Any]): def __init__(self, dict_obj: Optional[Dict] = None, suppress_exception: bool = False): """ :param dict_obj: A dict or dict like object to init ParamsDict - :type dict_obj: Optional[dict] :param suppress_exception: Flag to suppress value exceptions while initializing the ParamsDict - :type suppress_exception: bool """ params_dict: Dict[str, Param] = {} dict_obj = dict_obj or {} @@ -149,10 +142,8 @@ def __setitem__(self, key: str, value: Any) -> None: Param's type only. :param key: A key which needs to be inserted or updated in the dict - :type key: str :param value: A value which needs to be set against the key. It could be of any type but will be converted and stored as a Param object eventually. - :type value: Any """ if isinstance(value, Param): param = value @@ -174,7 +165,6 @@ def __getitem__(self, key: str) -> Any: resolve method as well on the Param object. :param key: The key to fetch - :type key: str """ param = self.__dict[key] return param.resolve(suppress_exception=self.suppress_exception) @@ -227,11 +217,8 @@ class DagParam: EmailOperator(subject=dag.param('subject', 'Hi from Airflow!')) :param current_dag: Dag being used for parameter. - :type current_dag: airflow.models.DAG :param name: key value which is used to set the parameter - :type name: str :param default: Default value used if no parameter was set. - :type default: Any """ def __init__(self, current_dag, name: str, default: Optional[Any] = None): diff --git a/airflow/models/sensorinstance.py b/airflow/models/sensorinstance.py index f8e6ef82919c0..78892f2ade20f 100644 --- a/airflow/models/sensorinstance.py +++ b/airflow/models/sensorinstance.py @@ -81,7 +81,6 @@ def get_classpath(obj): Get the object dotted class path. Used for getting operator classpath. :param obj: - :type obj: :return: The class path of input object :rtype: str """ @@ -99,12 +98,9 @@ def register(cls, ti, poke_context, execution_context, session=None): :param ti: The task instance for the sensor to be registered. :type: ti: :param poke_context: Context used for sensor poke function. - :type poke_context: dict :param execution_context: Context used for execute sensor such as timeout setting and email configuration. - :type execution_context: dict :param session: SQLAlchemy ORM Session - :type session: Session :return: True if the ti was registered successfully. :rtype: Boolean """ diff --git a/airflow/models/serialized_dag.py b/airflow/models/serialized_dag.py index 0a68587259e34..7b7893692f0dc 100644 --- a/airflow/models/serialized_dag.py +++ b/airflow/models/serialized_dag.py @@ -250,9 +250,7 @@ def bulk_sync_to_db(dags: List[DAG], session: Session = None): DAG is saved in a separate database query. :param dags: the DAG objects to save to the DB - :type dags: List[airflow.models.dag.DAG] :param session: ORM Session - :type session: Session :return: None """ for dag in dags: @@ -269,9 +267,7 @@ def get_last_updated_datetime(cls, dag_id: str, session: Session = None) -> Opti in serialized_dag table :param dag_id: DAG ID - :type dag_id: str :param session: ORM Session - :type session: Session """ return session.query(cls.last_updated).filter(cls.dag_id == dag_id).scalar() @@ -282,7 +278,6 @@ def get_max_last_updated_datetime(cls, session: Session = None) -> Optional[date Get the maximum date when any DAG was last updated in serialized_dag table :param session: ORM Session - :type session: Session """ return session.query(func.max(cls.last_updated)).scalar() @@ -293,9 +288,7 @@ def get_latest_version_hash(cls, dag_id: str, session: Session = None) -> Option Get the latest DAG version for a given DAG ID. :param dag_id: DAG ID - :type dag_id: str :param session: ORM Session - :type session: Session :return: DAG Hash, or None if the DAG is not found :rtype: str | None """ @@ -308,7 +301,6 @@ def get_dag_dependencies(cls, session: Session = None) -> Dict[str, List['DagDep Get the dependencies between DAGs :param session: ORM Session - :type session: Session """ if session.bind.dialect.name in ["sqlite", "mysql"]: query = session.query(cls.dag_id, func.json_extract(cls.data, "$.dag.dag_dependencies")) diff --git a/airflow/models/taskinstance.py b/airflow/models/taskinstance.py index 48a4da0160d46..075027ce81566 100644 --- a/airflow/models/taskinstance.py +++ b/airflow/models/taskinstance.py @@ -629,39 +629,24 @@ def generate_command( Generates the shell command required to execute this task instance. :param dag_id: DAG ID - :type dag_id: str :param task_id: Task ID - :type task_id: str :param run_id: The run_id of this task's DagRun - :type run_id: datetime :param mark_success: Whether to mark the task as successful - :type mark_success: bool :param ignore_all_deps: Ignore all ignorable dependencies. Overrides the other ignore_* parameters. - :type ignore_all_deps: bool :param ignore_depends_on_past: Ignore depends_on_past parameter of DAGs (e.g. for Backfills) - :type ignore_depends_on_past: bool :param ignore_task_deps: Ignore task-specific dependencies such as depends_on_past and trigger rule - :type ignore_task_deps: bool :param ignore_ti_state: Ignore the task instance's previous failure/success - :type ignore_ti_state: bool :param local: Whether to run the task locally - :type local: bool :param pickle_id: If the DAG was serialized to the DB, the ID associated with the pickled DAG - :type pickle_id: Optional[int] :param file_path: path to the file containing the DAG definition - :type file_path: Optional[str] :param raw: raw mode (needs more details) - :type raw: Optional[bool] :param job_id: job ID (needs more details) - :type job_id: Optional[int] :param pool: the Airflow pool that the task should run in - :type pool: Optional[str] :param cfg_path: the Path to the configuration file - :type cfg_path: Optional[str] :return: shell command that can be used to run the task instance :rtype: list[str] """ @@ -724,7 +709,6 @@ def current_state(self, session=NEW_SESSION) -> str: a new session is used. :param session: SQLAlchemy ORM Session - :type session: Session """ ti = ( session.query(TaskInstance) @@ -747,7 +731,6 @@ def error(self, session=NEW_SESSION): Forces the task instance's state to FAILED in the database. :param session: SQLAlchemy ORM Session - :type session: Session """ self.log.error("Recording the task instance as FAILED") self.state = State.FAILED @@ -760,11 +743,9 @@ def refresh_from_db(self, session=NEW_SESSION, lock_for_update=False) -> None: Refreshes the task instance from the database based on the primary key :param session: SQLAlchemy ORM Session - :type session: Session :param lock_for_update: if True, indicates that the database should lock the TaskInstance (issuing a FOR UPDATE clause) until the session is committed. - :type lock_for_update: bool """ self.log.debug("Refreshing TaskInstance %s from DB", self) @@ -817,9 +798,7 @@ def refresh_from_task(self, task: "BaseOperator", pool_override=None): Copy common attributes from the given task. :param task: The task object to copy from - :type task: airflow.models.BaseOperator :param pool_override: Use the pool_override instead of task's pool - :type pool_override: str """ self.task = task self.queue = task.queue @@ -837,7 +816,6 @@ def clear_xcom_data(self, session=NEW_SESSION): Clears all XCom data from the database for the task instance :param session: SQLAlchemy ORM Session - :type session: Session """ self.log.debug("Clearing XCom data") XCom.clear( @@ -859,9 +837,7 @@ def set_state(self, state: Optional[str], session=NEW_SESSION): Set TaskInstance state. :param state: State to set for the TI - :type state: str :param session: SQLAlchemy ORM Session - :type session: Session """ current_time = timezone.utcnow() self.log.debug("Setting task state for %s to %s", self, state) @@ -892,7 +868,6 @@ def are_dependents_done(self, session=NEW_SESSION): if the task DROPs and recreates a table. :param session: SQLAlchemy ORM Session - :type session: Session """ task = self.task @@ -1046,12 +1021,9 @@ def are_dependencies_met(self, dep_context=None, session=NEW_SESSION, verbose=Fa :param dep_context: The execution context that determines the dependencies that should be evaluated. - :type dep_context: DepContext :param session: database session - :type session: sqlalchemy.orm.session.Session :param verbose: whether log details on failed dependencies on info or debug log level - :type verbose: bool """ dep_context = dep_context or DepContext() failed = False @@ -1181,27 +1153,16 @@ def check_and_change_state_before_execution( executed, in preparation for _run_raw_task :param verbose: whether to turn on more verbose logging - :type verbose: bool :param ignore_all_deps: Ignore all of the non-critical dependencies, just runs - :type ignore_all_deps: bool :param ignore_depends_on_past: Ignore depends_on_past DAG attribute - :type ignore_depends_on_past: bool :param ignore_task_deps: Don't check the dependencies of this TaskInstance's task - :type ignore_task_deps: bool :param ignore_ti_state: Disregards previous task instance state - :type ignore_ti_state: bool :param mark_success: Don't run the task, mark its state as success - :type mark_success: bool :param test_mode: Doesn't record success or failure in the DB - :type test_mode: bool :param job_id: Job (BackfillJob / LocalTaskJob / SchedulerJob) ID - :type job_id: str :param pool: specifies the pool to use to run the task instance - :type pool: str :param external_executor_id: The identifier of the celery executor - :type external_executor_id: str :param session: SQLAlchemy ORM Session - :type session: Session :return: whether the state was changed to running or not :rtype: bool """ @@ -1342,13 +1303,9 @@ def _run_raw_task( only after another function changes the state to running. :param mark_success: Don't run the task, mark its state as success - :type mark_success: bool :param test_mode: Doesn't record success or failure in the DB - :type test_mode: bool :param pool: specifies the pool to use to run the task instance - :type pool: str :param session: SQLAlchemy ORM Session - :type session: Session """ self.test_mode = test_mode self.refresh_from_task(self.task, pool_override=pool) @@ -2163,12 +2120,10 @@ def xcom_push( Make an XCom available for tasks to pull. :param key: Key to store the value under. - :type key: str :param value: Value to store. What types are possible depends on whether ``enable_xcom_pickling`` is true or not. If so, this can be any picklable object; only be JSON-serializable may be used otherwise. :param execution_date: Deprecated parameter that has no effect. - :type execution_date: datetime """ if execution_date is not None: self_execution_date = self.get_dagrun(session).execution_date @@ -2216,19 +2171,14 @@ def xcom_pull( available as a constant XCOM_RETURN_KEY. This key is automatically given to XComs returned by tasks (as opposed to being pushed manually). To remove the filter, pass key=None. - :type key: str :param task_ids: Only XComs from tasks with matching ids will be pulled. Can pass None to remove the filter. - :type task_ids: str or iterable of strings (representing task_ids) :param dag_id: If provided, only pulls XComs from this DAG. If None (default), the DAG of the calling task is used. - :type dag_id: str :param include_prior_dates: If False, only XComs from the current execution_date are returned. If True, XComs from previous dates are returned as well. - :type include_prior_dates: bool :param session: Sqlalchemy ORM Session - :type session: Session """ if dag_id is None: dag_id = self.dag_id diff --git a/airflow/models/taskreschedule.py b/airflow/models/taskreschedule.py index 01f3aaeeab655..518f1e77ff65f 100644 --- a/airflow/models/taskreschedule.py +++ b/airflow/models/taskreschedule.py @@ -98,14 +98,10 @@ def query_for_task_instance(task_instance, descending=False, session=None, try_n Returns query for task reschedules for a given the task instance. :param session: the database session object - :type session: sqlalchemy.orm.session.Session :param task_instance: the task instance to find task reschedules for - :type task_instance: airflow.models.TaskInstance :param descending: If True then records are returned in descending order - :type descending: bool :param try_number: Look for TaskReschedule of the given try_number. Default is None which looks for the same try_number of the given task_instance. - :type try_number: int """ if try_number is None: try_number = task_instance.try_number @@ -130,12 +126,9 @@ def find_for_task_instance(task_instance, session=None, try_number=None): in ascending order. :param session: the database session object - :type session: sqlalchemy.orm.session.Session :param task_instance: the task instance to find task reschedules for - :type task_instance: airflow.models.TaskInstance :param try_number: Look for TaskReschedule of the given try_number. Default is None which looks for the same try_number of the given task_instance. - :type try_number: int """ return TaskReschedule.query_for_task_instance( task_instance, session=session, try_number=try_number diff --git a/airflow/models/variable.py b/airflow/models/variable.py index 6a478edca842b..0904ddb23e6e9 100644 --- a/airflow/models/variable.py +++ b/airflow/models/variable.py @@ -100,10 +100,8 @@ def setdefault(cls, key, default, description=None, deserialize_json=False): for a key, and if it isn't there, stores the default value and returns it. :param key: Dict key for this Variable - :type key: str :param default: Default value to set and return if the variable isn't already in the DB - :type default: Mixed :param deserialize_json: Store this as a JSON encoded value in the DB and un-encode it when retrieving a value :return: Mixed diff --git a/airflow/models/xcom.py b/airflow/models/xcom.py index 109540826b869..4402fb4aa61e7 100644 --- a/airflow/models/xcom.py +++ b/airflow/models/xcom.py @@ -116,7 +116,6 @@ def set( :param run_id: DAG run ID for the task. :param session: Database session. If not given, a new session will be created for this function. - :type session: sqlalchemy.orm.session.Session """ @overload @@ -215,7 +214,6 @@ def get_one( returned regardless of the run it belongs to. :param session: Database session. If not given, a new session will be created for this function. - :type session: sqlalchemy.orm.session.Session """ @overload @@ -310,7 +308,6 @@ def get_many( returned regardless of the run it belongs to. :param session: Database session. If not given, a new session will be created for this function. - :type session: sqlalchemy.orm.session.Session """ @overload @@ -410,7 +407,6 @@ def clear(cls, *, dag_id: str, task_id: str, run_id: str, session: Optional[Sess :param run_id: ID of DAG run to clear the XCom for. :param session: Database session. If not given, a new session will be created for this function. - :type session: sqlalchemy.orm.session.Session """ @overload diff --git a/airflow/models/xcom_arg.py b/airflow/models/xcom_arg.py index 772a3c8127fe9..1e37e91f0f978 100644 --- a/airflow/models/xcom_arg.py +++ b/airflow/models/xcom_arg.py @@ -56,9 +56,7 @@ class XComArg(DependencyMixin): op2 = MyOperator(my_text_message=f"the value is {xcomarg['topic']}") :param operator: operator to which the XComArg belongs to - :type operator: airflow.models.baseoperator.BaseOperator :param key: key value which is used for xcom_pull (key in the XCom table) - :type key: str """ def __init__(self, operator: "Union[BaseOperator, MappedOperator]", key: str = XCOM_RETURN_KEY): diff --git a/airflow/operators/bash.py b/airflow/operators/bash.py index 757214300a4ca..0f4c8b305b95f 100644 --- a/airflow/operators/bash.py +++ b/airflow/operators/bash.py @@ -39,26 +39,20 @@ class BashOperator(BaseOperator): :param bash_command: The command, set of commands or reference to a bash script (must be '.sh') to be executed. (templated) - :type bash_command: str :param env: If env is not None, it must be a dict that defines the environment variables for the new process; these are used instead of inheriting the current process environment, which is the default behavior. (templated) - :type env: dict :param append_env: If False(default) uses the environment variables passed in env params and does not inherit the current process environment. If True, inherits the environment variables from current passes and then environment variable passed by the user will either update the existing inherited environment variables or the new variables gets appended to it - :type append_env: bool :param output_encoding: Output encoding of bash command - :type output_encoding: str :param skip_exit_code: If task exits with this exit code, leave the task in ``skipped`` state (default: 99). If set to ``None``, any non-zero exit code will be treated as a failure. - :type skip_exit_code: int :param cwd: Working directory to execute the command in. If None (default), the command is run in a temporary directory. - :type cwd: str Airflow will evaluate the exit code of the bash command. In general, a non-zero exit code will result in task failure and zero will result in task success. Exit code ``99`` (or another set in ``skip_exit_code``) diff --git a/airflow/operators/branch.py b/airflow/operators/branch.py index 516d75a958273..cdd546feca469 100644 --- a/airflow/operators/branch.py +++ b/airflow/operators/branch.py @@ -45,7 +45,6 @@ def choose_branch(self, context: Context) -> Union[str, Iterable[str]]: task_ids. :param context: Context dictionary as passed to execute() - :type context: dict """ raise NotImplementedError diff --git a/airflow/operators/datetime.py b/airflow/operators/datetime.py index 47021c1730952..c37a4f9d50c11 100644 --- a/airflow/operators/datetime.py +++ b/airflow/operators/datetime.py @@ -35,17 +35,12 @@ class BranchDateTimeOperator(BaseBranchOperator): :param follow_task_ids_if_true: task id or task ids to follow if ``datetime.datetime.now()`` falls above target_lower and below ``target_upper``. - :type follow_task_ids_if_true: str or list[str] :param follow_task_ids_if_false: task id or task ids to follow if ``datetime.datetime.now()`` falls below target_lower or above ``target_upper``. - :type follow_task_ids_if_false: str or list[str] :param target_lower: target lower bound. - :type target_lower: Optional[datetime.datetime] :param target_upper: target upper bound. - :type target_upper: Optional[datetime.datetime] :param use_task_execution_date: If ``True``, uses task's execution day to compare with targets. Execution date is useful for backfilling. If ``False``, uses system's date. - :type use_task_execution_date: bool """ def __init__( diff --git a/airflow/operators/email.py b/airflow/operators/email.py index c8cb61b1a80a8..220bafa944b12 100644 --- a/airflow/operators/email.py +++ b/airflow/operators/email.py @@ -27,25 +27,16 @@ class EmailOperator(BaseOperator): Sends an email. :param to: list of emails to send the email to. (templated) - :type to: list or string (comma or semicolon delimited) :param subject: subject line for the email. (templated) - :type subject: str :param html_content: content of the email, html markup is allowed. (templated) - :type html_content: str :param files: file names to attach in email (templated) - :type files: list :param cc: list of recipients to be added in CC field - :type cc: list or string (comma or semicolon delimited) :param bcc: list of recipients to be added in BCC field - :type bcc: list or string (comma or semicolon delimited) :param mime_subtype: MIME sub content type - :type mime_subtype: str :param mime_charset: character set parameter added to the Content-Type header. - :type mime_charset: str :param custom_headers: additional headers to add to the MIME message. - :type custom_headers: dict """ template_fields: Sequence[str] = ('to', 'subject', 'html_content', 'files') diff --git a/airflow/operators/generic_transfer.py b/airflow/operators/generic_transfer.py index d38a573819d75..2a42859d17e74 100644 --- a/airflow/operators/generic_transfer.py +++ b/airflow/operators/generic_transfer.py @@ -32,18 +32,12 @@ class GenericTransfer(BaseOperator): This is meant to be used on small-ish datasets that fit in memory. :param sql: SQL query to execute against the source database. (templated) - :type sql: str :param destination_table: target table. (templated) - :type destination_table: str :param source_conn_id: source connection - :type source_conn_id: str :param destination_conn_id: destination connection - :type destination_conn_id: str :param preoperator: sql statement or list of statements to be executed prior to loading the data. (templated) - :type preoperator: str or list[str] :param insert_args: extra params for `insert_rows` method. - :type insert_args: dict """ template_fields: Sequence[str] = ('sql', 'destination_table', 'preoperator') diff --git a/airflow/operators/python.py b/airflow/operators/python.py index 49ebef9d5a784..c89d043e2a692 100644 --- a/airflow/operators/python.py +++ b/airflow/operators/python.py @@ -49,17 +49,13 @@ def task(python_callable: Optional[Callable] = None, multiple_outputs: Optional[ def my_task() :param python_callable: A reference to an object that is callable - :type python_callable: python callable :param op_kwargs: a dictionary of keyword arguments that will get unpacked in your function (templated) - :type op_kwargs: dict :param op_args: a list of positional arguments that will get unpacked when calling your callable (templated) - :type op_args: list :param multiple_outputs: if set, function return value will be unrolled to multiple XCom values. Dict will unroll to xcom values with keys as keys. Defaults to False. - :type multiple_outputs: bool :return: """ # To maintain backwards compatibility, we import the task object into this file @@ -109,26 +105,20 @@ def my_python_callable(**kwargs): :param python_callable: A reference to an object that is callable - :type python_callable: python callable :param op_kwargs: a dictionary of keyword arguments that will get unpacked in your function - :type op_kwargs: dict (templated) :param op_args: a list of positional arguments that will get unpacked when calling your callable - :type op_args: list (templated) :param templates_dict: a dictionary where the values are templates that will get templated by the Airflow engine sometime between ``__init__`` and ``execute`` takes place and are made available in your callable's context after the template has been applied. (templated) - :type templates_dict: dict[str] :param templates_exts: a list of file extensions to resolve while processing templated fields, for examples ``['.sql', '.hql']`` - :type templates_exts: list[str] :param show_return_value_in_logs: a bool value whether to show return_value logs. Defaults to True, which allows return value log output. It can be set to False to prevent log output of return value when you return huge data such as transmission a large amount of XCom to TaskAPI. - :type show_return_value_in_logs: bool """ template_fields: Sequence[str] = ('templates_dict', 'op_args', 'op_kwargs') @@ -283,37 +273,27 @@ class PythonVirtualenvOperator(PythonOperator): :param python_callable: A python function with no references to outside variables, defined with def, which will be run in a virtualenv - :type python_callable: function :param requirements: Either a list of requirement strings, or a (templated) "requirements file" as specified by pip. - :type requirements: list[str] | str :param python_version: The Python version to run the virtualenv with. Note that both 2 and 2.7 are acceptable forms. - :type python_version: Optional[Union[str, int, float]] :param use_dill: Whether to use dill to serialize the args and result (pickle is default). This allow more complex types but requires you to include dill in your requirements. - :type use_dill: bool :param system_site_packages: Whether to include system_site_packages in your virtualenv. See virtualenv documentation for more information. - :type system_site_packages: bool :param op_args: A list of positional arguments to pass to python_callable. - :type op_args: list :param op_kwargs: A dict of keyword arguments to pass to python_callable. - :type op_kwargs: dict :param string_args: Strings that are present in the global var virtualenv_string_args, available to python_callable at runtime as a list[str]. Note that args are split by newline. - :type string_args: list[str] :param templates_dict: a dictionary where the values are templates that will get templated by the Airflow engine sometime between ``__init__`` and ``execute`` takes place and are made available in your callable's context after the template has been applied - :type templates_dict: dict of str :param templates_exts: a list of file extensions to resolve while processing templated fields, for examples ``['.sql', '.hql']`` - :type templates_exts: list[str] """ template_fields: Sequence[str] = ('requirements',) diff --git a/airflow/operators/sql.py b/airflow/operators/sql.py index b642772d13917..9facfc12f1ad0 100644 --- a/airflow/operators/sql.py +++ b/airflow/operators/sql.py @@ -117,11 +117,8 @@ class SQLCheckOperator(BaseSQLOperator): without stopping the progress of the DAG. :param sql: the sql to be executed. (templated) - :type sql: str :param conn_id: the connection ID used to connect to the database. - :type conn_id: str :param database: name of database which overwrite the defined one in connection - :type database: str """ template_fields: Sequence[str] = ("sql",) @@ -156,7 +153,6 @@ def _convert_to_float_if_possible(s): if appropriate :param s: the string to be converted - :type s: str """ try: ret = float(s) @@ -170,11 +166,8 @@ class SQLValueCheckOperator(BaseSQLOperator): Performs a simple value check using sql code. :param sql: the sql to be executed. (templated) - :type sql: str :param conn_id: the connection ID used to connect to the database. - :type conn_id: str :param database: name of database which overwrite the defined one in connection - :type database: str """ __mapper_args__ = {"polymorphic_identity": "SQLValueCheckOperator"} @@ -263,16 +256,11 @@ class SQLIntervalCheckOperator(BaseSQLOperator): a certain tolerance of the ones from days_back before. :param table: the table name - :type table: str :param conn_id: the connection ID used to connect to the database. - :type conn_id: str :param database: name of database which will overwrite the defined one in connection - :type database: Optional[str] :param days_back: number of days between ds and the ds we want to check against. Defaults to 7 days - :type days_back: Optional[int] :param date_filter_column: The column name for the dates to filter on. Defaults to 'ds' - :type date_filter_column: Optional[str] :param ratio_formula: which formula to use to compute the ratio between the two metrics. Assuming cur is the metric of today and ref is the metric to today - days_back. @@ -281,11 +269,8 @@ class SQLIntervalCheckOperator(BaseSQLOperator): relative_diff: computes abs(cur-ref) / ref Default: 'max_over_min' - :type ratio_formula: str :param ignore_zero: whether we should ignore zero metrics - :type ignore_zero: bool :param metrics_thresholds: a dictionary of ratios indexed by metrics - :type metrics_thresholds: dict """ __mapper_args__ = {"polymorphic_identity": "SQLIntervalCheckOperator"} @@ -407,15 +392,10 @@ class SQLThresholdCheckOperator(BaseSQLOperator): value OR a sql statement that results a numeric. :param sql: the sql to be executed. (templated) - :type sql: str :param conn_id: the connection ID used to connect to the database. - :type conn_id: str :param database: name of database which overwrite the defined one in connection - :type database: str :param min_threshold: numerical value or min threshold sql to be executed (templated) - :type min_threshold: numeric or str :param max_threshold: numerical value or max threshold sql to be executed (templated) - :type max_threshold: numeric or str """ template_fields: Sequence[str] = ("sql", "min_threshold", "max_threshold") @@ -489,20 +469,14 @@ class BranchSQLOperator(BaseSQLOperator, SkipMixin): Allows a DAG to "branch" or follow a specified path based on the results of a SQL query. :param sql: The SQL code to be executed, should return true or false (templated) - :type sql: Can receive a str representing a sql statement or reference to a template file. - Template reference are recognized by str ending in '.sql'. - Expected SQL query to return Boolean (True/False), integer (0 = False, Otherwise = 1) - or string (true/y/yes/1/on/false/n/no/0/off). + Template reference are recognized by str ending in '.sql'. + Expected SQL query to return Boolean (True/False), integer (0 = False, Otherwise = 1) + or string (true/y/yes/1/on/false/n/no/0/off). :param follow_task_ids_if_true: task id or task ids to follow if query returns true - :type follow_task_ids_if_true: str or list :param follow_task_ids_if_false: task id or task ids to follow if query returns false - :type follow_task_ids_if_false: str or list :param conn_id: the connection ID used to connect to the database. - :type conn_id: str :param database: name of database which overwrite the defined one in connection - :type database: str :param parameters: (optional) the parameters to render the SQL query with. - :type parameters: mapping or iterable """ template_fields: Sequence[str] = ("sql",) diff --git a/airflow/operators/subdag.py b/airflow/operators/subdag.py index df27e12c5b492..217ec5c7cd77a 100644 --- a/airflow/operators/subdag.py +++ b/airflow/operators/subdag.py @@ -61,7 +61,6 @@ class SubDagOperator(BaseSensorOperator): :param subdag: the DAG object to run as a subdag of the current DAG. :param session: sqlalchemy session :param conf: Configuration for the subdag - :type conf: dict :param propagate_skipped_state: by setting this argument you can define whether the skipped state of leaf task(s) should be propagated to the parent dag's downstream task. diff --git a/airflow/operators/trigger_dagrun.py b/airflow/operators/trigger_dagrun.py index c14f2d438df9e..259f4039be087 100644 --- a/airflow/operators/trigger_dagrun.py +++ b/airflow/operators/trigger_dagrun.py @@ -59,28 +59,19 @@ class TriggerDagRunOperator(BaseOperator): Triggers a DAG run for a specified ``dag_id`` :param trigger_dag_id: The dag_id to trigger (templated). - :type trigger_dag_id: str :param trigger_run_id: The run ID to use for the triggered DAG run (templated). If not provided, a run ID will be automatically generated. - :type trigger_run_id: str :param conf: Configuration for the DAG run. - :type conf: dict :param execution_date: Execution date for the dag (templated). - :type execution_date: str or datetime.datetime :param reset_dag_run: Whether or not clear existing dag run if already exists. This is useful when backfill or rerun an existing dag run. When reset_dag_run=False and dag run exists, DagRunAlreadyExists will be raised. When reset_dag_run=True and dag run exists, existing dag run will be cleared to rerun. - :type reset_dag_run: bool :param wait_for_completion: Whether or not wait for dag run completion. (default: False) - :type wait_for_completion: bool :param poke_interval: Poke interval to check dag run status when wait_for_completion=True. (default: 60) - :type poke_interval: int :param allowed_states: List of allowed states, default is ``['success']``. - :type allowed_states: list :param failed_states: List of failed or dis-allowed states, default is ``None``. - :type failed_states: list """ template_fields: Sequence[str] = ("trigger_dag_id", "trigger_run_id", "execution_date", "conf") diff --git a/airflow/operators/weekday.py b/airflow/operators/weekday.py index 56bbfa62189ce..fb35079fe0d3e 100644 --- a/airflow/operators/weekday.py +++ b/airflow/operators/weekday.py @@ -31,9 +31,7 @@ class BranchDayOfWeekOperator(BaseBranchOperator): :ref:`howto/operator:BranchDayOfWeekOperator` :param follow_task_ids_if_true: task id or task ids to follow if criteria met - :type follow_task_ids_if_true: str or list[str] :param follow_task_ids_if_false: task id or task ids to follow if criteria does not met - :type follow_task_ids_if_false: str or list[str] :param week_day: Day of the week to check (full name). Optionally, a set of days can also be provided using a set. Example values: @@ -43,11 +41,9 @@ class BranchDayOfWeekOperator(BaseBranchOperator): * ``{WeekDay.TUESDAY}`` * ``{WeekDay.SATURDAY, WeekDay.SUNDAY}`` - :type week_day: iterable or airflow.utils.weekday.WeekDay :param use_task_execution_day: If ``True``, uses task's execution day to compare with is_today. Execution Date is Useful for backfilling. If ``False``, uses system's day of the week. - :type use_task_execution_day: bool """ def __init__( diff --git a/airflow/plugins_manager.py b/airflow/plugins_manager.py index e8aaaa206e8d9..87709d5286d13 100644 --- a/airflow/plugins_manager.py +++ b/airflow/plugins_manager.py @@ -483,7 +483,6 @@ def get_plugin_info(attrs_to_dump: Optional[Iterable[str]] = None) -> List[Dict[ Dump plugins attributes :param attrs_to_dump: A list of plugin attributes to dump - :type attrs_to_dump: List """ ensure_plugins_loaded() integrate_executor_plugins() diff --git a/airflow/providers/airbyte/hooks/airbyte.py b/airflow/providers/airbyte/hooks/airbyte.py index 8ea82b2950f52..b1f6317530514 100644 --- a/airflow/providers/airbyte/hooks/airbyte.py +++ b/airflow/providers/airbyte/hooks/airbyte.py @@ -28,9 +28,7 @@ class AirbyteHook(HttpHook): :param airbyte_conn_id: Required. The name of the Airflow connection to get connection information for Airbyte. - :type airbyte_conn_id: str :param api_version: Optional. Airbyte API version. - :type api_version: str """ conn_name_attr = 'airbyte_conn_id' @@ -57,12 +55,9 @@ def wait_for_job( Helper method which polls a job to check if it finishes. :param job_id: Required. Id of the Airbyte job - :type job_id: str :param wait_seconds: Optional. Number of seconds between checks. - :type wait_seconds: float :param timeout: Optional. How many seconds wait for job to be ready. Used only if ``asynchronous`` is False. - :type timeout: float """ state = None start = time.monotonic() @@ -93,7 +88,6 @@ def submit_sync_connection(self, connection_id: str) -> Any: Submits a job to a Airbyte server. :param connection_id: Required. The ConnectionId of the Airbyte Connection. - :type connection_id: str """ return self.run( endpoint=f"api/{self.api_version}/connections/sync", @@ -106,7 +100,6 @@ def get_job(self, job_id: int) -> Any: Gets the resource representation for a job in Airbyte. :param job_id: Required. Id of the Airbyte job - :type job_id: int """ return self.run( endpoint=f"api/{self.api_version}/jobs/get", diff --git a/airflow/providers/airbyte/operators/airbyte.py b/airflow/providers/airbyte/operators/airbyte.py index 73be9830248bf..ef2e2c1559902 100644 --- a/airflow/providers/airbyte/operators/airbyte.py +++ b/airflow/providers/airbyte/operators/airbyte.py @@ -35,20 +35,14 @@ class AirbyteTriggerSyncOperator(BaseOperator): :param airbyte_conn_id: Required. The name of the Airflow connection to get connection information for Airbyte. - :type airbyte_conn_id: str :param connection_id: Required. The Airbyte ConnectionId UUID between a source and destination. - :type connection_id: str :param asynchronous: Optional. Flag to get job_id after submitting the job to the Airbyte API. This is useful for submitting long running jobs and waiting on them asynchronously using the AirbyteJobSensor. - :type asynchronous: bool :param api_version: Optional. Airbyte API version. - :type api_version: str :param wait_seconds: Optional. Number of seconds between checks. Only used when ``asynchronous`` is False. - :type wait_seconds: float :param timeout: Optional. The amount of time, in seconds, to wait for the request to complete. Only used when ``asynchronous`` is False. - :type timeout: float """ template_fields: Sequence[str] = ('connection_id',) diff --git a/airflow/providers/airbyte/sensors/airbyte.py b/airflow/providers/airbyte/sensors/airbyte.py index 6640dd0fb4c2e..10c5954ee3a79 100644 --- a/airflow/providers/airbyte/sensors/airbyte.py +++ b/airflow/providers/airbyte/sensors/airbyte.py @@ -31,12 +31,9 @@ class AirbyteJobSensor(BaseSensorOperator): Check for the state of a previously submitted Airbyte job. :param airbyte_job_id: Required. Id of the Airbyte job - :type airbyte_job_id: str :param airbyte_conn_id: Required. The name of the Airflow connection to get connection information for Airbyte. - :type airbyte_conn_id: str :param api_version: Optional. Airbyte API version. - :type api_version: str """ template_fields: Sequence[str] = ('airbyte_job_id',) diff --git a/airflow/providers/alibaba/cloud/hooks/oss.py b/airflow/providers/alibaba/cloud/hooks/oss.py index 76725a2a77518..3f04eee6eedae 100644 --- a/airflow/providers/alibaba/cloud/hooks/oss.py +++ b/airflow/providers/alibaba/cloud/hooks/oss.py @@ -125,9 +125,7 @@ def object_exists(self, key: str, bucket_name: Optional[str] = None) -> bool: Check if object exists. :param key: the path of the object - :type key: str :param bucket_name: the name of the bucket - :type bucket_name: str :return: True if it exists and False if not. :rtype: bool """ @@ -143,7 +141,6 @@ def get_bucket(self, bucket_name: Optional[str] = None) -> oss2.api.Bucket: Returns a oss2.Bucket object :param bucket_name: the name of the bucket - :type bucket_name: str :return: the bucket object to the bucket name. :rtype: oss2.api.Bucket """ @@ -156,11 +153,8 @@ def load_string(self, key: str, content: str, bucket_name: Optional[str] = None) Loads a string to OSS :param key: the path of the object - :type key: str :param content: str to set as content for the key. - :type content: str :param bucket_name: the name of the bucket - :type bucket_name: str """ try: self.get_bucket(bucket_name).put_object(key, content) @@ -178,11 +172,8 @@ def upload_local_file( Upload a local file to OSS :param key: the OSS path of the object - :type key: str :param file: local file to upload. - :type file: str :param bucket_name: the name of the bucket - :type bucket_name: str """ try: self.get_bucket(bucket_name).put_object_from_file(key, file) @@ -200,11 +191,8 @@ def download_file( Download file from OSS :param key: key of the file-like object to download. - :type key: str :param local_file: local path + file name to save. - :type local_file: str :param bucket_name: the name of the bucket - :type bucket_name: str :return: the file name. :rtype: str """ @@ -225,9 +213,7 @@ def delete_object( Delete object from OSS :param key: key of the object to delete. - :type key: str :param bucket_name: the name of the bucket - :type bucket_name: str """ try: self.get_bucket(bucket_name).delete_object(key) @@ -245,9 +231,7 @@ def delete_objects( Delete objects from OSS :param key: keys list of the objects to delete. - :type key: list of str :param bucket_name: the name of the bucket - :type bucket_name: str """ try: self.get_bucket(bucket_name).batch_delete_objects(key) @@ -264,7 +248,6 @@ def delete_bucket( Delete bucket from OSS :param bucket_name: the name of the bucket - :type bucket_name: str """ try: self.get_bucket(bucket_name).delete_bucket() @@ -281,7 +264,6 @@ def create_bucket( Create bucket :param bucket_name: the name of the bucket - :type bucket_name: str """ try: self.get_bucket(bucket_name).create_bucket() diff --git a/airflow/providers/alibaba/cloud/operators/oss.py b/airflow/providers/alibaba/cloud/operators/oss.py index ff21c84ac25c2..8ec9b4b13975e 100644 --- a/airflow/providers/alibaba/cloud/operators/oss.py +++ b/airflow/providers/alibaba/cloud/operators/oss.py @@ -31,11 +31,8 @@ class OSSCreateBucketOperator(BaseOperator): This operator creates an OSS bucket :param region: OSS region you want to create bucket - :type region: str :param bucket_name: This is bucket name you want to create - :type bucket_name: str :param oss_conn_id: The Airflow connection used for OSS credentials. - :type oss_conn_id: Optional[str] """ def __init__( @@ -60,11 +57,8 @@ class OSSDeleteBucketOperator(BaseOperator): This operator to delete an OSS bucket :param region: OSS region you want to create bucket - :type region: str :param bucket_name: This is bucket name you want to delete - :type bucket_name: str :param oss_conn_id: The Airflow connection used for OSS credentials. - :type oss_conn_id: Optional[str] """ def __init__( @@ -89,15 +83,10 @@ class OSSUploadObjectOperator(BaseOperator): This operator to upload an file-like object :param key: the OSS path of the object - :type key: str :param file: local file to upload. - :type file: str :param region: OSS region you want to create bucket - :type region: str :param bucket_name: This is bucket name you want to create - :type bucket_name: str :param oss_conn_id: The Airflow connection used for OSS credentials. - :type oss_conn_id: Optional[str] """ def __init__( @@ -126,15 +115,10 @@ class OSSDownloadObjectOperator(BaseOperator): This operator to Download an OSS object :param key: key of the object to download. - :type key: str :param local_file: local path + file name to save. - :type local_file: str :param region: OSS region - :type region: str :param bucket_name: OSS bucket name - :type bucket_name: str :param oss_conn_id: The Airflow connection used for OSS credentials. - :type oss_conn_id: Optional[str] """ def __init__( @@ -163,13 +147,9 @@ class OSSDeleteBatchObjectOperator(BaseOperator): This operator to delete OSS objects :param key: key list of the objects to delete. - :type key: str :param region: OSS region - :type region: str :param bucket_name: OSS bucket name - :type bucket_name: str :param oss_conn_id: The Airflow connection used for OSS credentials. - :type oss_conn_id: Optional[str] """ def __init__( @@ -196,13 +176,9 @@ class OSSDeleteObjectOperator(BaseOperator): This operator to delete an OSS object :param key: key of the object to delete. - :type key: str :param region: OSS region - :type region: str :param bucket_name: OSS bucket name - :type bucket_name: str :param oss_conn_id: The Airflow connection used for OSS credentials. - :type oss_conn_id: Optional[str] """ def __init__( diff --git a/airflow/providers/alibaba/cloud/sensors/oss_key.py b/airflow/providers/alibaba/cloud/sensors/oss_key.py index 2788ea777d027..a53dcbb3f7cd1 100644 --- a/airflow/providers/alibaba/cloud/sensors/oss_key.py +++ b/airflow/providers/alibaba/cloud/sensors/oss_key.py @@ -42,13 +42,9 @@ class OSSKeySensor(BaseSensorOperator): :param bucket_key: The key being waited on. Supports full oss:// style url or relative path from root level. When it's specified as a full oss:// url, please leave bucket_name as `None`. - :type bucket_key: str :param region: OSS region - :type region: str :param bucket_name: OSS bucket name - :type bucket_name: str :param oss_conn_id: The Airflow connection used for OSS credentials. - :type oss_conn_id: Optional[str] """ template_fields: Sequence[str] = ('bucket_key', 'bucket_name') diff --git a/airflow/providers/amazon/aws/hooks/athena.py b/airflow/providers/amazon/aws/hooks/athena.py index 39ebe62684318..1c5c42fa7a06d 100644 --- a/airflow/providers/amazon/aws/hooks/athena.py +++ b/airflow/providers/amazon/aws/hooks/athena.py @@ -16,7 +16,13 @@ # specific language governing permissions and limitations # under the License. -"""This module contains AWS Athena hook""" +""" +This module contains AWS Athena hook. + +.. spelling:: + + PageIterator +""" import warnings from time import sleep from typing import Any, Dict, Optional @@ -37,7 +43,6 @@ class AthenaHook(AwsBaseHook): :class:`~airflow.providers.amazon.aws.hooks.base_aws.AwsBaseHook` :param sleep_time: Time (in seconds) to wait between two consecutive calls to check query status on Athena - :type sleep_time: int """ INTERMEDIATE_STATES = ( @@ -66,15 +71,10 @@ def run_query( Run Presto query on athena with provided config and return submitted query_execution_id :param query: Presto query to run - :type query: str :param query_context: Context in which query need to be run - :type query_context: dict :param result_configuration: Dict with path to store results in and config related to encryption - :type result_configuration: dict :param client_request_token: Unique token created by user to avoid multiple executions of same query - :type client_request_token: str :param workgroup: Athena workgroup name, when not specified, will be 'primary' - :type workgroup: str :return: str """ params = { @@ -94,7 +94,6 @@ def check_query_status(self, query_execution_id: str) -> Optional[str]: Fetch the status of submitted athena query. Returns None or one of valid query states. :param query_execution_id: Id of submitted athena query - :type query_execution_id: str :return: str """ response = self.get_conn().get_query_execution(QueryExecutionId=query_execution_id) @@ -113,7 +112,6 @@ def get_state_change_reason(self, query_execution_id: str) -> Optional[str]: Fetch the reason for a state change (e.g. error message). Returns None or reason string. :param query_execution_id: Id of submitted athena query - :type query_execution_id: str :return: str """ response = self.get_conn().get_query_execution(QueryExecutionId=query_execution_id) @@ -135,11 +133,8 @@ def get_query_results( failed/cancelled state else dict of query output :param query_execution_id: Id of submitted athena query - :type query_execution_id: str :param next_token_id: The token that specifies where to start pagination. - :type next_token_id: str :param max_results: The maximum number of results (rows) to return in this request. - :type max_results: int :return: dict """ query_state = self.check_query_status(query_execution_id) @@ -167,13 +162,9 @@ def get_query_results_paginator( wish to get all results at once, call build_full_result() on the returned PageIterator :param query_execution_id: Id of submitted athena query - :type query_execution_id: str :param max_items: The total number of items to return. - :type max_items: int :param page_size: The size of each page. - :type page_size: int :param starting_token: A token to specify where to start paginating. - :type starting_token: str :return: PageIterator """ query_state = self.check_query_status(query_execution_id) @@ -200,9 +191,7 @@ def poll_query_status(self, query_execution_id: str, max_tries: Optional[int] = Returns one of the final states :param query_execution_id: Id of submitted athena query - :type query_execution_id: str :param max_tries: Number of times to poll for query state before function exits - :type max_tries: int :return: str """ try_number = 1 @@ -234,7 +223,6 @@ def get_output_location(self, query_execution_id: str) -> str: in s3 uri format. :param query_execution_id: Id of submitted athena query - :type query_execution_id: str :return: str """ output_location = None @@ -259,7 +247,6 @@ def stop_query(self, query_execution_id: str) -> Dict: Cancel the submitted athena query :param query_execution_id: Id of submitted athena query - :type query_execution_id: str :return: dict """ return self.get_conn().stop_query_execution(QueryExecutionId=query_execution_id) diff --git a/airflow/providers/amazon/aws/hooks/base_aws.py b/airflow/providers/amazon/aws/hooks/base_aws.py index 2d004e5ed708f..d7ca0a69523ee 100644 --- a/airflow/providers/amazon/aws/hooks/base_aws.py +++ b/airflow/providers/amazon/aws/hooks/base_aws.py @@ -367,19 +367,13 @@ class AwsBaseHook(BaseHook): running Airflow in a distributed manner and aws_conn_id is None or empty, then default boto3 configuration would be used (and must be maintained on each worker node). - :type aws_conn_id: str :param verify: Whether or not to verify SSL certificates. https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html - :type verify: Union[bool, str, None] :param region_name: AWS region_name. If not specified then the default boto3 behaviour is used. - :type region_name: Optional[str] :param client_type: boto3.client client_type. Eg 's3', 'emr' etc - :type client_type: Optional[str] :param resource_type: boto3.resource resource_type. Eg 'dynamodb' etc - :type resource_type: Optional[str] :param config: Configuration for botocore client. (https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html) - :type config: Optional[botocore.client.Config] """ conn_name_attr = 'aws_conn_id' @@ -599,12 +593,9 @@ def _parse_s3_config( parse boto, s3cmd.conf and AWS SDK config formats :param config_file_name: path to the config file - :type config_file_name: str :param config_format: config type. One of "boto", "s3cmd" or "aws". Defaults to "boto" - :type config_format: str :param profile: profile name in AWS type config file - :type profile: str """ config = configparser.ConfigParser() if config.read(config_file_name): # pragma: no cover diff --git a/airflow/providers/amazon/aws/hooks/batch_client.py b/airflow/providers/amazon/aws/hooks/batch_client.py index 793357ae5d37d..3b10012b3943f 100644 --- a/airflow/providers/amazon/aws/hooks/batch_client.py +++ b/airflow/providers/amazon/aws/hooks/batch_client.py @@ -56,7 +56,6 @@ def describe_jobs(self, jobs: List[str]) -> Dict: Get job descriptions from AWS Batch :param jobs: a list of JobId to describe - :type jobs: List[str] :return: an API response to describe jobs :rtype: Dict @@ -70,7 +69,6 @@ def get_waiter(self, waiterName: str) -> botocore.waiter.Waiter: :param waiterName: The name of the waiter. The name should match the name (including the casing) of the key name in the waiter model file (typically this is CamelCasing). - :type waiterName: str :return: a waiter object for the named AWS Batch service :rtype: botocore.waiter.Waiter @@ -105,25 +103,18 @@ def submit_job( Submit a Batch job :param jobName: the name for the AWS Batch job - :type jobName: str :param jobQueue: the queue name on AWS Batch - :type jobQueue: str :param jobDefinition: the job definition name on AWS Batch - :type jobDefinition: str :param arrayProperties: the same parameter that boto3 will receive - :type arrayProperties: Dict :param parameters: the same parameter that boto3 will receive - :type parameters: Dict :param containerOverrides: the same parameter that boto3 will receive - :type containerOverrides: Dict :param tags: the same parameter that boto3 will receive - :type tags: Dict :return: an API response :rtype: Dict @@ -135,10 +126,8 @@ def terminate_job(self, jobId: str, reason: str) -> Dict: Terminate a Batch job :param jobId: a job ID to terminate - :type jobId: str :param reason: a reason to terminate job ID - :type reason: str :return: an API response :rtype: Dict @@ -156,11 +145,9 @@ class BatchClientHook(AwsBaseHook): :param max_retries: exponential back-off retries, 4200 = 48 hours; polling is only used when waiters is None - :type max_retries: Optional[int] :param status_retries: number of HTTP retries to get job status, 10; polling is only used when waiters is None - :type status_retries: Optional[int] .. note:: Several methods use a default random delay to check or poll for job status, i.e. @@ -228,10 +215,8 @@ def terminate_job(self, job_id: str, reason: str) -> Dict: Terminate a Batch job :param job_id: a job ID to terminate - :type job_id: str :param reason: a reason to terminate job ID - :type reason: str :return: an API response :rtype: Dict @@ -246,7 +231,6 @@ def check_job_success(self, job_id: str) -> bool: 'SUCCEEDED', else raise an AirflowException :param job_id: a Batch job ID - :type job_id: str :rtype: bool @@ -272,10 +256,8 @@ def wait_for_job(self, job_id: str, delay: Union[int, float, None] = None) -> No Wait for Batch job to complete :param job_id: a Batch job ID - :type job_id: str :param delay: a delay before polling for job status - :type delay: Optional[Union[int, float]] :raises: AirflowException """ @@ -297,10 +279,8 @@ def poll_for_job_running(self, job_id: str, delay: Union[int, float, None] = Non quickly from STARTING to RUNNING to completed (often a failure). :param job_id: a Batch job ID - :type job_id: str :param delay: a delay before polling for job status - :type delay: Optional[Union[int, float]] :raises: AirflowException """ @@ -317,10 +297,8 @@ def poll_for_job_complete(self, job_id: str, delay: Union[int, float, None] = No 'SUBMITTED'>'PENDING'>'RUNNABLE'>'STARTING'>'RUNNING'>'SUCCEEDED'|'FAILED' :param job_id: a Batch job ID - :type job_id: str :param delay: a delay before polling for job status - :type delay: Optional[Union[int, float]] :raises: AirflowException """ @@ -333,11 +311,9 @@ def poll_job_status(self, job_id: str, match_status: List[str]) -> bool: Poll for job status using an exponential back-off strategy (with max_retries). :param job_id: a Batch job ID - :type job_id: str :param match_status: a list of job status to match; the Batch job status are: 'SUBMITTED'|'PENDING'|'RUNNABLE'|'STARTING'|'RUNNING'|'SUCCEEDED'|'FAILED' - :type match_status: List[str] :rtype: bool @@ -377,7 +353,6 @@ def get_job_description(self, job_id: str) -> Dict: Get job description (using status_retries). :param job_id: a Batch job ID - :type job_id: str :return: an API response for describe jobs :rtype: Dict @@ -420,10 +395,8 @@ def parse_job_description(job_id: str, response: Dict) -> Dict: Parse job description to extract description for job_id :param job_id: a Batch job ID - :type job_id: str :param response: an API response for describe jobs - :type response: Dict :return: an API response to describe job_id :rtype: Dict @@ -450,15 +423,12 @@ def add_jitter( :param delay: number of seconds to pause; delay is assumed to be a positive number - :type delay: Union[int, float] :param width: delay +/- width for random jitter; width is assumed to be a positive number - :type width: Union[int, float] :param minima: minimum delay allowed; minima is assumed to be a non-negative number - :type minima: Union[int, float] :return: uniform(delay - width, delay + width) jitter and it is a non-negative number @@ -478,7 +448,6 @@ def delay(delay: Union[int, float, None] = None) -> None: :param delay: a delay to pause execution using ``time.sleep(delay)``; a small 1 second jitter is applied to the delay. - :type delay: Optional[Union[int, float]] .. note:: This method uses a default random delay, i.e. @@ -500,7 +469,6 @@ def exponential_delay(tries: int) -> float: This is used in the :py:meth:`.poll_for_job_status` method. :param tries: Number of tries - :type tries: int :rtype: float diff --git a/airflow/providers/amazon/aws/hooks/batch_waiters.py b/airflow/providers/amazon/aws/hooks/batch_waiters.py index 79d0cd20987b3..29c10e4f464d5 100644 --- a/airflow/providers/amazon/aws/hooks/batch_waiters.py +++ b/airflow/providers/amazon/aws/hooks/batch_waiters.py @@ -94,16 +94,13 @@ class BatchWaitersHook(BatchClientHook): - https://github.com/boto/botocore/issues/1915 :param waiter_config: a custom waiter configuration for AWS Batch services - :type waiter_config: Optional[Dict] :param aws_conn_id: connection id of AWS credentials / region name. If None, credential boto3 strategy will be used (http://boto3.readthedocs.io/en/latest/guide/configuration.html). - :type aws_conn_id: Optional[str] :param region_name: region name to use in AWS client. Override the AWS region in connection (if provided) - :type region_name: Optional[str] """ def __init__(self, *args, waiter_config: Optional[Dict] = None, **kwargs) -> None: @@ -180,7 +177,6 @@ def get_waiter(self, waiter_name: str) -> botocore.waiter.Waiter: :param waiter_name: The name of the waiter. The name should match the name (including the casing) of the key name in the waiter model file (typically this is CamelCasing); see ``.list_waiters``. - :type waiter_name: str :return: a waiter object for the named AWS Batch service :rtype: botocore.waiter.Waiter @@ -203,10 +199,8 @@ def wait_for_job(self, job_id: str, delay: Union[int, float, None] = None) -> No following names: "JobExists", "JobRunning" and "JobComplete". :param job_id: a Batch job ID - :type job_id: str :param delay: A delay before polling for job status - :type delay: Union[int, float, None] :raises: AirflowException diff --git a/airflow/providers/amazon/aws/hooks/cloud_formation.py b/airflow/providers/amazon/aws/hooks/cloud_formation.py index e5a7b7f0500e8..e96f397628ed9 100644 --- a/airflow/providers/amazon/aws/hooks/cloud_formation.py +++ b/airflow/providers/amazon/aws/hooks/cloud_formation.py @@ -58,9 +58,7 @@ def create_stack(self, stack_name: str, cloudformation_parameters: dict) -> None Create stack in CloudFormation. :param stack_name: stack_name. - :type stack_name: str :param cloudformation_parameters: parameters to be passed to CloudFormation. - :type cloudformation_parameters: dict """ if 'StackName' not in cloudformation_parameters: cloudformation_parameters['StackName'] = stack_name @@ -71,9 +69,7 @@ def delete_stack(self, stack_name: str, cloudformation_parameters: Optional[dict Delete stack in CloudFormation. :param stack_name: stack_name. - :type stack_name: str :param cloudformation_parameters: parameters to be passed to CloudFormation (optional). - :type cloudformation_parameters: dict """ cloudformation_parameters = cloudformation_parameters or {} if 'StackName' not in cloudformation_parameters: diff --git a/airflow/providers/amazon/aws/hooks/datasync.py b/airflow/providers/amazon/aws/hooks/datasync.py index bc7cdb85186a5..b75123d6fc6ec 100644 --- a/airflow/providers/amazon/aws/hooks/datasync.py +++ b/airflow/providers/amazon/aws/hooks/datasync.py @@ -38,7 +38,6 @@ class DataSyncHook(AwsBaseHook): :param wait_interval_seconds: Time to wait between two consecutive calls to check TaskExecution status. Defaults to 30 seconds. - :type wait_interval_seconds: Optional[int] :raises ValueError: If wait_interval_seconds is not between 0 and 15*60 seconds. """ diff --git a/airflow/providers/amazon/aws/hooks/dms.py b/airflow/providers/amazon/aws/hooks/dms.py index f0f5c90b5fa3f..a1bd19daf3129 100644 --- a/airflow/providers/amazon/aws/hooks/dms.py +++ b/airflow/providers/amazon/aws/hooks/dms.py @@ -60,9 +60,7 @@ def find_replication_tasks_by_arn( """ Find and describe replication tasks by task ARN :param replication_task_arn: Replication task arn - :type replication_task_arn: str :param without_settings: Indicates whether to return task information with settings. - :type without_settings: Optional[bool] :return: list of replication tasks that match the ARN """ @@ -83,7 +81,6 @@ def get_task_status(self, replication_task_arn: str) -> Optional[str]: Retrieve task status. :param replication_task_arn: Replication task ARN - :type replication_task_arn: str :return: Current task status """ replication_tasks = self.find_replication_tasks_by_arn( @@ -113,17 +110,11 @@ def create_replication_task( Create DMS replication task :param replication_task_id: Replication task id - :type replication_task_id: str :param source_endpoint_arn: Source endpoint ARN - :type source_endpoint_arn: str :param target_endpoint_arn: Target endpoint ARN - :type target_endpoint_arn: str :param replication_instance_arn: Replication instance ARN - :type replication_instance_arn: str :param table_mappings: Table mappings - :type table_mappings: dict :param migration_type: Migration type ('full-load'|'cdc'|'full-load-and-cdc'), full-load by default. - :type migration_type: str :return: Replication task ARN """ dms_client = self.get_conn() @@ -152,10 +143,8 @@ def start_replication_task( Starts replication task. :param replication_task_arn: Replication task ARN - :type replication_task_arn: str :param start_replication_task_type: Replication task start type (default='start-replication') ('start-replication'|'resume-processing'|'reload-target') - :type start_replication_task_type: str """ dms_client = self.get_conn() dms_client.start_replication_task( @@ -169,7 +158,6 @@ def stop_replication_task(self, replication_task_arn): Stops replication task. :param replication_task_arn: Replication task ARN - :type replication_task_arn: str """ dms_client = self.get_conn() dms_client.stop_replication_task(ReplicationTaskArn=replication_task_arn) @@ -179,7 +167,6 @@ def delete_replication_task(self, replication_task_arn): Starts replication task deletion and waits for it to be deleted :param replication_task_arn: Replication task ARN - :type replication_task_arn: str """ dms_client = self.get_conn() dms_client.delete_replication_task(ReplicationTaskArn=replication_task_arn) @@ -192,9 +179,7 @@ def wait_for_task_status(self, replication_task_arn: str, status: DmsTaskWaiterS Supported statuses: deleted, ready, running, stopped. :param status: Status to wait for - :type status: DmsTaskWaiterStatus :param replication_task_arn: Replication task ARN - :type replication_task_arn: str """ if not isinstance(status, DmsTaskWaiterStatus): raise TypeError('Status must be an instance of DmsTaskWaiterStatus') diff --git a/airflow/providers/amazon/aws/hooks/dynamodb.py b/airflow/providers/amazon/aws/hooks/dynamodb.py index 3fb55072841e6..7b298ee15ca62 100644 --- a/airflow/providers/amazon/aws/hooks/dynamodb.py +++ b/airflow/providers/amazon/aws/hooks/dynamodb.py @@ -36,9 +36,7 @@ class DynamoDBHook(AwsBaseHook): :class:`~airflow.providers.amazon.aws.hooks.base_aws.AwsBaseHook` :param table_keys: partition key and sort key - :type table_keys: list :param table_name: target DynamoDB table - :type table_name: str """ def __init__( diff --git a/airflow/providers/amazon/aws/hooks/ec2.py b/airflow/providers/amazon/aws/hooks/ec2.py index 9d97292f8912b..96dbaf541051d 100644 --- a/airflow/providers/amazon/aws/hooks/ec2.py +++ b/airflow/providers/amazon/aws/hooks/ec2.py @@ -75,9 +75,7 @@ def get_instance(self, instance_id: str, filters: Optional[List] = None): Get EC2 instance by id and return it. :param instance_id: id of the AWS EC2 instance - :type instance_id: str :param filters: List of filters to specify instances to get - :type filters: list :return: Instance object :rtype: ec2.Instance """ @@ -169,7 +167,6 @@ def get_instance_state(self, instance_id: str) -> str: Get EC2 instance state by id and return it. :param instance_id: id of the AWS EC2 instance - :type instance_id: str :return: current state of the instance :rtype: str """ @@ -183,12 +180,9 @@ def wait_for_state(self, instance_id: str, target_state: str, check_interval: fl Wait EC2 instance until its state is equal to the target_state. :param instance_id: id of the AWS EC2 instance - :type instance_id: str :param target_state: target state of instance - :type target_state: str :param check_interval: time in seconds that the job should wait in between each instance state checks until operation is completed - :type check_interval: float :return: None :rtype: None """ diff --git a/airflow/providers/amazon/aws/hooks/eks.py b/airflow/providers/amazon/aws/hooks/eks.py index 1f689d77e0fa4..d21d905929d95 100644 --- a/airflow/providers/amazon/aws/hooks/eks.py +++ b/airflow/providers/amazon/aws/hooks/eks.py @@ -103,12 +103,9 @@ def create_cluster( Creates an Amazon EKS control plane. :param name: The unique name to give to your Amazon EKS Cluster. - :type name: str :param roleArn: The Amazon Resource Name (ARN) of the IAM role that provides permissions for the Kubernetes control plane to make calls to AWS API operations on your behalf. - :type roleArn: str :param resourcesVpcConfig: The VPC configuration used by the cluster control plane. - :type resourcesVpcConfig: Dict :return: Returns descriptive information about the created EKS Cluster. :rtype: Dict @@ -136,15 +133,10 @@ def create_nodegroup( Creates an Amazon EKS managed node group for an Amazon EKS Cluster. :param clusterName: The name of the Amazon EKS cluster to create the EKS Managed Nodegroup in. - :type clusterName: str :param nodegroupName: The unique name to give your managed nodegroup. - :type nodegroupName: str :param subnets: The subnets to use for the Auto Scaling group that is created for your nodegroup. - :type subnets: List[str] :param nodeRole: The Amazon Resource Name (ARN) of the IAM role to associate with your nodegroup. - :type nodeRole: str :param tags: Optional tags to apply to your nodegroup. - :type tags: Dict :return: Returns descriptive information about the created EKS Managed Nodegroup. :rtype: Dict @@ -187,14 +179,10 @@ def create_fargate_profile( Creates an AWS Fargate profile for an Amazon EKS cluster. :param clusterName: The name of the Amazon EKS cluster to apply the Fargate profile to. - :type clusterName: str :param fargateProfileName: The name of the Fargate profile. - :type fargateProfileName: str :param podExecutionRoleArn: The Amazon Resource Name (ARN) of the pod execution role to use for pods that match the selectors in the Fargate profile. - :type podExecutionRoleArn: str :param selectors: The selectors to match for pods to use this Fargate profile. - :type selectors: List :return: Returns descriptive information about the created Fargate profile. :rtype: Dict @@ -221,7 +209,6 @@ def delete_cluster(self, name: str) -> Dict: Deletes the Amazon EKS Cluster control plane. :param name: The name of the cluster to delete. - :type name: str :return: Returns descriptive information about the deleted EKS Cluster. :rtype: Dict @@ -238,9 +225,7 @@ def delete_nodegroup(self, clusterName: str, nodegroupName: str) -> Dict: Deletes an Amazon EKS managed node group from a specified cluster. :param clusterName: The name of the Amazon EKS Cluster that is associated with your nodegroup. - :type clusterName: str :param nodegroupName: The name of the nodegroup to delete. - :type nodegroupName: str :return: Returns descriptive information about the deleted EKS Managed Nodegroup. :rtype: Dict @@ -261,9 +246,7 @@ def delete_fargate_profile(self, clusterName: str, fargateProfileName: str) -> D Deletes an AWS Fargate profile from a specified Amazon EKS cluster. :param clusterName: The name of the Amazon EKS cluster associated with the Fargate profile to delete. - :type clusterName: str :param fargateProfileName: The name of the Fargate profile to delete. - :type fargateProfileName: str :return: Returns descriptive information about the deleted Fargate profile. :rtype: Dict @@ -286,9 +269,7 @@ def describe_cluster(self, name: str, verbose: bool = False) -> Dict: Returns descriptive information about an Amazon EKS Cluster. :param name: The name of the cluster to describe. - :type name: str :param verbose: Provides additional logging if set to True. Defaults to False. - :type verbose: bool :return: Returns descriptive information about a specific EKS Cluster. :rtype: Dict @@ -310,11 +291,8 @@ def describe_nodegroup(self, clusterName: str, nodegroupName: str, verbose: bool Returns descriptive information about an Amazon EKS managed node group. :param clusterName: The name of the Amazon EKS Cluster associated with the nodegroup. - :type clusterName: str :param nodegroupName: The name of the nodegroup to describe. - :type nodegroupName: str :param verbose: Provides additional logging if set to True. Defaults to False. - :type verbose: bool :return: Returns descriptive information about a specific EKS Nodegroup. :rtype: Dict @@ -343,11 +321,8 @@ def describe_fargate_profile( Returns descriptive information about an AWS Fargate profile. :param clusterName: The name of the Amazon EKS Cluster associated with the Fargate profile. - :type clusterName: str :param fargateProfileName: The name of the Fargate profile to describe. - :type fargateProfileName: str :param verbose: Provides additional logging if set to True. Defaults to False. - :type verbose: bool :return: Returns descriptive information about an AWS Fargate profile. :rtype: Dict @@ -375,7 +350,6 @@ def get_cluster_state(self, clusterName: str) -> ClusterStates: Returns the current status of a given Amazon EKS Cluster. :param clusterName: The name of the cluster to check. - :type clusterName: str :return: Returns the current status of a given Amazon EKS Cluster. :rtype: ClusterStates @@ -394,9 +368,7 @@ def get_fargate_profile_state(self, clusterName: str, fargateProfileName: str) - Returns the current status of a given AWS Fargate profile. :param clusterName: The name of the Amazon EKS Cluster associated with the Fargate profile. - :type clusterName: str :param fargateProfileName: The name of the Fargate profile to check. - :type fargateProfileName: str :return: Returns the current status of a given AWS Fargate profile. :rtype: AWS FargateProfileStates @@ -421,9 +393,7 @@ def get_nodegroup_state(self, clusterName: str, nodegroupName: str) -> Nodegroup Returns the current status of a given Amazon EKS managed node group. :param clusterName: The name of the Amazon EKS Cluster associated with the nodegroup. - :type clusterName: str :param nodegroupName: The name of the nodegroup to check. - :type nodegroupName: str :return: Returns the current status of a given Amazon EKS Nodegroup. :rtype: NodegroupStates @@ -449,7 +419,6 @@ def list_clusters( Lists all Amazon EKS Clusters in your AWS account. :param verbose: Provides additional logging if set to True. Defaults to False. - :type verbose: bool :return: A List containing the cluster names. :rtype: List @@ -468,9 +437,7 @@ def list_nodegroups( Lists all Amazon EKS managed node groups associated with the specified cluster. :param clusterName: The name of the Amazon EKS Cluster containing nodegroups to list. - :type clusterName: str :param verbose: Provides additional logging if set to True. Defaults to False. - :type verbose: bool :return: A List of nodegroup names within the given cluster. :rtype: List @@ -489,9 +456,7 @@ def list_fargate_profiles( Lists all AWS Fargate profiles associated with the specified cluster. :param clusterName: The name of the Amazon EKS Cluster containing Fargate profiles to list. - :type clusterName: str :param verbose: Provides additional logging if set to True. Defaults to False. - :type verbose: bool :return: A list of Fargate profile names within a given cluster. :rtype: List @@ -508,11 +473,8 @@ def _list_all(self, api_call: Callable, response_key: str, verbose: bool) -> Lis Repeatedly calls a provided boto3 API Callable and collates the responses into a List. :param api_call: The api command to execute. - :type api_call: Callable :param response_key: Which dict key to collect into the final list. - :type response_key: str :param verbose: Provides additional logging if set to True. Defaults to False. - :type verbose: bool :return: A List of the combined results of the provided API call. :rtype: List @@ -544,9 +506,7 @@ def generate_config_file( Writes the kubeconfig file given an EKS Cluster. :param eks_cluster_name: The name of the cluster to generate kubeconfig file for. - :type eks_cluster_name: str :param pod_namespace: The namespace to run within kubernetes. - :type pod_namespace: str """ if pod_username: warnings.warn( diff --git a/airflow/providers/amazon/aws/hooks/elasticache_replication_group.py b/airflow/providers/amazon/aws/hooks/elasticache_replication_group.py index d6ddb9cb661c5..47af28845d1aa 100644 --- a/airflow/providers/amazon/aws/hooks/elasticache_replication_group.py +++ b/airflow/providers/amazon/aws/hooks/elasticache_replication_group.py @@ -28,13 +28,10 @@ class ElastiCacheReplicationGroupHook(AwsBaseHook): :param max_retries: Max retries for checking availability of and deleting replication group If this is not supplied then this is defaulted to 10 - :type max_retries: int :param exponential_back_off_factor: Multiplication factor for deciding next sleep time If this is not supplied then this is defaulted to 1 - :type exponential_back_off_factor: float :param initial_poke_interval: Initial sleep time in seconds If this is not supplied then this is defaulted to 60 seconds - :type initial_poke_interval: float """ TERMINAL_STATES = frozenset({"available", "create-failed", "deleting"}) @@ -59,7 +56,6 @@ def create_replication_group(self, config: dict) -> dict: Call ElastiCache API for creating a replication group :param config: Configuration for creating the replication group - :type config: dict :return: Response from ElastiCache create replication group API :rtype: dict """ @@ -70,7 +66,6 @@ def delete_replication_group(self, replication_group_id: str) -> dict: Call ElastiCache API for deleting a replication group :param replication_group_id: ID of replication group to delete - :type replication_group_id: str :return: Response from ElastiCache delete replication group API :rtype: dict """ @@ -81,7 +76,6 @@ def describe_replication_group(self, replication_group_id: str) -> dict: Call ElastiCache API for describing a replication group :param replication_group_id: ID of replication group to describe - :type replication_group_id: str :return: Response from ElastiCache describe replication group API :rtype: dict """ @@ -92,7 +86,6 @@ def get_replication_group_status(self, replication_group_id: str) -> str: Get current status of replication group :param replication_group_id: ID of replication group to check for status - :type replication_group_id: str :return: Current status of replication group :rtype: str """ @@ -103,7 +96,6 @@ def is_replication_group_available(self, replication_group_id: str) -> bool: Helper for checking if replication group is available or not :param replication_group_id: ID of replication group to check for availability - :type replication_group_id: str :return: True if available else False :rtype: bool """ @@ -120,16 +112,12 @@ def wait_for_availability( Check if replication group is available or not by performing a describe over it :param replication_group_id: ID of replication group to check for availability - :type replication_group_id: str :param initial_sleep_time: Initial sleep time in seconds If this is not supplied then this is defaulted to class level value - :type initial_sleep_time: float :param exponential_back_off_factor: Multiplication factor for deciding next sleep time If this is not supplied then this is defaulted to class level value - :type exponential_back_off_factor: float :param max_retries: Max retries for checking availability of replication group If this is not supplied then this is defaulted to class level value - :type max_retries: int :return: True if replication is available else False :rtype: bool """ @@ -179,16 +167,12 @@ def wait_for_deletion( Helper for deleting a replication group ensuring it is either deleted or can't be deleted :param replication_group_id: ID of replication to delete - :type replication_group_id: str :param initial_sleep_time: Initial sleep time in second If this is not supplied then this is defaulted to class level value - :type initial_sleep_time: float :param exponential_back_off_factor: Multiplication factor for deciding next sleep time If this is not supplied then this is defaulted to class level value - :type exponential_back_off_factor: float :param max_retries: Max retries for checking availability of replication group If this is not supplied then this is defaulted to class level value - :type max_retries: int :return: Response from ElastiCache delete replication group API and flag to identify if deleted or not :rtype: (dict, bool) """ @@ -259,16 +243,12 @@ def ensure_delete_replication_group( Delete a replication group ensuring it is either deleted or can't be deleted :param replication_group_id: ID of replication to delete - :type replication_group_id: str :param initial_sleep_time: Initial sleep time in second If this is not supplied then this is defaulted to class level value - :type initial_sleep_time: float :param exponential_back_off_factor: Multiplication factor for deciding next sleep time If this is not supplied then this is defaulted to class level value - :type exponential_back_off_factor: float :param max_retries: Max retries for checking availability of replication group If this is not supplied then this is defaulted to class level value - :type max_retries: int :return: Response from ElastiCache delete replication group API :rtype: dict :raises AirflowException: If replication group is not deleted diff --git a/airflow/providers/amazon/aws/hooks/emr.py b/airflow/providers/amazon/aws/hooks/emr.py index 22e3d1901341b..fa56920a064b7 100644 --- a/airflow/providers/amazon/aws/hooks/emr.py +++ b/airflow/providers/amazon/aws/hooks/emr.py @@ -52,9 +52,7 @@ def get_cluster_id_by_name(self, emr_cluster_name: str, cluster_states: List[str Will return only if single id is found. :param emr_cluster_name: Name of a cluster to find - :type emr_cluster_name: str :param cluster_states: State(s) of cluster to find - :type cluster_states: list :return: id of the EMR cluster """ response = self.get_conn().list_clusters(ClusterStates=cluster_states) @@ -103,7 +101,6 @@ class EmrContainerHook(AwsBaseHook): :class:`~airflow.providers.amazon.aws.hooks.base_aws.AwsBaseHook` :param virtual_cluster_id: Cluster ID of the EMR on EKS virtual cluster - :type virtual_cluster_id: str """ INTERMEDIATE_STATES = ( @@ -138,19 +135,13 @@ def submit_job( See: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/emr-containers.html#EMRContainers.Client.start_job_run # noqa: E501 :param name: The name of the job run. - :type name: str :param execution_role_arn: The IAM role ARN associated with the job run. - :type execution_role_arn: str :param release_label: The Amazon EMR release version to use for the job run. - :type release_label: str :param job_driver: Job configuration details, e.g. the Spark job parameters. - :type job_driver: dict :param configuration_overrides: The configuration overrides for the job run, specifically either application configuration or monitoring configuration. - :type configuration_overrides: dict :param client_request_token: The client idempotency token of the job run request. Use this if you want to specify a unique ID to prevent two jobs from getting started. - :type client_request_token: str :return: Job ID """ params = { @@ -181,7 +172,6 @@ def get_job_failure_reason(self, job_id: str) -> Optional[str]: Fetch the reason for a job failure (e.g. error message). Returns None or reason string. :param job_id: Id of submitted job run - :type job_id: str :return: str """ # We absorb any errors if we can't retrieve the job status @@ -207,7 +197,6 @@ def check_query_status(self, job_id: str) -> Optional[str]: Fetch the status of submitted job run. Returns None or one of valid query states. See: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/emr-containers.html#EMRContainers.Client.describe_job_run # noqa: E501 :param job_id: Id of submitted job run - :type job_id: str :return: str """ try: @@ -232,11 +221,8 @@ def poll_query_status( Returns one of the final states. :param job_id: Id of submitted job run - :type job_id: str :param max_tries: Number of times to poll for query state before function exits - :type max_tries: int :param poll_interval: Time (in seconds) to wait between calls to check query status on EMR - :type poll_interval: int :return: str """ try_number = 1 @@ -267,7 +253,6 @@ def stop_query(self, job_id: str) -> Dict: Cancel the submitted job_run :param job_id: Id of submitted job_run - :type job_id: str :return: dict """ return self.conn.cancel_job_run( diff --git a/airflow/providers/amazon/aws/hooks/glacier.py b/airflow/providers/amazon/aws/hooks/glacier.py index c2e0509ef2f8a..00c4b884ae262 100644 --- a/airflow/providers/amazon/aws/hooks/glacier.py +++ b/airflow/providers/amazon/aws/hooks/glacier.py @@ -34,7 +34,6 @@ def retrieve_inventory(self, vault_name: str) -> Dict[str, Any]: Initiate an Amazon Glacier inventory-retrieval job :param vault_name: the Glacier vault on which job is executed - :type vault_name: str """ job_params = {'Type': 'inventory-retrieval'} self.log.info("Retrieving inventory for vault: %s", vault_name) @@ -48,9 +47,7 @@ def retrieve_inventory_results(self, vault_name: str, job_id: str) -> Dict[str, Retrieve the results of an Amazon Glacier inventory-retrieval job :param vault_name: the Glacier vault on which job is executed - :type vault_name: string :param job_id: the job ID was returned by retrieve_inventory() - :type job_id: str """ self.log.info("Retrieving the job results for vault: %s...", vault_name) response = self.get_conn().get_job_output(vaultName=vault_name, jobId=job_id) @@ -62,9 +59,7 @@ def describe_job(self, vault_name: str, job_id: str) -> Dict[str, Any]: inventory-retrieval job :param vault_name: the Glacier vault on which job is executed - :type vault_name: string :param job_id: the job ID was returned by retrieve_inventory() - :type job_id: str """ self.log.info("Retrieving status for vault: %s and job %s", vault_name, job_id) response = self.get_conn().describe_job(vaultName=vault_name, jobId=job_id) diff --git a/airflow/providers/amazon/aws/hooks/glue.py b/airflow/providers/amazon/aws/hooks/glue.py index 3fbb1fa7555b9..f0170f358f73a 100644 --- a/airflow/providers/amazon/aws/hooks/glue.py +++ b/airflow/providers/amazon/aws/hooks/glue.py @@ -29,25 +29,15 @@ class GlueJobHook(AwsBaseHook): Interact with AWS Glue - create job, trigger, crawler :param s3_bucket: S3 bucket where logs and local etl script will be uploaded - :type s3_bucket: Optional[str] :param job_name: unique job name per AWS account - :type job_name: Optional[str] :param desc: job description - :type desc: Optional[str] :param concurrent_run_limit: The maximum number of concurrent runs allowed for a job - :type concurrent_run_limit: int :param script_location: path to etl script on s3 - :type script_location: Optional[str] :param retry_limit: Maximum number of times to retry this job if it fails - :type retry_limit: int :param num_of_dpus: Number of AWS Glue DPUs to allocate to this Job - :type num_of_dpus: int :param region_name: aws region name (example: us-east-1) - :type region_name: Optional[str] :param iam_role_name: AWS IAM Role for Glue Job Execution - :type iam_role_name: Optional[str] :param create_job_kwargs: Extra arguments for Glue Job Creation - :type create_job_kwargs: Optional[dict] """ JOB_POLL_INTERVAL = 6 # polls job status after every JOB_POLL_INTERVAL seconds @@ -139,9 +129,7 @@ def get_job_state(self, job_name: str, run_id: str) -> str: Get state of the Glue job. The job state can be running, finished, failed, stopped or timeout. :param job_name: unique job name per AWS account - :type job_name: str :param run_id: The job-run ID of the predecessor job run - :type run_id: str :return: State of the Glue job """ glue_client = self.get_conn() @@ -155,9 +143,7 @@ def job_completion(self, job_name: str, run_id: str) -> Dict[str, str]: fails and return final state if finished. Raises AirflowException when the job failed :param job_name: unique job name per AWS account - :type job_name: str :param run_id: The job-run ID of the predecessor job run - :type run_id: str :return: Dict of JobRunState and JobRunId """ failed_states = ['FAILED', 'TIMEOUT'] diff --git a/airflow/providers/amazon/aws/hooks/glue_catalog.py b/airflow/providers/amazon/aws/hooks/glue_catalog.py index 2771499a7df2e..fc9c353e084b0 100644 --- a/airflow/providers/amazon/aws/hooks/glue_catalog.py +++ b/airflow/providers/amazon/aws/hooks/glue_catalog.py @@ -49,17 +49,12 @@ def get_partitions( Retrieves the partition values for a table. :param database_name: The name of the catalog database where the partitions reside. - :type database_name: str :param table_name: The name of the partitions' table. - :type table_name: str :param expression: An expression filtering the partitions to be returned. Please see official AWS documentation for further information. https://docs.aws.amazon.com/glue/latest/dg/aws-glue-api-catalog-partitions.html#aws-glue-api-catalog-partitions-GetPartitions - :type expression: str :param page_size: pagination size - :type page_size: int :param max_items: maximum items to return - :type max_items: int :return: set of partition values where each value is a tuple since a partition may be composed of multiple columns. For example: ``{('2018-01-01','1'), ('2018-01-01','2')}`` @@ -86,12 +81,9 @@ def check_for_partition(self, database_name: str, table_name: str, expression: s Checks whether a partition exists :param database_name: Name of hive database (schema) @table belongs to - :type database_name: str :param table_name: Name of hive table @partition belongs to - :type table_name: str :expression: Expression that matches the partitions to check for (eg `a = 'b' AND c = 'd'`) - :type expression: str :rtype: bool >>> hook = GlueCatalogHook() @@ -108,9 +100,7 @@ def get_table(self, database_name: str, table_name: str) -> dict: Get the information of the table :param database_name: Name of hive database (schema) @table belongs to - :type database_name: str :param table_name: Name of hive table - :type table_name: str :rtype: dict >>> hook = GlueCatalogHook() @@ -126,9 +116,7 @@ def get_table_location(self, database_name: str, table_name: str) -> str: Get the physical location of the table :param database_name: Name of hive database (schema) @table belongs to - :type database_name: str :param table_name: Name of hive table - :type table_name: str :return: str """ table = self.get_table(database_name, table_name) diff --git a/airflow/providers/amazon/aws/hooks/glue_crawler.py b/airflow/providers/amazon/aws/hooks/glue_crawler.py index 9194785aeda32..00d438aaf56aa 100644 --- a/airflow/providers/amazon/aws/hooks/glue_crawler.py +++ b/airflow/providers/amazon/aws/hooks/glue_crawler.py @@ -53,7 +53,6 @@ def has_crawler(self, crawler_name) -> bool: Checks if the crawler already exists :param crawler_name: unique crawler name per AWS account - :type crawler_name: str :return: Returns True if the crawler already exists and False if not. """ self.log.info("Checking if crawler already exists: %s", crawler_name) @@ -69,7 +68,6 @@ def get_crawler(self, crawler_name: str) -> dict: Gets crawler configurations :param crawler_name: unique crawler name per AWS account - :type crawler_name: str :return: Nested dictionary of crawler configurations """ return self.glue_client.get_crawler(Name=crawler_name)['Crawler'] @@ -79,7 +77,6 @@ def update_crawler(self, **crawler_kwargs) -> bool: Updates crawler configurations :param crawler_kwargs: Keyword args that define the configurations used for the crawler - :type crawler_kwargs: any :return: True if crawler was updated and false otherwise """ crawler_name = crawler_kwargs['Name'] @@ -101,7 +98,6 @@ def create_crawler(self, **crawler_kwargs) -> str: Creates an AWS Glue Crawler :param crawler_kwargs: Keyword args that define the configurations used to create the crawler - :type crawler_kwargs: any :return: Name of the crawler """ crawler_name = crawler_kwargs['Name'] @@ -114,7 +110,6 @@ def start_crawler(self, crawler_name: str) -> dict: Triggers the AWS Glue crawler :param crawler_name: unique crawler name per AWS account - :type crawler_name: str :return: Empty dictionary """ self.log.info("Starting crawler %s", crawler_name) @@ -128,9 +123,7 @@ def wait_for_crawler_completion(self, crawler_name: str, poll_interval: int = 5) Raises AirflowException if the crawler fails or is cancelled. :param crawler_name: unique crawler name per AWS account - :type crawler_name: str :param poll_interval: Time (in seconds) to wait between two consecutive calls to check crawler status - :type poll_interval: int :return: Crawler's status """ failed_status = ['FAILED', 'CANCELLED'] diff --git a/airflow/providers/amazon/aws/hooks/kinesis.py b/airflow/providers/amazon/aws/hooks/kinesis.py index d3864477fe7bc..8f26b54d64b12 100644 --- a/airflow/providers/amazon/aws/hooks/kinesis.py +++ b/airflow/providers/amazon/aws/hooks/kinesis.py @@ -34,7 +34,6 @@ class FirehoseHook(AwsBaseHook): :class:`~airflow.providers.amazon.aws.hooks.base_aws.AwsBaseHook` :param delivery_stream: Name of the delivery stream - :type delivery_stream: str """ def __init__(self, delivery_stream: str, *args, **kwargs) -> None: diff --git a/airflow/providers/amazon/aws/hooks/lambda_function.py b/airflow/providers/amazon/aws/hooks/lambda_function.py index d2dfcc3a74752..56ea7ba81a25f 100644 --- a/airflow/providers/amazon/aws/hooks/lambda_function.py +++ b/airflow/providers/amazon/aws/hooks/lambda_function.py @@ -33,13 +33,9 @@ class LambdaHook(AwsBaseHook): :class:`~airflow.providers.amazon.aws.hooks.base_aws.AwsBaseHook` :param function_name: AWS Lambda Function Name - :type function_name: str :param log_type: Tail Invocation Request - :type log_type: str :param qualifier: AWS Lambda Function Version or Alias Name - :type qualifier: str :param invocation_type: AWS Lambda Invocation Type (RequestResponse, Event etc) - :type invocation_type: str """ def __init__( diff --git a/airflow/providers/amazon/aws/hooks/logs.py b/airflow/providers/amazon/aws/hooks/logs.py index bb22ae13d7337..05e49f1f9f181 100644 --- a/airflow/providers/amazon/aws/hooks/logs.py +++ b/airflow/providers/amazon/aws/hooks/logs.py @@ -53,17 +53,12 @@ def get_log_events( items that are available at the current moment. :param log_group: The name of the log group. - :type log_group: str :param log_stream_name: The name of the specific stream. - :type log_stream_name: str :param start_time: The time stamp value to start reading the logs from (default: 0). - :type start_time: int :param skip: The number of log entries to skip at the start (default: 0). This is for when there are multiple entries at the same timestamp. - :type skip: int :param start_from_head: whether to start from the beginning (True) of the log or at the end of the log (False). - :type start_from_head: bool :rtype: dict :return: | A CloudWatch log event with the following key-value pairs: | 'timestamp' (int): The time in milliseconds of the event. diff --git a/airflow/providers/amazon/aws/hooks/redshift_cluster.py b/airflow/providers/amazon/aws/hooks/redshift_cluster.py index aa45d77175011..88b0e85308ab7 100644 --- a/airflow/providers/amazon/aws/hooks/redshift_cluster.py +++ b/airflow/providers/amazon/aws/hooks/redshift_cluster.py @@ -31,7 +31,6 @@ class RedshiftHook(AwsBaseHook): :class:`~airflow.providers.amazon.aws.hooks.base_aws.AwsBaseHook` :param aws_conn_id: The Airflow connection used for AWS credentials. - :type aws_conn_id: str """ def __init__(self, *args, **kwargs) -> None: @@ -44,11 +43,8 @@ def cluster_status(self, cluster_identifier: str) -> str: Return status of a cluster :param cluster_identifier: unique identifier of a cluster - :type cluster_identifier: str :param skip_final_cluster_snapshot: determines cluster snapshot creation - :type skip_final_cluster_snapshot: bool :param final_cluster_snapshot_identifier: Optional[str] - :type final_cluster_snapshot_identifier: Optional[str] """ try: response = self.get_conn().describe_clusters(ClusterIdentifier=cluster_identifier)['Clusters'] @@ -66,11 +62,8 @@ def delete_cluster( Delete a cluster and optionally create a snapshot :param cluster_identifier: unique identifier of a cluster - :type cluster_identifier: str :param skip_final_cluster_snapshot: determines cluster snapshot creation - :type skip_final_cluster_snapshot: bool :param final_cluster_snapshot_identifier: name of final cluster snapshot - :type final_cluster_snapshot_identifier: str """ final_cluster_snapshot_identifier = final_cluster_snapshot_identifier or '' @@ -86,7 +79,6 @@ def describe_cluster_snapshots(self, cluster_identifier: str) -> Optional[List[s Gets a list of snapshots for a cluster :param cluster_identifier: unique identifier of a cluster - :type cluster_identifier: str """ response = self.get_conn().describe_cluster_snapshots(ClusterIdentifier=cluster_identifier) if 'Snapshots' not in response: @@ -101,9 +93,7 @@ def restore_from_cluster_snapshot(self, cluster_identifier: str, snapshot_identi Restores a cluster from its snapshot :param cluster_identifier: unique identifier of a cluster - :type cluster_identifier: str :param snapshot_identifier: unique identifier for a snapshot of a cluster - :type snapshot_identifier: str """ response = self.get_conn().restore_from_cluster_snapshot( ClusterIdentifier=cluster_identifier, SnapshotIdentifier=snapshot_identifier @@ -115,9 +105,7 @@ def create_cluster_snapshot(self, snapshot_identifier: str, cluster_identifier: Creates a snapshot of a cluster :param snapshot_identifier: unique identifier for a snapshot of a cluster - :type snapshot_identifier: str :param cluster_identifier: unique identifier of a cluster - :type cluster_identifier: str """ response = self.get_conn().create_cluster_snapshot( SnapshotIdentifier=snapshot_identifier, diff --git a/airflow/providers/amazon/aws/hooks/redshift_sql.py b/airflow/providers/amazon/aws/hooks/redshift_sql.py index 739b7b57411ed..0b889063cc10a 100644 --- a/airflow/providers/amazon/aws/hooks/redshift_sql.py +++ b/airflow/providers/amazon/aws/hooks/redshift_sql.py @@ -39,7 +39,6 @@ class RedshiftSQLHook(DbApiHook): :param redshift_conn_id: reference to :ref:`Amazon Redshift connection id` - :type redshift_conn_id: str .. note:: get_sqlalchemy_engine() and get_uri() depend on sqlalchemy-amazon-redshift @@ -108,9 +107,7 @@ def get_table_primary_key(self, table: str, schema: Optional[str] = "public") -> """ Helper method that returns the table primary key :param table: Name of the target table - :type table: str :param table: Name of the target schema, public by default - :type table: str :return: Primary key columns list :rtype: List[str] """ diff --git a/airflow/providers/amazon/aws/hooks/s3.py b/airflow/providers/amazon/aws/hooks/s3.py index 141487042dfb8..0f3c0451a42f4 100644 --- a/airflow/providers/amazon/aws/hooks/s3.py +++ b/airflow/providers/amazon/aws/hooks/s3.py @@ -152,7 +152,6 @@ def check_for_bucket(self, bucket_name: Optional[str] = None) -> bool: Check if bucket_name exists. :param bucket_name: the name of the bucket - :type bucket_name: str :return: True if it exists and False if not. :rtype: bool """ @@ -169,7 +168,6 @@ def get_bucket(self, bucket_name: Optional[str] = None) -> str: Returns a boto3.S3.Bucket object :param bucket_name: the name of the bucket - :type bucket_name: str :return: the bucket object to the bucket name. :rtype: boto3.S3.Bucket """ @@ -190,9 +188,7 @@ def create_bucket(self, bucket_name: Optional[str] = None, region_name: Optional Creates an Amazon S3 bucket. :param bucket_name: The name of the bucket - :type bucket_name: str :param region_name: The name of the aws region in which to create the bucket. - :type region_name: str """ if not region_name: region_name = self.get_conn().meta.region_name @@ -209,11 +205,8 @@ def check_for_prefix(self, prefix: str, delimiter: str, bucket_name: Optional[st Checks that a prefix exists in a bucket :param bucket_name: the name of the bucket - :type bucket_name: str :param prefix: a key prefix - :type prefix: str :param delimiter: the delimiter marks key hierarchy. - :type delimiter: str :return: False if the prefix does not exist in the bucket and True if it does. :rtype: bool """ @@ -236,15 +229,10 @@ def list_prefixes( Lists prefixes in a bucket under prefix :param bucket_name: the name of the bucket - :type bucket_name: str :param prefix: a key prefix - :type prefix: str :param delimiter: the delimiter marks key hierarchy. - :type delimiter: str :param page_size: pagination size - :type page_size: int :param max_items: maximum items to return - :type max_items: int :return: a list of matched prefixes :rtype: list """ @@ -281,15 +269,10 @@ def list_keys( Lists keys in a bucket under prefix and not containing delimiter :param bucket_name: the name of the bucket - :type bucket_name: str :param prefix: a key prefix - :type prefix: str :param delimiter: the delimiter marks key hierarchy. - :type delimiter: str :param page_size: pagination size - :type page_size: int :param max_items: maximum items to return - :type max_items: int :return: a list of matched keys :rtype: list """ @@ -320,9 +303,7 @@ def check_for_key(self, key: str, bucket_name: Optional[str] = None) -> bool: Checks if a key exists in a bucket :param key: S3 key that will point to the file - :type key: str :param bucket_name: Name of the bucket in which the file is stored - :type bucket_name: str :return: True if the key exists and False if not. :rtype: bool """ @@ -342,9 +323,7 @@ def get_key(self, key: str, bucket_name: Optional[str] = None) -> S3Transfer: Returns a boto3.s3.Object :param key: the path to the key - :type key: str :param bucket_name: the name of the bucket - :type bucket_name: str :return: the key object from the bucket :rtype: boto3.s3.Object """ @@ -368,9 +347,7 @@ def read_key(self, key: str, bucket_name: Optional[str] = None) -> str: Reads a key from S3 :param key: S3 key that will point to the file - :type key: str :param bucket_name: Name of the bucket in which the file is stored - :type bucket_name: str :return: the content of the key :rtype: str """ @@ -392,17 +369,11 @@ def select_key( Reads a key with S3 Select. :param key: S3 key that will point to the file - :type key: str :param bucket_name: Name of the bucket in which the file is stored - :type bucket_name: str :param expression: S3 Select expression - :type expression: str :param expression_type: S3 Select expression type - :type expression_type: str :param input_serialization: S3 Select input data serialization format - :type input_serialization: dict :param output_serialization: S3 Select output data serialization format - :type output_serialization: dict :return: retrieved subset of original data by S3 Select :rtype: str @@ -440,11 +411,8 @@ def check_for_wildcard_key( Checks that a key matching a wildcard expression exists in a bucket :param wildcard_key: the path to the key - :type wildcard_key: str :param bucket_name: the name of the bucket - :type bucket_name: str :param delimiter: the delimiter marks key hierarchy - :type delimiter: str :return: True if a key exists and False if not. :rtype: bool """ @@ -462,11 +430,8 @@ def get_wildcard_key( Returns a boto3.s3.Object object matching the wildcard expression :param wildcard_key: the path to the key - :type wildcard_key: str :param bucket_name: the name of the bucket - :type bucket_name: str :param delimiter: the delimiter marks key hierarchy - :type delimiter: str :return: the key object from the bucket or None if none has been found. :rtype: boto3.s3.Object """ @@ -493,23 +458,16 @@ def load_file( Loads a local file to S3 :param filename: path to the file to load. - :type filename: Union[Path, str] :param key: S3 key that will point to the file - :type key: str :param bucket_name: Name of the bucket in which to store the file - :type bucket_name: str :param replace: A flag to decide whether or not to overwrite the key if it already exists. If replace is False and the key exists, an error will be raised. - :type replace: bool :param encrypt: If True, the file will be encrypted on the server-side by S3 and will be stored in an encrypted form while at rest in S3. - :type encrypt: bool :param gzip: If True, the file will be compressed locally - :type gzip: bool :param acl_policy: String specifying the canned ACL policy for the file being uploaded to the S3 bucket. - :type acl_policy: str """ filename = str(filename) if not replace and self.check_for_key(key, bucket_name): @@ -550,24 +508,16 @@ def load_string( boto infrastructure to ship a file to s3. :param string_data: str to set as content for the key. - :type string_data: str :param key: S3 key that will point to the file - :type key: str :param bucket_name: Name of the bucket in which to store the file - :type bucket_name: str :param replace: A flag to decide whether or not to overwrite the key if it already exists - :type replace: bool :param encrypt: If True, the file will be encrypted on the server-side by S3 and will be stored in an encrypted form while at rest in S3. - :type encrypt: bool :param encoding: The string to byte encoding - :type encoding: str :param acl_policy: The string to specify the canned ACL policy for the object to be uploaded - :type acl_policy: str :param compression: Type of compression to use, currently only gzip is supported. - :type compression: str """ encoding = encoding or 'utf-8' @@ -606,20 +556,14 @@ def load_bytes( boto infrastructure to ship a file to s3. :param bytes_data: bytes to set as content for the key. - :type bytes_data: bytes :param key: S3 key that will point to the file - :type key: str :param bucket_name: Name of the bucket in which to store the file - :type bucket_name: str :param replace: A flag to decide whether or not to overwrite the key if it already exists - :type replace: bool :param encrypt: If True, the file will be encrypted on the server-side by S3 and will be stored in an encrypted form while at rest in S3. - :type encrypt: bool :param acl_policy: The string to specify the canned ACL policy for the object to be uploaded - :type acl_policy: str """ file_obj = io.BytesIO(bytes_data) self._upload_file_obj(file_obj, key, bucket_name, replace, encrypt, acl_policy) @@ -640,20 +584,14 @@ def load_file_obj( Loads a file object to S3 :param file_obj: The file-like object to set as the content for the S3 key. - :type file_obj: file-like object :param key: S3 key that will point to the file - :type key: str :param bucket_name: Name of the bucket in which to store the file - :type bucket_name: str :param replace: A flag that indicates whether to overwrite the key if it already exists. - :type replace: bool :param encrypt: If True, S3 encrypts the file on the server, and the file is stored in encrypted form at rest in S3. - :type encrypt: bool :param acl_policy: The string to specify the canned ACL policy for the object to be uploaded - :type acl_policy: str """ self._upload_file_obj(file_obj, key, bucket_name, replace, encrypt, acl_policy) @@ -704,25 +642,19 @@ def copy_object( It can be either full s3:// style url or relative path from root level. When it's specified as a full s3:// url, please omit source_bucket_name. - :type source_bucket_key: str :param dest_bucket_key: The key of the object to copy to. The convention to specify `dest_bucket_key` is the same as `source_bucket_key`. - :type dest_bucket_key: str :param source_bucket_name: Name of the S3 bucket where the source object is in. It should be omitted when `source_bucket_key` is provided as a full s3:// url. - :type source_bucket_name: str :param dest_bucket_name: Name of the S3 bucket to where the object is copied. It should be omitted when `dest_bucket_key` is provided as a full s3:// url. - :type dest_bucket_name: str :param source_version_id: Version ID of the source object (OPTIONAL) - :type source_version_id: str :param acl_policy: The string to specify the canned ACL policy for the object to be copied which is private by default. - :type acl_policy: str """ acl_policy = acl_policy or 'private' @@ -760,9 +692,7 @@ def delete_bucket(self, bucket_name: str, force_delete: bool = False) -> None: To delete s3 bucket, delete all s3 bucket objects and then delete the bucket. :param bucket_name: Bucket name - :type bucket_name: str :param force_delete: Enable this to delete bucket even if not empty - :type force_delete: bool :return: None :rtype: None """ @@ -777,7 +707,6 @@ def delete_objects(self, bucket: str, keys: Union[str, list]) -> None: Delete keys from the bucket. :param bucket: Name of the bucket in which you are going to delete object(s) - :type bucket: str :param keys: The key(s) to delete from S3 bucket. When ``keys`` is a string, it's supposed to be the key name of @@ -785,7 +714,6 @@ def delete_objects(self, bucket: str, keys: Union[str, list]) -> None: When ``keys`` is a list, it's supposed to be the list of the keys to delete. - :type keys: str or list """ if isinstance(keys, str): keys = [keys] @@ -812,12 +740,9 @@ def download_file( Downloads a file from the S3 location to the local file system. :param key: The key path in S3. - :type key: str :param bucket_name: The specific bucket to use. - :type bucket_name: Optional[str] :param local_path: The local path to the downloaded file. If no path is provided it will use the system's temporary directory. - :type local_path: Optional[str] :return: the file name. :rtype: str """ @@ -849,15 +774,11 @@ def generate_presigned_url( Generate a presigned url given a client, its method, and arguments :param client_method: The client method to presign for. - :type client_method: str :param params: The parameters normally passed to ClientMethod. - :type params: dict :param expires_in: The number of seconds the presigned url is valid for. By default it expires in an hour (3600 seconds). - :type expires_in: int :param http_method: The http method to use on the generated url. By default, the http method is whatever is used in the method's model. - :type http_method: str :return: The presigned url. :rtype: str """ @@ -877,7 +798,6 @@ def get_bucket_tagging(self, bucket_name: Optional[str] = None) -> Optional[List Gets a List of tags from a bucket. :param bucket_name: The name of the bucket. - :type bucket_name: str :return: A List containing the key/value pairs for the tags :rtype: Optional[List[Dict[str, str]]] """ @@ -902,13 +822,9 @@ def put_bucket_tagging( Overwrites the existing TagSet with provided tags. Must provide either a TagSet or a key/value pair. :param tag_set: A List containing the key/value pairs for the tags. - :type tag_set: List[Dict[str, str]] :param key: The Key for the new TagSet entry. - :type key: str :param value: The Value for the new TagSet entry. - :type value: str :param bucket_name: The name of the bucket. - :type bucket_name: str :return: None :rtype: None """ @@ -935,7 +851,6 @@ def delete_bucket_tagging(self, bucket_name: Optional[str] = None) -> None: Deletes all tags from a bucket. :param bucket_name: The name of the bucket. - :type bucket_name: str :return: None :rtype: None """ diff --git a/airflow/providers/amazon/aws/hooks/sagemaker.py b/airflow/providers/amazon/aws/hooks/sagemaker.py index 9f5caed5c2242..612842462c2e4 100644 --- a/airflow/providers/amazon/aws/hooks/sagemaker.py +++ b/airflow/providers/amazon/aws/hooks/sagemaker.py @@ -68,9 +68,7 @@ def secondary_training_status_changed(current_job_description: dict, prev_job_de Returns true if training job's secondary status message has changed. :param current_job_description: Current job description, returned from DescribeTrainingJob call. - :type current_job_description: dict :param prev_job_description: Previous job description, returned from DescribeTrainingJob call. - :type prev_job_description: dict :return: Whether the secondary status message of a training job changed or not. """ @@ -101,9 +99,7 @@ def secondary_training_status_message( Returns a string contains start time and the secondary training job status message. :param job_description: Returned response from DescribeTrainingJob call - :type job_description: dict :param prev_description: Previous job description from DescribeTrainingJob call - :type prev_description: dict :return: Job status string to be printed. """ @@ -156,11 +152,8 @@ def tar_and_s3_upload(self, path: str, key: str, bucket: str) -> None: Tar the local file or directory and upload to s3 :param path: local file or directory - :type path: str :param key: s3 key - :type key: str :param bucket: s3 bucket - :type bucket: str :return: None """ with tempfile.TemporaryFile() as temp_file: @@ -179,7 +172,6 @@ def configure_s3_resources(self, config: dict) -> None: Extract the S3 operations from the configuration and execute them. :param config: config of SageMaker operation - :type config: dict :rtype: dict """ s3_operations = config.pop('S3Operations', None) @@ -200,7 +192,6 @@ def check_s3_url(self, s3url: str) -> bool: Check if an S3 URL exists :param s3url: S3 url - :type s3url: str :rtype: bool """ bucket, key = S3Hook.parse_s3_url(s3url) @@ -224,7 +215,6 @@ def check_training_config(self, training_config: dict) -> None: Check if a training configuration is valid :param training_config: training_config - :type training_config: dict :return: None """ if "InputDataConfig" in training_config: @@ -237,7 +227,6 @@ def check_tuning_config(self, tuning_config: dict) -> None: Check if a tuning configuration is valid :param tuning_config: tuning_config - :type tuning_config: dict :return: None """ for channel in tuning_config['TrainingJobDefinition']['InputDataConfig']: @@ -280,13 +269,10 @@ def multi_stream_iter(self, log_group: str, streams: list, positions=None) -> Ge interleaving the events from each stream so they're yielded in timestamp order. :param log_group: The name of the log group. - :type log_group: str :param streams: A list of the log stream names. The position of the stream in this list is the stream number. - :type streams: list :param positions: A list of pairs of (timestamp, skip) which represents the last record read from each stream. - :type positions: list :return: A tuple of (stream number, cloudwatch log event). """ positions = positions or {s: Position(timestamp=0, skip=0) for s in streams} @@ -324,16 +310,12 @@ def create_training_job( Create a training job :param config: the config for training - :type config: dict :param wait_for_completion: if the program should keep running until job finishes - :type wait_for_completion: bool :param check_interval: the time interval in seconds which the operator will check the status of any SageMaker job - :type check_interval: int :param max_ingestion_time: the maximum ingestion time in seconds. Any SageMaker jobs that run longer than this will fail. Setting this to None implies no timeout for any SageMaker job. - :type max_ingestion_time: int :return: A response to training job creation """ self.check_training_config(config) @@ -375,16 +357,12 @@ def create_tuning_job( Create a tuning job :param config: the config for tuning - :type config: dict :param wait_for_completion: if the program should keep running until job finishes - :type wait_for_completion: bool :param check_interval: the time interval in seconds which the operator will check the status of any SageMaker job - :type check_interval: int :param max_ingestion_time: the maximum ingestion time in seconds. Any SageMaker jobs that run longer than this will fail. Setting this to None implies no timeout for any SageMaker job. - :type max_ingestion_time: int :return: A response to tuning job creation """ self.check_tuning_config(config) @@ -411,16 +389,12 @@ def create_transform_job( Create a transform job :param config: the config for transform job - :type config: dict :param wait_for_completion: if the program should keep running until job finishes - :type wait_for_completion: bool :param check_interval: the time interval in seconds which the operator will check the status of any SageMaker job - :type check_interval: int :param max_ingestion_time: the maximum ingestion time in seconds. Any SageMaker jobs that run longer than this will fail. Setting this to None implies no timeout for any SageMaker job. - :type max_ingestion_time: int :return: A response to transform job creation """ if "S3DataSource" in config['TransformInput']['DataSource']: @@ -448,16 +422,12 @@ def create_processing_job( Create a processing job :param config: the config for processing job - :type config: dict :param wait_for_completion: if the program should keep running until job finishes - :type wait_for_completion: bool :param check_interval: the time interval in seconds which the operator will check the status of any SageMaker job - :type check_interval: int :param max_ingestion_time: the maximum ingestion time in seconds. Any SageMaker jobs that run longer than this will fail. Setting this to None implies no timeout for any SageMaker job. - :type max_ingestion_time: int :return: A response to transform job creation """ response = self.get_conn().create_processing_job(**config) @@ -476,7 +446,6 @@ def create_model(self, config: dict): Create a model job :param config: the config for model - :type config: dict :return: A response to model creation """ return self.get_conn().create_model(**config) @@ -486,7 +455,6 @@ def create_endpoint_config(self, config: dict): Create an endpoint config :param config: the config for endpoint-config - :type config: dict :return: A response to endpoint config creation """ return self.get_conn().create_endpoint_config(**config) @@ -502,16 +470,12 @@ def create_endpoint( Create an endpoint :param config: the config for endpoint - :type config: dict :param wait_for_completion: if the program should keep running until job finishes - :type wait_for_completion: bool :param check_interval: the time interval in seconds which the operator will check the status of any SageMaker job - :type check_interval: int :param max_ingestion_time: the maximum ingestion time in seconds. Any SageMaker jobs that run longer than this will fail. Setting this to None implies no timeout for any SageMaker job. - :type max_ingestion_time: int :return: A response to endpoint creation """ response = self.get_conn().create_endpoint(**config) @@ -537,16 +501,12 @@ def update_endpoint( Update an endpoint :param config: the config for endpoint - :type config: dict :param wait_for_completion: if the program should keep running until job finishes - :type wait_for_completion: bool :param check_interval: the time interval in seconds which the operator will check the status of any SageMaker job - :type check_interval: int :param max_ingestion_time: the maximum ingestion time in seconds. Any SageMaker jobs that run longer than this will fail. Setting this to None implies no timeout for any SageMaker job. - :type max_ingestion_time: int :return: A response to endpoint update """ response = self.get_conn().update_endpoint(**config) @@ -566,7 +526,6 @@ def describe_training_job(self, name: str): Return the training job info associated with the name :param name: the name of the training job - :type name: str :return: A dict contains all the training job info """ return self.get_conn().describe_training_job(TrainingJobName=name) @@ -637,7 +596,6 @@ def describe_tuning_job(self, name: str) -> dict: Return the tuning job info associated with the name :param name: the name of the tuning job - :type name: str :return: A dict contains all the tuning job info """ return self.get_conn().describe_hyper_parameter_tuning_job(HyperParameterTuningJobName=name) @@ -647,7 +605,6 @@ def describe_model(self, name: str) -> dict: Return the SageMaker model info associated with the name :param name: the name of the SageMaker model - :type name: str :return: A dict contains all the model info """ return self.get_conn().describe_model(ModelName=name) @@ -657,7 +614,6 @@ def describe_transform_job(self, name: str) -> dict: Return the transform job info associated with the name :param name: the name of the transform job - :type name: str :return: A dict contains all the transform job info """ return self.get_conn().describe_transform_job(TransformJobName=name) @@ -667,7 +623,6 @@ def describe_processing_job(self, name: str) -> dict: Return the processing job info associated with the name :param name: the name of the processing job - :type name: str :return: A dict contains all the processing job info """ return self.get_conn().describe_processing_job(ProcessingJobName=name) @@ -677,7 +632,6 @@ def describe_endpoint_config(self, name: str) -> dict: Return the endpoint config info associated with the name :param name: the name of the endpoint config - :type name: str :return: A dict contains all the endpoint config info """ return self.get_conn().describe_endpoint_config(EndpointConfigName=name) @@ -685,7 +639,6 @@ def describe_endpoint_config(self, name: str) -> dict: def describe_endpoint(self, name: str) -> dict: """ :param name: the name of the endpoint - :type name: str :return: A dict contains all the endpoint info """ return self.get_conn().describe_endpoint(EndpointName=name) @@ -703,22 +656,16 @@ def check_status( Check status of a SageMaker job :param job_name: name of the job to check status - :type job_name: str :param key: the key of the response dict that points to the state - :type key: str :param describe_function: the function used to retrieve the status - :type describe_function: python callable :param args: the arguments for the function :param check_interval: the time interval in seconds which the operator will check the status of any SageMaker job - :type check_interval: int :param max_ingestion_time: the maximum ingestion time in seconds. Any SageMaker jobs that run longer than this will fail. Setting this to None implies no timeout for any SageMaker job. - :type max_ingestion_time: int :param non_terminal_states: the set of nonterminal states - :type non_terminal_states: set :return: response of describe call after job is done """ if not non_terminal_states: @@ -769,20 +716,14 @@ def check_training_status_with_log( job is complete. :param job_name: name of the training job to check status and display logs for - :type job_name: str :param non_terminal_states: the set of non_terminal states - :type non_terminal_states: set :param failed_states: the set of failed states - :type failed_states: set :param wait_for_completion: Whether to keep looking for new log entries until the job completes - :type wait_for_completion: bool :param check_interval: The interval in seconds between polling for new log entries and job completion - :type check_interval: int :param max_ingestion_time: the maximum ingestion time in seconds. Any SageMaker jobs that run longer than this will fail. Setting this to None implies no timeout for any SageMaker job. - :type max_ingestion_time: int :return: None """ sec = 0 diff --git a/airflow/providers/amazon/aws/hooks/secrets_manager.py b/airflow/providers/amazon/aws/hooks/secrets_manager.py index 3d0289a3a7eb5..f1596d4961f4e 100644 --- a/airflow/providers/amazon/aws/hooks/secrets_manager.py +++ b/airflow/providers/amazon/aws/hooks/secrets_manager.py @@ -44,7 +44,6 @@ def get_secret(self, secret_name: str) -> Union[str, bytes]: reflecting format it stored in the AWS Secrets Manager :param secret_name: name of the secrets. - :type secret_name: str :return: Union[str, bytes] with the information about the secrets :rtype: Union[str, bytes] """ @@ -62,7 +61,6 @@ def get_secret_as_dict(self, secret_name: str) -> dict: Retrieve secret value from AWS Secrets Manager in a dict representation :param secret_name: name of the secrets. - :type secret_name: str :return: dict with the information about the secrets :rtype: dict """ diff --git a/airflow/providers/amazon/aws/hooks/sns.py b/airflow/providers/amazon/aws/hooks/sns.py index e26f5fb52f52d..94c83d8d31300 100644 --- a/airflow/providers/amazon/aws/hooks/sns.py +++ b/airflow/providers/amazon/aws/hooks/sns.py @@ -63,11 +63,9 @@ def publish_to_target( Publish a message to a topic or an endpoint. :param target_arn: either a TopicArn or an EndpointArn - :type target_arn: str :param message: the default message you want to send :param message: str :param subject: subject of message - :type subject: str :param message_attributes: additional attributes to publish for message filtering. This should be a flat dict; the DataType to be sent depends on the type of the value: @@ -76,7 +74,6 @@ def publish_to_target( - int, float = Number - iterable = String.Array - :type message_attributes: dict """ publish_kwargs: Dict[str, Union[str, dict]] = { 'TargetArn': target_arn, diff --git a/airflow/providers/amazon/aws/hooks/sqs.py b/airflow/providers/amazon/aws/hooks/sqs.py index 89d1399d27ded..c6de9f904879a 100644 --- a/airflow/providers/amazon/aws/hooks/sqs.py +++ b/airflow/providers/amazon/aws/hooks/sqs.py @@ -43,10 +43,8 @@ def create_queue(self, queue_name: str, attributes: Optional[Dict] = None) -> Di Create queue using connection object :param queue_name: name of the queue. - :type queue_name: str :param attributes: additional attributes for the queue (default: None) For details of the attributes parameter see :py:meth:`SQS.create_queue` - :type attributes: dict :return: dict with the information about the queue For details of the returned value see :py:meth:`SQS.create_queue` @@ -65,14 +63,10 @@ def send_message( Send message to the queue :param queue_url: queue url - :type queue_url: str :param message_body: the contents of the message - :type message_body: str :param delay_seconds: seconds to delay the message - :type delay_seconds: int :param message_attributes: additional attributes for the message (default: None) For details of the attributes parameter see :py:meth:`botocore.client.SQS.send_message` - :type message_attributes: dict :return: dict with the information about the message sent For details of the returned value see :py:meth:`botocore.client.SQS.send_message` diff --git a/airflow/providers/amazon/aws/hooks/step_function.py b/airflow/providers/amazon/aws/hooks/step_function.py index 4af591840014c..97ffb10c04147 100644 --- a/airflow/providers/amazon/aws/hooks/step_function.py +++ b/airflow/providers/amazon/aws/hooks/step_function.py @@ -47,11 +47,8 @@ def start_execution( https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/stepfunctions.html#SFN.Client.start_execution :param state_machine_arn: AWS Step Function State Machine ARN - :type state_machine_arn: str :param name: The name of the execution. - :type name: Optional[str] :param state_machine_input: JSON data input to pass to the State Machine - :type state_machine_input: Union[Dict[str, any], str, None] :return: Execution ARN :rtype: str """ @@ -75,7 +72,6 @@ def describe_execution(self, execution_arn: str) -> dict: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/stepfunctions.html#SFN.Client.describe_execution :param execution_arn: ARN of the State Machine Execution - :type execution_arn: str :return: Dict with Execution details :rtype: dict """ diff --git a/airflow/providers/amazon/aws/log/cloudwatch_task_handler.py b/airflow/providers/amazon/aws/log/cloudwatch_task_handler.py index fcfa4c8c3f649..c975a2cb83fc6 100644 --- a/airflow/providers/amazon/aws/log/cloudwatch_task_handler.py +++ b/airflow/providers/amazon/aws/log/cloudwatch_task_handler.py @@ -37,12 +37,9 @@ class CloudwatchTaskHandler(FileTaskHandler, LoggingMixin): It extends airflow FileTaskHandler and uploads to and reads from Cloudwatch. :param base_log_folder: base folder to store logs locally - :type base_log_folder: str :param log_group_arn: ARN of the Cloudwatch log group for remote log storage with format ``arn:aws:logs:{region name}:{account id}:log-group:{group name}`` - :type log_group_arn: str :param filename_template: template for file name (local storage) or log stream name (remote) - :type filename_template: str """ def __init__(self, base_log_folder: str, log_group_arn: str, filename_template: str): diff --git a/airflow/providers/amazon/aws/log/s3_task_handler.py b/airflow/providers/amazon/aws/log/s3_task_handler.py index 4566156847162..574607e0632d3 100644 --- a/airflow/providers/amazon/aws/log/s3_task_handler.py +++ b/airflow/providers/amazon/aws/log/s3_task_handler.py @@ -141,7 +141,6 @@ def s3_log_exists(self, remote_log_location: str) -> bool: Check if remote_log_location exists in remote storage :param remote_log_location: log's location in remote storage - :type remote_log_location: str :return: True if location exists else False """ return self.hook.check_for_key(remote_log_location) @@ -152,10 +151,8 @@ def s3_read(self, remote_log_location: str, return_error: bool = False) -> str: logs are found or there is an error. :param remote_log_location: the log's location in remote storage - :type remote_log_location: str (path) :param return_error: if True, returns a string error message if an error occurs. Otherwise returns '' when an error occurs. - :type return_error: bool :return: the log found at the remote_log_location """ try: @@ -174,12 +171,9 @@ def s3_write(self, log: str, remote_log_location: str, append: bool = True): was created. :param log: the log to write to the remote_log_location - :type log: str :param remote_log_location: the log's location in remote storage - :type remote_log_location: str (path) :param append: if False, any existing log file is overwritten. If True, the new log is appended to any existing logs. - :type append: bool """ try: if append and self.s3_log_exists(remote_log_location): diff --git a/airflow/providers/amazon/aws/operators/athena.py b/airflow/providers/amazon/aws/operators/athena.py index 73e56a44a19bb..6febe2a917532 100644 --- a/airflow/providers/amazon/aws/operators/athena.py +++ b/airflow/providers/amazon/aws/operators/athena.py @@ -41,25 +41,15 @@ class AthenaOperator(BaseOperator): :ref:`howto/operator:AthenaOperator` :param query: Presto to be run on athena. (templated) - :type query: str :param database: Database to select. (templated) - :type database: str :param output_location: s3 path to write the query results into. (templated) - :type output_location: str :param aws_conn_id: aws connection to use - :type aws_conn_id: str :param client_request_token: Unique token created by user to avoid multiple executions of same query - :type client_request_token: str :param workgroup: Athena workgroup in which query will be run - :type workgroup: str :param query_execution_context: Context in which query need to be run - :type query_execution_context: dict :param result_configuration: Dict with path to store results in and config related to encryption - :type result_configuration: dict :param sleep_time: Time (in seconds) to wait between two consecutive calls to check query status on Athena - :type sleep_time: int :param max_tries: Number of times to poll for query state before function exits - :type max_tries: int """ ui_color = '#44b5e2' diff --git a/airflow/providers/amazon/aws/operators/batch.py b/airflow/providers/amazon/aws/operators/batch.py index d524ad1598fdb..06fb244bf6bb3 100644 --- a/airflow/providers/amazon/aws/operators/batch.py +++ b/airflow/providers/amazon/aws/operators/batch.py @@ -42,50 +42,37 @@ class BatchOperator(BaseOperator): Execute a job on AWS Batch :param job_name: the name for the job that will run on AWS Batch (templated) - :type job_name: str :param job_definition: the job definition name on AWS Batch - :type job_definition: str :param job_queue: the queue name on AWS Batch - :type job_queue: str :param overrides: the `containerOverrides` parameter for boto3 (templated) - :type overrides: Optional[dict] :param array_properties: the `arrayProperties` parameter for boto3 - :type array_properties: Optional[dict] :param parameters: the `parameters` for boto3 (templated) - :type parameters: Optional[dict] :param job_id: the job ID, usually unknown (None) until the submit_job operation gets the jobId defined by AWS Batch - :type job_id: Optional[str] :param waiters: an :py:class:`.BatchWaiters` object (see note below); if None, polling is used with max_retries and status_retries. - :type waiters: Optional[BatchWaiters] :param max_retries: exponential back-off retries, 4200 = 48 hours; polling is only used when waiters is None - :type max_retries: int :param status_retries: number of HTTP retries to get job status, 10; polling is only used when waiters is None - :type status_retries: int :param aws_conn_id: connection id of AWS credentials / region name. If None, credential boto3 strategy will be used. - :type aws_conn_id: str :param region_name: region name to use in AWS Hook. Override the region_name in connection (if provided) - :type region_name: str :param tags: collection of tags to apply to the AWS Batch job submission if None, no tags are submitted - :type tags: dict .. note:: Any custom waiters must return a waiter for these calls: diff --git a/airflow/providers/amazon/aws/operators/cloud_formation.py b/airflow/providers/amazon/aws/operators/cloud_formation.py index 04e198773faf6..51dcb20c67de7 100644 --- a/airflow/providers/amazon/aws/operators/cloud_formation.py +++ b/airflow/providers/amazon/aws/operators/cloud_formation.py @@ -30,14 +30,11 @@ class CloudFormationCreateStackOperator(BaseOperator): An operator that creates a CloudFormation stack. :param stack_name: stack name (templated) - :type stack_name: str :param cloudformation_parameters: parameters to be passed to CloudFormation. .. seealso:: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/cloudformation.html#CloudFormation.Client.create_stack - :type cloudformation_parameters: dict :param aws_conn_id: aws connection to uses - :type aws_conn_id: str """ template_fields: Sequence[str] = ('stack_name',) @@ -64,14 +61,12 @@ class CloudFormationDeleteStackOperator(BaseOperator): An operator that deletes a CloudFormation stack. :param stack_name: stack name (templated) - :type stack_name: str :param cloudformation_parameters: parameters to be passed to CloudFormation. .. seealso:: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/cloudformation.html#CloudFormation.Client.delete_stack :type cloudformation_parameters: dict :param aws_conn_id: aws connection to uses - :type aws_conn_id: str """ template_fields: Sequence[str] = ('stack_name',) diff --git a/airflow/providers/amazon/aws/operators/datasync.py b/airflow/providers/amazon/aws/operators/datasync.py index c677ceb86604e..932f7b31abc25 100644 --- a/airflow/providers/amazon/aws/operators/datasync.py +++ b/airflow/providers/amazon/aws/operators/datasync.py @@ -45,63 +45,49 @@ class DataSyncOperator(BaseOperator): execute the Task if there was 1 Task, or fail if there were many Tasks. :param aws_conn_id: AWS connection to use. - :type aws_conn_id: str :param wait_interval_seconds: Time to wait between two consecutive calls to check TaskExecution status. - :type wait_interval_seconds: int :param max_iterations: Maximum number of consecutive calls to check TaskExecution status. - :type max_iterations: int :param task_arn: AWS DataSync TaskArn to use. If None, then this operator will attempt to either search for an existing Task or attempt to create a new Task. - :type task_arn: str :param source_location_uri: Source location URI to search for. All DataSync Tasks with a LocationArn with this URI will be considered. Example: ``smb://server/subdir`` - :type source_location_uri: str :param destination_location_uri: Destination location URI to search for. All DataSync Tasks with a LocationArn with this URI will be considered. Example: ``s3://airflow_bucket/stuff`` - :type destination_location_uri: str :param allow_random_task_choice: If multiple Tasks match, one must be chosen to execute. If allow_random_task_choice is True then a random one is chosen. - :type allow_random_task_choice: bool :param allow_random_location_choice: If multiple Locations match, one must be chosen when creating a task. If allow_random_location_choice is True then a random one is chosen. - :type allow_random_location_choice: bool :param create_task_kwargs: If no suitable TaskArn is identified, it will be created if ``create_task_kwargs`` is defined. ``create_task_kwargs`` is then used internally like this: ``boto3.create_task(**create_task_kwargs)`` Example: ``{'Name': 'xyz', 'Options': ..., 'Excludes': ..., 'Tags': ...}`` - :type create_task_kwargs: dict :param create_source_location_kwargs: If no suitable LocationArn is found, a Location will be created if ``create_source_location_kwargs`` is defined. ``create_source_location_kwargs`` is then used internally like this: ``boto3.create_location_xyz(**create_source_location_kwargs)`` The xyz is determined from the prefix of source_location_uri, eg ``smb:/...`` or ``s3:/...`` Example: ``{'Subdirectory': ..., 'ServerHostname': ..., ...}`` - :type create_source_location_kwargs: dict :param create_destination_location_kwargs: If no suitable LocationArn is found, a Location will be created if ``create_destination_location_kwargs`` is defined. ``create_destination_location_kwargs`` is used internally like this: ``boto3.create_location_xyz(**create_destination_location_kwargs)`` The xyz is determined from the prefix of destination_location_uri, eg ``smb:/...` or ``s3:/...`` Example: ``{'S3BucketArn': ..., 'S3Config': {'BucketAccessRoleArn': ...}, ...}`` - :type create_destination_location_kwargs: dict :param update_task_kwargs: If a suitable TaskArn is found or created, it will be updated if ``update_task_kwargs`` is defined. ``update_task_kwargs`` is used internally like this: ``boto3.update_task(TaskArn=task_arn, **update_task_kwargs)`` Example: ``{'Name': 'xyz', 'Options': ..., 'Excludes': ...}`` - :type update_task_kwargs: dict :param task_execution_kwargs: Additional kwargs passed directly when starting the Task execution, used internally like this: ``boto3.start_task_execution(TaskArn=task_arn, **task_execution_kwargs)`` - :type task_execution_kwargs: dict :param delete_task_after_execution: If True then the TaskArn which was executed will be deleted from AWS DataSync on successful completion. - :type delete_task_after_execution: bool :raises AirflowException: If ``task_arn`` was not specified, or if either ``source_location_uri`` or ``destination_location_uri`` were not specified. diff --git a/airflow/providers/amazon/aws/operators/dms.py b/airflow/providers/amazon/aws/operators/dms.py index d78404944a502..aad16155525e8 100644 --- a/airflow/providers/amazon/aws/operators/dms.py +++ b/airflow/providers/amazon/aws/operators/dms.py @@ -35,25 +35,17 @@ class DmsCreateTaskOperator(BaseOperator): :ref:`howto/operator:DmsCreateTaskOperator` :param replication_task_id: Replication task id - :type replication_task_id: str :param source_endpoint_arn: Source endpoint ARN - :type source_endpoint_arn: str :param target_endpoint_arn: Target endpoint ARN - :type target_endpoint_arn: str :param replication_instance_arn: Replication instance ARN - :type replication_instance_arn: str :param table_mappings: Table mappings - :type table_mappings: dict :param migration_type: Migration type ('full-load'|'cdc'|'full-load-and-cdc'), full-load by default. - :type migration_type: str :param create_task_kwargs: Extra arguments for DMS replication task creation. - :type create_task_kwargs: Optional[dict] :param aws_conn_id: The Airflow connection used for AWS credentials. If this is None or empty then the default boto3 behaviour is used. If running Airflow in a distributed manner and aws_conn_id is None or empty, then default boto3 configuration would be used (and must be maintained on each worker node). - :type aws_conn_id: Optional[str] """ template_fields: Sequence[str] = ( @@ -125,13 +117,11 @@ class DmsDeleteTaskOperator(BaseOperator): :ref:`howto/operator:DmsDeleteTaskOperator` :param replication_task_arn: Replication task ARN - :type replication_task_arn: str :param aws_conn_id: The Airflow connection used for AWS credentials. If this is None or empty then the default boto3 behaviour is used. If running Airflow in a distributed manner and aws_conn_id is None or empty, then default boto3 configuration would be used (and must be maintained on each worker node). - :type aws_conn_id: Optional[str] """ template_fields: Sequence[str] = ('replication_task_arn',) @@ -165,13 +155,11 @@ class DmsDescribeTasksOperator(BaseOperator): Describes AWS DMS replication tasks. :param describe_tasks_kwargs: Describe tasks command arguments - :type describe_tasks_kwargs: Optional[dict] :param aws_conn_id: The Airflow connection used for AWS credentials. If this is None or empty then the default boto3 behaviour is used. If running Airflow in a distributed manner and aws_conn_id is None or empty, then default boto3 configuration would be used (and must be maintained on each worker node). - :type aws_conn_id: Optional[str] """ template_fields: Sequence[str] = ('describe_tasks_kwargs',) @@ -209,18 +197,14 @@ class DmsStartTaskOperator(BaseOperator): :ref:`howto/operator:DmsStartTaskOperator` :param replication_task_arn: Replication task ARN - :type replication_task_arn: str :param start_replication_task_type: Replication task start type (default='start-replication') ('start-replication'|'resume-processing'|'reload-target') - :type start_replication_task_type: str :param start_task_kwargs: Extra start replication task arguments - :type start_task_kwargs: Optional[dict] :param aws_conn_id: The Airflow connection used for AWS credentials. If this is None or empty then the default boto3 behaviour is used. If running Airflow in a distributed manner and aws_conn_id is None or empty, then default boto3 configuration would be used (and must be maintained on each worker node). - :type aws_conn_id: Optional[str] """ template_fields: Sequence[str] = ( @@ -267,13 +251,11 @@ class DmsStopTaskOperator(BaseOperator): Stops AWS DMS replication task. :param replication_task_arn: Replication task ARN - :type replication_task_arn: str :param aws_conn_id: The Airflow connection used for AWS credentials. If this is None or empty then the default boto3 behaviour is used. If running Airflow in a distributed manner and aws_conn_id is None or empty, then default boto3 configuration would be used (and must be maintained on each worker node). - :type aws_conn_id: Optional[str] """ template_fields: Sequence[str] = ('replication_task_arn',) diff --git a/airflow/providers/amazon/aws/operators/ec2.py b/airflow/providers/amazon/aws/operators/ec2.py index fa6886acd2447..033760be5b560 100644 --- a/airflow/providers/amazon/aws/operators/ec2.py +++ b/airflow/providers/amazon/aws/operators/ec2.py @@ -31,14 +31,10 @@ class EC2StartInstanceOperator(BaseOperator): Start AWS EC2 instance using boto3. :param instance_id: id of the AWS EC2 instance - :type instance_id: str :param aws_conn_id: aws connection to use - :type aws_conn_id: str :param region_name: (optional) aws region name associated with the client - :type region_name: Optional[str] :param check_interval: time in seconds that the job should wait in between each instance state checks until operation is completed - :type check_interval: float """ template_fields: Sequence[str] = ("instance_id", "region_name") @@ -77,14 +73,10 @@ class EC2StopInstanceOperator(BaseOperator): Stop AWS EC2 instance using boto3. :param instance_id: id of the AWS EC2 instance - :type instance_id: str :param aws_conn_id: aws connection to use - :type aws_conn_id: str :param region_name: (optional) aws region name associated with the client - :type region_name: Optional[str] :param check_interval: time in seconds that the job should wait in between each instance state checks until operation is completed - :type check_interval: float """ template_fields: Sequence[str] = ("instance_id", "region_name") diff --git a/airflow/providers/amazon/aws/operators/ecs.py b/airflow/providers/amazon/aws/operators/ecs.py index 0bbc648fe296e..9dfdb4406fa37 100644 --- a/airflow/providers/amazon/aws/operators/ecs.py +++ b/airflow/providers/amazon/aws/operators/ecs.py @@ -161,68 +161,48 @@ class EcsOperator(BaseOperator): :ref:`howto/operator:EcsOperator` :param task_definition: the task definition name on Elastic Container Service - :type task_definition: str :param cluster: the cluster name on Elastic Container Service - :type cluster: str :param overrides: the same parameter that boto3 will receive (templated): https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ecs.html#ECS.Client.run_task - :type overrides: dict :param aws_conn_id: connection id of AWS credentials / region name. If None, credential boto3 strategy will be used (http://boto3.readthedocs.io/en/latest/guide/configuration.html). - :type aws_conn_id: str :param region_name: region name to use in AWS Hook. Override the region_name in connection (if provided) - :type region_name: str :param launch_type: the launch type on which to run your task ('EC2' or 'FARGATE') - :type launch_type: str :param capacity_provider_strategy: the capacity provider strategy to use for the task. When capacity_provider_strategy is specified, the launch_type parameter is omitted. If no capacity_provider_strategy or launch_type is specified, the default capacity provider strategy for the cluster is used. - :type capacity_provider_strategy: list :param group: the name of the task group associated with the task - :type group: str :param placement_constraints: an array of placement constraint objects to use for the task - :type placement_constraints: list :param placement_strategy: an array of placement strategy objects to use for the task - :type placement_strategy: list :param platform_version: the platform version on which your task is running - :type platform_version: str :param network_configuration: the network configuration for the task - :type network_configuration: dict :param tags: a dictionary of tags in the form of {'tagKey': 'tagValue'}. - :type tags: dict :param awslogs_group: the CloudWatch group where your ECS container logs are stored. Only required if you want logs to be shown in the Airflow UI after your job has finished. - :type awslogs_group: str :param awslogs_region: the region in which your CloudWatch logs are stored. If None, this is the same as the `region_name` parameter. If that is also None, this is the default AWS region based on your connection settings. - :type awslogs_region: str :param awslogs_stream_prefix: the stream prefix that is used for the CloudWatch logs. This is usually based on some custom name combined with the name of the container. Only required if you want logs to be shown in the Airflow UI after your job has finished. - :type awslogs_stream_prefix: str :param awslogs_fetch_interval: the interval that the ECS task log fetcher should wait in between each Cloudwatch logs fetches. - :type awslogs_fetch_interval: timedelta :param quota_retry: Config if and how to retry the launch of a new ECS task, to handle transient errors. - :type quota_retry: dict :param reattach: If set to True, will check if the task previously launched by the task_instance is already running. If so, the operator will attach to it instead of starting a new task. This is to avoid relaunching a new task when the connection drops between Airflow and ECS while the task is running (when the Airflow worker is restarted for example). - :type reattach: bool :param number_logs_exception: Number of lines from the last Cloudwatch logs to return in the AirflowException if an ECS task is stopped (to receive Airflow alerts with the logs of what failed in the code running in ECS). - :type number_logs_exception: int """ ui_color = '#f0ede4' diff --git a/airflow/providers/amazon/aws/operators/eks.py b/airflow/providers/amazon/aws/operators/eks.py index ccf0250be9862..cb17cdfb0ab02 100644 --- a/airflow/providers/amazon/aws/operators/eks.py +++ b/airflow/providers/amazon/aws/operators/eks.py @@ -70,15 +70,11 @@ class EksCreateClusterOperator(BaseOperator): :ref:`howto/operator:EksCreateClusterOperator` :param cluster_name: The unique name to give to your Amazon EKS Cluster. (templated) - :type cluster_name: str :param cluster_role_arn: The Amazon Resource Name (ARN) of the IAM role that provides permissions for the Kubernetes control plane to make calls to AWS API operations on your behalf. (templated) - :type cluster_role_arn: str :param resources_vpc_config: The VPC configuration used by the cluster control plane. (templated) - :type resources_vpc_config: Dict :param compute: The type of compute architecture to generate along with the cluster. (templated) Defaults to 'nodegroup' to generate an EKS Managed Nodegroup. - :type compute: str :param create_cluster_kwargs: Optional parameters to pass to the CreateCluster API (templated) :type: Dict :param aws_conn_id: The Airflow connection used for AWS credentials. (templated) @@ -86,18 +82,14 @@ class EksCreateClusterOperator(BaseOperator): running Airflow in a distributed manner and aws_conn_id is None or empty, then the default boto3 configuration would be used (and must be maintained on each worker node). - :type aws_conn_id: str :param region: Which AWS region the connection should use. (templated) If this is None or empty then the default boto3 behaviour is used. - :type region: str If compute is assigned the value of 'nodegroup': :param nodegroup_name: *REQUIRED* The unique name to give your Amazon EKS managed node group. (templated) - :type nodegroup_name: str :param nodegroup_role_arn: *REQUIRED* The Amazon Resource Name (ARN) of the IAM role to associate with the Amazon EKS managed node group. (templated) - :type nodegroup_role_arn: str :param create_nodegroup_kwargs: Optional parameters to pass to the CreateNodegroup API (templated) :type: Dict @@ -105,12 +97,9 @@ class EksCreateClusterOperator(BaseOperator): If compute is assigned the value of 'fargate': :param fargate_profile_name: *REQUIRED* The unique name to give your AWS Fargate profile. (templated) - :type fargate_profile_name: str :param fargate_pod_execution_role_arn: *REQUIRED* The Amazon Resource Name (ARN) of the pod execution role to use for pods that match the selectors in the AWS Fargate profile. (templated) - :type podExecutionRoleArn: str :param fargate_selectors: The selectors to match for pods to use this AWS Fargate profile. (templated) - :type fargate_selectors: List :param create_fargate_profile_kwargs: Optional parameters to pass to the CreateFargateProfile API (templated) :type: Dict @@ -245,15 +234,11 @@ class EksCreateNodegroupOperator(BaseOperator): :ref:`howto/operator:EksCreateNodegroupOperator` :param cluster_name: The name of the Amazon EKS Cluster to create the managed nodegroup in. (templated) - :type cluster_name: str :param nodegroup_name: The unique name to give your managed nodegroup. (templated) - :type nodegroup_name: str :param nodegroup_subnets: The subnets to use for the Auto Scaling group that is created for the managed nodegroup. (templated) - :type nodegroup_subnets: List[str] :param nodegroup_role_arn: The Amazon Resource Name (ARN) of the IAM role to associate with the managed nodegroup. (templated) - :type nodegroup_role_arn: str :param create_nodegroup_kwargs: Optional parameters to pass to the Create Nodegroup API (templated) :type: Dict :param aws_conn_id: The Airflow connection used for AWS credentials. (templated) @@ -261,10 +246,8 @@ class EksCreateNodegroupOperator(BaseOperator): running Airflow in a distributed manner and aws_conn_id is None or empty, then the default boto3 configuration would be used (and must be maintained on each worker node). - :type aws_conn_id: str - :param region: Which AWS region the connection should use. (templated) + :param region: Which AWS region the connection should use. (templated) If this is None or empty then the default boto3 behaviour is used. - :type region: str """ @@ -322,14 +305,10 @@ class EksCreateFargateProfileOperator(BaseOperator): :ref:`howto/operator:EksCreateFargateProfileOperator` :param cluster_name: The name of the Amazon EKS cluster to apply the AWS Fargate profile to. (templated) - :type cluster_name: str :param pod_execution_role_arn: The Amazon Resource Name (ARN) of the pod execution role to use for pods that match the selectors in the AWS Fargate profile. (templated) - :type pod_execution_role_arn: str :param selectors: The selectors to match for pods to use this AWS Fargate profile. (templated) - :type selectors: List :param fargate_profile_name: The unique name to give your AWS Fargate profile. (templated) - :type fargate_profile_name: str :param create_fargate_profile_kwargs: Optional parameters to pass to the CreateFargate Profile API (templated) :type: Dict @@ -339,10 +318,8 @@ class EksCreateFargateProfileOperator(BaseOperator): running Airflow in a distributed manner and aws_conn_id is None or empty, then the default boto3 configuration would be used (and must be maintained on each worker node). - :type aws_conn_id: str :param region: Which AWS region the connection should use. (templated) If this is None or empty then the default boto3 behaviour is used. - :type region: str """ template_fields: Sequence[str] = ( @@ -399,19 +376,15 @@ class EksDeleteClusterOperator(BaseOperator): :ref:`howto/operator:EksDeleteClusterOperator` :param cluster_name: The name of the Amazon EKS Cluster to delete. (templated) - :type cluster_name: str :param force_delete_compute: If True, will delete any attached resources. (templated) Defaults to False. - :type force_delete_compute: bool :param aws_conn_id: The Airflow connection used for AWS credentials. (templated) If this is None or empty then the default boto3 behaviour is used. If running Airflow in a distributed manner and aws_conn_id is None or empty, then the default boto3 configuration would be used (and must be maintained on each worker node). - :type aws_conn_id: str :param region: Which AWS region the connection should use. (templated) If this is None or empty then the default boto3 behaviour is used. - :type region: str """ @@ -524,18 +497,14 @@ class EksDeleteNodegroupOperator(BaseOperator): :ref:`howto/operator:EksDeleteNodegroupOperator` :param cluster_name: The name of the Amazon EKS Cluster associated with your nodegroup. (templated) - :type cluster_name: str :param nodegroup_name: The name of the nodegroup to delete. (templated) - :type nodegroup_name: str :param aws_conn_id: The Airflow connection used for AWS credentials. (templated) If this is None or empty then the default boto3 behaviour is used. If running Airflow in a distributed manner and aws_conn_id is None or empty, then the default boto3 configuration would be used (and must be maintained on each worker node). - :type aws_conn_id: str :param region: Which AWS region the connection should use. (templated) If this is None or empty then the default boto3 behaviour is used. - :type region: str """ @@ -578,18 +547,14 @@ class EksDeleteFargateProfileOperator(BaseOperator): :ref:`howto/operator:EksDeleteFargateProfileOperator` :param cluster_name: The name of the Amazon EKS cluster associated with your Fargate profile. (templated) - :type cluster_name: str :param fargate_profile_name: The name of the AWS Fargate profile to delete. (templated) - :type fargate_profile_name: str :param aws_conn_id: The Airflow connection used for AWS credentials. (templated) If this is None or empty then the default boto3 behaviour is used. If running Airflow in a distributed manner and aws_conn_id is None or empty, then the default boto3 configuration would be used (and must be maintained on each worker node). - :type aws_conn_id: str :param region: Which AWS region the connection should use. (templated) If this is None or empty then the default boto3 behaviour is used. - :type region: str """ template_fields: Sequence[str] = ( @@ -633,32 +598,24 @@ class EksPodOperator(KubernetesPodOperator): :ref:`howto/operator:EksPodOperator` :param cluster_name: The name of the Amazon EKS Cluster to execute the task on. (templated) - :type cluster_name: str :param cluster_role_arn: The Amazon Resource Name (ARN) of the IAM role that provides permissions for the Kubernetes control plane to make calls to AWS API operations on your behalf. (templated) - :type cluster_role_arn: str :param in_cluster: If True, look for config inside the cluster; if False look for a local file path. - :type in_cluster: bool :param namespace: The namespace in which to execute the pod. (templated) - :type namespace: str :param pod_name: The unique name to give the pod. (templated) - :type pod_name: str :param aws_profile: The named profile containing the credentials for the AWS CLI tool to use. :param aws_profile: str :param region: Which AWS region the connection should use. (templated) If this is None or empty then the default boto3 behaviour is used. - :type region: str :param aws_conn_id: The Airflow connection used for AWS credentials. (templated) If this is None or empty then the default boto3 behaviour is used. If running Airflow in a distributed manner and aws_conn_id is None or empty, then the default boto3 configuration would be used (and must be maintained on each worker node). - :type aws_conn_id: str :param is_delete_operator_pod: What to do when the pod reaches its final state, or the execution is interrupted. If True, delete the pod; if False, leave the pod. Current default is False, but this will be changed in the next major release of this provider. - :type is_delete_operator_pod: bool """ diff --git a/airflow/providers/amazon/aws/operators/emr.py b/airflow/providers/amazon/aws/operators/emr.py index 31abb03f6facf..390259cc07165 100644 --- a/airflow/providers/amazon/aws/operators/emr.py +++ b/airflow/providers/amazon/aws/operators/emr.py @@ -42,21 +42,15 @@ class EmrAddStepsOperator(BaseOperator): An operator that adds steps to an existing EMR job_flow. :param job_flow_id: id of the JobFlow to add steps to. (templated) - :type job_flow_id: Optional[str] :param job_flow_name: name of the JobFlow to add steps to. Use as an alternative to passing job_flow_id. will search for id of JobFlow with matching name in one of the states in param cluster_states. Exactly one cluster like this should exist or will fail. (templated) - :type job_flow_name: Optional[str] :param cluster_states: Acceptable cluster states when searching for JobFlow id by job_flow_name. (templated) - :type cluster_states: list :param aws_conn_id: aws connection to uses - :type aws_conn_id: str :param steps: boto3 style steps or reference to a steps file (must be '.json') to be added to the jobflow. (templated) - :type steps: list|str :param do_xcom_push: if True, job_flow_id is pushed to XCom with key job_flow_id. - :type do_xcom_push: bool """ template_fields: Sequence[str] = ('job_flow_id', 'job_flow_name', 'cluster_states', 'steps') @@ -124,29 +118,19 @@ class EmrContainerOperator(BaseOperator): An operator that submits jobs to EMR on EKS virtual clusters. :param name: The name of the job run. - :type name: str :param virtual_cluster_id: The EMR on EKS virtual cluster ID - :type virtual_cluster_id: str :param execution_role_arn: The IAM role ARN associated with the job run. - :type execution_role_arn: str :param release_label: The Amazon EMR release version to use for the job run. - :type release_label: str :param job_driver: Job configuration details, e.g. the Spark job parameters. - :type job_driver: dict :param configuration_overrides: The configuration overrides for the job run, specifically either application configuration or monitoring configuration. - :type configuration_overrides: dict :param client_request_token: The client idempotency token of the job run request. Use this if you want to specify a unique ID to prevent two jobs from getting started. If no token is provided, a UUIDv4 token will be generated for you. - :type client_request_token: str :param aws_conn_id: The Airflow connection used for AWS credentials. - :type aws_conn_id: str :param poll_interval: Time (in seconds) to wait between two consecutive calls to check query status on EMR - :type poll_interval: int :param max_tries: Maximum number of times to wait for the job run to finish. Defaults to None, which will poll until the job is *not* in a pending, submitted, or running state. - :type max_tries: int """ template_fields: Sequence[str] = ( @@ -270,14 +254,10 @@ class EmrCreateJobFlowOperator(BaseOperator): the config from the connection. :param aws_conn_id: aws connection to uses - :type aws_conn_id: str :param emr_conn_id: emr connection to use - :type emr_conn_id: str :param job_flow_overrides: boto3 style arguments or reference to an arguments file (must be '.json') to override emr_connection extra. (templated) - :type job_flow_overrides: dict|str :param region_name: Region named passed to EmrHook - :type region_name: Optional[str] """ template_fields: Sequence[str] = ('job_flow_overrides',) @@ -330,13 +310,9 @@ class EmrModifyClusterOperator(BaseOperator): """ An operator that modifies an existing EMR cluster. :param cluster_id: cluster identifier - :type cluster_id: str :param step_concurrency_level: Concurrency of the cluster - :type step_concurrency_level: int :param aws_conn_id: aws connection to uses - :type aws_conn_id: str :param do_xcom_push: if True, cluster_id is pushed to XCom with key cluster_id. - :type do_xcom_push: bool """ template_fields: Sequence[str] = ('cluster_id', 'step_concurrency_level') @@ -378,9 +354,7 @@ class EmrTerminateJobFlowOperator(BaseOperator): Operator to terminate EMR JobFlows. :param job_flow_id: id of the JobFlow to terminate. (templated) - :type job_flow_id: str :param aws_conn_id: aws connection to uses - :type aws_conn_id: str """ template_fields: Sequence[str] = ('job_flow_id',) diff --git a/airflow/providers/amazon/aws/operators/glacier.py b/airflow/providers/amazon/aws/operators/glacier.py index 285813995eea3..27904e4d1d0a3 100644 --- a/airflow/providers/amazon/aws/operators/glacier.py +++ b/airflow/providers/amazon/aws/operators/glacier.py @@ -33,9 +33,7 @@ class GlacierCreateJobOperator(BaseOperator): :ref:`howto/operator:GlacierCreateJobOperator` :param aws_conn_id: The reference to the AWS connection details - :type aws_conn_id: str :param vault_name: the Glacier vault on which job is executed - :type vault_name: str """ template_fields: Sequence[str] = ("vault_name",) diff --git a/airflow/providers/amazon/aws/operators/glue.py b/airflow/providers/amazon/aws/operators/glue.py index 2ea0eb1cead39..75f989e30194f 100644 --- a/airflow/providers/amazon/aws/operators/glue.py +++ b/airflow/providers/amazon/aws/operators/glue.py @@ -35,31 +35,18 @@ class GlueJobOperator(BaseOperator): Language support: Python and Scala :param job_name: unique job name per AWS Account - :type job_name: Optional[str] :param script_location: location of ETL script. Must be a local or S3 path - :type script_location: Optional[str] :param job_desc: job description details - :type job_desc: Optional[str] :param concurrent_run_limit: The maximum number of concurrent runs allowed for a job - :type concurrent_run_limit: Optional[int] :param script_args: etl script arguments and AWS Glue arguments (templated) - :type script_args: dict :param retry_limit: The maximum number of times to retry this job if it fails - :type retry_limit: Optional[int] :param num_of_dpus: Number of AWS Glue DPUs to allocate to this Job. - :type num_of_dpus: int :param region_name: aws region name (example: us-east-1) - :type region_name: str :param s3_bucket: S3 bucket where logs and local etl script will be uploaded - :type s3_bucket: Optional[str] :param iam_role_name: AWS IAM Role for Glue Job Execution - :type iam_role_name: Optional[str] :param create_job_kwargs: Extra arguments for Glue Job Creation - :type create_job_kwargs: Optional[dict] :param run_job_kwargs: Extra arguments for Glue Job Run - :type run_job_kwargs: Optional[dict] :param wait_for_completion: Whether or not wait for job run completion. (default: True) - :type wait_for_completion: bool """ template_fields: Sequence[str] = ('script_args',) diff --git a/airflow/providers/amazon/aws/operators/glue_crawler.py b/airflow/providers/amazon/aws/operators/glue_crawler.py index b35e3d034231d..0bd27aaeaf0b6 100644 --- a/airflow/providers/amazon/aws/operators/glue_crawler.py +++ b/airflow/providers/amazon/aws/operators/glue_crawler.py @@ -39,11 +39,8 @@ class GlueCrawlerOperator(BaseOperator): schema, format and data types of data stores within the AWS cloud. :param config: Configurations for the AWS Glue crawler - :type config: dict :param aws_conn_id: aws connection to use - :type aws_conn_id: Optional[str] :param poll_interval: Time (in seconds) to wait between two consecutive calls to check crawler status - :type poll_interval: Optional[int] """ ui_color = '#ededed' diff --git a/airflow/providers/amazon/aws/operators/redshift_cluster.py b/airflow/providers/amazon/aws/operators/redshift_cluster.py index 816f27efac280..ec4a88f498c56 100644 --- a/airflow/providers/amazon/aws/operators/redshift_cluster.py +++ b/airflow/providers/amazon/aws/operators/redshift_cluster.py @@ -32,9 +32,7 @@ class RedshiftResumeClusterOperator(BaseOperator): :ref:`howto/operator:RedshiftResumeClusterOperator` :param cluster_identifier: id of the AWS Redshift Cluster - :type cluster_identifier: str :param aws_conn_id: aws connection to use - :type aws_conn_id: str """ template_fields: Sequence[str] = ("cluster_identifier",) @@ -73,9 +71,7 @@ class RedshiftPauseClusterOperator(BaseOperator): :ref:`howto/operator:RedshiftPauseClusterOperator` :param cluster_identifier: id of the AWS Redshift Cluster - :type cluster_identifier: str :param aws_conn_id: aws connection to use - :type aws_conn_id: str """ template_fields: Sequence[str] = ("cluster_identifier",) diff --git a/airflow/providers/amazon/aws/operators/redshift_sql.py b/airflow/providers/amazon/aws/operators/redshift_sql.py index a3cd3f98a37fb..20f0ff0acfbdd 100644 --- a/airflow/providers/amazon/aws/operators/redshift_sql.py +++ b/airflow/providers/amazon/aws/operators/redshift_sql.py @@ -32,17 +32,14 @@ class RedshiftSQLOperator(BaseOperator): For more information on how to use this operator, take a look at the guide: :ref:`howto/operator:RedshiftSQLOperator` - :param sql: the sql code to be executed - :type sql: Can receive a str representing a sql statement, - or an iterable of str (sql statements) + :param sql: the SQL code to be executed as a single string, or + a list of str (sql statements), or a reference to a template file. + Template references are recognized by str ending in '.sql' :param redshift_conn_id: reference to :ref:`Amazon Redshift connection id` - :type redshift_conn_id: str :param parameters: (optional) the parameters to render the SQL query with. - :type parameters: dict or iterable :param autocommit: if True, each command is automatically committed. (default value: False) - :type autocommit: bool """ template_fields: Sequence[str] = ('sql',) diff --git a/airflow/providers/amazon/aws/operators/s3.py b/airflow/providers/amazon/aws/operators/s3.py index 95f4b54c95343..4ba03cb419cc1 100644 --- a/airflow/providers/amazon/aws/operators/s3.py +++ b/airflow/providers/amazon/aws/operators/s3.py @@ -43,15 +43,12 @@ class S3CreateBucketOperator(BaseOperator): :ref:`howto/operator:S3CreateBucketOperator` :param bucket_name: This is bucket name you want to create - :type bucket_name: str :param aws_conn_id: The Airflow connection used for AWS credentials. If this is None or empty then the default boto3 behaviour is used. If running Airflow in a distributed manner and aws_conn_id is None or empty, then default boto3 configuration would be used (and must be maintained on each worker node). - :type aws_conn_id: Optional[str] :param region_name: AWS region_name. If not specified fetched from connection. - :type region_name: Optional[str] """ template_fields: Sequence[str] = ("bucket_name",) @@ -88,15 +85,12 @@ class S3DeleteBucketOperator(BaseOperator): :ref:`howto/operator:S3DeleteBucketOperator` :param bucket_name: This is bucket name you want to delete - :type bucket_name: str :param force_delete: Forcibly delete all objects in the bucket before deleting the bucket - :type force_delete: bool :param aws_conn_id: The Airflow connection used for AWS credentials. If this is None or empty then the default boto3 behaviour is used. If running Airflow in a distributed manner and aws_conn_id is None or empty, then default boto3 configuration would be used (and must be maintained on each worker node). - :type aws_conn_id: Optional[str] """ template_fields: Sequence[str] = ("bucket_name",) @@ -131,13 +125,11 @@ class S3GetBucketTaggingOperator(BaseOperator): :ref:`howto/operator:S3GetBucketTaggingOperator` :param bucket_name: This is bucket name you want to reference - :type bucket_name: str :param aws_conn_id: The Airflow connection used for AWS credentials. If this is None or empty then the default boto3 behaviour is used. If running Airflow in a distributed manner and aws_conn_id is None or empty, then default boto3 configuration would be used (and must be maintained on each worker node). - :type aws_conn_id: Optional[str] """ template_fields: Sequence[str] = ("bucket_name",) @@ -167,20 +159,16 @@ class S3PutBucketTaggingOperator(BaseOperator): :ref:`howto/operator:S3PutBucketTaggingOperator` :param bucket_name: The name of the bucket to add tags to. - :type bucket_name: str :param key: The key portion of the key/value pair for a tag to be added. If a key is provided, a value must be provided as well. - :type key: str :param value: The value portion of the key/value pair for a tag to be added. If a value is provided, a key must be provided as well. :param tag_set: A List of key/value pairs. - :type tag_set: List[Dict[str, str]] :param aws_conn_id: The Airflow connection used for AWS credentials. If this is None or empty then the default boto3 behaviour is used. If running Airflow in a distributed manner and aws_conn_id is None or empty, then the default boto3 configuration would be used (and must be maintained on each worker node). - :type aws_conn_id: Optional[str] """ template_fields: Sequence[str] = ("bucket_name",) @@ -224,13 +212,11 @@ class S3DeleteBucketTaggingOperator(BaseOperator): :ref:`howto/operator:S3DeleteBucketTaggingOperator` :param bucket_name: This is the name of the bucket to delete tags from. - :type bucket_name: str :param aws_conn_id: The Airflow connection used for AWS credentials. If this is None or empty then the default boto3 behaviour is used. If running Airflow in a distributed manner and aws_conn_id is None or empty, then default boto3 configuration would be used (and must be maintained on each worker node). - :type aws_conn_id: Optional[str] """ template_fields: Sequence[str] = ("bucket_name",) @@ -263,23 +249,17 @@ class S3CopyObjectOperator(BaseOperator): It can be either full s3:// style url or relative path from root level. When it's specified as a full s3:// url, please omit source_bucket_name. - :type source_bucket_key: str :param dest_bucket_key: The key of the object to copy to. (templated) The convention to specify `dest_bucket_key` is the same as `source_bucket_key`. - :type dest_bucket_key: str :param source_bucket_name: Name of the S3 bucket where the source object is in. (templated) It should be omitted when `source_bucket_key` is provided as a full s3:// url. - :type source_bucket_name: str :param dest_bucket_name: Name of the S3 bucket to where the object is copied. (templated) It should be omitted when `dest_bucket_key` is provided as a full s3:// url. - :type dest_bucket_name: str :param source_version_id: Version ID of the source object (OPTIONAL) - :type source_version_id: str :param aws_conn_id: Connection id of the S3 connection to use - :type aws_conn_id: str :param verify: Whether or not to verify SSL certificates for S3 connection. By default SSL certificates are verified. @@ -291,10 +271,8 @@ class S3CopyObjectOperator(BaseOperator): - path/to/cert/bundle.pem: A filename of the CA cert bundle to uses. You can specify this argument if you want to use a different CA cert bundle than the one used by botocore. - :type verify: bool or str :param acl_policy: String specifying the canned ACL policy for the file being uploaded to the S3 bucket. - :type acl_policy: str """ template_fields: Sequence[str] = ( @@ -348,7 +326,6 @@ class S3DeleteObjectsOperator(BaseOperator): Users may specify up to 1000 keys to delete. :param bucket: Name of the bucket in which you are going to delete object(s). (templated) - :type bucket: str :param keys: The key(s) to delete from S3 bucket. (templated) When ``keys`` is a string, it's supposed to be the key name of @@ -358,12 +335,9 @@ class S3DeleteObjectsOperator(BaseOperator): keys to delete. You may specify up to 1000 keys. - :type keys: str or list :param prefix: Prefix of objects to delete. (templated) All objects matching this prefix in the bucket will be deleted. - :type prefix: str :param aws_conn_id: Connection id of the S3 connection to use - :type aws_conn_id: str :param verify: Whether or not to verify SSL certificates for S3 connection. By default SSL certificates are verified. @@ -375,7 +349,6 @@ class S3DeleteObjectsOperator(BaseOperator): - ``path/to/cert/bundle.pem``: A filename of the CA cert bundle to uses. You can specify this argument if you want to use a different CA cert bundle than the one used by botocore. - :type verify: bool or str """ template_fields: Sequence[str] = ('keys', 'bucket', 'prefix') @@ -427,17 +400,11 @@ class S3FileTransformOperator(BaseOperator): omit the transformation script if S3 Select expression is specified. :param source_s3_key: The key to be retrieved from S3. (templated) - :type source_s3_key: str :param dest_s3_key: The key to be written from S3. (templated) - :type dest_s3_key: str :param transform_script: location of the executable transformation script - :type transform_script: str :param select_expression: S3 Select expression - :type select_expression: str :param script_args: arguments for transformation script (templated) - :type script_args: sequence of str :param source_aws_conn_id: source s3 connection - :type source_aws_conn_id: str :param source_verify: Whether or not to verify SSL certificates for S3 connection. By default SSL certificates are verified. You can provide the following values: @@ -450,14 +417,10 @@ class S3FileTransformOperator(BaseOperator): CA cert bundle than the one used by botocore. This is also applicable to ``dest_verify``. - :type source_verify: bool or str :param dest_aws_conn_id: destination s3 connection - :type dest_aws_conn_id: str :param dest_verify: Whether or not to verify SSL certificates for S3 connection. See: ``source_verify`` - :type dest_verify: bool or str :param replace: Replace dest S3 key if it already exists - :type replace: bool """ template_fields: Sequence[str] = ('source_s3_key', 'dest_s3_key', 'script_args') @@ -553,14 +516,10 @@ class S3ListOperator(BaseOperator): used by `xcom` in the downstream task. :param bucket: The S3 bucket where to find the objects. (templated) - :type bucket: str :param prefix: Prefix string to filters the objects whose name begin with such prefix. (templated) - :type prefix: str :param delimiter: the delimiter marks key hierarchy. (templated) - :type delimiter: str :param aws_conn_id: The connection ID to use when connecting to S3 storage. - :type aws_conn_id: str :param verify: Whether or not to verify SSL certificates for S3 connection. By default SSL certificates are verified. You can provide the following values: @@ -571,7 +530,6 @@ class S3ListOperator(BaseOperator): - ``path/to/cert/bundle.pem``: A filename of the CA cert bundle to uses. You can specify this argument if you want to use a different CA cert bundle than the one used by botocore. - :type verify: bool or str **Example**: @@ -629,14 +587,10 @@ class S3ListPrefixesOperator(BaseOperator): can be used by `xcom` in the downstream task. :param bucket: The S3 bucket where to find the subfolders. (templated) - :type bucket: str :param prefix: Prefix string to filter the subfolders whose name begin with such prefix. (templated) - :type prefix: str :param delimiter: the delimiter marks subfolder hierarchy. (templated) - :type delimiter: str :param aws_conn_id: The connection ID to use when connecting to S3 storage. - :type aws_conn_id: str :param verify: Whether or not to verify SSL certificates for S3 connection. By default SSL certificates are verified. You can provide the following values: @@ -647,7 +601,6 @@ class S3ListPrefixesOperator(BaseOperator): - ``path/to/cert/bundle.pem``: A filename of the CA cert bundle to uses. You can specify this argument if you want to use a different CA cert bundle than the one used by botocore. - :type verify: bool or str **Example**: diff --git a/airflow/providers/amazon/aws/operators/sagemaker.py b/airflow/providers/amazon/aws/operators/sagemaker.py index a41f7a2002863..d65020e617e2c 100644 --- a/airflow/providers/amazon/aws/operators/sagemaker.py +++ b/airflow/providers/amazon/aws/operators/sagemaker.py @@ -39,9 +39,7 @@ class SageMakerBaseOperator(BaseOperator): """This is the base operator for all SageMaker operators. :param config: The configuration necessary to start a training job (templated) - :type config: dict :param aws_conn_id: The AWS connection ID to use. - :type aws_conn_id: str """ template_fields: Sequence[str] = ('config',) @@ -114,24 +112,17 @@ class SageMakerProcessingOperator(SageMakerBaseOperator): :param config: The configuration necessary to start a processing job (templated). For details of the configuration parameter see :py:meth:`SageMaker.Client.create_processing_job` - :type config: dict :param aws_conn_id: The AWS connection ID to use. - :type aws_conn_id: str :param wait_for_completion: If wait is set to True, the time interval, in seconds, that the operation waits to check the status of the processing job. - :type wait_for_completion: bool :param print_log: if the operator should print the cloudwatch log during processing - :type print_log: bool :param check_interval: if wait is set to be true, this is the time interval in seconds which the operator will check the status of the processing job - :type check_interval: int :param max_ingestion_time: If wait is set to True, the operation fails if the processing job doesn't finish within max_ingestion_time seconds. If you set this parameter to None, the operation does not timeout. - :type max_ingestion_time: int :param action_if_job_exists: Behaviour if the job name already exists. Possible options are "increment" (default) and "fail". - :type action_if_job_exists: str """ def __init__( @@ -201,9 +192,7 @@ class SageMakerEndpointConfigOperator(SageMakerBaseOperator): :param config: The configuration necessary to create an endpoint config. For details of the configuration parameter see :py:meth:`SageMaker.Client.create_endpoint_config` - :type config: dict :param aws_conn_id: The AWS connection ID to use. - :type aws_conn_id: str """ integer_fields = [['ProductionVariants', 'InitialInstanceCount']] @@ -253,19 +242,13 @@ class SageMakerEndpointOperator(SageMakerBaseOperator): For details of the configuration parameter of endpoint_configuration see :py:meth:`SageMaker.Client.create_endpoint` - :type config: dict :param aws_conn_id: The AWS connection ID to use. - :type aws_conn_id: str :param wait_for_completion: Whether the operator should wait until the endpoint creation finishes. - :type wait_for_completion: bool :param check_interval: If wait is set to True, this is the time interval, in seconds, that this operation waits before polling the status of the endpoint creation. - :type check_interval: int :param max_ingestion_time: If wait is set to True, this operation fails if the endpoint creation doesn't finish within max_ingestion_time seconds. If you set this parameter to None it never times out. - :type max_ingestion_time: int :param operation: Whether to create an endpoint or update an endpoint. Must be either 'create or 'update'. - :type operation: str """ def __init__( @@ -371,18 +354,13 @@ class SageMakerTransformOperator(SageMakerBaseOperator): For details of the configuration parameter of model_config, See: :py:meth:`SageMaker.Client.create_model` - :type config: dict :param aws_conn_id: The AWS connection ID to use. - :type aws_conn_id: str :param wait_for_completion: Set to True to wait until the transform job finishes. - :type wait_for_completion: bool :param check_interval: If wait is set to True, the time interval, in seconds, that this operation waits to check the status of the transform job. - :type check_interval: int :param max_ingestion_time: If wait is set to True, the operation fails if the transform job doesn't finish within max_ingestion_time seconds. If you set this parameter to None, the operation does not timeout. - :type max_ingestion_time: int """ def __init__( @@ -452,18 +430,13 @@ class SageMakerTuningOperator(SageMakerBaseOperator): For details of the configuration parameter see :py:meth:`SageMaker.Client.create_hyper_parameter_tuning_job` - :type config: dict :param aws_conn_id: The AWS connection ID to use. - :type aws_conn_id: str :param wait_for_completion: Set to True to wait until the tuning job finishes. - :type wait_for_completion: bool :param check_interval: If wait is set to True, the time interval, in seconds, that this operation waits to check the status of the tuning job. - :type check_interval: int :param max_ingestion_time: If wait is set to True, the operation fails if the tuning job doesn't finish within max_ingestion_time seconds. If you set this parameter to None, the operation does not timeout. - :type max_ingestion_time: int """ integer_fields = [ @@ -521,9 +494,7 @@ class SageMakerModelOperator(SageMakerBaseOperator): :param config: The configuration necessary to create a model. For details of the configuration parameter see :py:meth:`SageMaker.Client.create_model` - :type config: dict :param aws_conn_id: The AWS connection ID to use. - :type aws_conn_id: str """ def __init__(self, *, config, **kwargs): @@ -554,24 +525,17 @@ class SageMakerTrainingOperator(SageMakerBaseOperator): :param config: The configuration necessary to start a training job (templated). For details of the configuration parameter see :py:meth:`SageMaker.Client.create_training_job` - :type config: dict :param aws_conn_id: The AWS connection ID to use. - :type aws_conn_id: str :param wait_for_completion: If wait is set to True, the time interval, in seconds, that the operation waits to check the status of the training job. - :type wait_for_completion: bool :param print_log: if the operator should print the cloudwatch log during training - :type print_log: bool :param check_interval: if wait is set to be true, this is the time interval in seconds which the operator will check the status of the training job - :type check_interval: int :param max_ingestion_time: If wait is set to True, the operation fails if the training job doesn't finish within max_ingestion_time seconds. If you set this parameter to None, the operation does not timeout. - :type max_ingestion_time: int :param check_if_job_exists: If set to true, then the operator will check whether a training job already exists for the name in the config. - :type check_if_job_exists: bool :param action_if_job_exists: Behaviour if the job name already exists. Possible options are "increment" (default) and "fail". This is only relevant if check_if diff --git a/airflow/providers/amazon/aws/operators/sns.py b/airflow/providers/amazon/aws/operators/sns.py index 8cd28d0206938..48a436b020745 100644 --- a/airflow/providers/amazon/aws/operators/sns.py +++ b/airflow/providers/amazon/aws/operators/sns.py @@ -31,16 +31,11 @@ class SnsPublishOperator(BaseOperator): Publish a message to Amazon SNS. :param aws_conn_id: aws connection to use - :type aws_conn_id: str :param target_arn: either a TopicArn or an EndpointArn - :type target_arn: str :param message: the default message you want to send (templated) - :type message: str :param subject: the message subject you want to send (templated) - :type subject: str :param message_attributes: the message attributes you want to send as a flat dict (data type will be determined automatically) - :type message_attributes: dict """ template_fields: Sequence[str] = ('message', 'subject', 'message_attributes') diff --git a/airflow/providers/amazon/aws/operators/sqs.py b/airflow/providers/amazon/aws/operators/sqs.py index 595dd1b729b04..6bc2f8da09704 100644 --- a/airflow/providers/amazon/aws/operators/sqs.py +++ b/airflow/providers/amazon/aws/operators/sqs.py @@ -35,16 +35,11 @@ class SqsPublishOperator(BaseOperator): :ref:`howto/operator:SqsPublishOperator` :param sqs_queue: The SQS queue url (templated) - :type sqs_queue: str :param message_content: The message content (templated) - :type message_content: str :param message_attributes: additional attributes for the message (default: None) For details of the attributes parameter see :py:meth:`botocore.client.SQS.send_message` - :type message_attributes: dict :param delay_seconds: message delay (templated) (default: 1 second) - :type delay_seconds: int :param aws_conn_id: AWS connection id (default: aws_default) - :type aws_conn_id: str """ template_fields: Sequence[str] = ('sqs_queue', 'message_content', 'delay_seconds', 'message_attributes') @@ -73,7 +68,6 @@ def execute(self, context: 'Context'): Publish the message to SQS queue :param context: the context object - :type context: dict :return: dict with information about the message sent For details of the returned dict see :py:meth:`botocore.client.SQS.send_message` :rtype: dict diff --git a/airflow/providers/amazon/aws/operators/step_function.py b/airflow/providers/amazon/aws/operators/step_function.py index b30653e8a812e..b800ea90d34b8 100644 --- a/airflow/providers/amazon/aws/operators/step_function.py +++ b/airflow/providers/amazon/aws/operators/step_function.py @@ -37,15 +37,10 @@ class StepFunctionStartExecutionOperator(BaseOperator): :class:`~airflow.models.BaseOperator` :param state_machine_arn: ARN of the Step Function State Machine - :type state_machine_arn: str :param name: The name of the execution. - :type name: Optional[str] :param state_machine_input: JSON data input to pass to the State Machine - :type state_machine_input: Union[Dict[str, any], str, None] :param aws_conn_id: aws connection to uses - :type aws_conn_id: str :param do_xcom_push: if True, execution_arn is pushed to XCom with key execution_arn. - :type do_xcom_push: bool """ template_fields: Sequence[str] = ('state_machine_arn', 'name', 'input') @@ -92,9 +87,7 @@ class StepFunctionGetExecutionOutputOperator(BaseOperator): :class:`~airflow.models.BaseOperator` :param execution_arn: ARN of the Step Function State Machine Execution - :type execution_arn: str :param aws_conn_id: aws connection to use, defaults to 'aws_default' - :type aws_conn_id: str """ template_fields: Sequence[str] = ('execution_arn',) diff --git a/airflow/providers/amazon/aws/secrets/secrets_manager.py b/airflow/providers/amazon/aws/secrets/secrets_manager.py index e947673d21acf..262abac781d0f 100644 --- a/airflow/providers/amazon/aws/secrets/secrets_manager.py +++ b/airflow/providers/amazon/aws/secrets/secrets_manager.py @@ -80,29 +80,22 @@ class SecretsManagerBackend(BaseSecretsBackend, LoggingMixin): :param connections_prefix: Specifies the prefix of the secret to read to get Connections. If set to None (null value in the configuration), requests for connections will not be sent to AWS Secrets Manager. If you don't want a connections_prefix, set it as an empty string - :type connections_prefix: str :param variables_prefix: Specifies the prefix of the secret to read to get Variables. If set to None (null value in the configuration), requests for variables will not be sent to AWS Secrets Manager. If you don't want a variables_prefix, set it as an empty string - :type variables_prefix: str :param config_prefix: Specifies the prefix of the secret to read to get Configurations. If set to None (null value in the configuration), requests for configurations will not be sent to AWS Secrets Manager. If you don't want a config_prefix, set it as an empty string - :type config_prefix: str :param profile_name: The name of a profile to use. If not given, then the default profile is used. - :type profile_name: str :param sep: separator used to concatenate secret_prefix and secret_id. Default: "/" - :type sep: str :param full_url_mode: if True, the secrets must be stored as one conn URI in just one field per secret. If False (set it as false in backend_kwargs), you can store the secret using different fields (password, user...). - :type full_url_mode: bool :param extra_conn_words: for using just when you set full_url_mode as false and store the secrets in different fields of secrets manager. You can add more words for each connection part beyond the default ones. The extra words to be searched should be passed as a dict of lists, each list corresponding to a connection part. The optional keys of the dict must be: user, password, host, schema, conn_type. - :type extra_conn_words: dict """ def __init__( @@ -185,7 +178,6 @@ def get_conn_uri(self, conn_id: str): Get Connection Value :param conn_id: connection id - :type conn_id: str """ if self.connections_prefix is None: return None @@ -233,9 +225,7 @@ def _get_secret(self, path_prefix, secret_id: str) -> Optional[str]: """ Get secret value from Secrets Manager :param path_prefix: Prefix for the Path to get Secret - :type path_prefix: str :param secret_id: Secret Key - :type secret_id: str """ if path_prefix: secrets_path = self.build_path(path_prefix, secret_id, self.sep) diff --git a/airflow/providers/amazon/aws/secrets/systems_manager.py b/airflow/providers/amazon/aws/secrets/systems_manager.py index b9567badb7d22..85b06330f0d0a 100644 --- a/airflow/providers/amazon/aws/secrets/systems_manager.py +++ b/airflow/providers/amazon/aws/secrets/systems_manager.py @@ -49,15 +49,11 @@ class SystemsManagerParameterStoreBackend(BaseSecretsBackend, LoggingMixin): :param connections_prefix: Specifies the prefix of the secret to read to get Connections. If set to None (null), requests for connections will not be sent to AWS SSM Parameter Store. - :type connections_prefix: str :param variables_prefix: Specifies the prefix of the secret to read to get Variables. If set to None (null), requests for variables will not be sent to AWS SSM Parameter Store. - :type variables_prefix: str :param config_prefix: Specifies the prefix of the secret to read to get Variables. If set to None (null), requests for configurations will not be sent to AWS SSM Parameter Store. - :type config_prefix: str :param profile_name: The name of a profile to use. If not given, then the default profile is used. - :type profile_name: str """ def __init__( @@ -95,7 +91,6 @@ def get_conn_uri(self, conn_id: str) -> Optional[str]: Get param value :param conn_id: connection id - :type conn_id: str """ if self.connections_prefix is None: return None @@ -131,9 +126,7 @@ def _get_secret(self, path_prefix: str, secret_id: str) -> Optional[str]: Get secret value from Parameter Store. :param path_prefix: Prefix for the Path to get Secret - :type path_prefix: str :param secret_id: Secret Key - :type secret_id: str """ ssm_path = self.build_path(path_prefix, secret_id) try: diff --git a/airflow/providers/amazon/aws/sensors/athena.py b/airflow/providers/amazon/aws/sensors/athena.py index dae4be4d875ff..64e9279b4b225 100644 --- a/airflow/providers/amazon/aws/sensors/athena.py +++ b/airflow/providers/amazon/aws/sensors/athena.py @@ -37,15 +37,11 @@ class AthenaSensor(BaseSensorOperator): If the query fails, the task will fail. :param query_execution_id: query_execution_id to check the state of - :type query_execution_id: str :param max_retries: Number of times to poll for query state before returning the current state, defaults to None - :type max_retries: int :param aws_conn_id: aws connection to use, defaults to 'aws_default' - :type aws_conn_id: str :param sleep_time: Time in seconds to wait between two consecutive call to check query status on athena, defaults to 10 - :type sleep_time: int """ INTERMEDIATE_STATES = ( diff --git a/airflow/providers/amazon/aws/sensors/batch.py b/airflow/providers/amazon/aws/sensors/batch.py index 52692b9f3e1c1..a6ca51ab22fb1 100644 --- a/airflow/providers/amazon/aws/sensors/batch.py +++ b/airflow/providers/amazon/aws/sensors/batch.py @@ -31,9 +31,7 @@ class BatchSensor(BaseSensorOperator): If the job fails, the task will fail. :param job_id: Batch job_id to check the state for - :type job_id: str :param aws_conn_id: aws connection to use, defaults to 'aws_default' - :type aws_conn_id: str """ template_fields: Sequence[str] = ('job_id',) diff --git a/airflow/providers/amazon/aws/sensors/cloud_formation.py b/airflow/providers/amazon/aws/sensors/cloud_formation.py index 88f9caff53dc8..bf78b3bcfdd07 100644 --- a/airflow/providers/amazon/aws/sensors/cloud_formation.py +++ b/airflow/providers/amazon/aws/sensors/cloud_formation.py @@ -36,12 +36,9 @@ class CloudFormationCreateStackSensor(BaseSensorOperator): Waits for a stack to be created successfully on AWS CloudFormation. :param stack_name: The name of the stack to wait for (templated) - :type stack_name: str :param aws_conn_id: ID of the Airflow connection where credentials and extra configuration are stored - :type aws_conn_id: str :param poke_interval: Time in seconds that the job should wait between each try - :type poke_interval: int """ template_fields: Sequence[str] = ('stack_name',) @@ -72,12 +69,9 @@ class CloudFormationDeleteStackSensor(BaseSensorOperator): Waits for a stack to be deleted successfully on AWS CloudFormation. :param stack_name: The name of the stack to wait for (templated) - :type stack_name: str :param aws_conn_id: ID of the Airflow connection where credentials and extra configuration are stored - :type aws_conn_id: str :param poke_interval: Time in seconds that the job should wait between each try - :type poke_interval: int """ template_fields: Sequence[str] = ('stack_name',) diff --git a/airflow/providers/amazon/aws/sensors/dms.py b/airflow/providers/amazon/aws/sensors/dms.py index 37601d46015f7..26e6b7148fdcd 100644 --- a/airflow/providers/amazon/aws/sensors/dms.py +++ b/airflow/providers/amazon/aws/sensors/dms.py @@ -33,15 +33,11 @@ class DmsTaskBaseSensor(BaseSensorOperator): Subclasses should set ``target_statuses`` and ``termination_statuses`` fields. :param replication_task_arn: AWS DMS replication task ARN - :type replication_task_arn: str :param aws_conn_id: aws connection to uses - :type aws_conn_id: str :param target_statuses: the target statuses, sensor waits until the task reaches any of these states - :type target_states: list[str] :param termination_statuses: the termination statuses, sensor fails when the task reaches any of these states - :type termination_statuses: list[str] """ template_fields: Sequence[str] = ('replication_task_arn',) @@ -99,7 +95,6 @@ class DmsTaskCompletedSensor(DmsTaskBaseSensor): :ref:`howto/sensor:DmsTaskCompletedSensor` :param replication_task_arn: AWS DMS replication task ARN - :type replication_task_arn: str """ template_fields: Sequence[str] = ('replication_task_arn',) diff --git a/airflow/providers/amazon/aws/sensors/ec2.py b/airflow/providers/amazon/aws/sensors/ec2.py index 5f84677477496..f955f713f1ab4 100644 --- a/airflow/providers/amazon/aws/sensors/ec2.py +++ b/airflow/providers/amazon/aws/sensors/ec2.py @@ -32,11 +32,8 @@ class EC2InstanceStateSensor(BaseSensorOperator): state of the instance become equal to the target state. :param target_state: target state of instance - :type target_state: str :param instance_id: id of the AWS EC2 instance - :type instance_id: str :param region_name: (optional) aws region name associated with the client - :type region_name: Optional[str] """ template_fields: Sequence[str] = ("target_state", "instance_id", "region_name") diff --git a/airflow/providers/amazon/aws/sensors/eks.py b/airflow/providers/amazon/aws/sensors/eks.py index ab19db7679a57..7f639b684103f 100644 --- a/airflow/providers/amazon/aws/sensors/eks.py +++ b/airflow/providers/amazon/aws/sensors/eks.py @@ -61,18 +61,14 @@ class EksClusterStateSensor(BaseSensorOperator): Check the state of an Amazon EKS Cluster until it reaches the target state or another terminal state. :param cluster_name: The name of the Cluster to watch. (templated) - :type cluster_name: str :param target_state: Target state of the Cluster. (templated) - :type target_state: ClusterStates :param region: Which AWS region the connection should use. (templated) If this is None or empty then the default boto3 behaviour is used. - :type region: str :param aws_conn_id: The Airflow connection used for AWS credentials. (templated) If this is None or empty then the default boto3 behaviour is used. If running Airflow in a distributed manner and aws_conn_id is None or empty, then the default boto3 configuration would be used (and must be maintained on each worker node). - :type aws_conn_id: str """ template_fields: Sequence[str] = ("cluster_name", "target_state", "aws_conn_id", "region") @@ -121,20 +117,15 @@ class EksFargateProfileStateSensor(BaseSensorOperator): Check the state of an AWS Fargate profile until it reaches the target state or another terminal state. :param cluster_name: The name of the Cluster which the AWS Fargate profile is attached to. (templated) - :type cluster_name: str :param fargate_profile_name: The name of the Fargate profile to watch. (templated) - :type fargate_profile_name: str :param target_state: Target state of the Fargate profile. (templated) - :type target_state: FargateProfileStates :param region: Which AWS region the connection should use. (templated) If this is None or empty then the default boto3 behaviour is used. - :type region: str :param aws_conn_id: The Airflow connection used for AWS credentials. (templated) If this is None or empty then the default boto3 behaviour is used. If running Airflow in a distributed manner and aws_conn_id is None or empty, then the default boto3 configuration would be used (and must be maintained on each worker node). - :type aws_conn_id: str """ template_fields: Sequence[str] = ( @@ -193,20 +184,15 @@ class EksNodegroupStateSensor(BaseSensorOperator): Check the state of an EKS managed node group until it reaches the target state or another terminal state. :param cluster_name: The name of the Cluster which the Nodegroup is attached to. (templated) - :type cluster_name: str :param nodegroup_name: The name of the Nodegroup to watch. (templated) - :type nodegroup_name: str :param target_state: Target state of the Nodegroup. (templated) - :type target_state: NodegroupStates :param region: Which AWS region the connection should use. (templated) If this is None or empty then the default boto3 behaviour is used. - :type region: str :param aws_conn_id: The Airflow connection used for AWS credentials. (templated) If this is None or empty then the default boto3 behaviour is used. If running Airflow in a distributed manner and aws_conn_id is None or empty, then the default boto3 configuration would be used (and must be maintained on each worker node). - :type aws_conn_id: str """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/amazon/aws/sensors/emr.py b/airflow/providers/amazon/aws/sensors/emr.py index a0b08abac0491..2bcb85d4972a1 100644 --- a/airflow/providers/amazon/aws/sensors/emr.py +++ b/airflow/providers/amazon/aws/sensors/emr.py @@ -44,7 +44,6 @@ class EmrBaseSensor(BaseSensorOperator): Subclasses should set ``target_states`` and ``failed_states`` fields. :param aws_conn_id: aws connection to uses - :type aws_conn_id: str """ ui_color = '#66c3ff' @@ -101,7 +100,6 @@ def state_from_response(response: Dict[str, Any]) -> str: Get state from response dictionary. :param response: response from AWS API - :type response: dict[str, Any] :return: state :rtype: str """ @@ -113,7 +111,6 @@ def failure_message_from_response(response: Dict[str, Any]) -> Optional[str]: Get failure message from response dictionary. :param response: response from AWS API - :type response: dict[str, Any] :return: failure message :rtype: Optional[str] """ @@ -126,15 +123,11 @@ class EmrContainerSensor(BaseSensorOperator): If the job run fails, the task will fail. :param job_id: job_id to check the state of - :type job_id: str :param max_retries: Number of times to poll for query state before returning the current state, defaults to None - :type max_retries: int :param aws_conn_id: aws connection to use, defaults to 'aws_default' - :type aws_conn_id: str :param poll_interval: Time in seconds to wait between two consecutive call to check query status on athena, defaults to 10 - :type poll_interval: int """ INTERMEDIATE_STATES = ( @@ -197,13 +190,10 @@ class EmrJobFlowSensor(EmrBaseSensor): until job flow to be ready (after 'STARTING' and 'BOOTSTRAPPING' states) :param job_flow_id: job_flow_id to check the state of - :type job_flow_id: str :param target_states: the target states, sensor waits until job flow reaches any of these states - :type target_states: list[str] :param failed_states: the failure states, sensor fails when job flow reaches any of these states - :type failed_states: list[str] """ template_fields: Sequence[str] = ('job_flow_id', 'target_states', 'failed_states') @@ -243,7 +233,6 @@ def state_from_response(response: Dict[str, Any]) -> str: Get state from response dictionary. :param response: response from AWS API - :type response: dict[str, Any] :return: current state of the cluster :rtype: str """ @@ -255,7 +244,6 @@ def failure_message_from_response(response: Dict[str, Any]) -> Optional[str]: Get failure message from response dictionary. :param response: response from AWS API - :type response: dict[str, Any] :return: failure message :rtype: Optional[str] """ @@ -276,15 +264,11 @@ class EmrStepSensor(EmrBaseSensor): With the default target states, sensor waits step to be completed. :param job_flow_id: job_flow_id which contains the step check the state of - :type job_flow_id: str :param step_id: step to check the state of - :type step_id: str :param target_states: the target states, sensor waits until step reaches any of these states - :type target_states: list[str] :param failed_states: the failure states, sensor fails when step reaches any of these states - :type failed_states: list[str] """ template_fields: Sequence[str] = ('job_flow_id', 'step_id', 'target_states', 'failed_states') @@ -326,7 +310,6 @@ def state_from_response(response: Dict[str, Any]) -> str: Get state from response dictionary. :param response: response from AWS API - :type response: dict[str, Any] :return: execution state of the cluster step :rtype: str """ @@ -338,7 +321,6 @@ def failure_message_from_response(response: Dict[str, Any]) -> Optional[str]: Get failure message from response dictionary. :param response: response from AWS API - :type response: dict[str, Any] :return: failure message :rtype: Optional[str] """ diff --git a/airflow/providers/amazon/aws/sensors/glacier.py b/airflow/providers/amazon/aws/sensors/glacier.py index 956316ca1d9d6..dec97f7293a79 100644 --- a/airflow/providers/amazon/aws/sensors/glacier.py +++ b/airflow/providers/amazon/aws/sensors/glacier.py @@ -42,14 +42,10 @@ class GlacierJobOperationSensor(BaseSensorOperator): :ref:`howto/operator:GlacierJobOperationSensor` :param aws_conn_id: The reference to the AWS connection details - :type aws_conn_id: str :param vault_name: name of Glacier vault on which job is executed - :type vault_name: str :param job_id: the job ID was returned by retrieve_inventory() - :type job_id: str :param poke_interval: Time in seconds that the job should wait in between each tries - :type poke_interval: float :param mode: How the sensor operates. Options are: ``{ poke | reschedule }``, default is ``poke``. When set to ``poke`` the sensor is taking up a worker slot for its @@ -62,7 +58,6 @@ class GlacierJobOperationSensor(BaseSensorOperator): this mode if the time before the criteria is met is expected to be quite long. The poke interval should be more than one minute to prevent too much load on the scheduler. - :type mode: str """ template_fields: Sequence[str] = ("vault_name", "job_id") diff --git a/airflow/providers/amazon/aws/sensors/glue.py b/airflow/providers/amazon/aws/sensors/glue.py index 9372077861871..149c77cf52a16 100644 --- a/airflow/providers/amazon/aws/sensors/glue.py +++ b/airflow/providers/amazon/aws/sensors/glue.py @@ -32,9 +32,7 @@ class GlueJobSensor(BaseSensorOperator): 'FAILED', 'STOPPED', 'SUCCEEDED' :param job_name: The AWS Glue Job unique name - :type job_name: str :param run_id: The AWS Glue current running job identifier - :type run_id: str """ template_fields: Sequence[str] = ('job_name', 'run_id') diff --git a/airflow/providers/amazon/aws/sensors/glue_catalog_partition.py b/airflow/providers/amazon/aws/sensors/glue_catalog_partition.py index b39dba5b4eab1..c49277f34342f 100644 --- a/airflow/providers/amazon/aws/sensors/glue_catalog_partition.py +++ b/airflow/providers/amazon/aws/sensors/glue_catalog_partition.py @@ -31,25 +31,19 @@ class GlueCatalogPartitionSensor(BaseSensorOperator): :param table_name: The name of the table to wait for, supports the dot notation (my_database.my_table) - :type table_name: str :param expression: The partition clause to wait for. This is passed as is to the AWS Glue Catalog API's get_partitions function, and supports SQL like notation as in ``ds='2015-01-01' AND type='value'`` and comparison operators as in ``"ds>=2015-01-01"``. See https://docs.aws.amazon.com/glue/latest/dg/aws-glue-api-catalog-partitions.html #aws-glue-api-catalog-partitions-GetPartitions - :type expression: str :param aws_conn_id: ID of the Airflow connection where credentials and extra configuration are stored - :type aws_conn_id: str :param region_name: Optional aws region name (example: us-east-1). Uses region from connection if not specified. - :type region_name: str :param database_name: The name of the catalog database where the partitions reside. - :type database_name: str :param poke_interval: Time in seconds that the job should wait in between each tries - :type poke_interval: int """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/amazon/aws/sensors/glue_crawler.py b/airflow/providers/amazon/aws/sensors/glue_crawler.py index d534bef379114..10ff0a074f40d 100644 --- a/airflow/providers/amazon/aws/sensors/glue_crawler.py +++ b/airflow/providers/amazon/aws/sensors/glue_crawler.py @@ -32,9 +32,7 @@ class GlueCrawlerSensor(BaseSensorOperator): 'FAILED', 'CANCELLED', 'SUCCEEDED' :param crawler_name: The AWS Glue crawler unique name - :type crawler_name: str :param aws_conn_id: aws connection to use, defaults to 'aws_default' - :type aws_conn_id: str """ def __init__(self, *, crawler_name: str, aws_conn_id: str = 'aws_default', **kwargs) -> None: diff --git a/airflow/providers/amazon/aws/sensors/redshift_cluster.py b/airflow/providers/amazon/aws/sensors/redshift_cluster.py index 2abe9d4a40dee..8a9206ea34e0c 100644 --- a/airflow/providers/amazon/aws/sensors/redshift_cluster.py +++ b/airflow/providers/amazon/aws/sensors/redshift_cluster.py @@ -28,9 +28,7 @@ class RedshiftClusterSensor(BaseSensorOperator): Waits for a Redshift cluster to reach a specific status. :param cluster_identifier: The identifier for the cluster being pinged. - :type cluster_identifier: str :param target_status: The cluster status desired. - :type target_status: str """ template_fields: Sequence[str] = ('cluster_identifier', 'target_status') diff --git a/airflow/providers/amazon/aws/sensors/s3.py b/airflow/providers/amazon/aws/sensors/s3.py index b31b9cdceba0d..8e9e55aa52b98 100644 --- a/airflow/providers/amazon/aws/sensors/s3.py +++ b/airflow/providers/amazon/aws/sensors/s3.py @@ -47,15 +47,11 @@ class S3KeySensor(BaseSensorOperator): :param bucket_key: The key being waited on. Supports full s3:// style url or relative path from root level. When it's specified as a full s3:// url, please leave bucket_name as `None`. - :type bucket_key: str :param bucket_name: Name of the S3 bucket. Only needed when ``bucket_key`` is not provided as a full s3:// url. - :type bucket_name: str :param wildcard_match: whether the bucket_key should be interpreted as a Unix wildcard pattern - :type wildcard_match: bool :param aws_conn_id: a reference to the s3 connection - :type aws_conn_id: str :param verify: Whether or not to verify SSL certificates for S3 connection. By default SSL certificates are verified. You can provide the following values: @@ -66,7 +62,6 @@ class S3KeySensor(BaseSensorOperator): - ``path/to/cert/bundle.pem``: A filename of the CA cert bundle to uses. You can specify this argument if you want to use a different CA cert bundle than the one used by botocore. - :type verify: bool or str """ template_fields: Sequence[str] = ('bucket_key', 'bucket_name') @@ -131,15 +126,11 @@ class S3KeySizeSensor(S3KeySensor): :param bucket_key: The key being waited on. Supports full s3:// style url or relative path from root level. When it's specified as a full s3:// url, please leave bucket_name as `None`. - :type bucket_key: str :param bucket_name: Name of the S3 bucket. Only needed when ``bucket_key`` is not provided as a full s3:// url. - :type bucket_name: str :param wildcard_match: whether the bucket_key should be interpreted as a Unix wildcard pattern - :type wildcard_match: bool :param aws_conn_id: a reference to the s3 connection - :type aws_conn_id: str :param verify: Whether or not to verify SSL certificates for S3 connection. By default SSL certificates are verified. You can provide the following values: @@ -150,8 +141,6 @@ class S3KeySizeSensor(S3KeySensor): - ``path/to/cert/bundle.pem``: A filename of the CA cert bundle to uses. You can specify this argument if you want to use a different CA cert bundle than the one used by botocore. - :type verify: bool or str - :type check_fn: Optional[Callable[..., bool]] :param check_fn: Function that receives the list of the S3 objects, and returns the boolean: - ``True``: a certain criteria is met @@ -160,7 +149,6 @@ class S3KeySizeSensor(S3KeySensor): def check_fn(self, data: List) -> bool: return any(f.get('Size', 0) > 1048576 for f in data if isinstance(f, dict)) - :type check_fn: Optional[Callable[..., bool]] """ def __init__( @@ -208,9 +196,7 @@ def check_fn(self, data: List, object_min_size: Optional[Union[int, float]] = 0) """Default function for checking that S3 Objects have size more than 0 :param data: List of the objects in S3 bucket. - :type data: list :param object_min_size: Checks if the objects sizes are greater then this value. - :type object_min_size: int """ return all(f.get('Size', 0) > object_min_size for f in data if isinstance(f, dict)) @@ -225,11 +211,8 @@ class S3KeysUnchangedSensor(BaseSensorOperator): be lost between rescheduled invocations. :param bucket_name: Name of the S3 bucket - :type bucket_name: str :param prefix: The prefix being waited on. Relative path from bucket root level. - :type prefix: str :param aws_conn_id: a reference to the s3 connection - :type aws_conn_id: str :param verify: Whether or not to verify SSL certificates for S3 connection. By default SSL certificates are verified. You can provide the following values: @@ -240,21 +223,16 @@ class S3KeysUnchangedSensor(BaseSensorOperator): - ``path/to/cert/bundle.pem``: A filename of the CA cert bundle to uses. You can specify this argument if you want to use a different CA cert bundle than the one used by botocore. - :type verify: Optional[Union[bool, str]] :param inactivity_period: The total seconds of inactivity to designate keys unchanged. Note, this mechanism is not real time and this operator may not return until a poke_interval after this period has passed with no additional objects sensed. - :type inactivity_period: float :param min_objects: The minimum number of objects needed for keys unchanged sensor to be considered valid. - :type min_objects: int :param previous_objects: The set of object ids found during the last poke. - :type previous_objects: Optional[Set[str]] :param allow_delete: Should this sensor consider objects being deleted between pokes valid behavior. If true a warning message will be logged when this happens. If false an error will be raised. - :type allow_delete: bool """ template_fields: Sequence[str] = ('bucket_name', 'prefix') @@ -299,7 +277,6 @@ def is_keys_unchanged(self, current_objects: Set[str]) -> bool: has passed and updates the state of the sensor accordingly. :param current_objects: set of object ids in bucket during last poke. - :type current_objects: set[str] """ current_num_objects = len(current_objects) if current_objects > self.previous_objects: @@ -372,14 +349,10 @@ class S3PrefixSensor(BaseSensorOperator): are NOT special characters in the Python regex engine. :param bucket_name: Name of the S3 bucket - :type bucket_name: str :param prefix: The prefix being waited on. Relative path from bucket root level. - :type prefix: str or list of str :param delimiter: The delimiter intended to show hierarchy. Defaults to '/'. - :type delimiter: str :param aws_conn_id: a reference to the s3 connection - :type aws_conn_id: str :param verify: Whether or not to verify SSL certificates for S3 connection. By default SSL certificates are verified. You can provide the following values: @@ -390,7 +363,6 @@ class S3PrefixSensor(BaseSensorOperator): - ``path/to/cert/bundle.pem``: A filename of the CA cert bundle to uses. You can specify this argument if you want to use a different CA cert bundle than the one used by botocore. - :type verify: bool or str """ template_fields: Sequence[str] = ('prefix', 'bucket_name') diff --git a/airflow/providers/amazon/aws/sensors/sagemaker.py b/airflow/providers/amazon/aws/sensors/sagemaker.py index b136e942ae7bc..054b139cc2eb9 100644 --- a/airflow/providers/amazon/aws/sensors/sagemaker.py +++ b/airflow/providers/amazon/aws/sensors/sagemaker.py @@ -91,7 +91,6 @@ class SageMakerEndpointSensor(SageMakerBaseSensor): :param job_name: job_name of the endpoint instance to check the state of - :type job_name: str """ template_fields: Sequence[str] = ('endpoint_name',) @@ -128,7 +127,6 @@ class SageMakerTransformSensor(SageMakerBaseSensor): :param job_name: job_name of the transform job instance to check the state of - :type job_name: str """ template_fields: Sequence[str] = ('job_name',) @@ -200,9 +198,7 @@ class SageMakerTrainingSensor(SageMakerBaseSensor): :param job_name: name of the SageMaker training job to check the state of - :type job_name: str :param print_log: if the operator should print the cloudwatch log - :type print_log: bool """ template_fields: Sequence[str] = ('job_name',) diff --git a/airflow/providers/amazon/aws/sensors/sqs.py b/airflow/providers/amazon/aws/sensors/sqs.py index 127fa55c9d3c3..0d71ba7c93596 100644 --- a/airflow/providers/amazon/aws/sensors/sqs.py +++ b/airflow/providers/amazon/aws/sensors/sqs.py @@ -38,30 +38,22 @@ class SqsSensor(BaseSensorOperator): is pushed through XCom with the key ``messages``. :param aws_conn_id: AWS connection id - :type aws_conn_id: str :param sqs_queue: The SQS queue url (templated) - :type sqs_queue: str :param max_messages: The maximum number of messages to retrieve for each poke (templated) - :type max_messages: int :param wait_time_seconds: The time in seconds to wait for receiving messages (default: 1 second) - :type wait_time_seconds: int :param visibility_timeout: Visibility timeout, a period of time during which Amazon SQS prevents other consumers from receiving and processing the message. - :type visibility_timeout: Optional[Int] :param message_filtering: Specified how received messages should be filtered. Supported options are: `None` (no filtering, default), `'literal'` (message Body literal match) or `'jsonpath'` (message Body filtered using a JSONPath expression). You may add further methods by overriding the relevant class methods. - :type message_filtering: Optional[Literal["literal", "jsonpath"]] :param message_filtering_match_values: Optional value/s for the message filter to match on. For example, with literal matching, if a message body matches any of the specified values then it is included. For JSONPath matching, the result of the JSONPath expression is used and may match any of the specified values. - :type message_filtering_match_values: Any :param message_filtering_config: Additional configuration to pass to the message filter. For example with JSONPath filtering you can pass a JSONPath expression string here, such as `'foo[*].baz'`. Messages with a Body which does not match are ignored. - :type message_filtering_config: Any """ template_fields: Sequence[str] = ('sqs_queue', 'max_messages', 'message_filtering_config') @@ -106,7 +98,6 @@ def poke(self, context: 'Context'): Check for message on subscribed queue and write to xcom the message with key ``messages`` :param context: the context object - :type context: dict :return: ``True`` if message is available or ``False`` """ sqs_conn = self.get_hook().get_conn() diff --git a/airflow/providers/amazon/aws/sensors/step_function.py b/airflow/providers/amazon/aws/sensors/step_function.py index bbad9da3f5c30..3c170c072762f 100644 --- a/airflow/providers/amazon/aws/sensors/step_function.py +++ b/airflow/providers/amazon/aws/sensors/step_function.py @@ -36,9 +36,7 @@ class StepFunctionExecutionSensor(BaseSensorOperator): of the State Machine's output to `output` :param execution_arn: execution_arn to check the state of - :type execution_arn: str :param aws_conn_id: aws connection to use, defaults to 'aws_default' - :type aws_conn_id: str """ INTERMEDIATE_STATES = ('RUNNING',) diff --git a/airflow/providers/amazon/aws/transfers/dynamodb_to_s3.py b/airflow/providers/amazon/aws/transfers/dynamodb_to_s3.py index 21a3db01418d3..051705321c885 100644 --- a/airflow/providers/amazon/aws/transfers/dynamodb_to_s3.py +++ b/airflow/providers/amazon/aws/transfers/dynamodb_to_s3.py @@ -88,23 +88,16 @@ class DynamoDBToS3Operator(BaseOperator): ) :param dynamodb_table_name: Dynamodb table to replicate data from - :type dynamodb_table_name: str :param s3_bucket_name: S3 bucket to replicate data to - :type s3_bucket_name: str :param file_size: Flush file to s3 if file size >= file_size - :type file_size: int :param dynamodb_scan_kwargs: kwargs pass to # noqa: E501 - :type dynamodb_scan_kwargs: Optional[Dict[str, Any]] :param s3_key_prefix: Prefix of s3 object key - :type s3_key_prefix: Optional[str] :param process_func: How we transforms a dynamodb item to bytes. By default we dump the json - :type process_func: Callable[[Dict[str, Any]], bytes] :param aws_conn_id: The Airflow connection used for AWS credentials. If this is None or empty then the default boto3 behaviour is used. If running Airflow in a distributed manner and aws_conn_id is None or empty, then default boto3 configuration would be used (and must be maintained on each worker node). - :type aws_conn_id: str """ def __init__( diff --git a/airflow/providers/amazon/aws/transfers/exasol_to_s3.py b/airflow/providers/amazon/aws/transfers/exasol_to_s3.py index b9c1d4fcf81b8..0f2fb7a99ea10 100644 --- a/airflow/providers/amazon/aws/transfers/exasol_to_s3.py +++ b/airflow/providers/amazon/aws/transfers/exasol_to_s3.py @@ -33,29 +33,20 @@ class ExasolToS3Operator(BaseOperator): Export data from Exasol database to AWS S3 bucket. :param query_or_table: the sql statement to be executed or table name to export - :type query_or_table: str :param key: S3 key that will point to the file - :type key: str :param bucket_name: Name of the bucket in which to store the file - :type bucket_name: str :param replace: A flag to decide whether or not to overwrite the key if it already exists. If replace is False and the key exists, an error will be raised. - :type replace: bool :param encrypt: If True, the file will be encrypted on the server-side by S3 and will be stored in an encrypted form while at rest in S3. - :type encrypt: bool :param gzip: If True, the file will be compressed locally - :type gzip: bool :param acl_policy: String specifying the canned ACL policy for the file being uploaded to the S3 bucket. - :type acl_policy: str :param query_params: Query parameters passed to underlying ``export_to_file`` method of :class:`~pyexasol.connection.ExaConnection`. - :type query_params: dict :param export_params: Extra parameters passed to underlying ``export_to_file`` method of :class:`~pyexasol.connection.ExaConnection`. - :type export_params: dict """ template_fields: Sequence[str] = ('query_or_table', 'key', 'bucket_name', 'query_params', 'export_params') diff --git a/airflow/providers/amazon/aws/transfers/ftp_to_s3.py b/airflow/providers/amazon/aws/transfers/ftp_to_s3.py index c52d3e9609910..fae7d5454a5bd 100644 --- a/airflow/providers/amazon/aws/transfers/ftp_to_s3.py +++ b/airflow/providers/amazon/aws/transfers/ftp_to_s3.py @@ -33,38 +33,27 @@ class FTPToS3Operator(BaseOperator): :param ftp_path: The ftp remote path. For one file it is mandatory to include the file as well. For multiple files, it is the route where the files will be found. - :type ftp_path: str :param s3_bucket: The targeted s3 bucket in which to upload the file(s). - :type s3_bucket: str :param s3_key: The targeted s3 key. For one file it must include the file path. For several, it must end with "/". - :type s3_key: str :param ftp_filenames: Only used if you want to move multiple files. You can pass a list with exact filenames present in the ftp path, or a prefix that all files must meet. It can also be the string '*' for moving all the files within the ftp path. - :type ftp_filenames: Union(str, list) :param s3_filenames: Only used if you want to move multiple files and name them different from the originals from the ftp. It can be a list of filenames or file prefix (that will replace the ftp prefix). - :type s3_filenames: Union(str, list) :param ftp_conn_id: The ftp connection id. The name or identifier for establishing a connection to the FTP server. - :type ftp_conn_id: str :param aws_conn_id: The s3 connection id. The name or identifier for establishing a connection to S3. - :type aws_conn_id: str :param replace: A flag to decide whether or not to overwrite the key if it already exists. If replace is False and the key exists, an error will be raised. - :type replace: bool :param encrypt: If True, the file will be encrypted on the server-side by S3 and will be stored in an encrypted form while at rest in S3. - :type encrypt: bool :param gzip: If True, the file will be compressed locally - :type gzip: bool :param acl_policy: String specifying the canned ACL policy for the file being uploaded to the S3 bucket. - :type acl_policy: str """ template_fields: Sequence[str] = ('ftp_path', 's3_bucket', 's3_key', 'ftp_filenames', 's3_filenames') diff --git a/airflow/providers/amazon/aws/transfers/gcs_to_s3.py b/airflow/providers/amazon/aws/transfers/gcs_to_s3.py index fafd77747432c..575371a642f3f 100644 --- a/airflow/providers/amazon/aws/transfers/gcs_to_s3.py +++ b/airflow/providers/amazon/aws/transfers/gcs_to_s3.py @@ -32,27 +32,19 @@ class GCSToS3Operator(BaseOperator): Synchronizes a Google Cloud Storage bucket with an S3 bucket. :param bucket: The Google Cloud Storage bucket to find the objects. (templated) - :type bucket: str :param prefix: Prefix string which filters objects whose name begin with this prefix. (templated) - :type prefix: str :param delimiter: The delimiter by which you want to filter the objects. (templated) For e.g to lists the CSV files from in a directory in GCS you would use delimiter='.csv'. - :type delimiter: str :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param google_cloud_storage_conn_id: (Deprecated) The connection ID used to connect to Google Cloud. This parameter has been deprecated. You should pass the gcp_conn_id parameter instead. - :type google_cloud_storage_conn_id: str :param delegate_to: Google account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param dest_aws_conn_id: The destination S3 connection - :type dest_aws_conn_id: str :param dest_s3_key: The base S3 key to be used to store the files. (templated) - :type dest_s3_key: str :param dest_verify: Whether or not to verify SSL certificates for S3 connection. By default SSL certificates are verified. You can provide the following values: @@ -64,7 +56,6 @@ class GCSToS3Operator(BaseOperator): You can specify this argument if you want to use a different CA cert bundle than the one used by botocore. - :type dest_verify: bool or str :param replace: Whether or not to verify the existence of the files in the destination bucket. By default is set to False @@ -72,7 +63,6 @@ class GCSToS3Operator(BaseOperator): the destination bucket. If set to False, will upload only the files that are in the origin but not in the destination bucket. - :type replace: bool :param google_impersonation_chain: Optional Google service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -81,10 +71,8 @@ class GCSToS3Operator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type google_impersonation_chain: Union[str, Sequence[str]] :param s3_acl_policy: Optional The string to specify the canned ACL policy for the object to be uploaded in S3 - :type s3_acl_policy: str """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/amazon/aws/transfers/glacier_to_gcs.py b/airflow/providers/amazon/aws/transfers/glacier_to_gcs.py index 48ab9587332f7..07d3410ee7334 100644 --- a/airflow/providers/amazon/aws/transfers/glacier_to_gcs.py +++ b/airflow/providers/amazon/aws/transfers/glacier_to_gcs.py @@ -39,24 +39,16 @@ class GlacierToGCSOperator(BaseOperator): :ref:`howto/operator:GlacierToGCSOperator` :param aws_conn_id: The reference to the AWS connection details - :type aws_conn_id: str :param gcp_conn_id: The reference to the GCP connection details - :type gcp_conn_id: str :param vault_name: the Glacier vault on which job is executed - :type vault_name: string :param bucket_name: the Google Cloud Storage bucket where the data will be transferred - :type bucket_name: str :param object_name: the name of the object to check in the Google cloud storage bucket. - :type object_name: str :param gzip: option to compress local file or file data for upload - :type gzip: bool :param chunk_size: size of chunk in bytes the that will downloaded from Glacier vault - :type chunk_size: int :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param google_impersonation_chain: Optional Google service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -65,7 +57,6 @@ class GlacierToGCSOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type google_impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ("vault_name", "bucket_name", "object_name") diff --git a/airflow/providers/amazon/aws/transfers/google_api_to_s3.py b/airflow/providers/amazon/aws/transfers/google_api_to_s3.py index af979ee0765c7..dd794c348b5bf 100644 --- a/airflow/providers/amazon/aws/transfers/google_api_to_s3.py +++ b/airflow/providers/amazon/aws/transfers/google_api_to_s3.py @@ -44,46 +44,32 @@ class GoogleApiToS3Operator(BaseOperator): with the Google Cloud Platform. :param google_api_service_name: The specific API service that is being requested. - :type google_api_service_name: str :param google_api_service_version: The version of the API that is being requested. - :type google_api_service_version: str :param google_api_endpoint_path: The client libraries path to the api call's executing method. For example: 'analyticsreporting.reports.batchGet' .. note:: See https://developers.google.com/apis-explorer for more information on which methods are available. - :type google_api_endpoint_path: str :param google_api_endpoint_params: The params to control the corresponding endpoint result. - :type google_api_endpoint_params: dict :param s3_destination_key: The url where to put the data retrieved from the endpoint in S3. - :type s3_destination_key: str :param google_api_response_via_xcom: Can be set to expose the google api response to xcom. - :type google_api_response_via_xcom: str :param google_api_endpoint_params_via_xcom: If set to a value this value will be used as a key for pulling from xcom and updating the google api endpoint params. - :type google_api_endpoint_params_via_xcom: str :param google_api_endpoint_params_via_xcom_task_ids: Task ids to filter xcom by. - :type google_api_endpoint_params_via_xcom_task_ids: str or list of str :param google_api_pagination: If set to True Pagination will be enabled for this request to retrieve all data. .. note:: This means the response will be a list of responses. - :type google_api_pagination: bool :param google_api_num_retries: Define the number of retries for the google api requests being made if it fails. - :type google_api_num_retries: int :param s3_overwrite: Specifies whether the s3 file will be overwritten if exists. - :type s3_overwrite: bool :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: Google account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param aws_conn_id: The connection id specifying the authentication information for the S3 Bucket. - :type aws_conn_id: str :param google_impersonation_chain: Optional Google service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -92,7 +78,6 @@ class GoogleApiToS3Operator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type google_impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -145,7 +130,6 @@ def execute(self, context: 'Context') -> None: Transfers Google APIs json data to S3. :param context: The context that is being provided when executing. - :type context: dict """ self.log.info('Transferring data from %s to s3', self.google_api_service_name) diff --git a/airflow/providers/amazon/aws/transfers/hive_to_dynamodb.py b/airflow/providers/amazon/aws/transfers/hive_to_dynamodb.py index 3953571988a2f..6407c2716c806 100644 --- a/airflow/providers/amazon/aws/transfers/hive_to_dynamodb.py +++ b/airflow/providers/amazon/aws/transfers/hive_to_dynamodb.py @@ -36,26 +36,16 @@ class HiveToDynamoDBOperator(BaseOperator): be used for smallish amount of data. :param sql: SQL query to execute against the hive database. (templated) - :type sql: str :param table_name: target DynamoDB table - :type table_name: str :param table_keys: partition key and sort key - :type table_keys: list :param pre_process: implement pre-processing of source data - :type pre_process: function :param pre_process_args: list of pre_process function arguments - :type pre_process_args: list :param pre_process_kwargs: dict of pre_process function arguments - :type pre_process_kwargs: dict :param region_name: aws region name (example: us-east-1) - :type region_name: str :param schema: hive database schema - :type schema: str :param hiveserver2_conn_id: Reference to the :ref: `Hive Server2 thrift service connection id `. - :type hiveserver2_conn_id: str :param aws_conn_id: aws connection - :type aws_conn_id: str """ template_fields: Sequence[str] = ('sql',) diff --git a/airflow/providers/amazon/aws/transfers/imap_attachment_to_s3.py b/airflow/providers/amazon/aws/transfers/imap_attachment_to_s3.py index 2e0800edc13e8..b3fa33f001d13 100644 --- a/airflow/providers/amazon/aws/transfers/imap_attachment_to_s3.py +++ b/airflow/providers/amazon/aws/transfers/imap_attachment_to_s3.py @@ -35,22 +35,14 @@ class ImapAttachmentToS3Operator(BaseOperator): :ref:`howto/operator:ImapAttachmentToS3Operator` :param imap_attachment_name: The file name of the mail attachment that you want to transfer. - :type imap_attachment_name: str :param s3_key: The destination file name in the s3 bucket for the attachment. - :type s3_key: str :param imap_check_regex: If set checks the `imap_attachment_name` for a regular expression. - :type imap_check_regex: bool :param imap_mail_folder: The folder on the mail server to look for the attachment. - :type imap_mail_folder: str :param imap_mail_filter: If set other than 'All' only specific mails will be checked. See :py:meth:`imaplib.IMAP4.search` for details. - :type imap_mail_filter: str :param s3_overwrite: If set overwrites the s3 key if already exists. - :type s3_overwrite: bool :param imap_conn_id: The reference to the connection details of the mail server. - :type imap_conn_id: str :param s3_conn_id: The reference to the s3 connection details. - :type s3_conn_id: str """ template_fields: Sequence[str] = ('imap_attachment_name', 's3_key', 'imap_mail_filter') @@ -83,7 +75,6 @@ def execute(self, context: 'Context') -> None: This function executes the transfer from the email server (via imap) into s3. :param context: The context while executing. - :type context: dict """ self.log.info( 'Transferring mail attachment %s from mail server via imap to s3 key %s...', diff --git a/airflow/providers/amazon/aws/transfers/local_to_s3.py b/airflow/providers/amazon/aws/transfers/local_to_s3.py index dac8decb6e904..704198b05d1ff 100644 --- a/airflow/providers/amazon/aws/transfers/local_to_s3.py +++ b/airflow/providers/amazon/aws/transfers/local_to_s3.py @@ -30,19 +30,15 @@ class LocalFilesystemToS3Operator(BaseOperator): :param filename: Path to the local file. Path can be either absolute (e.g. /path/to/file.ext) or relative (e.g. ../../foo/*/*.csv). (templated) - :type filename: str :param dest_key: The key of the object to copy to. (templated) It can be either full s3:// style url or relative path from root level. When it's specified as a full s3:// url, including dest_bucket results in a TypeError. - :type dest_key: str :param dest_bucket: Name of the S3 bucket to where the object is copied. (templated) Inclusion when `dest_key` is provided as a full s3:// url results in a TypeError. - :type dest_bucket: str :param aws_conn_id: Connection id of the S3 connection to use - :type aws_conn_id: str :param verify: Whether or not to verify SSL certificates for S3 connection. By default SSL certificates are verified. @@ -54,19 +50,14 @@ class LocalFilesystemToS3Operator(BaseOperator): - path/to/cert/bundle.pem: A filename of the CA cert bundle to uses. You can specify this argument if you want to use a different CA cert bundle than the one used by botocore. - :type verify: bool or str :param replace: A flag to decide whether or not to overwrite the key if it already exists. If replace is False and the key exists, an error will be raised. - :type replace: bool :param encrypt: If True, the file will be encrypted on the server-side by S3 and will be stored in an encrypted form while at rest in S3. - :type encrypt: bool :param gzip: If True, the file will be compressed locally - :type gzip: bool :param acl_policy: String specifying the canned ACL policy for the file being uploaded to the S3 bucket. - :type acl_policy: str """ template_fields: Sequence[str] = ('filename', 'dest_key', 'dest_bucket') diff --git a/airflow/providers/amazon/aws/transfers/mongo_to_s3.py b/airflow/providers/amazon/aws/transfers/mongo_to_s3.py index 3dd60a9d44012..218d35c892b96 100644 --- a/airflow/providers/amazon/aws/transfers/mongo_to_s3.py +++ b/airflow/providers/amazon/aws/transfers/mongo_to_s3.py @@ -38,30 +38,19 @@ class MongoToS3Operator(BaseOperator): """Operator meant to move data from mongo via pymongo to s3 via boto. :param mongo_conn_id: reference to a specific mongo connection - :type mongo_conn_id: str :param aws_conn_id: reference to a specific S3 connection - :type aws_conn_id: str :param mongo_collection: reference to a specific collection in your mongo db - :type mongo_collection: str :param mongo_query: query to execute. A list including a dict of the query - :type mongo_query: Union[list, dict] :param mongo_projection: optional parameter to filter the returned fields by the query. It can be a list of fields names to include or a dictionary for excluding fields (e.g ``projection={"_id": 0}`` ) - :type mongo_projection: Union[list, dict] :param s3_bucket: reference to a specific S3 bucket to store the data - :type s3_bucket: str :param s3_key: in which S3 key the file will be stored - :type s3_key: str :param mongo_db: reference to a specific mongo database - :type mongo_db: str :param replace: whether or not to replace the file in S3 if it previously existed - :type replace: bool :param allow_disk_use: enables writing to temporary files in the case you are handling large dataset. This only takes effect when `mongo_query` is a list - running an aggregate pipeline - :type allow_disk_use: bool :param compression: type of compression to use for output file in S3. Currently only gzip is supported. - :type compression: str """ template_fields: Sequence[str] = ('s3_bucket', 's3_key', 'mongo_query', 'mongo_collection') diff --git a/airflow/providers/amazon/aws/transfers/mysql_to_s3.py b/airflow/providers/amazon/aws/transfers/mysql_to_s3.py index 72e28d720c256..c0cc659a47797 100644 --- a/airflow/providers/amazon/aws/transfers/mysql_to_s3.py +++ b/airflow/providers/amazon/aws/transfers/mysql_to_s3.py @@ -55,17 +55,11 @@ class MySQLToS3Operator(BaseOperator): :param query: the sql query to be executed. If you want to execute a file, place the absolute path of it, ending with .sql extension. (templated) - :type query: str :param s3_bucket: bucket where the data will be stored. (templated) - :type s3_bucket: str :param s3_key: desired key for the file. It includes the name of the file. (templated) - :type s3_key: str :param replace: whether or not to replace the file in S3 if it previously existed - :type replace: bool :param mysql_conn_id: Reference to :ref:`mysql connection id `. - :type mysql_conn_id: str :param aws_conn_id: reference to a specific S3 connection - :type aws_conn_id: str :param verify: Whether or not to verify SSL certificates for S3 connection. By default SSL certificates are verified. You can provide the following values: @@ -75,18 +69,12 @@ class MySQLToS3Operator(BaseOperator): - ``path/to/cert/bundle.pem``: A filename of the CA cert bundle to uses. You can specify this argument if you want to use a different CA cert bundle than the one used by botocore. - :type verify: bool or str :param pd_csv_kwargs: arguments to include in pd.to_csv (header, index, columns...) - :type pd_csv_kwargs: dict :param index: whether to have the index or not in the dataframe - :type index: str :param header: whether to include header or not into the S3 file - :type header: bool :param file_format: the destination file format, only string 'csv' or 'parquet' is accepted. - :type file_format: str :param pd_kwargs: arguments to include in ``DataFrame.to_parquet()`` or ``DataFrame.to_csv()``. This is preferred than ``pd_csv_kwargs``. - :type pd_kwargs: dict """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/amazon/aws/transfers/redshift_to_s3.py b/airflow/providers/amazon/aws/transfers/redshift_to_s3.py index afb5738a1206b..de14a1c0c86a4 100644 --- a/airflow/providers/amazon/aws/transfers/redshift_to_s3.py +++ b/airflow/providers/amazon/aws/transfers/redshift_to_s3.py @@ -32,25 +32,18 @@ class RedshiftToS3Operator(BaseOperator): Executes an UNLOAD command to s3 as a CSV with headers :param s3_bucket: reference to a specific S3 bucket - :type s3_bucket: str :param s3_key: reference to a specific S3 key. If ``table_as_file_name`` is set to False, this param must include the desired file name - :type s3_key: str :param schema: reference to a specific schema in redshift database Applicable when ``table`` param provided. - :type schema: str :param table: reference to a specific table in redshift database Used when ``select_query`` param not provided. - :type table: str :param select_query: custom select query to fetch data from redshift database - :type select_query: str :param redshift_conn_id: reference to a specific redshift database - :type redshift_conn_id: str :param aws_conn_id: reference to a specific S3 connection If the AWS connection contains 'aws_iam_role' in ``extras`` the operator will use AWS STS credentials with a token https://docs.aws.amazon.com/redshift/latest/dg/copy-parameters-authorization.html#copy-credentials - :type aws_conn_id: str :param verify: Whether or not to verify SSL certificates for S3 connection. By default SSL certificates are verified. You can provide the following values: @@ -61,19 +54,13 @@ class RedshiftToS3Operator(BaseOperator): - ``path/to/cert/bundle.pem``: A filename of the CA cert bundle to uses. You can specify this argument if you want to use a different CA cert bundle than the one used by botocore. - :type verify: bool or str :param unload_options: reference to a list of UNLOAD options - :type unload_options: list :param autocommit: If set to True it will automatically commit the UNLOAD statement. Otherwise it will be committed right before the redshift connection gets closed. - :type autocommit: bool :param include_header: If set to True the s3 file contains the header columns. - :type include_header: bool :param parameters: (optional) the parameters to render the SQL query with. - :type parameters: dict or iterable :param table_as_file_name: If set to True, the s3 file will be named as the table. Applicable when ``table`` param provided. - :type table_as_file_name: bool """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/amazon/aws/transfers/s3_to_ftp.py b/airflow/providers/amazon/aws/transfers/s3_to_ftp.py index e6c55fadcda8a..219d853848dd9 100644 --- a/airflow/providers/amazon/aws/transfers/s3_to_ftp.py +++ b/airflow/providers/amazon/aws/transfers/s3_to_ftp.py @@ -33,18 +33,13 @@ class S3ToFTPOperator(BaseOperator): :param s3_bucket: The targeted s3 bucket. This is the S3 bucket from where the file is downloaded. - :type s3_bucket: str :param s3_key: The targeted s3 key. This is the specified file path for downloading the file from S3. - :type s3_key: str :param ftp_path: The ftp remote path. This is the specified file path for uploading file to the FTP server. - :type ftp_path: str :param aws_conn_id: reference to a specific AWS connection - :type aws_conn_id: str :param ftp_conn_id: The ftp connection id. The name or identifier for establishing a connection to the FTP server. - :type ftp_conn_id: str """ template_fields: Sequence[str] = ('s3_bucket', 's3_key', 'ftp_path') diff --git a/airflow/providers/amazon/aws/transfers/s3_to_redshift.py b/airflow/providers/amazon/aws/transfers/s3_to_redshift.py index ee65848eaee54..6b54e8902a6cf 100644 --- a/airflow/providers/amazon/aws/transfers/s3_to_redshift.py +++ b/airflow/providers/amazon/aws/transfers/s3_to_redshift.py @@ -40,20 +40,14 @@ class S3ToRedshiftOperator(BaseOperator): :ref:`howto/operator:S3ToRedshiftOperator` :param schema: reference to a specific schema in redshift database - :type schema: str :param table: reference to a specific table in redshift database - :type table: str :param s3_bucket: reference to a specific S3 bucket - :type s3_bucket: str :param s3_key: reference to a specific S3 key - :type s3_key: str :param redshift_conn_id: reference to a specific redshift database - :type redshift_conn_id: str :param aws_conn_id: reference to a specific S3 connection If the AWS connection contains 'aws_iam_role' in ``extras`` the operator will use AWS STS credentials with a token https://docs.aws.amazon.com/redshift/latest/dg/copy-parameters-authorization.html#copy-credentials - :type aws_conn_id: str :param verify: Whether or not to verify SSL certificates for S3 connection. By default SSL certificates are verified. You can provide the following values: @@ -64,15 +58,10 @@ class S3ToRedshiftOperator(BaseOperator): - ``path/to/cert/bundle.pem``: A filename of the CA cert bundle to uses. You can specify this argument if you want to use a different CA cert bundle than the one used by botocore. - :type verify: bool or str :param column_list: list of column names to load - :type column_list: List[str] :param copy_options: reference to a list of COPY options - :type copy_options: list :param method: Action to be performed on execution. Available ``APPEND``, ``UPSERT`` and ``REPLACE``. - :type method: str :param upsert_keys: List of fields to use as key on upsert action - :type upsert_keys: List[str] """ template_fields: Sequence[str] = ('s3_bucket', 's3_key', 'schema', 'table', 'column_list', 'copy_options') diff --git a/airflow/providers/amazon/aws/transfers/s3_to_sftp.py b/airflow/providers/amazon/aws/transfers/s3_to_sftp.py index fbfe586a0f645..233503020a73e 100644 --- a/airflow/providers/amazon/aws/transfers/s3_to_sftp.py +++ b/airflow/providers/amazon/aws/transfers/s3_to_sftp.py @@ -38,19 +38,14 @@ class S3ToSFTPOperator(BaseOperator): :param sftp_conn_id: The sftp connection id. The name or identifier for establishing a connection to the SFTP server. - :type sftp_conn_id: str :param sftp_path: The sftp remote path. This is the specified file path for uploading file to the SFTP server. - :type sftp_path: str :param s3_conn_id: The s3 connection id. The name or identifier for establishing a connection to S3 - :type s3_conn_id: str :param s3_bucket: The targeted s3 bucket. This is the S3 bucket from where the file is downloaded. - :type s3_bucket: str :param s3_key: The targeted s3 key. This is the specified file path for downloading the file from S3. - :type s3_key: str """ template_fields: Sequence[str] = ('s3_key', 'sftp_path') diff --git a/airflow/providers/amazon/aws/transfers/salesforce_to_s3.py b/airflow/providers/amazon/aws/transfers/salesforce_to_s3.py index 4ba363056c4b2..b754bf9564287 100644 --- a/airflow/providers/amazon/aws/transfers/salesforce_to_s3.py +++ b/airflow/providers/amazon/aws/transfers/salesforce_to_s3.py @@ -36,40 +36,26 @@ class SalesforceToS3Operator(BaseOperator): :ref:`howto/operator:SalesforceToS3Operator` :param salesforce_query: The query to send to Salesforce. - :type salesforce_query: str :param s3_bucket_name: The bucket name to upload to. - :type s3_bucket_name: str :param s3_key: The object name to set when uploading the file. - :type s3_key: str :param salesforce_conn_id: The name of the connection that has the parameters needed to connect to Salesforce. - :type salesforce_conn_id: str :param export_format: Desired format of files to be exported. - :type export_format: str :param query_params: Additional optional arguments to be passed to the HTTP request querying Salesforce. - :type query_params: dict :param include_deleted: True if the query should include deleted records. - :type include_deleted: bool :param coerce_to_timestamp: True if you want all datetime fields to be converted into Unix timestamps. False if you want them to be left in the same format as they were in Salesforce. Leaving the value as False will result in datetimes being strings. Default: False - :type coerce_to_timestamp: bool :param record_time_added: True if you want to add a Unix timestamp field to the resulting data that marks when the data was fetched from Salesforce. Default: False - :type record_time_added: bool :param aws_conn_id: The name of the connection that has the parameters we need to connect to S3. - :type aws_conn_id: str :param replace: A flag to decide whether or not to overwrite the S3 key if it already exists. If set to False and the key exists an error will be raised. - :type replace: bool :param encrypt: If True, the file will be encrypted on the server-side by S3 and will be stored in an encrypted form while at rest in S3. - :type encrypt: bool :param gzip: If True, the file will be compressed locally. - :type gzip: bool :param acl_policy: String specifying the canned ACL policy for the file being uploaded to the S3 bucket. - :type acl_policy: str """ template_fields: Sequence[str] = ("salesforce_query", "s3_bucket_name", "s3_key") diff --git a/airflow/providers/amazon/aws/transfers/sftp_to_s3.py b/airflow/providers/amazon/aws/transfers/sftp_to_s3.py index f69e2e487e069..71376e3179c0a 100644 --- a/airflow/providers/amazon/aws/transfers/sftp_to_s3.py +++ b/airflow/providers/amazon/aws/transfers/sftp_to_s3.py @@ -38,22 +38,16 @@ class SFTPToS3Operator(BaseOperator): :param sftp_conn_id: The sftp connection id. The name or identifier for establishing a connection to the SFTP server. - :type sftp_conn_id: str :param sftp_path: The sftp remote path. This is the specified file path for downloading the file from the SFTP server. - :type sftp_path: str :param s3_conn_id: The s3 connection id. The name or identifier for establishing a connection to S3 - :type s3_conn_id: str :param s3_bucket: The targeted s3 bucket. This is the S3 bucket to where the file is uploaded. - :type s3_bucket: str :param s3_key: The targeted s3 key. This is the specified path for uploading the file to S3. - :type s3_key: str :param use_temp_file: If True, copies file first to local, if False streams file from SFTP to S3. - :type use_temp_file: bool """ template_fields: Sequence[str] = ('s3_key', 'sftp_path') diff --git a/airflow/providers/apache/beam/hooks/beam.py b/airflow/providers/apache/beam/hooks/beam.py index 4bd27ebc4fdf4..7970734f0ad1c 100644 --- a/airflow/providers/apache/beam/hooks/beam.py +++ b/airflow/providers/apache/beam/hooks/beam.py @@ -56,7 +56,6 @@ def beam_options_to_args(options: dict) -> List[str]: apache_beam/options/pipeline_options.py#L230-L251 :param options: Dictionary with options - :type options: dict :return: List of arguments :rtype: List[str] """ @@ -79,10 +78,8 @@ class BeamCommandRunner(LoggingMixin): Class responsible for running pipeline command in subprocess :param cmd: Parts of the command to be run in subprocess - :type cmd: List[str] :param process_line_callback: Optional callback which can be used to process stdout and stderr to detect job id - :type process_line_callback: Optional[Callable[[str], None]] """ def __init__( @@ -158,7 +155,6 @@ class BeamHook(BaseHook): keyword arguments rather than positional. :param runner: Runner type - :type runner: str """ def __init__( @@ -199,28 +195,22 @@ def start_python_pipeline( Starts Apache Beam python pipeline. :param variables: Variables passed to the pipeline. - :type variables: Dict :param py_options: Additional options. - :type py_options: List[str] :param py_interpreter: Python version of the Apache Beam pipeline. If None, this defaults to the python3. To track python versions supported by beam and related issues check: https://issues.apache.org/jira/browse/BEAM-1251 - :type py_interpreter: str :param py_requirements: Additional python package(s) to install. If a value is passed to this parameter, a new virtual environment has been created with additional packages installed. You could also install the apache-beam package if it is not installed on your system or you want to use a different version. - :type py_requirements: List[str] :param py_system_site_packages: Whether to include system_site_packages in your virtualenv. See virtualenv documentation for more information. This option is only relevant if the ``py_requirements`` parameter is not None. - :type py_system_site_packages: bool :param on_new_job_id_callback: Callback called when the job ID is known. - :type on_new_job_id_callback: callable """ if "labels" in variables: variables["labels"] = [f"{key}={value}" for key, value in variables["labels"].items()] @@ -273,11 +263,8 @@ def start_java_pipeline( Starts Apache Beam Java pipeline. :param variables: Variables passed to the job. - :type variables: dict :param jar: Name of the jar for the pipeline - :type job_class: str :param job_class: Name of the java class for the pipeline. - :type job_class: str """ if "labels" in variables: variables["labels"] = json.dumps(variables["labels"], separators=(",", ":")) diff --git a/airflow/providers/apache/beam/operators/beam.py b/airflow/providers/apache/beam/operators/beam.py index 8d83b5b890533..5877dab7b2651 100644 --- a/airflow/providers/apache/beam/operators/beam.py +++ b/airflow/providers/apache/beam/operators/beam.py @@ -117,17 +117,13 @@ class BeamRunPythonPipelineOperator(BaseOperator, BeamDataflowMixin): :param py_file: Reference to the python Apache Beam pipeline file.py, e.g., /some/local/file/path/to/your/python/pipeline/file. (templated) - :type py_file: str :param runner: Runner on which pipeline will be run. By default "DirectRunner" is being used. Other possible options: DataflowRunner, SparkRunner, FlinkRunner. See: :class:`~providers.apache.beam.hooks.beam.BeamRunnerType` See: https://beam.apache.org/documentation/runners/capability-matrix/ - :type runner: str :param py_options: Additional python options, e.g., ["-m", "-v"]. - :type py_options: list[str] :param default_pipeline_options: Map of default pipeline options. - :type default_pipeline_options: dict :param pipeline_options: Map of pipeline options.The key must be a dictionary. The value can contain different types: @@ -140,33 +136,27 @@ class BeamRunPythonPipelineOperator(BaseOperator, BeamDataflowMixin): * Other value types will be replaced with the Python textual representation. When defining labels (``labels`` option), you can also provide a dictionary. - :type pipeline_options: dict :param py_interpreter: Python version of the beam pipeline. If None, this defaults to the python3. To track python versions supported by beam and related issues check: https://issues.apache.org/jira/browse/BEAM-1251 - :type py_interpreter: str :param py_requirements: Additional python package(s) to install. If a value is passed to this parameter, a new virtual environment has been created with additional packages installed. You could also install the apache_beam package if it is not installed on your system or you want to use a different version. - :type py_requirements: List[str] :param py_system_site_packages: Whether to include system_site_packages in your virtualenv. See virtualenv documentation for more information. This option is only relevant if the ``py_requirements`` parameter is not None. :param gcp_conn_id: Optional. The connection ID to use connecting to Google Cloud Storage if python file is on GCS. - :type gcp_conn_id: str :param delegate_to: Optional. The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param dataflow_config: Dataflow configuration, used when runner type is set to DataflowRunner - :type dataflow_config: Union[dict, providers.google.cloud.operators.dataflow.DataflowConfiguration] """ template_fields: Sequence[str] = ( @@ -317,16 +307,12 @@ class BeamRunJavaPipelineOperator(BaseOperator, BeamDataflowMixin): Use ``pipeline_options`` to pass on pipeline_options to your job. :param jar: The reference to a self executing Apache Beam jar (templated). - :type jar: str :param runner: Runner on which pipeline will be run. By default "DirectRunner" is being used. See: https://beam.apache.org/documentation/runners/capability-matrix/ - :type runner: str :param job_class: The name of the Apache Beam pipeline class to be executed, it is often not the main class configured in the pipeline jar file. - :type job_class: str :param default_pipeline_options: Map of default job pipeline_options. - :type default_pipeline_options: dict :param pipeline_options: Map of job specific pipeline_options.The key must be a dictionary. The value can contain different types: @@ -339,15 +325,11 @@ class BeamRunJavaPipelineOperator(BaseOperator, BeamDataflowMixin): * Other value types will be replaced with the Python textual representation. When defining labels (``labels`` option), you can also provide a dictionary. - :type pipeline_options: dict :param gcp_conn_id: The connection ID to use connecting to Google Cloud Storage if jar is on GCS - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param dataflow_config: Dataflow configuration, used when runner type is set to DataflowRunner - :type dataflow_config: Union[dict, providers.google.cloud.operators.dataflow.DataflowConfiguration] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/apache/cassandra/hooks/cassandra.py b/airflow/providers/apache/cassandra/hooks/cassandra.py index 461c857db40be..3d250741d2fc8 100644 --- a/airflow/providers/apache/cassandra/hooks/cassandra.py +++ b/airflow/providers/apache/cassandra/hooks/cassandra.py @@ -146,9 +146,7 @@ def get_lb_policy(policy_name: str, policy_args: Dict[str, Any]) -> Policy: Creates load balancing policy. :param policy_name: Name of the policy to use. - :type policy_name: str :param policy_args: Parameters for the policy. - :type policy_args: Dict """ if policy_name == 'DCAwareRoundRobinPolicy': local_dc = policy_args.get('local_dc', '') @@ -184,7 +182,6 @@ def table_exists(self, table: str) -> bool: :param table: Target Cassandra table. Use dot notation to target a specific keyspace. - :type table: str """ keyspace = self.keyspace if '.' in table: @@ -198,9 +195,7 @@ def record_exists(self, table: str, keys: Dict[str, str]) -> bool: :param table: Target Cassandra table. Use dot notation to target a specific keyspace. - :type table: str :param keys: The keys and their values to check the existence. - :type keys: dict """ keyspace = self.keyspace if '.' in table: diff --git a/airflow/providers/apache/cassandra/sensors/record.py b/airflow/providers/apache/cassandra/sensors/record.py index 08855eb73383d..f0a407297adfd 100644 --- a/airflow/providers/apache/cassandra/sensors/record.py +++ b/airflow/providers/apache/cassandra/sensors/record.py @@ -48,12 +48,9 @@ class CassandraRecordSensor(BaseSensorOperator): :param table: Target Cassandra table. Use dot notation to target a specific keyspace. - :type table: str :param keys: The keys and their values to be monitored - :type keys: dict :param cassandra_conn_id: The connection ID to use when connecting to Cassandra cluster - :type cassandra_conn_id: str """ template_fields: Sequence[str] = ('table', 'keys') diff --git a/airflow/providers/apache/cassandra/sensors/table.py b/airflow/providers/apache/cassandra/sensors/table.py index 638d0521c95bb..2f5e6681cb4b9 100644 --- a/airflow/providers/apache/cassandra/sensors/table.py +++ b/airflow/providers/apache/cassandra/sensors/table.py @@ -48,10 +48,8 @@ class CassandraTableSensor(BaseSensorOperator): :param table: Target Cassandra table. Use dot notation to target a specific keyspace. - :type table: str :param cassandra_conn_id: The connection ID to use when connecting to Cassandra cluster - :type cassandra_conn_id: str """ template_fields: Sequence[str] = ('table',) diff --git a/airflow/providers/apache/drill/operators/drill.py b/airflow/providers/apache/drill/operators/drill.py index 9c3981d53009e..791ed546c34fe 100644 --- a/airflow/providers/apache/drill/operators/drill.py +++ b/airflow/providers/apache/drill/operators/drill.py @@ -34,15 +34,12 @@ class DrillOperator(BaseOperator): For more information on how to use this operator, take a look at the guide: :ref:`howto/operator:DrillOperator` - :param sql: the SQL code to be executed. (templated) - :type sql: Can receive a str representing a sql statement, + :param sql: the SQL code to be executed as a single string, or a list of str (sql statements), or a reference to a template file. Template references are recognized by str ending in '.sql' :param drill_conn_id: id of the connection config for the target Drill environment - :type drill_conn_id: str :param parameters: (optional) the parameters to render the SQL query with. - :type parameters: dict or iterable """ template_fields: Sequence[str] = ('sql',) diff --git a/airflow/providers/apache/druid/hooks/druid.py b/airflow/providers/apache/druid/hooks/druid.py index 1016de6f515f0..671c914be604f 100644 --- a/airflow/providers/apache/druid/hooks/druid.py +++ b/airflow/providers/apache/druid/hooks/druid.py @@ -36,13 +36,10 @@ class DruidHook(BaseHook): :param druid_ingest_conn_id: The connection id to the Druid overlord machine which accepts index jobs - :type druid_ingest_conn_id: str :param timeout: The interval between polling the Druid job for the status of the ingestion job. Must be greater than or equal to 1 - :type timeout: int :param max_ingestion_time: The maximum ingestion time before assuming the job failed - :type max_ingestion_time: int """ def __init__( diff --git a/airflow/providers/apache/druid/operators/druid.py b/airflow/providers/apache/druid/operators/druid.py index 333c239770e04..9fd8b3595af5c 100644 --- a/airflow/providers/apache/druid/operators/druid.py +++ b/airflow/providers/apache/druid/operators/druid.py @@ -30,15 +30,11 @@ class DruidOperator(BaseOperator): Allows to submit a task directly to druid :param json_index_file: The filepath to the druid index specification - :type json_index_file: str :param druid_ingest_conn_id: The connection id of the Druid overlord which accepts index jobs - :type druid_ingest_conn_id: str :param timeout: The interval (in seconds) between polling the Druid job for the status of the ingestion job. Must be greater than or equal to 1 - :type timeout: int :param max_ingestion_time: The maximum ingestion time before assuming the job failed - :type max_ingestion_time: int """ template_fields: Sequence[str] = ('json_index_file',) diff --git a/airflow/providers/apache/druid/transfers/hive_to_druid.py b/airflow/providers/apache/druid/transfers/hive_to_druid.py index e728b4a30681b..03bb829e890fa 100644 --- a/airflow/providers/apache/druid/transfers/hive_to_druid.py +++ b/airflow/providers/apache/druid/transfers/hive_to_druid.py @@ -38,45 +38,30 @@ class HiveToDruidOperator(BaseOperator): be used for smallish amount of data.[/del] :param sql: SQL query to execute against the Druid database. (templated) - :type sql: str :param druid_datasource: the datasource you want to ingest into in druid - :type druid_datasource: str :param ts_dim: the timestamp dimension - :type ts_dim: str :param metric_spec: the metrics you want to define for your data - :type metric_spec: list :param hive_cli_conn_id: the hive connection id - :type hive_cli_conn_id: str :param druid_ingest_conn_id: the druid ingest connection id - :type druid_ingest_conn_id: str :param metastore_conn_id: the metastore connection id - :type metastore_conn_id: str :param hadoop_dependency_coordinates: list of coordinates to squeeze int the ingest json - :type hadoop_dependency_coordinates: list[str] :param intervals: list of time intervals that defines segments, this is passed as is to the json object. (templated) - :type intervals: list :param num_shards: Directly specify the number of shards to create. - :type num_shards: float :param target_partition_size: Target number of rows to include in a partition, - :type target_partition_size: int :param query_granularity: The minimum granularity to be able to query results at and the granularity of the data inside the segment. E.g. a value of "minute" will mean that data is aggregated at minutely granularity. That is, if there are collisions in the tuple (minute(timestamp), dimensions), then it will aggregate values together using the aggregators instead of storing individual rows. A granularity of 'NONE' means millisecond granularity. - :type query_granularity: str :param segment_granularity: The granularity to create time chunks at. Multiple segments can be created per time chunk. For example, with 'DAY' segmentGranularity, the events of the same day fall into the same time chunk which can be optionally further partitioned into multiple segments based on other configurations and input size. - :type segment_granularity: str :param hive_tblproperties: additional properties for tblproperties in hive for the staging table - :type hive_tblproperties: dict :param job_properties: additional properties for job - :type job_properties: dict """ template_fields: Sequence[str] = ('sql', 'intervals') @@ -171,9 +156,7 @@ def construct_ingest_query(self, static_path: str, columns: List[str]) -> Dict[s Builds an ingest query for an HDFS TSV load. :param static_path: The path on hdfs where the data is - :type static_path: str :param columns: List of all the columns that are available - :type columns: list """ # backward compatibility for num_shards, # but target_partition_size is the default setting diff --git a/airflow/providers/apache/hdfs/hooks/hdfs.py b/airflow/providers/apache/hdfs/hooks/hdfs.py index 034490d3db000..6d1ce3d010125 100644 --- a/airflow/providers/apache/hdfs/hooks/hdfs.py +++ b/airflow/providers/apache/hdfs/hooks/hdfs.py @@ -39,11 +39,8 @@ class HDFSHook(BaseHook): Interact with HDFS. This class is a wrapper around the snakebite library. :param hdfs_conn_id: Connection id to fetch connection info - :type hdfs_conn_id: str :param proxy_user: effective user for HDFS operations - :type proxy_user: str :param autoconfig: use snakebite's automatically configured client - :type autoconfig: bool """ conn_name_attr = 'hdfs_conn_id' diff --git a/airflow/providers/apache/hdfs/hooks/webhdfs.py b/airflow/providers/apache/hdfs/hooks/webhdfs.py index a2b28ba59d4a4..a32206ba9bef8 100644 --- a/airflow/providers/apache/hdfs/hooks/webhdfs.py +++ b/airflow/providers/apache/hdfs/hooks/webhdfs.py @@ -47,9 +47,7 @@ class WebHDFSHook(BaseHook): Interact with HDFS. This class is a wrapper around the hdfscli library. :param webhdfs_conn_id: The connection id for the webhdfs client to connect to. - :type webhdfs_conn_id: str :param proxy_user: The user used to authenticate. - :type proxy_user: str """ def __init__(self, webhdfs_conn_id: str = 'webhdfs_default', proxy_user: Optional[str] = None): @@ -112,7 +110,6 @@ def check_for_path(self, hdfs_path: str) -> bool: Check for the existence of a path in HDFS by querying FileStatus. :param hdfs_path: The path to check. - :type hdfs_path: str :return: True if the path exists and False if not. :rtype: bool """ @@ -131,15 +128,11 @@ def load_file( If it's a folder, all the files inside of it will be uploaded. .. note:: This implies that folders empty of files will not be created remotely. - :type source: str :param destination: PTarget HDFS path. If it already exists and is a directory, files will be uploaded inside. - :type destination: str :param overwrite: Overwrite any existing file or directory. - :type overwrite: bool :param parallelism: Number of threads to use for parallelization. A value of `0` (or negative) uses as many threads as there are files. - :type parallelism: int :param kwargs: Keyword arguments forwarded to :meth:`hdfs.client.Client.upload`. """ conn = self.get_conn() diff --git a/airflow/providers/apache/hdfs/sensors/hdfs.py b/airflow/providers/apache/hdfs/sensors/hdfs.py index a130bb14a7d26..a445bb688e722 100644 --- a/airflow/providers/apache/hdfs/sensors/hdfs.py +++ b/airflow/providers/apache/hdfs/sensors/hdfs.py @@ -35,15 +35,10 @@ class HdfsSensor(BaseSensorOperator): Waits for a file or folder to land in HDFS :param filepath: The route to a stored file. - :type filepath: str :param hdfs_conn_id: The Airflow connection used for HDFS credentials. - :type hdfs_conn_id: str :param ignored_ext: This is the list of ignored extensions. - :type ignored_ext: Optional[List[str]] :param ignore_copying: Shall we ignore? - :type ignore_copying: Optional[bool] :param file_size: This is the size of the file. - :type file_size: Optional[int] .. seealso:: For more information on how to use this operator, take a look at the guide: @@ -98,11 +93,8 @@ def filter_for_ignored_ext( Will filter if instructed to do so the result to remove matching criteria :param result: list of dicts returned by Snakebite ls - :type result: list[dict] :param ignored_ext: list of ignored extensions - :type ignored_ext: list :param ignore_copying: shall we ignore ? - :type ignore_copying: bool :return: list of dicts which were not removed :rtype: list[dict] """ diff --git a/airflow/providers/apache/hive/hooks/hive.py b/airflow/providers/apache/hive/hooks/hive.py index ea61853681485..6f76e236dce97 100644 --- a/airflow/providers/apache/hive/hooks/hive.py +++ b/airflow/providers/apache/hive/hooks/hive.py @@ -70,15 +70,11 @@ class HiveCliHook(BaseHook): :param hive_cli_conn_id: Reference to the :ref:`Hive CLI connection id `. - :type hive_cli_conn_id: str :param mapred_queue: queue used by the Hadoop Scheduler (Capacity or Fair) - :type mapred_queue: str :param mapred_queue_priority: priority within the job queue. Possible settings include: VERY_HIGH, HIGH, NORMAL, LOW, VERY_LOW - :type mapred_queue_priority: str :param mapred_job_name: This name will appear in the jobtracker. This can make monitoring easier. - :type mapred_job_name: str """ conn_name_attr = 'hive_cli_conn_id' @@ -166,7 +162,6 @@ def _prepare_hiveconf(d: Dict[Any, Any]) -> List[Any]: from a dictionary of key value pairs. :param d: - :type d: dict >>> hh = HiveCliHook() >>> hive_conf = {"hive.exec.dynamic.partition": "true", @@ -192,16 +187,12 @@ def run_cli( in HiveConf. :param hql: an hql (hive query language) statement to run with hive cli - :type hql: str :param schema: Name of hive schema (database) to use - :type schema: str :param verbose: Provides additional logging. Defaults to True. - :type verbose: bool :param hive_conf: if specified these key value pairs will be passed to hive as ``-hiveconf "key"="value"``. Note that they will be passed after the ``hive_cli_params`` and thus will override whatever values are specified in the database. - :type hive_conf: dict >>> hh = HiveCliHook() >>> result = hh.run_cli("USE airflow;") @@ -323,19 +314,13 @@ def load_df( not be sanitized. :param df: DataFrame to load into a Hive table - :type df: pandas.DataFrame :param table: target Hive table, use dot notation to target a specific database - :type table: str :param field_dict: mapping from column name to hive data type. Note that it must be OrderedDict so as to keep columns' order. - :type field_dict: collections.OrderedDict :param delimiter: field delimiter in the file - :type delimiter: str :param encoding: str encoding to use when writing DataFrame to file - :type encoding: str :param pandas_kwargs: passed to DataFrame.to_csv - :type pandas_kwargs: dict :param kwargs: passed to self.load_file """ @@ -404,28 +389,19 @@ def load_file( final destination using a ``HiveOperator``. :param filepath: local filepath of the file to load - :type filepath: str :param table: target Hive table, use dot notation to target a specific database - :type table: str :param delimiter: field delimiter in the file - :type delimiter: str :param field_dict: A dictionary of the fields name in the file as keys and their Hive types as values. Note that it must be OrderedDict so as to keep columns' order. - :type field_dict: collections.OrderedDict :param create: whether to create the table if it doesn't exist - :type create: bool :param overwrite: whether to overwrite the data in table or partition - :type overwrite: bool :param partition: target partition as a dict of partition columns and values - :type partition: dict :param recreate: whether to drop and recreate the table at every execution - :type recreate: bool :param tblproperties: TBLPROPERTIES of the hive table being created - :type tblproperties: dict """ hql = '' if recreate: @@ -478,7 +454,6 @@ class HiveMetastoreHook(BaseHook): :param metastore_conn_id: reference to the :ref: `metastore thrift service connection id `. - :type metastore_conn_id: str """ # java short max val @@ -570,12 +545,9 @@ def check_for_partition(self, schema: str, table: str, partition: str) -> bool: Checks whether a partition exists :param schema: Name of hive schema (database) @table belongs to - :type schema: str :param table: Name of hive table @partition belongs to - :type table: str :param partition: Expression that matches the partitions to check for (eg `a = 'b' AND c = 'd'`) - :type partition: str :rtype: bool >>> hh = HiveMetastoreHook() @@ -593,11 +565,8 @@ def check_for_named_partition(self, schema: str, table: str, partition_name: str Checks whether a partition with a given name exists :param schema: Name of hive schema (database) @table belongs to - :type schema: str :param table: Name of hive table @partition belongs to - :type table: str :param partition_name: Name of the partitions to check for (eg `a=b/c=d`) - :type partition_name: str :rtype: bool >>> hh = HiveMetastoreHook() @@ -682,14 +651,11 @@ def _get_max_partition_from_part_specs( filter out partitions. :param part_specs: list of partition specs. - :type part_specs: list :param partition_key: partition key name. - :type partition_key: str :param filter_map: partition_key:partition_value map used for partition filtering, e.g. {'key1': 'value1', 'key2': 'value2'}. Only partitions matching all partition_key:partition_value pairs will be considered as candidates of max partition. - :type filter_map: map :return: Max partition or None if part_specs is empty. :rtype: basestring """ @@ -733,13 +699,9 @@ def max_partition( filter out partitions. :param schema: schema name. - :type schema: str :param table_name: table name. - :type table_name: str :param field: partition key to get max partition from. - :type field: str :param filter_map: partition_key:partition_value map used for partition filtering. - :type filter_map: map >>> hh = HiveMetastoreHook() >>> filter_map = {'ds': '2015-01-01'} @@ -789,14 +751,10 @@ def drop_partitions(self, table_name, part_vals, delete_data=False, db='default' Drop partitions from the given table matching the part_vals input :param table_name: table name. - :type table_name: str :param part_vals: list of partition specs. - :type part_vals: list :param delete_data: Setting to control if underlying data have to deleted in addition to dropping partitions. - :type delete_data: bool :param db: Name of hive schema (database) @table belongs to - :type db: str >>> hh = HiveMetastoreHook() >>> hh.drop_partitions(db='airflow', table_name='static_babynames', @@ -827,9 +785,7 @@ class HiveServer2Hook(DbApiHook): :param hiveserver2_conn_id: Reference to the :ref: `Hive Server2 thrift service connection id `. - :type hiveserver2_conn_id: str :param schema: Hive database name. - :type schema: Optional[str] """ conn_name_attr = 'hiveserver2_conn_id' @@ -943,13 +899,9 @@ def get_results( Get results of the provided hql in target schema. :param hql: hql to be executed. - :type hql: str or list :param schema: target schema, default to 'default'. - :type schema: str :param fetch_size: max size of result to fetch. - :type fetch_size: int :param hive_conf: hive_conf to execute alone with the hql. - :type hive_conf: dict :return: results of hql execution, dict with data (list of results) and header :rtype: dict """ @@ -973,21 +925,13 @@ def to_csv( Execute hql in target schema and write results to a csv file. :param hql: hql to be executed. - :type hql: str or list :param csv_filepath: filepath of csv to write results into. - :type csv_filepath: str :param schema: target schema, default to 'default'. - :type schema: str :param delimiter: delimiter of the csv file, default to ','. - :type delimiter: str :param lineterminator: lineterminator of the csv file. - :type lineterminator: str :param output_header: header of the csv file, default to True. - :type output_header: bool :param fetch_size: number of result rows to write into the csv file, default to 1000. - :type fetch_size: int :param hive_conf: hive_conf to execute alone with the hql. - :type hive_conf: dict """ results_iter = self._get_results(hql, schema, fetch_size=fetch_size, hive_conf=hive_conf) @@ -1023,11 +967,8 @@ def get_records( Get a set of records from a Hive query. :param hql: hql to be executed. - :type hql: str or list :param schema: target schema, default to 'default'. - :type schema: str :param hive_conf: hive_conf to execute alone with the hql. - :type hive_conf: dict :return: result of hive execution :rtype: list @@ -1049,13 +990,9 @@ def get_pandas_df( # type: ignore Get a pandas dataframe from a Hive query :param hql: hql to be executed. - :type hql: str or list :param schema: target schema, default to 'default'. - :type schema: str :param hive_conf: hive_conf to execute alone with the hql. - :type hive_conf: dict :param kwargs: (optional) passed into pandas.DataFrame constructor - :type kwargs: dict :return: result of hive execution :rtype: DataFrame diff --git a/airflow/providers/apache/hive/operators/hive.py b/airflow/providers/apache/hive/operators/hive.py index 4672091d9e724..fd6339cbd5338 100644 --- a/airflow/providers/apache/hive/operators/hive.py +++ b/airflow/providers/apache/hive/operators/hive.py @@ -36,33 +36,24 @@ class HiveOperator(BaseOperator): :param hql: the hql to be executed. Note that you may also use a relative path from the dag file of a (template) hive script. (templated) - :type hql: str :param hive_cli_conn_id: Reference to the :ref:`Hive CLI connection id `. (templated) - :type hive_cli_conn_id: str :param hiveconfs: if defined, these key value pairs will be passed to hive as ``-hiveconf "key"="value"`` - :type hiveconfs: dict :param hiveconf_jinja_translate: when True, hiveconf-type templating ${var} gets translated into jinja-type templating {{ var }} and ${hiveconf:var} gets translated into jinja-type templating {{ var }}. Note that you may want to use this along with the ``DAG(user_defined_macros=myargs)`` parameter. View the DAG object documentation for more details. - :type hiveconf_jinja_translate: bool :param script_begin_tag: If defined, the operator will get rid of the part of the script before the first occurrence of `script_begin_tag` - :type script_begin_tag: str :param run_as_owner: Run HQL code as a DAG's owner. - :type run_as_owner: bool :param mapred_queue: queue used by the Hadoop CapacityScheduler. (templated) - :type mapred_queue: str :param mapred_queue_priority: priority within CapacityScheduler queue. Possible settings include: VERY_HIGH, HIGH, NORMAL, LOW, VERY_LOW - :type mapred_queue_priority: str :param mapred_job_name: This name will appear in the jobtracker. This can make monitoring easier. - :type mapred_job_name: str """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/apache/hive/operators/hive_stats.py b/airflow/providers/apache/hive/operators/hive_stats.py index 601d46b887073..56c0f304af050 100644 --- a/airflow/providers/apache/hive/operators/hive_stats.py +++ b/airflow/providers/apache/hive/operators/hive_stats.py @@ -45,23 +45,17 @@ class HiveStatsCollectionOperator(BaseOperator): :param metastore_conn_id: Reference to the :ref:`Hive Metastore connection id `. - :type metastore_conn_id: str :param table: the source table, in the format ``database.table_name``. (templated) - :type table: str :param partition: the source partition. (templated) - :type partition: dict of {col:value} :param extra_exprs: dict of expression to run against the table where keys are metric names and values are Presto compatible expressions - :type extra_exprs: dict :param excluded_columns: list of columns to exclude, consider excluding blobs, large json columns, ... - :type excluded_columns: list :param assignment_func: a function that receives a column name and a type, and returns a dict of metric names and an Presto expressions. If None is returned, the global defaults are applied. If an empty dictionary is returned, no stats are computed for that column. - :type assignment_func: function """ template_fields: Sequence[str] = ('table', 'partition', 'ds', 'dttm') diff --git a/airflow/providers/apache/hive/sensors/hive_partition.py b/airflow/providers/apache/hive/sensors/hive_partition.py index 4113f54e3abb6..f03dcb18f52e1 100644 --- a/airflow/providers/apache/hive/sensors/hive_partition.py +++ b/airflow/providers/apache/hive/sensors/hive_partition.py @@ -34,15 +34,12 @@ class HivePartitionSensor(BaseSensorOperator): :param table: The name of the table to wait for, supports the dot notation (my_database.my_table) - :type table: str :param partition: The partition clause to wait for. This is passed as is to the metastore Thrift client ``get_partitions_by_filter`` method, and apparently supports SQL like notation as in ``ds='2015-01-01' AND type='value'`` and comparison operators as in ``"ds>=2015-01-01"`` - :type partition: str :param metastore_conn_id: reference to the :ref: `metastore thrift service connection id ` - :type metastore_conn_id: str """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/apache/hive/sensors/metastore_partition.py b/airflow/providers/apache/hive/sensors/metastore_partition.py index 779db4bcc95b3..ea6c1525a1d57 100644 --- a/airflow/providers/apache/hive/sensors/metastore_partition.py +++ b/airflow/providers/apache/hive/sensors/metastore_partition.py @@ -32,16 +32,12 @@ class MetastorePartitionSensor(SqlSensor): way that wouldn't leverage the indexes. :param schema: the schema - :type schema: str :param table: the table - :type table: str :param partition_name: the partition name, as defined in the PARTITIONS table of the Metastore. Order of the fields does matter. Examples: ``ds=2016-01-01`` or ``ds=2016-01-01/sub=foo`` for a sub partitioned table - :type partition_name: str :param mysql_conn_id: a reference to the MySQL conn_id for the metastore - :type mysql_conn_id: str """ template_fields: Sequence[str] = ('partition_name', 'table', 'schema') diff --git a/airflow/providers/apache/hive/sensors/named_hive_partition.py b/airflow/providers/apache/hive/sensors/named_hive_partition.py index 55755db8dac7c..9535bcdab0219 100644 --- a/airflow/providers/apache/hive/sensors/named_hive_partition.py +++ b/airflow/providers/apache/hive/sensors/named_hive_partition.py @@ -34,10 +34,8 @@ class NamedHivePartitionSensor(BaseSensorOperator): Thrift client ``get_partitions_by_name`` method. Note that you cannot use logical or comparison operators as in HivePartitionSensor. - :type partition_names: list[str] :param metastore_conn_id: Reference to the :ref:`metastore thrift service connection id `. - :type metastore_conn_id: str """ template_fields: Sequence[str] = ('partition_names',) diff --git a/airflow/providers/apache/hive/transfers/hive_to_mysql.py b/airflow/providers/apache/hive/transfers/hive_to_mysql.py index 1789ec5171635..954c541fbbc4e 100644 --- a/airflow/providers/apache/hive/transfers/hive_to_mysql.py +++ b/airflow/providers/apache/hive/transfers/hive_to_mysql.py @@ -36,31 +36,23 @@ class HiveToMySqlOperator(BaseOperator): be used for smallish amount of data. :param sql: SQL query to execute against Hive server. (templated) - :type sql: str :param mysql_table: target MySQL table, use dot notation to target a specific database. (templated) - :type mysql_table: str :param mysql_conn_id: source mysql connection - :type mysql_conn_id: str :param metastore_conn_id: Reference to the :ref:`metastore thrift service connection id `. - :type metastore_conn_id: str :param mysql_preoperator: sql statement to run against mysql prior to import, typically use to truncate of delete in place of the data coming in, allowing the task to be idempotent (running the task twice won't double load data). (templated) - :type mysql_preoperator: str :param mysql_postoperator: sql statement to run against mysql after the import, typically used to move data from staging to production and issue cleanup commands. (templated) - :type mysql_postoperator: str :param bulk_load: flag to use bulk_load option. This loads mysql directly from a tab-delimited text file using the LOAD DATA LOCAL INFILE command. This option requires an extra connection parameter for the destination MySQL connection: {'local_infile': true}. - :type bulk_load: bool :param hive_conf: - :type hive_conf: dict """ template_fields: Sequence[str] = ('sql', 'mysql_table', 'mysql_preoperator', 'mysql_postoperator') diff --git a/airflow/providers/apache/hive/transfers/hive_to_samba.py b/airflow/providers/apache/hive/transfers/hive_to_samba.py index 41bed4f759a94..03337dc4d61e3 100644 --- a/airflow/providers/apache/hive/transfers/hive_to_samba.py +++ b/airflow/providers/apache/hive/transfers/hive_to_samba.py @@ -36,14 +36,10 @@ class HiveToSambaOperator(BaseOperator): results of the query as a csv to a Samba location. :param hql: the hql to be exported. (templated) - :type hql: str :param destination_filepath: the file path to where the file will be pushed onto samba - :type destination_filepath: str :param samba_conn_id: reference to the samba destination - :type samba_conn_id: str :param hiveserver2_conn_id: Reference to the :ref: `Hive Server2 thrift service connection id `. - :type hiveserver2_conn_id: str """ template_fields: Sequence[str] = ('hql', 'destination_filepath') diff --git a/airflow/providers/apache/hive/transfers/mssql_to_hive.py b/airflow/providers/apache/hive/transfers/mssql_to_hive.py index db0125c9bab50..e6691107266f9 100644 --- a/airflow/providers/apache/hive/transfers/mssql_to_hive.py +++ b/airflow/providers/apache/hive/transfers/mssql_to_hive.py @@ -50,26 +50,17 @@ class MsSqlToHiveOperator(BaseOperator): :param sql: SQL query to execute against the Microsoft SQL Server database. (templated) - :type sql: str :param hive_table: target Hive table, use dot notation to target a specific database. (templated) - :type hive_table: str :param create: whether to create the table if it doesn't exist - :type create: bool :param recreate: whether to drop and recreate the table at every execution - :type recreate: bool :param partition: target partition as a dict of partition columns and values. (templated) - :type partition: dict :param delimiter: field delimiter in the file - :type delimiter: str :param mssql_conn_id: source Microsoft SQL Server connection - :type mssql_conn_id: str :param hive_cli_conn_id: Reference to the :ref:`Hive CLI connection id `. - :type hive_cli_conn_id: str :param tblproperties: TBLPROPERTIES of the hive table being created - :type tblproperties: dict """ template_fields: Sequence[str] = ('sql', 'partition', 'hive_table') diff --git a/airflow/providers/apache/hive/transfers/mysql_to_hive.py b/airflow/providers/apache/hive/transfers/mysql_to_hive.py index b2fd51ec6222a..acdb9f1fa9ba2 100644 --- a/airflow/providers/apache/hive/transfers/mysql_to_hive.py +++ b/airflow/providers/apache/hive/transfers/mysql_to_hive.py @@ -48,36 +48,24 @@ class MySqlToHiveOperator(BaseOperator): final destination using a ``HiveOperator``. :param sql: SQL query to execute against the MySQL database. (templated) - :type sql: str :param hive_table: target Hive table, use dot notation to target a specific database. (templated) - :type hive_table: str :param create: whether to create the table if it doesn't exist - :type create: bool :param recreate: whether to drop and recreate the table at every execution - :type recreate: bool :param partition: target partition as a dict of partition columns and values. (templated) - :type partition: dict :param delimiter: field delimiter in the file - :type delimiter: str :param quoting: controls when quotes should be generated by csv writer, It can take on any of the csv.QUOTE_* constants. - :type quoting: str :param quotechar: one-character string used to quote fields containing special characters. - :type quotechar: str :param escapechar: one-character string used by csv writer to escape the delimiter or quotechar. - :type escapechar: str :param mysql_conn_id: source mysql connection - :type mysql_conn_id: str :param hive_cli_conn_id: Reference to the :ref:`Hive CLI connection id `. - :type hive_cli_conn_id: str :param tblproperties: TBLPROPERTIES of the hive table being created - :type tblproperties: dict """ template_fields: Sequence[str] = ('sql', 'partition', 'hive_table') diff --git a/airflow/providers/apache/hive/transfers/s3_to_hive.py b/airflow/providers/apache/hive/transfers/s3_to_hive.py index fe281847bf4fc..cc189303e0584 100644 --- a/airflow/providers/apache/hive/transfers/s3_to_hive.py +++ b/airflow/providers/apache/hive/transfers/s3_to_hive.py @@ -51,34 +51,23 @@ class S3ToHiveOperator(BaseOperator): final destination using a ``HiveOperator``. :param s3_key: The key to be retrieved from S3. (templated) - :type s3_key: str :param field_dict: A dictionary of the fields name in the file as keys and their Hive types as values - :type field_dict: dict :param hive_table: target Hive table, use dot notation to target a specific database. (templated) - :type hive_table: str :param delimiter: field delimiter in the file - :type delimiter: str :param create: whether to create the table if it doesn't exist - :type create: bool :param recreate: whether to drop and recreate the table at every execution - :type recreate: bool :param partition: target partition as a dict of partition columns and values. (templated) - :type partition: dict :param headers: whether the file contains column names on the first line - :type headers: bool :param check_headers: whether the column names on the first line should be checked against the keys of field_dict - :type check_headers: bool :param wildcard_match: whether the s3_key should be interpreted as a Unix wildcard pattern - :type wildcard_match: bool :param aws_conn_id: source s3 connection - :type aws_conn_id: str :param verify: Whether or not to verify SSL certificates for S3 connection. By default SSL certificates are verified. You can provide the following values: @@ -89,17 +78,12 @@ class S3ToHiveOperator(BaseOperator): - ``path/to/cert/bundle.pem``: A filename of the CA cert bundle to uses. You can specify this argument if you want to use a different CA cert bundle than the one used by botocore. - :type verify: bool or str :param hive_cli_conn_id: Reference to the :ref:`Hive CLI connection id `. - :type hive_cli_conn_id: str :param input_compressed: Boolean to determine if file decompression is required to process headers - :type input_compressed: bool :param tblproperties: TBLPROPERTIES of the hive table being created - :type tblproperties: dict :param select_expression: S3 Select expression - :type select_expression: str """ template_fields: Sequence[str] = ('s3_key', 'partition', 'hive_table') diff --git a/airflow/providers/apache/hive/transfers/vertica_to_hive.py b/airflow/providers/apache/hive/transfers/vertica_to_hive.py index 7f26368eafc03..d381aa610ecae 100644 --- a/airflow/providers/apache/hive/transfers/vertica_to_hive.py +++ b/airflow/providers/apache/hive/transfers/vertica_to_hive.py @@ -48,24 +48,16 @@ class VerticaToHiveOperator(BaseOperator): final destination using a ``HiveOperator``. :param sql: SQL query to execute against the Vertica database. (templated) - :type sql: str :param hive_table: target Hive table, use dot notation to target a specific database. (templated) - :type hive_table: str :param create: whether to create the table if it doesn't exist - :type create: bool :param recreate: whether to drop and recreate the table at every execution - :type recreate: bool :param partition: target partition as a dict of partition columns and values. (templated) - :type partition: dict :param delimiter: field delimiter in the file - :type delimiter: str :param vertica_conn_id: source Vertica connection - :type vertica_conn_id: str :param hive_cli_conn_id: Reference to the :ref:`Hive CLI connection id `. - :type hive_cli_conn_id: str """ template_fields: Sequence[str] = ('sql', 'partition', 'hive_table') diff --git a/airflow/providers/apache/kylin/hooks/kylin.py b/airflow/providers/apache/kylin/hooks/kylin.py index 7ee87b59dc42c..032b15c7e5bbf 100644 --- a/airflow/providers/apache/kylin/hooks/kylin.py +++ b/airflow/providers/apache/kylin/hooks/kylin.py @@ -29,11 +29,8 @@ class KylinHook(BaseHook): Interact with Kylin to run CubeSource commands and get job status. :param kylin_conn_id: The connection id as configured in Airflow administration. - :type kylin_conn_id: str :param project: project name - :type project: Optional[str] :param dsn: dsn - :type dsn: Optional[str] """ def __init__( diff --git a/airflow/providers/apache/kylin/operators/kylin_cube.py b/airflow/providers/apache/kylin/operators/kylin_cube.py index 993c8667e0bff..5fe91ee831934 100644 --- a/airflow/providers/apache/kylin/operators/kylin_cube.py +++ b/airflow/providers/apache/kylin/operators/kylin_cube.py @@ -39,14 +39,10 @@ class KylinCubeOperator(BaseOperator): `Apache Kylin `_ :param kylin_conn_id: The connection id as configured in Airflow administration. - :type kylin_conn_id: str :param project: kylin project name, this param will overwrite the project in kylin_conn_id: - :type project: str :param cube: kylin cube name - :type cube: str :param dsn: (dsn , dsn url of kylin connection ,which will overwrite kylin_conn_id. for example: kylin://ADMIN:KYLIN@sandbox/learn_kylin?timeout=60&is_debug=1) - :type dsn: str :param command: (kylin command include 'build', 'merge', 'refresh', 'delete', 'build_streaming', 'merge_streaming', 'refresh_streaming', 'disable', 'enable', 'purge', 'clone', 'drop'. @@ -64,28 +60,18 @@ class KylinCubeOperator(BaseOperator): purge - purge cube clone - clone cube,new cube name is {cube_name}_clone drop - drop cube) - :type command: str :param start_time: build segment start time - :type start_time: Optional[str] :param end_time: build segment end time - :type end_time: Optional[str] :param offset_start: streaming build segment start time - :type offset_start: Optional[str] :param offset_end: streaming build segment end time - :type offset_end: Optional[str] :param segment_name: segment name - :type segment_name: str :param is_track_job: (whether to track job status. if value is True,will track job until job status is in("FINISHED", "ERROR", "DISCARDED", "KILLED", "SUICIDAL", "STOPPED") or timeout) - :type is_track_job: bool :param interval: track job status,default value is 60s - :type interval: int :param timeout: timeout value,default value is 1 day,60 * 60 * 24 s - :type timeout: int :param eager_error_status: (jobs error status,if job status in this list ,this task will be error. default value is tuple(["ERROR", "DISCARDED", "KILLED", "SUICIDAL", "STOPPED"])) - :type eager_error_status: tuple """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/apache/livy/hooks/livy.py b/airflow/providers/apache/livy/hooks/livy.py index e05ed1679245d..865218c51e7b8 100644 --- a/airflow/providers/apache/livy/hooks/livy.py +++ b/airflow/providers/apache/livy/hooks/livy.py @@ -48,11 +48,8 @@ class LivyHook(HttpHook, LoggingMixin): Hook for Apache Livy through the REST API. :param livy_conn_id: reference to a pre-defined Livy Connection. - :type livy_conn_id: str :param extra_options: A dictionary of options passed to Livy. - :type extra_options: Dict[str, Any] :param extra_headers: A dictionary of headers passed to the HTTP request to livy. - :type extra_headers: Dict[str, Any] .. seealso:: For more details refer to the Apache Livy API reference: @@ -88,7 +85,6 @@ def get_conn(self, headers: Optional[Dict[str, Any]] = None) -> Any: Returns http session for use with requests :param headers: additional headers to be passed through as a dictionary - :type headers: dict :return: requests session :rtype: requests.Session """ @@ -108,13 +104,9 @@ def run_method( Wrapper for HttpHook, allows to change method on the same HttpHook :param method: http method - :type method: str :param endpoint: endpoint - :type endpoint: str :param data: request payload - :type data: dict :param headers: headers - :type headers: dict :return: http response :rtype: requests.Response """ @@ -170,7 +162,6 @@ def get_batch(self, session_id: Union[int, str]) -> Any: Fetch info about the specified batch :param session_id: identifier of the batch sessions - :type session_id: int :return: response body :rtype: dict """ @@ -194,7 +185,6 @@ def get_batch_state(self, session_id: Union[int, str]) -> BatchState: Fetch the state of the specified batch :param session_id: identifier of the batch sessions - :type session_id: Union[int, str] :return: batch state :rtype: BatchState """ @@ -221,7 +211,6 @@ def delete_batch(self, session_id: Union[int, str]) -> Any: Delete the specified batch :param session_id: identifier of the batch sessions - :type session_id: int :return: response body :rtype: dict """ @@ -244,11 +233,8 @@ def get_batch_logs(self, session_id: Union[int, str], log_start_position, log_ba """ Gets the session logs for a specified batch. :param session_id: identifier of the batch sessions - :type session_id: int :param log_start_position: Position from where to pull the logs - :type log_start_position: int :param log_batch_size: Number of lines to pull in one batch - :type log_batch_size: int :return: response body :rtype: dict @@ -271,7 +257,6 @@ def dump_batch_logs(self, session_id: Union[int, str]) -> Any: Dumps the session logs for a specified batch :param session_id: identifier of the batch sessions - :type session_id: int :return: response body :rtype: dict """ @@ -295,7 +280,6 @@ def _validate_session_id(session_id: Union[int, str]) -> None: Validate session id is a int :param session_id: session id - :type session_id: Union[int, str] """ try: int(session_id) @@ -308,7 +292,6 @@ def _parse_post_response(response: Dict[Any, Any]) -> Any: Parse batch response for batch id :param response: response body - :type response: dict :return: session id :rtype: int """ @@ -320,7 +303,6 @@ def _parse_request_response(response: Dict[Any, Any], parameter) -> Any: Parse batch response for batch id :param response: response body - :type response: dict :return: value of parameter :rtype: Union[int, list] """ @@ -350,37 +332,21 @@ def build_post_batch_body( For more information about the format refer to .. seealso:: https://livy.apache.org/docs/latest/rest-api.html :param file: Path of the file containing the application to execute (required). - :type file: str :param proxy_user: User to impersonate when running the job. - :type proxy_user: str :param class_name: Application Java/Spark main class string. - :type class_name: str :param args: Command line arguments for the application s. - :type args: Sequence[Union[str, int, float]] :param jars: jars to be used in this sessions. - :type jars: Sequence[str] :param py_files: Python files to be used in this session. - :type py_files: Sequence[str] :param files: files to be used in this session. - :type files: Sequence[str] :param driver_memory: Amount of memory to use for the driver process string. - :type driver_memory: str :param driver_cores: Number of cores to use for the driver process int. - :type driver_cores: Union[str, int] :param executor_memory: Amount of memory to use per executor process string. - :type executor_memory: str :param executor_cores: Number of cores to use for each executor int. - :type executor_cores: Union[int, str] :param num_executors: Number of executors to launch for this session int. - :type num_executors: Union[str, int] :param archives: Archives to be used in this session. - :type archives: Sequence[str] :param queue: The name of the YARN queue to which submitted string. - :type queue: str :param name: The name of this session string. - :type name: str :param conf: Spark configuration properties. - :type conf: dict :return: request body :rtype: dict """ @@ -425,7 +391,6 @@ def _validate_size_format(size: str) -> bool: Validate size format. :param size: size value - :type size: str :return: true if valid format :rtype: bool """ @@ -439,7 +404,6 @@ def _validate_list_of_stringables(vals: Sequence[Union[str, int, float]]) -> boo Check the values in the provided list can be converted to strings. :param vals: list to validate - :type vals: Sequence[Union[str, int, float]] :return: true if valid :rtype: bool """ @@ -457,7 +421,6 @@ def _validate_extra_conf(conf: Dict[Any, Any]) -> bool: Check configuration values are either strings or ints. :param conf: configuration variable - :type conf: dict :return: true if valid :rtype: bool """ diff --git a/airflow/providers/apache/livy/operators/livy.py b/airflow/providers/apache/livy/operators/livy.py index 46c59cd51d0f5..3b0a2bb93277c 100644 --- a/airflow/providers/apache/livy/operators/livy.py +++ b/airflow/providers/apache/livy/operators/livy.py @@ -33,46 +33,26 @@ class LivyOperator(BaseOperator): application to the underlying cluster. :param file: path of the file containing the application to execute (required). - :type file: str :param class_name: name of the application Java/Spark main class. - :type class_name: str :param args: application command line arguments. - :type args: list :param jars: jars to be used in this sessions. - :type jars: list :param py_files: python files to be used in this session. - :type py_files: list :param files: files to be used in this session. - :type files: list :param driver_memory: amount of memory to use for the driver process. - :type driver_memory: str :param driver_cores: number of cores to use for the driver process. - :type driver_cores: str, int :param executor_memory: amount of memory to use per executor process. - :type executor_memory: str :param executor_cores: number of cores to use for each executor. - :type executor_cores: str, int :param num_executors: number of executors to launch for this session. - :type num_executors: str, int :param archives: archives to be used in this session. - :type archives: list :param queue: name of the YARN queue to which the application is submitted. - :type queue: str :param name: name of this session. - :type name: str :param conf: Spark configuration properties. - :type conf: dict :param proxy_user: user to impersonate when running the job. - :type proxy_user: str :param livy_conn_id: reference to a pre-defined Livy Connection. - :type livy_conn_id: str :param polling_interval: time in seconds between polling for job completion. Don't poll for values >=0 - :type polling_interval: int :param extra_options: A dictionary of options, where key is string and value depends on the option that's being modified. - :type extra_options: Dict[str, Any] :param extra_headers: A dictionary of headers passed to the HTTP request to livy. - :type extra_headers: Dict[str, Any] """ template_fields: Sequence[str] = ('spark_params',) @@ -160,7 +140,6 @@ def poll_for_termination(self, batch_id: Union[int, str]) -> None: Pool Livy for batch termination. :param batch_id: id of the batch session to monitor. - :type batch_id: int """ hook = self.get_hook() state = hook.get_batch_state(batch_id) diff --git a/airflow/providers/apache/livy/sensors/livy.py b/airflow/providers/apache/livy/sensors/livy.py index e6f53f2c5d798..4c3419f2af4b2 100644 --- a/airflow/providers/apache/livy/sensors/livy.py +++ b/airflow/providers/apache/livy/sensors/livy.py @@ -30,10 +30,7 @@ class LivySensor(BaseSensorOperator): Monitor a Livy sessions for termination. :param livy_conn_id: reference to a pre-defined Livy connection - :type livy_conn_id: str :param batch_id: identifier of the monitored batch - :type batch_id: Union[int, str] - :type extra_options: A dictionary of options, where key is string and value depends on the option that's being modified. """ diff --git a/airflow/providers/apache/pig/operators/pig.py b/airflow/providers/apache/pig/operators/pig.py index 834c8adb109d4..5a285530dc637 100644 --- a/airflow/providers/apache/pig/operators/pig.py +++ b/airflow/providers/apache/pig/operators/pig.py @@ -30,17 +30,13 @@ class PigOperator(BaseOperator): Executes pig script. :param pig: the pig latin script to be executed. (templated) - :type pig: str :param pig_cli_conn_id: reference to the Hive database - :type pig_cli_conn_id: str :param pigparams_jinja_translate: when True, pig params-type templating ${var} gets translated into jinja-type templating {{ var }}. Note that you may want to use this along with the ``DAG(user_defined_macros=myargs)`` parameter. View the DAG object documentation for more details. - :type pigparams_jinja_translate: bool :param pig_opts: pig options, such as: -x tez, -useHCatalog, ... - :type pig_opts: str """ template_fields: Sequence[str] = ('pig',) diff --git a/airflow/providers/apache/pinot/hooks/pinot.py b/airflow/providers/apache/pinot/hooks/pinot.py index f48fd1b68f248..55ddce0bcc0cc 100644 --- a/airflow/providers/apache/pinot/hooks/pinot.py +++ b/airflow/providers/apache/pinot/hooks/pinot.py @@ -45,13 +45,10 @@ class PinotAdminHook(BaseHook): following PR: https://github.com/apache/incubator-pinot/pull/4110 :param conn_id: The name of the connection to use. - :type conn_id: str :param cmd_path: The filepath to the pinot-admin.sh executable - :type cmd_path: str :param pinot_admin_system_exit: If true, the result is evaluated based on the status code. Otherwise, the result is evaluated as a failure if "Error" or "Exception" is in the output message. - :type pinot_admin_system_exit: bool """ def __init__( @@ -78,9 +75,7 @@ def add_schema(self, schema_file: str, with_exec: bool = True) -> Any: Add Pinot schema by run AddSchema command :param schema_file: Pinot schema file - :type schema_file: str :param with_exec: bool - :type with_exec: bool """ cmd = ["AddSchema"] cmd += ["-controllerHost", self.host] @@ -95,9 +90,7 @@ def add_table(self, file_path: str, with_exec: bool = True) -> Any: Add Pinot table with run AddTable command :param file_path: Pinot table configure file - :type file_path: str :param with_exec: bool - :type with_exec: bool """ cmd = ["AddTable"] cmd += ["-controllerHost", self.host] @@ -208,9 +201,7 @@ def run_cli(self, cmd: List[str], verbose: bool = True) -> str: Run command with pinot-admin.sh :param cmd: List of command going to be run by pinot-admin.sh script - :type cmd: list :param verbose: - :type verbose: bool """ command = [self.cmd_path] command.extend(cmd) @@ -292,9 +283,7 @@ def get_records(self, sql: str, parameters: Optional[Union[Dict[str, Any], Itera :param sql: the sql statement to be executed (str) or a list of sql statements to execute - :type sql: str :param parameters: The parameters to render the SQL query with. - :type parameters: dict or iterable """ with self.get_conn() as cur: cur.execute(sql) @@ -306,9 +295,7 @@ def get_first(self, sql: str, parameters: Optional[Union[Dict[str, Any], Iterabl :param sql: the sql statement to be executed (str) or a list of sql statements to execute - :type sql: str or list :param parameters: The parameters to render the SQL query with. - :type parameters: dict or iterable """ with self.get_conn() as cur: cur.execute(sql) diff --git a/airflow/providers/apache/spark/hooks/spark_jdbc.py b/airflow/providers/apache/spark/hooks/spark_jdbc.py index d5dfcdcddb8b8..df9d715be0dcc 100644 --- a/airflow/providers/apache/spark/hooks/spark_jdbc.py +++ b/airflow/providers/apache/spark/hooks/spark_jdbc.py @@ -29,80 +29,53 @@ class SparkJDBCHook(SparkSubmitHook): transfers to/from JDBC-based databases with Apache Spark. :param spark_app_name: Name of the job (default airflow-spark-jdbc) - :type spark_app_name: str :param spark_conn_id: The :ref:`spark connection id ` as configured in Airflow administration - :type spark_conn_id: str :param spark_conf: Any additional Spark configuration properties - :type spark_conf: dict :param spark_py_files: Additional python files used (.zip, .egg, or .py) - :type spark_py_files: str :param spark_files: Additional files to upload to the container running the job - :type spark_files: str :param spark_jars: Additional jars to upload and add to the driver and executor classpath - :type spark_jars: str :param num_executors: number of executor to run. This should be set so as to manage the number of connections made with the JDBC database - :type num_executors: int :param executor_cores: Number of cores per executor - :type executor_cores: int :param executor_memory: Memory per executor (e.g. 1000M, 2G) - :type executor_memory: str :param driver_memory: Memory allocated to the driver (e.g. 1000M, 2G) - :type driver_memory: str :param verbose: Whether to pass the verbose flag to spark-submit for debugging - :type verbose: bool :param keytab: Full path to the file that contains the keytab - :type keytab: str :param principal: The name of the kerberos principal used for keytab - :type principal: str :param cmd_type: Which way the data should flow. 2 possible values: spark_to_jdbc: data written by spark from metastore to jdbc jdbc_to_spark: data written by spark from jdbc to metastore - :type cmd_type: str :param jdbc_table: The name of the JDBC table - :type jdbc_table: str :param jdbc_conn_id: Connection id used for connection to JDBC database - :type jdbc_conn_id: str :param jdbc_driver: Name of the JDBC driver to use for the JDBC connection. This driver (usually a jar) should be passed in the 'jars' parameter - :type jdbc_driver: str :param metastore_table: The name of the metastore table, - :type metastore_table: str :param jdbc_truncate: (spark_to_jdbc only) Whether or not Spark should truncate or drop and recreate the JDBC table. This only takes effect if 'save_mode' is set to Overwrite. Also, if the schema is different, Spark cannot truncate, and will drop and recreate - :type jdbc_truncate: bool :param save_mode: The Spark save-mode to use (e.g. overwrite, append, etc.) - :type save_mode: str :param save_format: (jdbc_to_spark-only) The Spark save-format to use (e.g. parquet) - :type save_format: str :param batch_size: (spark_to_jdbc only) The size of the batch to insert per round trip to the JDBC database. Defaults to 1000 - :type batch_size: int :param fetch_size: (jdbc_to_spark only) The size of the batch to fetch per round trip from the JDBC database. Default depends on the JDBC driver - :type fetch_size: int :param num_partitions: The maximum number of partitions that can be used by Spark simultaneously, both for spark_to_jdbc and jdbc_to_spark operations. This will also cap the number of JDBC connections that can be opened - :type num_partitions: int :param partition_column: (jdbc_to_spark-only) A numeric column to be used to partition the metastore table by. If specified, you must also specify: num_partitions, lower_bound, upper_bound - :type partition_column: str :param lower_bound: (jdbc_to_spark-only) Lower bound of the range of the numeric partition column to fetch. If specified, you must also specify: num_partitions, partition_column, upper_bound - :type lower_bound: int :param upper_bound: (jdbc_to_spark-only) Upper bound of the range of the numeric partition column to fetch. If specified, you must also specify: num_partitions, partition_column, lower_bound - :type upper_bound: int :param create_table_column_types: (spark_to_jdbc-only) The database column data types to use instead of the defaults, when creating the table. Data type information should be specified in diff --git a/airflow/providers/apache/spark/hooks/spark_sql.py b/airflow/providers/apache/spark/hooks/spark_sql.py index 837517da5e29c..2621bf9a4099b 100644 --- a/airflow/providers/apache/spark/hooks/spark_sql.py +++ b/airflow/providers/apache/spark/hooks/spark_sql.py @@ -32,33 +32,21 @@ class SparkSqlHook(BaseHook): "spark-sql" binary is in the PATH. :param sql: The SQL query to execute - :type sql: str :param conf: arbitrary Spark configuration property - :type conf: str (format: PROP=VALUE) :param conn_id: connection_id string - :type conn_id: str :param total_executor_cores: (Standalone & Mesos only) Total cores for all executors (Default: all the available cores on the worker) - :type total_executor_cores: int :param executor_cores: (Standalone & YARN only) Number of cores per executor (Default: 2) - :type executor_cores: int :param executor_memory: Memory per executor (e.g. 1000M, 2G) (Default: 1G) - :type executor_memory: str :param keytab: Full path to the file that contains the keytab - :type keytab: str :param master: spark://host:port, mesos://host:port, yarn, or local (Default: The ``host`` and ``port`` set in the Connection, or ``"yarn"``) - :type master: str :param name: Name of the job. - :type name: str :param num_executors: Number of executors to launch - :type num_executors: int :param verbose: Whether to pass the verbose flag to spark-sql - :type verbose: bool :param yarn_queue: The YARN queue to submit to (Default: The ``queue`` value set in the Connection, or ``"default"``) - :type yarn_queue: str """ conn_name_attr = 'conn_id' @@ -127,7 +115,6 @@ def _prepare_command(self, cmd: Union[str, List[str]]) -> List[str]: as default. :param cmd: command to append to the spark-sql command - :type cmd: str or list[str] :return: full command to be executed """ connection_cmd = ["spark-sql"] @@ -177,9 +164,7 @@ def run_query(self, cmd: str = "", **kwargs: Any) -> None: Remote Popen (actually execute the Spark-sql query) :param cmd: command to append to the spark-sql command - :type cmd: str or list[str] :param kwargs: extra arguments to Popen (see subprocess.Popen) - :type kwargs: dict """ spark_sql_cmd = self._prepare_command(cmd) diff --git a/airflow/providers/apache/spark/hooks/spark_submit.py b/airflow/providers/apache/spark/hooks/spark_submit.py index 677ea7be8cd68..95a65c853df00 100644 --- a/airflow/providers/apache/spark/hooks/spark_submit.py +++ b/airflow/providers/apache/spark/hooks/spark_submit.py @@ -41,67 +41,43 @@ class SparkSubmitHook(BaseHook, LoggingMixin): supplied. :param conf: Arbitrary Spark configuration properties - :type conf: dict :param spark_conn_id: The :ref:`spark connection id ` as configured in Airflow administration. When an invalid connection_id is supplied, it will default to yarn. - :type spark_conn_id: str :param files: Upload additional files to the executor running the job, separated by a comma. Files will be placed in the working directory of each executor. For example, serialized objects. - :type files: str :param py_files: Additional python files used by the job, can be .zip, .egg or .py. - :type py_files: str :param: archives: Archives that spark should unzip (and possibly tag with #ALIAS) into the application working directory. :param driver_class_path: Additional, driver-specific, classpath settings. - :type driver_class_path: str :param jars: Submit additional jars to upload and place them in executor classpath. - :type jars: str :param java_class: the main class of the Java application - :type java_class: str :param packages: Comma-separated list of maven coordinates of jars to include on the driver and executor classpaths - :type packages: str :param exclude_packages: Comma-separated list of maven coordinates of jars to exclude while resolving the dependencies provided in 'packages' - :type exclude_packages: str :param repositories: Comma-separated list of additional remote repositories to search for the maven coordinates given with 'packages' - :type repositories: str :param total_executor_cores: (Standalone & Mesos only) Total cores for all executors (Default: all the available cores on the worker) - :type total_executor_cores: int :param executor_cores: (Standalone, YARN and Kubernetes only) Number of cores per executor (Default: 2) - :type executor_cores: int :param executor_memory: Memory per executor (e.g. 1000M, 2G) (Default: 1G) - :type executor_memory: str :param driver_memory: Memory allocated to the driver (e.g. 1000M, 2G) (Default: 1G) - :type driver_memory: str :param keytab: Full path to the file that contains the keytab - :type keytab: str :param principal: The name of the kerberos principal used for keytab - :type principal: str :param proxy_user: User to impersonate when submitting the application - :type proxy_user: str :param name: Name of the job (default airflow-spark) - :type name: str :param num_executors: Number of executors to launch - :type num_executors: int :param status_poll_interval: Seconds to wait between polls of driver status in cluster mode (Default: 1) - :type status_poll_interval: int :param application_args: Arguments for the application being submitted - :type application_args: list :param env_vars: Environment variables for spark-submit. It supports yarn and k8s mode too. - :type env_vars: dict :param verbose: Whether to pass the verbose flag to spark-submit process for debugging - :type verbose: bool :param spark_binary: The command to use for spark submit. Some distros may use spark2-submit. - :type spark_binary: str """ conn_name_attr = 'conn_id' @@ -279,7 +255,6 @@ def _build_spark_submit_command(self, application: str) -> List[str]: Construct the spark-submit command to execute. :param application: command to append to the spark-submit command - :type application: str :return: full command to be executed """ connection_cmd = self._get_spark_binary_path() @@ -411,7 +386,6 @@ def submit(self, application: str = "", **kwargs: Any) -> None: Remote Popen to execute the spark-submit job :param application: Submitted application, jar or py file - :type application: str :param kwargs: extra arguments to Popen (see subprocess.Popen) """ spark_submit_cmd = self._build_spark_submit_command(application) diff --git a/airflow/providers/apache/spark/operators/spark_jdbc.py b/airflow/providers/apache/spark/operators/spark_jdbc.py index 3b938caaab339..87f244be50899 100644 --- a/airflow/providers/apache/spark/operators/spark_jdbc.py +++ b/airflow/providers/apache/spark/operators/spark_jdbc.py @@ -37,80 +37,53 @@ class SparkJDBCOperator(SparkSubmitOperator): :ref:`howto/operator:SparkJDBCOperator` :param spark_app_name: Name of the job (default airflow-spark-jdbc) - :type spark_app_name: str :param spark_conn_id: The :ref:`spark connection id ` as configured in Airflow administration - :type spark_conn_id: str :param spark_conf: Any additional Spark configuration properties - :type spark_conf: dict :param spark_py_files: Additional python files used (.zip, .egg, or .py) - :type spark_py_files: str :param spark_files: Additional files to upload to the container running the job - :type spark_files: str :param spark_jars: Additional jars to upload and add to the driver and executor classpath - :type spark_jars: str :param num_executors: number of executor to run. This should be set so as to manage the number of connections made with the JDBC database - :type num_executors: int :param executor_cores: Number of cores per executor - :type executor_cores: int :param executor_memory: Memory per executor (e.g. 1000M, 2G) - :type executor_memory: str :param driver_memory: Memory allocated to the driver (e.g. 1000M, 2G) - :type driver_memory: str :param verbose: Whether to pass the verbose flag to spark-submit for debugging - :type verbose: bool :param keytab: Full path to the file that contains the keytab - :type keytab: str :param principal: The name of the kerberos principal used for keytab - :type principal: str :param cmd_type: Which way the data should flow. 2 possible values: spark_to_jdbc: data written by spark from metastore to jdbc jdbc_to_spark: data written by spark from jdbc to metastore - :type cmd_type: str :param jdbc_table: The name of the JDBC table - :type jdbc_table: str :param jdbc_conn_id: Connection id used for connection to JDBC database - :type jdbc_conn_id: str :param jdbc_driver: Name of the JDBC driver to use for the JDBC connection. This driver (usually a jar) should be passed in the 'jars' parameter - :type jdbc_driver: str :param metastore_table: The name of the metastore table, - :type metastore_table: str :param jdbc_truncate: (spark_to_jdbc only) Whether or not Spark should truncate or drop and recreate the JDBC table. This only takes effect if 'save_mode' is set to Overwrite. Also, if the schema is different, Spark cannot truncate, and will drop and recreate - :type jdbc_truncate: bool :param save_mode: The Spark save-mode to use (e.g. overwrite, append, etc.) - :type save_mode: str :param save_format: (jdbc_to_spark-only) The Spark save-format to use (e.g. parquet) - :type save_format: str :param batch_size: (spark_to_jdbc only) The size of the batch to insert per round trip to the JDBC database. Defaults to 1000 - :type batch_size: int :param fetch_size: (jdbc_to_spark only) The size of the batch to fetch per round trip from the JDBC database. Default depends on the JDBC driver - :type fetch_size: int :param num_partitions: The maximum number of partitions that can be used by Spark simultaneously, both for spark_to_jdbc and jdbc_to_spark operations. This will also cap the number of JDBC connections that can be opened - :type num_partitions: int :param partition_column: (jdbc_to_spark-only) A numeric column to be used to partition the metastore table by. If specified, you must also specify: num_partitions, lower_bound, upper_bound - :type partition_column: str :param lower_bound: (jdbc_to_spark-only) Lower bound of the range of the numeric partition column to fetch. If specified, you must also specify: num_partitions, partition_column, upper_bound - :type lower_bound: int :param upper_bound: (jdbc_to_spark-only) Upper bound of the range of the numeric partition column to fetch. If specified, you must also specify: num_partitions, partition_column, lower_bound - :type upper_bound: int :param create_table_column_types: (spark_to_jdbc-only) The database column data types to use instead of the defaults, when creating the table. Data type information should be specified in diff --git a/airflow/providers/apache/spark/operators/spark_sql.py b/airflow/providers/apache/spark/operators/spark_sql.py index f9a00fcc2bf98..7fb1df93d2fdc 100644 --- a/airflow/providers/apache/spark/operators/spark_sql.py +++ b/airflow/providers/apache/spark/operators/spark_sql.py @@ -34,33 +34,21 @@ class SparkSqlOperator(BaseOperator): :ref:`howto/operator:SparkSqlOperator` :param sql: The SQL query to execute. (templated) - :type sql: str :param conf: arbitrary Spark configuration property - :type conf: str (format: PROP=VALUE) :param conn_id: connection_id string - :type conn_id: str :param total_executor_cores: (Standalone & Mesos only) Total cores for all executors (Default: all the available cores on the worker) - :type total_executor_cores: int :param executor_cores: (Standalone & YARN only) Number of cores per executor (Default: 2) - :type executor_cores: int :param executor_memory: Memory per executor (e.g. 1000M, 2G) (Default: 1G) - :type executor_memory: str :param keytab: Full path to the file that contains the keytab - :type keytab: str :param master: spark://host:port, mesos://host:port, yarn, or local (Default: The ``host`` and ``port`` set in the Connection, or ``"yarn"``) - :type master: str :param name: Name of the job - :type name: str :param num_executors: Number of executors to launch - :type num_executors: int :param verbose: Whether to pass the verbose flag to spark-sql - :type verbose: bool :param yarn_queue: The YARN queue to submit to (Default: The ``queue`` value set in the Connection, or ``"default"``) - :type yarn_queue: str """ template_fields: Sequence[str] = ('_sql',) diff --git a/airflow/providers/apache/spark/operators/spark_submit.py b/airflow/providers/apache/spark/operators/spark_submit.py index c418825d17ab4..db1114cf201dc 100644 --- a/airflow/providers/apache/spark/operators/spark_submit.py +++ b/airflow/providers/apache/spark/operators/spark_submit.py @@ -37,64 +37,39 @@ class SparkSubmitOperator(BaseOperator): :ref:`howto/operator:SparkSubmitOperator` :param application: The application that submitted as a job, either jar or py file. (templated) - :type application: str :param conf: Arbitrary Spark configuration properties (templated) - :type conf: dict :param spark_conn_id: The :ref:`spark connection id ` as configured in Airflow administration. When an invalid connection_id is supplied, it will default to yarn. - :type spark_conn_id: str :param files: Upload additional files to the executor running the job, separated by a comma. Files will be placed in the working directory of each executor. For example, serialized objects. (templated) - :type files: str :param py_files: Additional python files used by the job, can be .zip, .egg or .py. (templated) - :type py_files: str :param jars: Submit additional jars to upload and place them in executor classpath. (templated) - :type jars: str :param driver_class_path: Additional, driver-specific, classpath settings. (templated) - :type driver_class_path: str :param java_class: the main class of the Java application - :type java_class: str :param packages: Comma-separated list of maven coordinates of jars to include on the driver and executor classpaths. (templated) - :type packages: str :param exclude_packages: Comma-separated list of maven coordinates of jars to exclude while resolving the dependencies provided in 'packages' (templated) - :type exclude_packages: str :param repositories: Comma-separated list of additional remote repositories to search for the maven coordinates given with 'packages' - :type repositories: str :param total_executor_cores: (Standalone & Mesos only) Total cores for all executors (Default: all the available cores on the worker) - :type total_executor_cores: int :param executor_cores: (Standalone & YARN only) Number of cores per executor (Default: 2) - :type executor_cores: int :param executor_memory: Memory per executor (e.g. 1000M, 2G) (Default: 1G) - :type executor_memory: str :param driver_memory: Memory allocated to the driver (e.g. 1000M, 2G) (Default: 1G) - :type driver_memory: str :param keytab: Full path to the file that contains the keytab (templated) - :type keytab: str :param principal: The name of the kerberos principal used for keytab (templated) - :type principal: str :param proxy_user: User to impersonate when submitting the application (templated) - :type proxy_user: str :param name: Name of the job (default airflow-spark). (templated) - :type name: str :param num_executors: Number of executors to launch - :type num_executors: int :param status_poll_interval: Seconds to wait between polls of driver status in cluster mode (Default: 1) - :type status_poll_interval: int :param application_args: Arguments for the application being submitted (templated) - :type application_args: list :param env_vars: Environment variables for spark-submit. It supports yarn and k8s mode too. (templated) - :type env_vars: dict :param verbose: Whether to pass the verbose flag to spark-submit process for debugging - :type verbose: bool :param spark_binary: The command to use for spark submit. Some distros may use spark2-submit. - :type spark_binary: str """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/apache/sqoop/hooks/sqoop.py b/airflow/providers/apache/sqoop/hooks/sqoop.py index 8a6b5a55a93e9..65ed7500cb5ae 100644 --- a/airflow/providers/apache/sqoop/hooks/sqoop.py +++ b/airflow/providers/apache/sqoop/hooks/sqoop.py @@ -43,13 +43,9 @@ class SqoopHook(BaseHook): * ``password_file``: Path to file containing the password. :param conn_id: Reference to the sqoop connection. - :type conn_id: str :param verbose: Set sqoop to verbose. - :type verbose: bool :param num_mappers: Number of map tasks to import in parallel. - :type num_mappers: int :param properties: Properties to set via the -D argument - :type properties: dict """ conn_name_attr = 'conn_id' diff --git a/airflow/providers/asana/operators/asana_tasks.py b/airflow/providers/asana/operators/asana_tasks.py index d440ec8b0d314..66d88291a3903 100644 --- a/airflow/providers/asana/operators/asana_tasks.py +++ b/airflow/providers/asana/operators/asana_tasks.py @@ -35,14 +35,11 @@ class AsanaCreateTaskOperator(BaseOperator): :ref:`howto/operator:AsanaCreateTaskOperator` :param conn_id: The Asana connection to use. - :type conn_id: str :param name: Name of the Asana task. - :type name: str :param task_parameters: Any of the optional task creation parameters. See https://developers.asana.com/docs/create-a-task for a complete list. You must specify at least one of 'workspace', 'parent', or 'projects' either here or in the connection. - :type task_parameters: dict """ def __init__( @@ -77,12 +74,9 @@ class AsanaUpdateTaskOperator(BaseOperator): :ref:`howto/operator:AsanaUpdateTaskOperator` :param conn_id: The Asana connection to use. - :type conn_id: str :param asana_task_gid: Asana task ID to update - :type asana_task_gid: str :param task_parameters: Any task parameters that should be updated. See https://developers.asana.com/docs/update-a-task for a complete list. - :type task_update_parameters: dict """ def __init__( @@ -114,9 +108,7 @@ class AsanaDeleteTaskOperator(BaseOperator): :ref:`howto/operator:AsanaDeleteTaskOperator` :param conn_id: The Asana connection to use. - :type conn_id: str :param asana_task_gid: Asana Task ID to delete. - :type asana_task_gid: str """ def __init__( @@ -147,11 +139,9 @@ class AsanaFindTaskOperator(BaseOperator): :ref:`howto/operator:AsanaFindTaskOperator` :param conn_id: The Asana connection to use. - :type conn_id: str :param search_parameters: The parameters used to find relevant tasks. You must specify at least one of `project`, `section`, `tag`, `user_task_list`, or both `assignee` and `workspace` either here or in the connection. - :type search_parameters: dict """ def __init__( diff --git a/airflow/providers/celery/sensors/celery_queue.py b/airflow/providers/celery/sensors/celery_queue.py index 57b9a1197c0eb..5a7674ae6bc09 100644 --- a/airflow/providers/celery/sensors/celery_queue.py +++ b/airflow/providers/celery/sensors/celery_queue.py @@ -33,9 +33,7 @@ class CeleryQueueSensor(BaseSensorOperator): or ``active`` states. :param celery_queue: The name of the Celery queue to wait for. - :type celery_queue: str :param target_task_id: Task id for checking - :type target_task_id: str """ def __init__(self, *, celery_queue: str, target_task_id: Optional[str] = None, **kwargs) -> None: @@ -51,7 +49,6 @@ def _check_task_id(self, context: 'Context') -> bool: celery result has been finished execution. :param context: Airflow's execution context - :type context: dict :return: True if task has been executed, otherwise False :rtype: bool """ diff --git a/airflow/providers/cloudant/hooks/cloudant.py b/airflow/providers/cloudant/hooks/cloudant.py index d40948703751e..3c02a945fd1d3 100644 --- a/airflow/providers/cloudant/hooks/cloudant.py +++ b/airflow/providers/cloudant/hooks/cloudant.py @@ -31,7 +31,6 @@ class CloudantHook(BaseHook): .. seealso:: the latest documentation `here `_. :param cloudant_conn_id: The connection id to authenticate and get a session object from cloudant. - :type cloudant_conn_id: str """ conn_name_attr = 'cloudant_conn_id' diff --git a/airflow/providers/cncf/kubernetes/backcompat/pod.py b/airflow/providers/cncf/kubernetes/backcompat/pod.py index 30a7128b399e8..4633dbd239c2b 100644 --- a/airflow/providers/cncf/kubernetes/backcompat/pod.py +++ b/airflow/providers/cncf/kubernetes/backcompat/pod.py @@ -34,19 +34,12 @@ class Resources: """ :param request_memory: requested memory - :type request_memory: str :param request_cpu: requested CPU number - :type request_cpu: float | str :param request_ephemeral_storage: requested ephemeral storage - :type request_ephemeral_storage: str :param limit_memory: limit for memory usage - :type limit_memory: str :param limit_cpu: Limit for CPU used - :type limit_cpu: float | str :param limit_gpu: Limits for GPU used - :type limit_gpu: int :param limit_ephemeral_storage: Limit for ephemeral storage - :type limit_ephemeral_storage: float | str """ def __init__( diff --git a/airflow/providers/cncf/kubernetes/backcompat/volume.py b/airflow/providers/cncf/kubernetes/backcompat/volume.py index e5b4d004ed0ca..c51ce8a551e38 100644 --- a/airflow/providers/cncf/kubernetes/backcompat/volume.py +++ b/airflow/providers/cncf/kubernetes/backcompat/volume.py @@ -35,10 +35,8 @@ def __init__(self, name, configs): and Persistent Volumes :param name: the name of the volume mount - :type name: str :param configs: dictionary of any features needed for volume. We purposely keep this vague since there are multiple volume types with changing configs. - :type configs: dict """ self.name = name self.configs = configs diff --git a/airflow/providers/cncf/kubernetes/backcompat/volume_mount.py b/airflow/providers/cncf/kubernetes/backcompat/volume_mount.py index b77ab47cd8a3a..f9faed9d04a97 100644 --- a/airflow/providers/cncf/kubernetes/backcompat/volume_mount.py +++ b/airflow/providers/cncf/kubernetes/backcompat/volume_mount.py @@ -38,13 +38,9 @@ def __init__(self, name, mount_path, sub_path, read_only): running container. :param name: the name of the volume mount - :type name: str :param mount_path: - :type mount_path: str :param sub_path: subpath within the volume mount - :type sub_path: Optional[str] :param read_only: whether to access pod with read-only mode - :type read_only: bool """ self.name = name self.mount_path = mount_path diff --git a/airflow/providers/cncf/kubernetes/hooks/kubernetes.py b/airflow/providers/cncf/kubernetes/hooks/kubernetes.py index 84d143b557390..8025c2378105c 100644 --- a/airflow/providers/cncf/kubernetes/hooks/kubernetes.py +++ b/airflow/providers/cncf/kubernetes/hooks/kubernetes.py @@ -61,7 +61,6 @@ class KubernetesHook(BaseHook): :param conn_id: The :ref:`kubernetes connection ` to Kubernetes cluster. - :type conn_id: str """ conn_name_attr = 'kubernetes_conn_id' @@ -195,15 +194,10 @@ def create_custom_object( Creates custom resource definition object in Kubernetes :param group: api group - :type group: str :param version: api version - :type version: str :param plural: api plural - :type plural: str :param body: crd object definition - :type body: Union[str, dict] :param namespace: kubernetes namespace - :type namespace: str """ api = client.CustomObjectsApi(self.api_client) if namespace is None: @@ -226,15 +220,10 @@ def get_custom_object( Get custom resource definition object from Kubernetes :param group: api group - :type group: str :param version: api version - :type version: str :param plural: api plural - :type plural: str :param name: crd object name - :type name: str :param namespace: kubernetes namespace - :type namespace: str """ api = client.CustomObjectsApi(self.api_client) if namespace is None: @@ -266,10 +255,8 @@ def get_pod_log_stream( Retrieves a log stream for a container in a kubernetes pod. :param pod_name: pod name - :type pod_name: str :param container: container name :param namespace: kubernetes namespace - :type namespace: str """ api = client.CoreV1Api(self.api_client) watcher = watch.Watch() @@ -293,10 +280,8 @@ def get_pod_logs( Retrieves a container's log from the specified pod. :param pod_name: pod name - :type pod_name: str :param container: container name :param namespace: kubernetes namespace - :type namespace: str """ api = client.CoreV1Api(self.api_client) return api.read_namespaced_pod_log( diff --git a/airflow/providers/cncf/kubernetes/operators/kubernetes_pod.py b/airflow/providers/cncf/kubernetes/operators/kubernetes_pod.py index 1ae72fb31acb3..cf35c668f18c4 100644 --- a/airflow/providers/cncf/kubernetes/operators/kubernetes_pod.py +++ b/airflow/providers/cncf/kubernetes/operators/kubernetes_pod.py @@ -80,101 +80,63 @@ class KubernetesPodOperator(BaseOperator): simplifies the authorization process. :param namespace: the namespace to run within kubernetes. - :type namespace: str :param image: Docker image you wish to launch. Defaults to hub.docker.com, but fully qualified URLS will point to custom repositories. (templated) - :type image: str :param name: name of the pod in which the task will run, will be used (plus a random suffix if random_name_suffix is True) to generate a pod id (DNS-1123 subdomain, containing only [a-z0-9.-]). - :type name: str :param random_name_suffix: if True, will generate a random suffix. - :type random_name_suffix: bool :param cmds: entrypoint of the container. (templated) The docker images's entrypoint is used if this is not provided. - :type cmds: list[str] :param arguments: arguments of the entrypoint. (templated) The docker image's CMD is used if this is not provided. - :type arguments: list[str] :param ports: ports for launched pod. - :type ports: list[k8s.V1ContainerPort] :param volume_mounts: volumeMounts for launched pod. - :type volume_mounts: list[k8s.V1VolumeMount] :param volumes: volumes for launched pod. Includes ConfigMaps and PersistentVolumes. - :type volumes: list[k8s.V1Volume] :param env_vars: Environment variables initialized in the container. (templated) - :type env_vars: list[k8s.V1EnvVar] :param secrets: Kubernetes secrets to inject in the container. They can be exposed as environment vars or files in a volume. - :type secrets: list[airflow.kubernetes.secret.Secret] :param in_cluster: run kubernetes client with in_cluster configuration. - :type in_cluster: bool :param cluster_context: context that points to kubernetes cluster. Ignored when in_cluster is True. If None, current-context is used. - :type cluster_context: str :param reattach_on_restart: if the scheduler dies while the pod is running, reattach and monitor - :type reattach_on_restart: bool :param labels: labels to apply to the Pod. (templated) - :type labels: dict :param startup_timeout_seconds: timeout in seconds to startup the pod. - :type startup_timeout_seconds: int :param get_logs: get the stdout of the container as logs of the tasks. - :type get_logs: bool :param image_pull_policy: Specify a policy to cache or always pull an image. - :type image_pull_policy: str :param annotations: non-identifying metadata you can attach to the Pod. Can be a large range of data, and can include characters that are not permitted by labels. - :type annotations: dict :param resources: A dict containing resources requests and limits. Possible keys are request_memory, request_cpu, limit_memory, limit_cpu, and limit_gpu, which will be used to generate airflow.kubernetes.pod.Resources. See also kubernetes.io/docs/concepts/configuration/manage-compute-resources-container - :type resources: k8s.V1ResourceRequirements :param affinity: A dict containing a group of affinity scheduling rules. - :type affinity: k8s.V1Affinity :param config_file: The path to the Kubernetes config file. (templated) If not specified, default value is ``~/.kube/config`` - :type config_file: str :param node_selector: A dict containing a group of scheduling rules. - :type node_selector: dict :param image_pull_secrets: Any image pull secrets to be given to the pod. If more than one secret is required, provide a comma separated list: secret_a,secret_b - :type image_pull_secrets: List[k8s.V1LocalObjectReference] :param service_account_name: Name of the service account - :type service_account_name: str :param is_delete_operator_pod: What to do when the pod reaches its final state, or the execution is interrupted. If True (default), delete the pod; if False, leave the pod. - :type is_delete_operator_pod: bool :param hostnetwork: If True enable host networking on the pod. - :type hostnetwork: bool :param tolerations: A list of kubernetes tolerations. - :type tolerations: List[k8s.V1Toleration] :param security_context: security options the pod should run with (PodSecurityContext). - :type security_context: dict :param dnspolicy: dnspolicy for the pod. - :type dnspolicy: str :param schedulername: Specify a schedulername for the pod - :type schedulername: str :param full_pod_spec: The complete podSpec - :type full_pod_spec: kubernetes.client.models.V1Pod :param init_containers: init container for the launched Pod - :type init_containers: list[kubernetes.client.models.V1Container] :param log_events_on_failure: Log the pod's events if a failure occurs - :type log_events_on_failure: bool :param do_xcom_push: If True, the content of the file /airflow/xcom/return.json in the container will also be pushed to an XCom when the container completes. - :type do_xcom_push: bool :param pod_template_file: path to pod template file (templated) - :type pod_template_file: str :param priority_class_name: priority class name for the launched Pod - :type priority_class_name: str :param termination_grace_period: Termination grace period if task killed in UI, defaults to kubernetes default - :type termination_grace_period: int """ diff --git a/airflow/providers/cncf/kubernetes/operators/spark_kubernetes.py b/airflow/providers/cncf/kubernetes/operators/spark_kubernetes.py index bdf0cc860f154..10296871efc60 100644 --- a/airflow/providers/cncf/kubernetes/operators/spark_kubernetes.py +++ b/airflow/providers/cncf/kubernetes/operators/spark_kubernetes.py @@ -34,16 +34,11 @@ class SparkKubernetesOperator(BaseOperator): :param application_file: Defines Kubernetes 'custom_resource_definition' of 'sparkApplication' as either a path to a '.json' file or a JSON string. - :type application_file: str :param namespace: kubernetes namespace to put sparkApplication - :type namespace: str :param kubernetes_conn_id: The :ref:`kubernetes connection id ` for the to Kubernetes cluster. - :type kubernetes_conn_id: str :param api_group: kubernetes api group of sparkApplication - :type api_group: str :param api_version: kubernetes api version of sparkApplication - :type api_version: str """ template_fields: Sequence[str] = ('application_file', 'namespace') diff --git a/airflow/providers/cncf/kubernetes/sensors/spark_kubernetes.py b/airflow/providers/cncf/kubernetes/sensors/spark_kubernetes.py index 8b2423491933a..15ac40bcdb90a 100644 --- a/airflow/providers/cncf/kubernetes/sensors/spark_kubernetes.py +++ b/airflow/providers/cncf/kubernetes/sensors/spark_kubernetes.py @@ -36,18 +36,12 @@ class SparkKubernetesSensor(BaseSensorOperator): https://github.com/GoogleCloudPlatform/spark-on-k8s-operator/blob/v1beta2-1.1.0-2.4.5/docs/api-docs.md#sparkapplication :param application_name: spark Application resource name - :type application_name: str :param namespace: the kubernetes namespace where the sparkApplication reside in - :type namespace: str :param kubernetes_conn_id: The :ref:`kubernetes connection` to Kubernetes cluster. - :type kubernetes_conn_id: str :param attach_log: determines whether logs for driver pod should be appended to the sensor log - :type attach_log: bool :param api_group: kubernetes api group of sparkApplication - :type api_group: str :param api_version: kubernetes api version of sparkApplication - :type api_version: str """ template_fields: Sequence[str] = ("application_name", "namespace") diff --git a/airflow/providers/cncf/kubernetes/utils/pod_manager.py b/airflow/providers/cncf/kubernetes/utils/pod_manager.py index 96fd03a1677a2..17c2225631a48 100644 --- a/airflow/providers/cncf/kubernetes/utils/pod_manager.py +++ b/airflow/providers/cncf/kubernetes/utils/pod_manager.py @@ -246,7 +246,6 @@ def parse_log_line(self, line: str) -> Tuple[Optional[DateTime], str]: Parse K8s log line and returns the final state :param line: k8s log line - :type line: str :return: timestamp and log message :rtype: Tuple[str, str] """ diff --git a/airflow/providers/databricks/hooks/databricks.py b/airflow/providers/databricks/hooks/databricks.py index 7b80e87019349..fd3dd99acfbbb 100644 --- a/airflow/providers/databricks/hooks/databricks.py +++ b/airflow/providers/databricks/hooks/databricks.py @@ -117,16 +117,12 @@ class DatabricksHook(BaseHook): Interact with Databricks. :param databricks_conn_id: Reference to the :ref:`Databricks connection `. - :type databricks_conn_id: str :param timeout_seconds: The amount of time in seconds the requests library will wait before timing-out. - :type timeout_seconds: int :param retry_limit: The number of times to retry the connection in case of service outages. - :type retry_limit: int :param retry_delay: The number of seconds to wait between retries (it might be a floating point number). - :type retry_delay: float """ conn_name_attr = 'databricks_conn_id' @@ -267,7 +263,6 @@ def _is_aad_token_valid(aad_token: dict) -> bool: """ Utility function to check AAD token hasn't expired yet :param aad_token: dict with properties of AAD token - :type aad_token: dict :return: true if token is valid, false otherwise :rtype: bool """ @@ -301,9 +296,7 @@ def _do_api_call(self, endpoint_info, json): Utility function to perform an API call with retries :param endpoint_info: Tuple of method and endpoint - :type endpoint_info: tuple[string, string] :param json: Parameters for this API call. - :type json: dict :return: If the api call returns a OK status code, this function returns the response in JSON. Otherwise, we throw an AirflowException. @@ -390,7 +383,6 @@ def run_now(self, json: dict) -> int: Utility function to call the ``api/2.0/jobs/run-now`` endpoint. :param json: The data used in the body of the request to the ``run-now`` endpoint. - :type json: dict :return: the run_id as an int :rtype: str """ @@ -402,7 +394,6 @@ def submit_run(self, json: dict) -> int: Utility function to call the ``api/2.0/jobs/runs/submit`` endpoint. :param json: The data used in the body of the request to the ``submit`` endpoint. - :type json: dict :return: the run_id as an int :rtype: str """ @@ -425,7 +416,6 @@ def get_job_id(self, run_id: int) -> int: Retrieves job_id from run_id. :param run_id: id of the run - :type run_id: int :return: Job id for given Databricks run """ json = {'run_id': run_id} @@ -532,7 +522,6 @@ def install(self, json: dict) -> None: Utility function to call the ``2.0/libraries/install`` endpoint. :param json: json dictionary containing cluster_id and an array of library - :type json: dict """ self._do_api_call(INSTALL_LIBS_ENDPOINT, json) @@ -543,7 +532,6 @@ def uninstall(self, json: dict) -> None: Utility function to call the ``2.0/libraries/uninstall`` endpoint. :param json: json dictionary containing cluster_id and an array of library - :type json: dict """ self._do_api_call(UNINSTALL_LIBS_ENDPOINT, json) diff --git a/airflow/providers/databricks/operators/databricks.py b/airflow/providers/databricks/operators/databricks.py index 8a0d76a6c1961..3e3d64adc36ce 100644 --- a/airflow/providers/databricks/operators/databricks.py +++ b/airflow/providers/databricks/operators/databricks.py @@ -171,7 +171,6 @@ class DatabricksSubmitRunOperator(BaseOperator): .. seealso:: For more information about templating see :ref:`concepts:jinja-templating`. https://docs.databricks.com/api/latest/jobs.html#runs-submit - :type json: dict :param spark_jar_task: The main class and parameters for the JAR task. Note that the actual JAR is specified in the ``libraries``. *EITHER* ``spark_jar_task`` *OR* ``notebook_task`` *OR* ``spark_python_task`` @@ -180,7 +179,6 @@ class DatabricksSubmitRunOperator(BaseOperator): .. seealso:: https://docs.databricks.com/api/latest/jobs.html#jobssparkjartask - :type spark_jar_task: dict :param notebook_task: The notebook path and parameters for the notebook task. *EITHER* ``spark_jar_task`` *OR* ``notebook_task`` *OR* ``spark_python_task`` *OR* ``spark_submit_task`` *OR* ``pipeline_task`` should be specified. @@ -188,7 +186,6 @@ class DatabricksSubmitRunOperator(BaseOperator): .. seealso:: https://docs.databricks.com/api/latest/jobs.html#jobsnotebooktask - :type notebook_task: dict :param spark_python_task: The python file path and parameters to run the python file with. *EITHER* ``spark_jar_task`` *OR* ``notebook_task`` *OR* ``spark_python_task`` *OR* ``spark_submit_task`` *OR* ``pipeline_task`` should be specified. @@ -196,7 +193,6 @@ class DatabricksSubmitRunOperator(BaseOperator): .. seealso:: https://docs.databricks.com/api/latest/jobs.html#jobssparkpythontask - :type spark_python_task: dict :param spark_submit_task: Parameters needed to run a spark-submit command. *EITHER* ``spark_jar_task`` *OR* ``notebook_task`` *OR* ``spark_python_task`` *OR* ``spark_submit_task`` *OR* ``pipeline_task`` should be specified. @@ -204,7 +200,6 @@ class DatabricksSubmitRunOperator(BaseOperator): .. seealso:: https://docs.databricks.com/api/latest/jobs.html#jobssparksubmittask - :type spark_submit_task: dict :param pipeline_task: Parameters needed to execute a Delta Live Tables pipeline task. The provided dictionary must contain at least ``pipeline_id`` field! *EITHER* ``spark_jar_task`` *OR* ``notebook_task`` *OR* ``spark_python_task`` @@ -213,7 +208,6 @@ class DatabricksSubmitRunOperator(BaseOperator): .. seealso:: https://docs.databricks.com/dev-tools/api/2.0/jobs.html#jobspipelinetask - :type pipeline_task: dict :param new_cluster: Specs for a new cluster on which this task will be run. *EITHER* ``new_cluster`` *OR* ``existing_cluster_id`` should be specified (except when ``pipeline_task`` is used). @@ -221,43 +215,33 @@ class DatabricksSubmitRunOperator(BaseOperator): .. seealso:: https://docs.databricks.com/api/latest/jobs.html#jobsclusterspecnewcluster - :type new_cluster: dict :param existing_cluster_id: ID for existing cluster on which to run this task. *EITHER* ``new_cluster`` *OR* ``existing_cluster_id`` should be specified (except when ``pipeline_task`` is used). This field will be templated. - :type existing_cluster_id: str :param libraries: Libraries which this run will use. This field will be templated. .. seealso:: https://docs.databricks.com/api/latest/libraries.html#managedlibrarieslibrary - :type libraries: list of dicts :param run_name: The run name used for this task. By default this will be set to the Airflow ``task_id``. This ``task_id`` is a required parameter of the superclass ``BaseOperator``. This field will be templated. - :type run_name: str :param timeout_seconds: The timeout for this run. By default a value of 0 is used which means to have no timeout. This field will be templated. - :type timeout_seconds: int32 :param databricks_conn_id: Reference to the :ref:`Databricks connection `. By default and in the common case this will be ``databricks_default``. To use token based authentication, provide the key ``token`` in the extra field for the connection and create the key ``host`` and leave the ``host`` field empty. - :type databricks_conn_id: str :param polling_period_seconds: Controls the rate which we poll for the result of this run. By default the operator will poll every 30 seconds. - :type polling_period_seconds: int :param databricks_retry_limit: Amount of times retry if the Databricks backend is unreachable. Its value must be greater than or equal to 1. - :type databricks_retry_limit: int :param databricks_retry_delay: Number of seconds to wait between retries (it might be a floating point number). - :type databricks_retry_delay: float :param do_xcom_push: Whether we should push run_id and run_page_url to xcom. - :type do_xcom_push: bool """ # Used in airflow.models.BaseOperator @@ -424,7 +408,6 @@ class DatabricksRunNowOperator(BaseOperator): .. seealso:: https://docs.databricks.com/api/latest/jobs.html#run-now - :type job_id: str :param json: A JSON object containing API parameters which will be passed directly to the ``api/2.0/jobs/run-now`` endpoint. The other named parameters (i.e. ``notebook_params``, ``spark_submit_params``..) to this operator will @@ -435,7 +418,6 @@ class DatabricksRunNowOperator(BaseOperator): .. seealso:: For more information about templating see :ref:`concepts:jinja-templating`. https://docs.databricks.com/api/latest/jobs.html#run-now - :type json: dict :param notebook_params: A dict from keys to values for jobs with notebook task, e.g. "notebook_params": {"name": "john doe", "age": "35"}. The map is passed to the notebook and will be accessible through the @@ -449,7 +431,6 @@ class DatabricksRunNowOperator(BaseOperator): .. seealso:: https://docs.databricks.com/user-guide/notebooks/widgets.html - :type notebook_params: dict :param python_params: A list of parameters for jobs with python tasks, e.g. "python_params": ["john doe", "35"]. The parameters will be passed to python file as command line parameters. @@ -461,7 +442,6 @@ class DatabricksRunNowOperator(BaseOperator): .. seealso:: https://docs.databricks.com/api/latest/jobs.html#run-now - :type python_params: list[str] :param jar_params: A list of parameters for jobs with JAR tasks, e.g. "jar_params": ["john doe", "35"]. The parameters will be passed to JAR file as command line parameters. @@ -473,7 +453,6 @@ class DatabricksRunNowOperator(BaseOperator): .. seealso:: https://docs.databricks.com/api/latest/jobs.html#run-now - :type jar_params: list[str] :param spark_submit_params: A list of parameters for jobs with spark submit task, e.g. "spark_submit_params": ["--class", "org.apache.spark.examples.SparkPi"]. The parameters will be passed to spark-submit script as command line parameters. @@ -484,24 +463,18 @@ class DatabricksRunNowOperator(BaseOperator): .. seealso:: https://docs.databricks.com/api/latest/jobs.html#run-now - :type spark_submit_params: list[str] :param timeout_seconds: The timeout for this run. By default a value of 0 is used which means to have no timeout. This field will be templated. - :type timeout_seconds: int32 :param databricks_conn_id: Reference to the :ref:`Databricks connection `. By default and in the common case this will be ``databricks_default``. To use token based authentication, provide the key ``token`` in the extra field for the connection and create the key ``host`` and leave the ``host`` field empty. - :type databricks_conn_id: str :param polling_period_seconds: Controls the rate which we poll for the result of this run. By default the operator will poll every 30 seconds. - :type polling_period_seconds: int :param databricks_retry_limit: Amount of times retry if the Databricks backend is unreachable. Its value must be greater than or equal to 1. - :type databricks_retry_limit: int :param do_xcom_push: Whether we should push run_id and run_page_url to xcom. - :type do_xcom_push: bool """ # Used in airflow.models.BaseOperator diff --git a/airflow/providers/datadog/hooks/datadog.py b/airflow/providers/datadog/hooks/datadog.py index 538a7757832b2..dabe9111f9611 100644 --- a/airflow/providers/datadog/hooks/datadog.py +++ b/airflow/providers/datadog/hooks/datadog.py @@ -36,7 +36,6 @@ class DatadogHook(BaseHook, LoggingMixin): Airflow runs. :param datadog_conn_id: The connection to datadog, containing metadata for api keys. - :type datadog_conn_id: str """ def __init__(self, datadog_conn_id: str = 'datadog_default') -> None: @@ -74,15 +73,10 @@ def send_metric( Sends a single datapoint metric to DataDog :param metric_name: The name of the metric - :type metric_name: str :param datapoint: A single integer or float related to the metric - :type datapoint: int or float :param tags: A list of tags associated with the metric - :type tags: list :param type_: Type of your metric: gauge, rate, or count - :type type_: str :param interval: If the type of the metric is rate or count, define the corresponding interval - :type interval: int """ response = api.Metric.send( metric=metric_name, points=datapoint, host=self.host, tags=tags, type=type_, interval=interval @@ -97,11 +91,8 @@ def query_metric(self, query: str, from_seconds_ago: int, to_seconds_ago: int) - function applied to it and returns the results. :param query: The datadog query to execute (see datadog docs) - :type query: str :param from_seconds_ago: How many seconds ago to start querying for. - :type from_seconds_ago: int :param to_seconds_ago: Up to how many seconds ago to query for. - :type to_seconds_ago: int """ now = int(time.time()) @@ -129,27 +120,18 @@ def post_event( alerting itself. :param title: The title of the event - :type title: str :param text: The body of the event (more information) - :type text: str :param aggregation_key: Key that can be used to aggregate this event in a stream - :type aggregation_key: str :param alert_type: The alert type for the event, one of ["error", "warning", "info", "success"] - :type alert_type: str :param date_happened: POSIX timestamp of the event; defaults to now - :type date_happened: int :handle: User to post the event as; defaults to owner of the application key used to submit. :param handle: str :param priority: Priority to post the event as. ("normal" or "low", defaults to "normal") - :type priority: str :param related_event_id: Post event as a child of the given event - :type related_event_id: id :param tags: List of tags to apply to the event - :type tags: list[str] :param device_name: device_name to post the event with - :type device_name: list """ response = api.Event.create( title=title, diff --git a/airflow/providers/datadog/sensors/datadog.py b/airflow/providers/datadog/sensors/datadog.py index c882b8a9c97b2..7dbcec80676d6 100644 --- a/airflow/providers/datadog/sensors/datadog.py +++ b/airflow/providers/datadog/sensors/datadog.py @@ -36,18 +36,12 @@ class DatadogSensor(BaseSensorOperator): Airflow runs. :param datadog_conn_id: The connection to datadog, containing metadata for api keys. - :type datadog_conn_id: str :param from_seconds_ago: POSIX timestamp start (default 3600). - :type from_seconds_ago: int :param up_to_seconds_from_now: POSIX timestamp end (default 0). - :type up_to_seconds_from_now: int :param priority: Priority of your events, either low or normal. - :type priority: Optional[str] :param sources: A comma separated list indicating what tags, if any, should be used to filter the list of monitors by scope - :type sources: Optional[str] :param tags: Get datadog events from specific sources. - :type tags: Optional[List[str]] :param response_check: A check against the ‘requests’ response object. The callable takes the response object as the first positional argument and optionally any number of keyword arguments available in the context dictionary. It should return True for diff --git a/airflow/providers/dingding/example_dags/example_dingding.py b/airflow/providers/dingding/example_dags/example_dingding.py index cddd7b82ee86f..ae60aa829c6e1 100644 --- a/airflow/providers/dingding/example_dags/example_dingding.py +++ b/airflow/providers/dingding/example_dags/example_dingding.py @@ -30,7 +30,6 @@ def failure_callback(context): The function that will be executed on failure. :param context: The context of the executed task. - :type context: dict """ message = ( 'AIRFLOW TASK FAILURE TIPS:\n' diff --git a/airflow/providers/dingding/hooks/dingding.py b/airflow/providers/dingding/hooks/dingding.py index 02437f2933156..c21c00ea0e2b2 100644 --- a/airflow/providers/dingding/hooks/dingding.py +++ b/airflow/providers/dingding/hooks/dingding.py @@ -36,16 +36,11 @@ class DingdingHook(HttpHook): `Dingding custom bot `_ :param dingding_conn_id: The name of the Dingding connection to use - :type dingding_conn_id: str :param message_type: Message type you want to send to Dingding, support five type so far including text, link, markdown, actionCard, feedCard - :type message_type: str :param message: The message send to Dingding chat group - :type message: str or dict :param at_mobiles: Remind specific users with this message - :type at_mobiles: list[str] :param at_all: Remind all people in group or not. If True, will overwrite ``at_mobiles`` - :type at_all: bool """ conn_name_attr = 'dingding_conn_id' @@ -101,7 +96,6 @@ def get_conn(self, headers: Optional[dict] = None) -> Session: not don't need generic params :param headers: additional headers to be passed through as a dictionary - :type headers: dict """ conn = self.get_connection(self.http_conn_id) self.base_url = conn.host if conn.host else 'https://oapi.dingtalk.com' diff --git a/airflow/providers/dingding/operators/dingding.py b/airflow/providers/dingding/operators/dingding.py index c5a6110e44f0e..23e4d144c146f 100644 --- a/airflow/providers/dingding/operators/dingding.py +++ b/airflow/providers/dingding/operators/dingding.py @@ -34,16 +34,11 @@ class DingdingOperator(BaseOperator): `Dingding custom bot `_ :param dingding_conn_id: The name of the Dingding connection to use - :type dingding_conn_id: str :param message_type: Message type you want to send to Dingding, support five type so far including text, link, markdown, actionCard, feedCard - :type message_type: str :param message: The message send to Dingding chat group - :type message: str or dict :param at_mobiles: Remind specific users with this message - :type at_mobiles: list[str] :param at_all: Remind all people in group or not. If True, will overwrite ``at_mobiles`` - :type at_all: bool """ template_fields: Sequence[str] = ('message',) diff --git a/airflow/providers/discord/hooks/discord_webhook.py b/airflow/providers/discord/hooks/discord_webhook.py index c8956e4f3e819..8f1931a170a59 100644 --- a/airflow/providers/discord/hooks/discord_webhook.py +++ b/airflow/providers/discord/hooks/discord_webhook.py @@ -37,21 +37,14 @@ class DiscordWebhookHook(HttpHook): :param http_conn_id: Http connection ID with host as "https://discord.com/api/" and default webhook endpoint in the extra field in the form of {"webhook_endpoint": "webhooks/{webhook.id}/{webhook.token}"} - :type http_conn_id: str :param webhook_endpoint: Discord webhook endpoint in the form of "webhooks/{webhook.id}/{webhook.token}" - :type webhook_endpoint: str :param message: The message you want to send to your Discord channel (max 2000 characters) - :type message: str :param username: Override the default username of the webhook - :type username: str :param avatar_url: Override the default avatar of the webhook - :type avatar_url: str :param tts: Is a text-to-speech message - :type tts: bool :param proxy: Proxy to use to make the Discord webhook call - :type proxy: str """ conn_name_attr = 'http_conn_id' diff --git a/airflow/providers/discord/operators/discord_webhook.py b/airflow/providers/discord/operators/discord_webhook.py index a4ef6bdcf0036..a852aa8ecfb0d 100644 --- a/airflow/providers/discord/operators/discord_webhook.py +++ b/airflow/providers/discord/operators/discord_webhook.py @@ -39,21 +39,14 @@ class DiscordWebhookOperator(SimpleHttpOperator): :param http_conn_id: Http connection ID with host as "https://discord.com/api/" and default webhook endpoint in the extra field in the form of {"webhook_endpoint": "webhooks/{webhook.id}/{webhook.token}"} - :type http_conn_id: str :param webhook_endpoint: Discord webhook endpoint in the form of "webhooks/{webhook.id}/{webhook.token}" - :type webhook_endpoint: str :param message: The message you want to send to your Discord channel (max 2000 characters). (templated) - :type message: str :param username: Override the default username of the webhook. (templated) - :type username: str :param avatar_url: Override the default avatar of the webhook - :type avatar_url: str :param tts: Is a text-to-speech message - :type tts: bool :param proxy: Proxy to use to make the Discord webhook call - :type proxy: str """ template_fields: Sequence[str] = ('username', 'message') diff --git a/airflow/providers/docker/decorators/docker.py b/airflow/providers/docker/decorators/docker.py index 3e80a2c254c5f..ea5c6958b949d 100644 --- a/airflow/providers/docker/decorators/docker.py +++ b/airflow/providers/docker/decorators/docker.py @@ -53,17 +53,13 @@ class _DockerDecoratedOperator(DecoratedOperator, DockerOperator): Wraps a Python callable and captures args/kwargs when called for execution. :param python_callable: A reference to an object that is callable - :type python_callable: python callable :param op_kwargs: a dictionary of keyword arguments that will get unpacked in your function (templated) - :type op_kwargs: dict :param op_args: a list of positional arguments that will get unpacked when calling your callable (templated) - :type op_args: list :param multiple_outputs: if set, function return value will be unrolled to multiple XCom values. Dict will unroll to xcom values with keys as keys. Defaults to False. - :type multiple_outputs: bool """ template_fields: Sequence[str] = ('op_args', 'op_kwargs') @@ -140,12 +136,10 @@ def docker_task( Also accepts any argument that DockerOperator will via ``kwargs``. Can be reused in a single DAG. :param python_callable: Function to decorate - :type python_callable: Optional[Callable] :param multiple_outputs: if set, function return value will be unrolled to multiple XCom values. List/Tuples will unroll to xcom values with index as key. Dict will unroll to xcom values with keys as XCom keys. Defaults to False. - :type multiple_outputs: bool """ return task_decorator_factory( python_callable=python_callable, diff --git a/airflow/providers/docker/hooks/docker.py b/airflow/providers/docker/hooks/docker.py index 76db2d84505d2..7ac2122d9e8fe 100644 --- a/airflow/providers/docker/hooks/docker.py +++ b/airflow/providers/docker/hooks/docker.py @@ -31,7 +31,6 @@ class DockerHook(BaseHook, LoggingMixin): :param docker_conn_id: The :ref:`Docker connection id ` where credentials and extra configuration are stored - :type docker_conn_id: str """ conn_name_attr = 'docker_conn_id' diff --git a/airflow/providers/docker/operators/docker.py b/airflow/providers/docker/operators/docker.py index 7fe7456fffcdb..2126e16411164 100644 --- a/airflow/providers/docker/operators/docker.py +++ b/airflow/providers/docker/operators/docker.py @@ -62,97 +62,63 @@ class DockerOperator(BaseOperator): :param image: Docker image from which to create the container. If image tag is omitted, "latest" will be used. (templated) - :type image: str :param api_version: Remote API version. Set to ``auto`` to automatically detect the server's version. - :type api_version: str :param command: Command to be run in the container. (templated) - :type command: str or list :param container_name: Name of the container. Optional (templated) - :type container_name: str or None :param cpus: Number of CPUs to assign to the container. This value gets multiplied with 1024. See https://docs.docker.com/engine/reference/run/#cpu-share-constraint - :type cpus: float :param docker_url: URL of the host running the docker daemon. Default is unix://var/run/docker.sock - :type docker_url: str :param environment: Environment variables to set in the container. (templated) - :type environment: dict :param private_environment: Private environment variables to set in the container. These are not templated, and hidden from the website. - :type private_environment: dict :param force_pull: Pull the docker image on every run. Default is False. - :type force_pull: bool :param mem_limit: Maximum amount of memory the container can use. Either a float value, which represents the limit in bytes, or a string like ``128m`` or ``1g``. - :type mem_limit: float or str :param host_tmp_dir: Specify the location of the temporary directory on the host which will be mapped to tmp_dir. If not provided defaults to using the standard system temp directory. - :type host_tmp_dir: str :param network_mode: Network mode for the container. - :type network_mode: str :param tls_ca_cert: Path to a PEM-encoded certificate authority to secure the docker connection. - :type tls_ca_cert: str :param tls_client_cert: Path to the PEM-encoded certificate used to authenticate docker client. - :type tls_client_cert: str :param tls_client_key: Path to the PEM-encoded key used to authenticate docker client. - :type tls_client_key: str :param tls_hostname: Hostname to match against the docker server certificate or False to disable the check. - :type tls_hostname: str or bool :param tls_ssl_version: Version of SSL to use when communicating with docker daemon. - :type tls_ssl_version: str :param mount_tmp_dir: Specify whether the temporary directory should be bind-mounted from the host to the container. Defaults to True - :type mount_tmp_dir: bool :param tmp_dir: Mount point inside the container to a temporary directory created on the host by the operator. The path is also made available via the environment variable ``AIRFLOW_TMP_DIR`` inside the container. - :type tmp_dir: str :param user: Default user inside the docker container. - :type user: int or str :param mounts: List of volumes to mount into the container. Each item should be a :py:class:`docker.types.Mount` instance. - :type mounts: list[docker.types.Mount] :param entrypoint: Overwrite the default ENTRYPOINT of the image - :type entrypoint: str or list :param working_dir: Working directory to set on the container (equivalent to the -w switch the docker client) - :type working_dir: str :param xcom_all: Push all the stdout or just the last line. The default is False (last line). - :type xcom_all: bool :param docker_conn_id: The :ref:`Docker connection id ` - :type docker_conn_id: str :param dns: Docker custom DNS servers - :type dns: list[str] :param dns_search: Docker custom DNS search domain - :type dns_search: list[str] :param auto_remove: Auto-removal of the container on daemon side when the container's process exits. The default is False. - :type auto_remove: bool :param shm_size: Size of ``/dev/shm`` in bytes. The size must be greater than 0. If omitted uses system default. - :type shm_size: int :param tty: Allocate pseudo-TTY to the container This needs to be set see logs of the Docker container. - :type tty: bool :param privileged: Give extended privileges to this container. - :type privileged: bool :param cap_add: Include container capabilities - :type cap_add: list[str] :param retrieve_output: Should this docker image consistently attempt to pull from and output file before manually shutting down the image. Useful for cases where users want a pickle serialized output that is not posted to logs - :type retrieve_output: bool :param retrieve_output_path: path for output file that will be retrieved and passed to xcom - :type retrieve_output_path: Optional[str] """ template_fields: Sequence[str] = ('image', 'command', 'environment', 'container_name') @@ -407,7 +373,6 @@ def format_command(command: Union[str, List[str]]) -> Union[List[str], str]: Retrieve command(s). if command string starts with [, it returns the command list) :param command: Docker command or entrypoint - :type command: str | List[str] :return: the command (or commands) :rtype: str | List[str] diff --git a/airflow/providers/docker/operators/docker_swarm.py b/airflow/providers/docker/operators/docker_swarm.py index e733122634856..2f0b3e44f0c62 100644 --- a/airflow/providers/docker/operators/docker_swarm.py +++ b/airflow/providers/docker/operators/docker_swarm.py @@ -45,72 +45,49 @@ class DockerSwarmOperator(DockerOperator): :param image: Docker image from which to create the container. If image tag is omitted, "latest" will be used. - :type image: str :param api_version: Remote API version. Set to ``auto`` to automatically detect the server's version. - :type api_version: str :param auto_remove: Auto-removal of the container on daemon side when the container's process exits. The default is False. - :type auto_remove: bool :param command: Command to be run in the container. (templated) - :type command: str or list :param docker_url: URL of the host running the docker daemon. Default is unix://var/run/docker.sock - :type docker_url: str :param environment: Environment variables to set in the container. (templated) - :type environment: dict :param force_pull: Pull the docker image on every run. Default is False. - :type force_pull: bool :param mem_limit: Maximum amount of memory the container can use. Either a float value, which represents the limit in bytes, or a string like ``128m`` or ``1g``. - :type mem_limit: float or str :param tls_ca_cert: Path to a PEM-encoded certificate authority to secure the docker connection. - :type tls_ca_cert: str :param tls_client_cert: Path to the PEM-encoded certificate used to authenticate docker client. - :type tls_client_cert: str :param tls_client_key: Path to the PEM-encoded key used to authenticate docker client. - :type tls_client_key: str :param tls_hostname: Hostname to match against the docker server certificate or False to disable the check. - :type tls_hostname: str or bool :param tls_ssl_version: Version of SSL to use when communicating with docker daemon. - :type tls_ssl_version: str :param tmp_dir: Mount point inside the container to a temporary directory created on the host by the operator. The path is also made available via the environment variable ``AIRFLOW_TMP_DIR`` inside the container. - :type tmp_dir: str :param user: Default user inside the docker container. - :type user: int or str :param docker_conn_id: The :ref:`Docker connection id ` - :type docker_conn_id: str :param tty: Allocate pseudo-TTY to the container of this service This needs to be set see logs of the Docker container / service. - :type tty: bool :param enable_logging: Show the application's logs in operator's logs. Supported only if the Docker engine is using json-file or journald logging drivers. The `tty` parameter should be set to use this with Python applications. - :type enable_logging: bool :param configs: List of docker configs to be exposed to the containers of the swarm service. The configs are ConfigReference objects as per the docker api [https://docker-py.readthedocs.io/en/stable/services.html#docker.models.services.ServiceCollection.create]_ - :type configs: List[docker.types.ConfigReference] :param secrets: List of docker secrets to be exposed to the containers of the swarm service. The secrets are SecretReference objects as per the docker create_service api. [https://docker-py.readthedocs.io/en/stable/services.html#docker.models.services.ServiceCollection.create]_ - :type secrets: List[docker.types.SecretReference] :param mode: Indicate whether a service should be deployed as a replicated or global service, and associated parameters - :type mode: docker.types.ServiceMode :param networks: List of network names or IDs or NetworkAttachmentConfig to attach the service to. - :type networks: List[Union[str, NetworkAttachmentConfig]] :param placement: Placement instructions for the scheduler. If a list is passed instead, it is assumed to be a list of constraints as part of a Placement object. - :type placement: Union[types.Placement, List[types.Placement]] """ def __init__( diff --git a/airflow/providers/elasticsearch/hooks/elasticsearch.py b/airflow/providers/elasticsearch/hooks/elasticsearch.py index 9898775527bcd..b48511670ffe5 100644 --- a/airflow/providers/elasticsearch/hooks/elasticsearch.py +++ b/airflow/providers/elasticsearch/hooks/elasticsearch.py @@ -32,7 +32,6 @@ class ElasticsearchHook(DbApiHook): :param elasticsearch_conn_id: The :ref:`ElasticSearch connection id ` used for Elasticsearch credentials. - :type elasticsearch_conn_id: str """ conn_name_attr = 'elasticsearch_conn_id' diff --git a/airflow/providers/elasticsearch/log/es_task_handler.py b/airflow/providers/elasticsearch/log/es_task_handler.py index c2b041e038363..bb6b1f648baf6 100644 --- a/airflow/providers/elasticsearch/log/es_task_handler.py +++ b/airflow/providers/elasticsearch/log/es_task_handler.py @@ -232,11 +232,8 @@ def es_read(self, log_id: str, offset: str, metadata: dict) -> list: Returns '' if no log is found or there was an error. :param log_id: the log_id of the log to read. - :type log_id: str :param offset: the offset start to read log from. - :type offset: str :param metadata: log metadata, used for steaming log download. - :type metadata: dict """ # Offset is the unique key for sorting logs given log_id. search = Search(using=self.client).query('match_phrase', log_id=log_id).sort(self.offset_field) @@ -349,7 +346,6 @@ def get_external_log_url(self, task_instance: TaskInstance, try_number: int) -> :param task_instance: task instance object :type: task_instance: TaskInstance :param try_number: task instance try_number to read logs from. - :type try_number: Optional[int] :return: URL to the external log collection service :rtype: str """ diff --git a/airflow/providers/exasol/hooks/exasol.py b/airflow/providers/exasol/hooks/exasol.py index 55ea55316c0b5..c2f8e13d99810 100644 --- a/airflow/providers/exasol/hooks/exasol.py +++ b/airflow/providers/exasol/hooks/exasol.py @@ -72,11 +72,8 @@ def get_pandas_df( :param sql: the sql statement to be executed (str) or a list of sql statements to execute - :type sql: str or list :param parameters: The parameters to render the SQL query with. - :type parameters: dict or iterable :param kwargs: (optional) passed into pyexasol.ExaConnection.export_to_pandas method - :type kwargs: dict """ with closing(self.get_conn()) as conn: df = conn.export_to_pandas(sql, query_params=parameters, **kwargs) @@ -90,9 +87,7 @@ def get_records( :param sql: the sql statement to be executed (str) or a list of sql statements to execute - :type sql: str or list :param parameters: The parameters to render the SQL query with. - :type parameters: dict or iterable """ with closing(self.get_conn()) as conn: with closing(conn.execute(sql, parameters)) as cur: @@ -104,9 +99,7 @@ def get_first(self, sql: Union[str, list], parameters: Optional[dict] = None) -> :param sql: the sql statement to be executed (str) or a list of sql statements to execute - :type sql: str or list :param parameters: The parameters to render the SQL query with. - :type parameters: dict or iterable """ with closing(self.get_conn()) as conn: with closing(conn.execute(sql, parameters)) as cur: @@ -123,15 +116,11 @@ def export_to_file( Exports data to a file. :param filename: Path to the file to which the data has to be exported - :type filename: str :param query_or_table: the sql statement to be executed or table name to export - :type query_or_table: str :param query_params: Query parameters passed to underlying ``export_to_file`` method of :class:`~pyexasol.connection.ExaConnection`. - :type query_params: dict :param export_params: Extra parameters passed to underlying ``export_to_file`` method of :class:`~pyexasol.connection.ExaConnection`. - :type export_params: dict """ self.log.info("Getting data from exasol") with closing(self.get_conn()) as conn: @@ -153,14 +142,10 @@ def run( :param sql: the sql statement to be executed (str) or a list of sql statements to execute - :type sql: str or list :param autocommit: What to set the connection's autocommit setting to before executing the query. - :type autocommit: bool :param parameters: The parameters to render the SQL query with. - :type parameters: dict or iterable :param handler: The result handler which is called with the result of each statement. - :type handler: callable """ if isinstance(sql, str): sql = [sql] @@ -183,9 +168,7 @@ def set_autocommit(self, conn, autocommit: bool) -> None: Sets the autocommit flag on the connection :param conn: Connection to set autocommit setting to. - :type conn: connection object :param autocommit: The autocommit setting to set. - :type autocommit: bool """ if not self.supports_autocommit and autocommit: self.log.warning( @@ -202,7 +185,6 @@ def get_autocommit(self, conn) -> bool: does not support autocommit. :param conn: Connection to get autocommit setting from. - :type conn: connection object :return: connection autocommit setting. :rtype: bool """ @@ -218,9 +200,7 @@ def _serialize_cell(cell, conn=None) -> object: hence we return cell without any conversion. :param cell: The cell to insert into the table - :type cell: object :param conn: The database connection - :type conn: connection object :return: The cell :rtype: object """ diff --git a/airflow/providers/exasol/operators/exasol.py b/airflow/providers/exasol/operators/exasol.py index d75cfb6264016..b70d3245d549c 100644 --- a/airflow/providers/exasol/operators/exasol.py +++ b/airflow/providers/exasol/operators/exasol.py @@ -28,19 +28,14 @@ class ExasolOperator(BaseOperator): """ Executes sql code in a specific Exasol database - :param sql: the sql code to be executed. (templated) - :type sql: Can receive a str representing a sql statement, - a list of str (sql statements), or reference to a template file. - Template reference are recognized by str ending in '.sql' + :param sql: the SQL code to be executed as a single string, or + a list of str (sql statements), or a reference to a template file. + template references are recognized by str ending in '.sql' :param exasol_conn_id: reference to a specific Exasol database - :type exasol_conn_id: string :param autocommit: if True, each command is automatically committed. (default value: False) - :type autocommit: bool :param parameters: (optional) the parameters to render the SQL query with. - :type parameters: dict :param schema: (optional) name of the schema which overwrite defined one in connection - :type schema: string """ template_fields: Sequence[str] = ('sql',) diff --git a/airflow/providers/facebook/ads/hooks/ads.py b/airflow/providers/facebook/ads/hooks/ads.py index 7ad56e7dc747e..96f2db9737351 100644 --- a/airflow/providers/facebook/ads/hooks/ads.py +++ b/airflow/providers/facebook/ads/hooks/ads.py @@ -54,10 +54,8 @@ class FacebookAdsReportingHook(BaseHook): https://developers.facebook.com/docs/marketing-apis/ :param facebook_conn_id: Airflow Facebook Ads connection ID - :type facebook_conn_id: str :param api_version: The version of Facebook API. Default to None. If it is None, it will use the Facebook business SDK default version. - :type api_version: Optional[str] """ @@ -122,12 +120,9 @@ def bulk_facebook_report( :param fields: List of fields that is obtained from Facebook. Found in AdsInsights.Field class. https://developers.facebook.com/docs/marketing-api/insights/parameters/v6.0 - :type fields: List[str] :param params: Parameters that determine the query for Facebook https://developers.facebook.com/docs/marketing-api/insights/parameters/v6.0 - :type params: Dict[str, Any] :param sleep_time: Time to sleep when async call is happening - :type sleep_time: int :return: Facebook Ads API response, converted to Facebook Ads Row objects regarding given Account ID type @@ -166,17 +161,12 @@ def _facebook_report( :param account_id: Facebook Account ID that holds ads information https://developers.facebook.com/docs/marketing-api/reference/ads-insights/ - :type account_id: str :param api: FacebookAdsApi created in the hook - :type api: FacebookAdsApi :param fields: List of fields that is obtained from Facebook. Found in AdsInsights.Field class. https://developers.facebook.com/docs/marketing-api/insights/parameters/v6.0 - :type fields: List[str] :param params: Parameters that determine the query for Facebook https://developers.facebook.com/docs/marketing-api/insights/parameters/v6.0 - :type params: Dict[str, Any] :param sleep_time: Time to sleep when async call is happening - :type sleep_time: int """ ad_account = AdAccount(account_id, api=api) _async = ad_account.get_insights(params=params, fields=fields, is_async=True) diff --git a/airflow/providers/ftp/hooks/ftp.py b/airflow/providers/ftp/hooks/ftp.py index 51fef5899e8d7..c2e0d9fed0609 100644 --- a/airflow/providers/ftp/hooks/ftp.py +++ b/airflow/providers/ftp/hooks/ftp.py @@ -35,7 +35,6 @@ class FTPHook(BaseHook): :param ftp_conn_id: The :ref:`ftp connection id ` reference. - :type ftp_conn_id: str """ conn_name_attr = 'ftp_conn_id' @@ -80,7 +79,6 @@ def describe_directory(self, path: str) -> dict: on the remote system (where the MLSD command is supported). :param path: full path to the remote directory - :type path: str """ conn = self.get_conn() conn.cwd(path) @@ -92,7 +90,6 @@ def list_directory(self, path: str) -> List[str]: Returns a list of files on the remote system. :param path: full path to the remote directory to list - :type path: str """ conn = self.get_conn() conn.cwd(path) @@ -105,7 +102,6 @@ def create_directory(self, path: str) -> None: Creates a directory on the remote system. :param path: full path to the remote directory to create - :type path: str """ conn = self.get_conn() conn.mkd(path) @@ -115,7 +111,6 @@ def delete_directory(self, path: str) -> None: Deletes a directory on the remote system. :param path: full path to the remote directory to delete - :type path: str """ conn = self.get_conn() conn.rmd(path) @@ -129,17 +124,14 @@ def retrieve_file(self, remote_full_path, local_full_path_or_buffer, callback=No be written to the buffer but not closed. :param remote_full_path: full path to the remote file - :type remote_full_path: str :param local_full_path_or_buffer: full path to the local file or a file-like buffer - :type local_full_path_or_buffer: str or file-like buffer :param callback: callback which is called each time a block of data is read. if you do not use a callback, these blocks will be written to the file or buffer passed in. if you do pass in a callback, note that writing to a file or buffer will need to be handled inside the callback. [default: output_handle.write()] - :type callback: callable .. code-block:: python @@ -205,10 +197,8 @@ def store_file(self, remote_full_path: str, local_full_path_or_buffer: Any) -> N be read from the buffer but not closed. :param remote_full_path: full path to the remote file - :type remote_full_path: str :param local_full_path_or_buffer: full path to the local file or a file-like buffer - :type local_full_path_or_buffer: str or file-like buffer """ conn = self.get_conn() @@ -231,7 +221,6 @@ def delete_file(self, path: str) -> None: Removes a file on the FTP Server. :param path: full path to the remote file - :type path: str """ conn = self.get_conn() conn.delete(path) @@ -251,7 +240,6 @@ def get_mod_time(self, path: str) -> datetime.datetime: Returns a datetime object representing the last time the file was modified :param path: remote file path - :type path: str """ conn = self.get_conn() ftp_mdtm = conn.sendcmd('MDTM ' + path) @@ -267,7 +255,6 @@ def get_size(self, path: str) -> Optional[int]: Returns the size of a file (in bytes) :param path: remote file path - :type path: str """ conn = self.get_conn() size = conn.size(path) diff --git a/airflow/providers/ftp/sensors/ftp.py b/airflow/providers/ftp/sensors/ftp.py index 7e30fd0663f99..faa9c5c315af1 100644 --- a/airflow/providers/ftp/sensors/ftp.py +++ b/airflow/providers/ftp/sensors/ftp.py @@ -31,13 +31,10 @@ class FTPSensor(BaseSensorOperator): Waits for a file or directory to be present on FTP. :param path: Remote file or directory path - :type path: str :param fail_on_transient_errors: Fail on all errors, including 4xx transient errors. Default True. - :type fail_on_transient_errors: bool :param ftp_conn_id: The :ref:`ftp connection id ` reference to run the sensor against. - :type ftp_conn_id: str """ template_fields: Sequence[str] = ('path',) diff --git a/airflow/providers/google/ads/hooks/ads.py b/airflow/providers/google/ads/hooks/ads.py index 20c6de0594ced..40a4e5e484dbb 100644 --- a/airflow/providers/google/ads/hooks/ads.py +++ b/airflow/providers/google/ads/hooks/ads.py @@ -68,11 +68,8 @@ class GoogleAdsHook(BaseHook): https://developers.google.com/google-ads/api/docs/start :param gcp_conn_id: The connection ID with the service account details. - :type gcp_conn_id: str :param google_ads_conn_id: The connection ID with the details of Google Ads config.yaml file. - :type google_ads_conn_id: str :param api_version: The Google Ads API version to use. - :type api_version: str :return: list of Google Ads Row object(s) :rtype: list[GoogleAdsRow] @@ -108,11 +105,8 @@ def search( more like conventional python object (using proto-plus-python). :param client_ids: Google Ads client ID(s) to query the API for. - :type client_ids: List[str] :param query: Google Ads Query Language query. - :type query: str :param page_size: Number of results to return per page. Max 10000. - :type page_size: int :return: Google Ads API response, converted to Google Ads Row objects :rtype: list[GoogleAdsRow] """ @@ -129,11 +123,8 @@ def search_proto_plus( message instances that behave more like conventional python objects. :param client_ids: Google Ads client ID(s) to query the API for. - :type client_ids: List[str] :param query: Google Ads Query Language query. - :type query: str :param page_size: Number of results to return per page. Max 10000. - :type page_size: int :return: Google Ads API response, converted to Google Ads Row objects :rtype: list[GoogleAdsRow] """ @@ -225,11 +216,8 @@ def _search( Pulls data from the Google Ads API :param client_ids: Google Ads client ID(s) to query the API for. - :type client_ids: List[str] :param query: Google Ads Query Language query. - :type query: str :param page_size: Number of results to return per page. Max 10000. - :type page_size: int :return: Google Ads API response, converted to Google Ads Row objects :rtype: list[GoogleAdsRow] @@ -255,7 +243,6 @@ def _extract_rows(self, iterators: List[GRPCIterator]) -> List[GoogleAdsRow]: Convert Google Page Iterator (GRPCIterator) objects to Google Ads Rows :param iterators: List of Google Page Iterator (GRPCIterator) objects - :type iterators: generator[GRPCIterator, None, None] :return: API response for all clients in the form of Google Ads Row object(s) :rtype: list[GoogleAdsRow] diff --git a/airflow/providers/google/ads/operators/ads.py b/airflow/providers/google/ads/operators/ads.py index 7634ce96ae951..702359e36488d 100644 --- a/airflow/providers/google/ads/operators/ads.py +++ b/airflow/providers/google/ads/operators/ads.py @@ -46,15 +46,10 @@ class GoogleAdsListAccountsOperator(BaseOperator): :ref:`howto/operator:GoogleAdsListAccountsOperator` :param bucket: The GCS bucket to upload to - :type bucket: str :param object_name: GCS path to save the csv file. Must be the full file path (ex. `path/to/file.csv`) - :type object_name: str :param gcp_conn_id: Airflow Google Cloud connection ID - :type gcp_conn_id: str :param google_ads_conn_id: Airflow Google Ads connection ID - :type google_ads_conn_id: str :param gzip: Option to compress local file or file data for upload - :type gzip: bool :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -63,9 +58,7 @@ class GoogleAdsListAccountsOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :param api_version: Optional Google Ads API version to use. - :type api_version: Optional[str] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/ads/transfers/ads_to_gcs.py b/airflow/providers/google/ads/transfers/ads_to_gcs.py index 915a7d5ee02b4..ffce93940c0d8 100644 --- a/airflow/providers/google/ads/transfers/ads_to_gcs.py +++ b/airflow/providers/google/ads/transfers/ads_to_gcs.py @@ -43,23 +43,14 @@ class GoogleAdsToGcsOperator(BaseOperator): :ref:`howto/operator:GoogleAdsToGcsOperator` :param client_ids: Google Ads client IDs to query - :type client_ids: List[str] :param query: Google Ads Query Language API query - :type query: str :param attributes: List of Google Ads Row attributes to extract - :type attributes: List[str] :param bucket: The GCS bucket to upload to - :type bucket: str :param obj: GCS path to save the object. Must be the full file path (ex. `path/to/file.txt`) - :type obj: str :param gcp_conn_id: Airflow Google Cloud connection ID - :type gcp_conn_id: str :param google_ads_conn_id: Airflow Google Ads connection ID - :type google_ads_conn_id: str :param page_size: The number of results per API page request. Max 10,000 - :type page_size: int :param gzip: Option to compress local file or file data for upload - :type gzip: bool :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -68,9 +59,7 @@ class GoogleAdsToGcsOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :param api_version: Optional Google Ads API version to use. - :type api_version: Optional[str] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/cloud/_internal_client/secret_manager_client.py b/airflow/providers/google/cloud/_internal_client/secret_manager_client.py index 7f331d4992ea8..b5c8584928583 100644 --- a/airflow/providers/google/cloud/_internal_client/secret_manager_client.py +++ b/airflow/providers/google/cloud/_internal_client/secret_manager_client.py @@ -44,7 +44,6 @@ class _SecretManagerClient(LoggingMixin): :param credentials: Credentials used to authenticate to GCP - :type credentials: google.auth.credentials.Credentials """ def __init__( @@ -59,7 +58,6 @@ def is_valid_secret_name(secret_name: str) -> bool: """ Returns true if the secret name is valid. :param secret_name: name of the secret - :type secret_name: str :return: """ return bool(re.match(SECRET_ID_PATTERN, secret_name)) @@ -77,11 +75,8 @@ def get_secret(self, secret_id: str, project_id: str, secret_version: str = 'lat Get secret value from the Secret Manager. :param secret_id: Secret Key - :type secret_id: str :param project_id: Project id to use - :type project_id: str :param secret_version: version of the secret (default is 'latest') - :type secret_version: str """ name = self.client.secret_version_path(project_id, secret_id, secret_version) try: diff --git a/airflow/providers/google/cloud/hooks/automl.py b/airflow/providers/google/cloud/hooks/automl.py index 3e2bb21b01dfb..db8504758c843 100644 --- a/airflow/providers/google/cloud/hooks/automl.py +++ b/airflow/providers/google/cloud/hooks/automl.py @@ -16,7 +16,13 @@ # specific language governing permissions and limitations # under the License. # -"""This module contains a Google AutoML hook.""" +""" +This module contains a Google AutoML hook. + +.. spelling:: + + PredictResponse +""" import sys from typing import Dict, Optional, Sequence, Tuple, Union @@ -115,20 +121,14 @@ def create_model( :param model: The model_id to create. If a dict is provided, it must be of the same form as the protobuf message `google.cloud.automl_v1beta1.types.Model` - :type model: Union[dict, google.cloud.automl_v1beta1.types.Model] :param project_id: ID of the Google Cloud project where model will be created if None then default project_id is used. - :type project_id: str :param location: The location of the project. - :type location: str :param retry: A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[google.api_core.retry.Retry] :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :return: `google.cloud.automl_v1beta1.types._OperationFuture` instance """ @@ -160,31 +160,22 @@ def batch_predict( Instead, a long running operation object is returned. :param model_id: Name of the model_id requested to serve the batch prediction. - :type model_id: str :param input_config: Required. The input configuration for batch prediction. If a dict is provided, it must be of the same form as the protobuf message `google.cloud.automl_v1beta1.types.BatchPredictInputConfig` - :type input_config: Union[dict, google.cloud.automl_v1beta1.types.BatchPredictInputConfig] :param output_config: Required. The Configuration specifying where output predictions should be written. If a dict is provided, it must be of the same form as the protobuf message `google.cloud.automl_v1beta1.types.BatchPredictOutputConfig` - :type output_config: Union[dict, google.cloud.automl_v1beta1.types.BatchPredictOutputConfig] :param params: Additional domain-specific parameters for the predictions, any string must be up to 25000 characters long. - :type params: Optional[Dict[str, str]] :param project_id: ID of the Google Cloud project where model is located if None then default project_id is used. - :type project_id: str :param location: The location of the project. - :type location: str :param retry: A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[google.api_core.retry.Retry] :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :return: `google.cloud.automl_v1beta1.types._OperationFuture` instance """ @@ -220,26 +211,18 @@ def predict( returned in the response. :param model_id: Name of the model_id requested to serve the prediction. - :type model_id: str :param payload: Required. Payload to perform a prediction on. The payload must match the problem type that the model_id was trained to solve. If a dict is provided, it must be of the same form as the protobuf message `google.cloud.automl_v1beta1.types.ExamplePayload` - :type payload: Union[dict, google.cloud.automl_v1beta1.types.ExamplePayload] :param params: Additional domain-specific parameters, any string must be up to 25000 characters long. - :type params: Optional[Dict[str, str]] :param project_id: ID of the Google Cloud project where model is located if None then default project_id is used. - :type project_id: str :param location: The location of the project. - :type location: str :param retry: A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[google.api_core.retry.Retry] :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :return: `google.cloud.automl_v1beta1.types.PredictResponse` instance """ @@ -268,20 +251,14 @@ def create_dataset( :param dataset: The dataset to create. If a dict is provided, it must be of the same form as the protobuf message Dataset. - :type dataset: Union[dict, Dataset] :param project_id: ID of the Google Cloud project where dataset is located if None then default project_id is used. - :type project_id: str :param location: The location of the project. - :type location: str :param retry: A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[google.api_core.retry.Retry] :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :return: `google.cloud.automl_v1beta1.types.Dataset` instance. """ @@ -310,23 +287,16 @@ def import_data( Imports data into a dataset. For Tables this method can only be called on an empty Dataset. :param dataset_id: Name of the AutoML dataset. - :type dataset_id: str :param input_config: The desired input location and its domain specific semantics, if any. If a dict is provided, it must be of the same form as the protobuf message InputConfig. - :type input_config: Union[dict, InputConfig] :param project_id: ID of the Google Cloud project where dataset is located if None then default project_id is used. - :type project_id: str :param location: The location of the project. - :type location: str :param retry: A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[google.api_core.retry.Retry] :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :return: `google.cloud.automl_v1beta1.types._OperationFuture` instance """ @@ -358,33 +328,23 @@ def list_column_specs( Lists column specs in a table spec. :param dataset_id: Name of the AutoML dataset. - :type dataset_id: str :param table_spec_id: table_spec_id for path builder. - :type table_spec_id: str :param field_mask: Mask specifying which fields to read. If a dict is provided, it must be of the same form as the protobuf message `google.cloud.automl_v1beta1.types.FieldMask` - :type field_mask: Union[dict, google.cloud.automl_v1beta1.types.FieldMask] :param filter_: Filter expression, see go/filtering. - :type filter_: str :param page_size: The maximum number of resources contained in the underlying API response. If page streaming is performed per resource, this parameter does not affect the return value. If page streaming is performed per-page, this determines the maximum number of resources in a page. - :type page_size: int :param project_id: ID of the Google Cloud project where dataset is located if None then default project_id is used. - :type project_id: str :param location: The location of the project. - :type location: str :param retry: A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[google.api_core.retry.Retry] :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :return: `google.cloud.automl_v1beta1.types.ColumnSpec` instance. """ @@ -417,20 +377,14 @@ def get_model( Gets a AutoML model. :param model_id: Name of the model. - :type model_id: str :param project_id: ID of the Google Cloud project where model is located if None then default project_id is used. - :type project_id: str :param location: The location of the project. - :type location: str :param retry: A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[google.api_core.retry.Retry] :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :return: `google.cloud.automl_v1beta1.types.Model` instance. """ @@ -458,20 +412,14 @@ def delete_model( Deletes a AutoML model. :param model_id: Name of the model. - :type model_id: str :param project_id: ID of the Google Cloud project where model is located if None then default project_id is used. - :type project_id: str :param location: The location of the project. - :type location: str :param retry: A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[google.api_core.retry.Retry] :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :return: `google.cloud.automl_v1beta1.types._OperationFuture` instance. """ @@ -498,18 +446,13 @@ def update_dataset( :param dataset: The dataset which replaces the resource on the server. If a dict is provided, it must be of the same form as the protobuf message Dataset. - :type dataset: Union[dict, Dataset] :param update_mask: The update mask applies to the resource. If a dict is provided, it must be of the same form as the protobuf message FieldMask. - :type update_mask: Union[dict, FieldMask] :param retry: A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[google.api_core.retry.Retry] :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :return: `google.cloud.automl_v1beta1.types.Dataset` instance.. """ @@ -542,24 +485,17 @@ def deploy_model( domains manage deployment automatically. :param model_id: Name of the model requested to serve the prediction. - :type model_id: str :param image_detection_metadata: Model deployment metadata specific to Image Object Detection. If a dict is provided, it must be of the same form as the protobuf message ImageObjectDetectionModelDeploymentMetadata - :type image_detection_metadata: Union[ImageObjectDetectionModelDeploymentMetadata, dict] :param project_id: ID of the Google Cloud project where model will be created if None then default project_id is used. - :type project_id: str :param location: The location of the project. - :type location: str :param retry: A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[google.api_core.retry.Retry] :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :return: `google.cloud.automl_v1beta1.types._OperationFuture` instance. """ @@ -591,28 +527,20 @@ def list_table_specs( Lists table specs in a dataset_id. :param dataset_id: Name of the dataset. - :type dataset_id: str :param filter_: Filter expression, see go/filtering. - :type filter_: str :param page_size: The maximum number of resources contained in the underlying API response. If page streaming is performed per resource, this parameter does not affect the return value. If page streaming is performed per-page, this determines the maximum number of resources in a page. - :type page_size: int :param project_id: ID of the Google Cloud project where dataset is located if None then default project_id is used. - :type project_id: str :param location: The location of the project. - :type location: str :param retry: A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[google.api_core.retry.Retry] :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :return: A `google.gax.PageIterator` instance. By default, this is an iterable of `google.cloud.automl_v1beta1.types.TableSpec` instances. @@ -643,17 +571,12 @@ def list_datasets( :param project_id: ID of the Google Cloud project where dataset is located if None then default project_id is used. - :type project_id: str :param location: The location of the project. - :type location: str :param retry: A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[google.api_core.retry.Retry] :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :return: A `google.gax.PageIterator` instance. By default, this is an iterable of `google.cloud.automl_v1beta1.types.Dataset` instances. @@ -684,20 +607,14 @@ def delete_dataset( Deletes a dataset and all of its contents. :param dataset_id: ID of dataset to be deleted. - :type dataset_id: str :param project_id: ID of the Google Cloud project where dataset is located if None then default project_id is used. - :type project_id: str :param location: The location of the project. - :type location: str :param retry: A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[google.api_core.retry.Retry] :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :return: `google.cloud.automl_v1beta1.types._OperationFuture` instance """ diff --git a/airflow/providers/google/cloud/hooks/bigquery.py b/airflow/providers/google/cloud/hooks/bigquery.py index bb9501e3687de..c2a22d86ec2f5 100644 --- a/airflow/providers/google/cloud/hooks/bigquery.py +++ b/airflow/providers/google/cloud/hooks/bigquery.py @@ -68,22 +68,14 @@ class BigQueryHook(GoogleBaseHook, DbApiHook): Interact with BigQuery. This hook uses the Google Cloud connection. :param gcp_conn_id: The Airflow connection used for GCP credentials. - :type gcp_conn_id: Optional[str] :param delegate_to: This performs a task on one host with reference to other hosts. - :type delegate_to: Optional[str] :param use_legacy_sql: This specifies whether to use legacy SQL dialect. - :type use_legacy_sql: bool :param location: The location of the BigQuery resource. - :type location: Optional[str] :param bigquery_conn_id: The Airflow connection used for BigQuery credentials. - :type bigquery_conn_id: Optional[str] :param api_resource_configs: This contains params configuration applied for Google BigQuery jobs. - :type api_resource_configs: Optional[Dict] :param impersonation_chain: This is the optional service account to impersonate using short term credentials. - :type impersonation_chain: Optional[Union[str, Sequence[str]]] :param labels: The BigQuery resource label. - :type labels: Optional[Dict] """ conn_name_attr = 'gcp_conn_id' @@ -148,9 +140,7 @@ def get_client(self, project_id: Optional[str] = None, location: Optional[str] = Returns authenticated BigQuery Client. :param project_id: Project ID for the project which the client acts on behalf of. - :type project_id: str :param location: Default location for jobs / datasets / tables. - :type location: str :return: """ return Client( @@ -217,15 +207,11 @@ def get_pandas_df( https://github.com/pydata/pandas/issues/6900 :param sql: The BigQuery SQL to execute. - :type sql: str :param parameters: The parameters to render the SQL query with (not used, leave to override superclass method) - :type parameters: mapping or iterable :param dialect: Dialect of BigQuery SQL – legacy SQL or standard SQL defaults to use `self.use_legacy_sql` if not specified - :type dialect: str in {'legacy', 'standard'} :param kwargs: (optional) passed into pandas_gbq.read_gbq method - :type kwargs: dict """ if dialect is None: dialect = 'legacy' if self.use_legacy_sql else 'standard' @@ -244,12 +230,9 @@ def table_exists(self, dataset_id: str, table_id: str, project_id: str) -> bool: :param project_id: The Google cloud project in which to look for the table. The connection supplied to the hook must provide access to the specified project. - :type project_id: str :param dataset_id: The name of the dataset in which to look for the table. - :type dataset_id: str :param table_id: The name of the table to check the existence of. - :type table_id: str """ table_reference = TableReference(DatasetReference(project_id, dataset_id), table_id) try: @@ -268,14 +251,10 @@ def table_partition_exists( :param project_id: The Google cloud project in which to look for the table. The connection supplied to the hook must provide access to the specified project. - :type project_id: str :param dataset_id: The name of the dataset in which to look for the table. - :type dataset_id: str :param table_id: The name of the table to check the existence of. - :type table_id: str :param partition_id: The name of the partition to check the existence of. - :type partition_id: str """ table_reference = TableReference(DatasetReference(project_id, dataset_id), table_id) try: @@ -307,22 +286,15 @@ def create_empty_table( To create a view, which is defined by a SQL query, parse a dictionary to 'view' kwarg :param project_id: The project to create the table into. - :type project_id: str :param dataset_id: The dataset to create the table into. - :type dataset_id: str :param table_id: The Name of the table to be created. - :type table_id: str :param table_resource: Table resource as described in documentation: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table If provided all other parameters are ignored. - :type table_resource: Dict[str, Any] :param schema_fields: If set, the schema field list as defined here: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema - :type schema_fields: list :param labels: a dictionary containing labels for the table, passed to BigQuery - :type labels: dict :param retry: Optional. How to retry the RPC. - :type retry: google.api_core.retry.Retry **Example**: :: @@ -334,16 +306,13 @@ def create_empty_table( .. seealso:: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#timePartitioning - :type time_partitioning: dict :param cluster_fields: [Optional] The fields used for clustering. BigQuery supports clustering for both partitioned and non-partitioned tables. https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#clustering.fields - :type cluster_fields: list :param view: [Optional] A dictionary containing definition for the view. If set, it will create a view instead of a table: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ViewDefinition - :type view: dict **Example**: :: @@ -353,18 +322,14 @@ def create_empty_table( } :param materialized_view: [Optional] The materialized view definition. - :type materialized_view: dict :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys). **Example**: :: encryption_configuration = { "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key" } - :type encryption_configuration: dict :param num_retries: Maximum number of retries in case of connection problems. - :type num_retries: int :param exists_ok: If ``True``, ignore "already exists" errors when creating the table. - :type exists_ok: bool :return: Created table """ if num_retries: @@ -423,17 +388,12 @@ def create_empty_dataset( :param project_id: The name of the project where we want to create an empty a dataset. Don't need to provide, if projectId in dataset_reference. - :type project_id: str :param dataset_id: The id of dataset. Don't need to provide, if datasetId in dataset_reference. - :type dataset_id: str :param location: (Optional) The geographic location where the dataset should reside. There is no default value but the dataset will be created in US if nothing is provided. - :type location: str :param dataset_reference: Dataset reference that could be provided with request body. More info: https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#resource - :type dataset_reference: dict :param exists_ok: If ``True``, ignore "already exists" errors when creating the dataset. - :type exists_ok: bool """ dataset_reference = dataset_reference or {"datasetReference": {}} @@ -483,14 +443,10 @@ def get_dataset_tables( https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/list :param dataset_id: the dataset ID of the requested dataset. - :type dataset_id: str :param project_id: (Optional) the project of the requested dataset. If None, self.project_id will be used. - :type project_id: str :param max_results: (Optional) the maximum number of tables to return. - :type max_results: int :param retry: How to retry the RPC. - :type retry: google.api_core.retry.Retry :return: List of tables associated with the dataset. """ self.log.info('Start getting tables list from dataset: %s.%s', project_id, dataset_id) @@ -514,14 +470,10 @@ def delete_dataset( Delete a dataset of Big query in your project. :param project_id: The name of the project where we have the dataset. - :type project_id: str :param dataset_id: The dataset to be delete. - :type dataset_id: str :param delete_contents: If True, delete all the tables in the dataset. If False and the dataset contains tables, the request will fail. - :type delete_contents: bool :param retry: How to retry the RPC. - :type retry: google.api_core.retry.Retry """ self.log.info('Deleting from project: %s Dataset:%s', project_id, dataset_id) self.get_client(project_id=project_id).delete_dataset( @@ -568,68 +520,50 @@ def create_external_table( table name to create external table. If ```` is not included, project will be the project defined in the connection json. - :type external_project_dataset_table: str :param schema_fields: The schema field list as defined here: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#resource - :type schema_fields: list :param source_uris: The source Google Cloud Storage URI (e.g. gs://some-bucket/some-file.txt). A single wild per-object name can be used. - :type source_uris: list :param source_format: File format to export. - :type source_format: str :param autodetect: Try to detect schema and format options automatically. Any option specified explicitly will be honored. - :type autodetect: bool :param compression: [Optional] The compression type of the data source. Possible values include GZIP and NONE. The default value is NONE. This setting is ignored for Google Cloud Bigtable, Google Cloud Datastore backups and Avro formats. - :type compression: str :param ignore_unknown_values: [Optional] Indicates if BigQuery should allow extra values that are not represented in the table schema. If true, the extra values are ignored. If false, records with extra columns are treated as bad records, and if there are too many bad records, an invalid error is returned in the job result. - :type ignore_unknown_values: bool :param max_bad_records: The maximum number of bad records that BigQuery can ignore when running the job. - :type max_bad_records: int :param skip_leading_rows: Number of rows to skip when loading from a CSV. - :type skip_leading_rows: int :param field_delimiter: The delimiter to use when loading from a CSV. - :type field_delimiter: str :param quote_character: The value that is used to quote data sections in a CSV file. - :type quote_character: str :param allow_quoted_newlines: Whether to allow quoted newlines (true) or not (false). - :type allow_quoted_newlines: bool :param allow_jagged_rows: Accept rows that are missing trailing optional columns. The missing values are treated as nulls. If false, records with missing trailing columns are treated as bad records, and if there are too many bad records, an invalid error is returned in the job result. Only applicable when source_format is CSV. - :type allow_jagged_rows: bool :param encoding: The character encoding of the data. See: .. seealso:: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.encoding - :type encoding: str :param src_fmt_configs: configure optional fields specific to the source format - :type src_fmt_configs: dict :param labels: A dictionary containing labels for the BiqQuery table. - :type labels: dict :param description: A string containing the description for the BigQuery table. - :type description: str :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys). **Example**: :: encryption_configuration = { "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key" } - :type encryption_configuration: dict """ warnings.warn( "This method is deprecated. Please use `BigQueryHook.create_empty_table` method with " @@ -726,19 +660,14 @@ def update_table( no modifications to the table occurred since the read. :param project_id: The project to create the table into. - :type project_id: str :param dataset_id: The dataset to create the table into. - :type dataset_id: str :param table_id: The Name of the table to be created. - :type table_id: str :param table_resource: Table resource as described in documentation: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table The table has to contain ``tableReference`` or ``project_id``, ``dataset_id`` and ``table_id`` have to be provided. - :type table_resource: Dict[str, Any] :param fields: The fields of ``table`` to change, spelled as the Table properties (e.g. "friendly_name"). - :type fields: List[str] """ fields = fields or list(table_resource.keys()) table_resource = self._resolve_table_reference( @@ -775,23 +704,15 @@ def patch_table( Reference: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/patch :param dataset_id: The dataset containing the table to be patched. - :type dataset_id: str :param table_id: The Name of the table to be patched. - :type table_id: str :param project_id: The project containing the table to be patched. - :type project_id: str :param description: [Optional] A user-friendly description of this table. - :type description: str :param expiration_time: [Optional] The time when this table expires, in milliseconds since the epoch. - :type expiration_time: int :param external_data_configuration: [Optional] A dictionary containing properties of a table stored outside of BigQuery. - :type external_data_configuration: dict :param friendly_name: [Optional] A descriptive name for this table. - :type friendly_name: str :param labels: [Optional] A dictionary containing labels associated with this table. - :type labels: dict :param schema: [Optional] If set, the schema field list as defined here: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema The supported schema modifications and unsupported schema modification are listed here: @@ -801,10 +722,8 @@ def patch_table( schema=[{"name": "emp_name", "type": "STRING", "mode": "REQUIRED"}, {"name": "salary", "type": "INTEGER", "mode": "NULLABLE"}] - :type schema: list :param time_partitioning: [Optional] A dictionary containing time-based partitioning definition for the table. - :type time_partitioning: dict :param view: [Optional] A dictionary containing definition for the view. If set, it will patch a view instead of a table: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ViewDefinition @@ -815,17 +734,14 @@ def patch_table( "useLegacySql": False } - :type view: dict :param require_partition_filter: [Optional] If true, queries over the this table require a partition filter. If false, queries over the table - :type require_partition_filter: bool :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys). **Example**: :: encryption_configuration = { "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key" } - :type encryption_configuration: dict """ warnings.warn( @@ -883,13 +799,9 @@ def insert_all( https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll :param project_id: The name of the project where we have the table - :type project_id: str :param dataset_id: The name of the dataset where we have the table - :type dataset_id: str :param table_id: The name of the table - :type table_id: str :param rows: the rows to insert - :type rows: list **Example or rows**: rows=[{"json": {"a_key": "a_value_0"}}, {"json": {"a_key": "a_value_1"}}] @@ -897,15 +809,12 @@ def insert_all( :param ignore_unknown_values: [Optional] Accept rows that contain values that do not match the schema. The unknown values are ignored. The default value is false, which treats unknown values as errors. - :type ignore_unknown_values: bool :param skip_invalid_rows: [Optional] Insert all valid rows of a request, even if invalid rows exist. The default value is false, which causes the entire request to fail if any invalid rows exist. - :type skip_invalid_rows: bool :param fail_on_error: [Optional] Force the task to fail if any errors occur. The default value is false, which indicates the task should not fail even if any insertion errors occur. - :type fail_on_error: bool """ self.log.info('Inserting %s row(s) into table %s:%s.%s', len(rows), project_id, dataset_id, table_id) @@ -952,15 +861,10 @@ def update_dataset( :param dataset_resource: Dataset resource that will be provided in request body. https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#resource - :type dataset_resource: dict :param dataset_id: The id of the dataset. - :type dataset_id: str :param fields: The properties of ``dataset`` to change (e.g. "friendly_name"). - :type fields: Sequence[str] :param project_id: The Google Cloud Project ID - :type project_id: str :param retry: How to retry the RPC. - :type retry: google.api_core.retry.Retry """ dataset_resource["datasetReference"] = dataset_resource.get("datasetReference", {}) @@ -988,13 +892,10 @@ def patch_dataset( https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/patch :param dataset_id: The BigQuery Dataset ID - :type dataset_id: str :param dataset_resource: Dataset resource that will be provided in request body. https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#resource - :type dataset_resource: dict :param project_id: The Google Cloud Project ID - :type project_id: str :rtype: dataset https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#resource """ @@ -1037,14 +938,10 @@ def get_dataset_tables_list( https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/list :param dataset_id: The BigQuery Dataset ID - :type dataset_id: str :param project_id: The Google Cloud Project ID - :type project_id: str :param table_prefix: Tables must begin by this prefix to be returned (case sensitive) - :type table_prefix: str :param max_results: The maximum number of results to return in a single response page. Leverage the page tokens to iterate through the entire collection. - :type max_results: int :return: List of tables associated with the dataset """ warnings.warn("This method is deprecated. Please use ``get_dataset_tables``.", DeprecationWarning) @@ -1079,7 +976,6 @@ def get_datasets_list( https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/list :param project_id: Google Cloud Project for which you try to get all datasets - :type project_id: str :param include_all: True if results include hidden datasets. Defaults to False. :param filter_: An expression for filtering the results by label. For syntax, see https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/list#filter. @@ -1092,7 +988,6 @@ def get_datasets_list( ``next_page_token`` of the :class:`~google.api_core.page_iterator.HTTPIterator`. :param page_token: str :param retry: How to retry the RPC. - :type retry: google.api_core.retry.Retry """ datasets = self.get_client(project_id=project_id).list_datasets( project=project_id, @@ -1113,9 +1008,7 @@ def get_dataset(self, dataset_id: str, project_id: Optional[str] = None) -> Data Fetch the dataset referenced by dataset_id. :param dataset_id: The BigQuery Dataset ID - :type dataset_id: str :param project_id: The Google Cloud Project ID - :type project_id: str :return: dataset_resource .. seealso:: @@ -1144,17 +1037,12 @@ def run_grant_dataset_view_access( This method is not atomic. Running it may clobber a simultaneous update. :param source_dataset: the source dataset - :type source_dataset: str :param view_dataset: the dataset that the view is in - :type view_dataset: str :param view_table: the table of the view - :type view_table: str :param project_id: the project of the source dataset. If None, self.project_id will be used. - :type project_id: str :param view_project: the project that the view is in. If None, self.project_id will be used. - :type view_project: str :return: the datasets resource of the source dataset. """ if source_project: @@ -1207,10 +1095,8 @@ def run_table_upsert( atomic operation. :param dataset_id: the dataset to upsert the table into. - :type dataset_id: str :param table_resource: a table resource. see https://cloud.google.com/bigquery/docs/reference/v2/tables#resource - :type table_resource: dict :param project_id: the project to upsert the table into. If None, project will be self.project_id. :return: @@ -1240,10 +1126,8 @@ def run_table_delete(self, deletion_dataset_table: str, ignore_if_missing: bool :param deletion_dataset_table: A dotted ``(.|:).`` that indicates which table will be deleted. - :type deletion_dataset_table: str :param ignore_if_missing: if True, then return success even if the requested table does not exist. - :type ignore_if_missing: bool :return: """ warnings.warn("This method is deprecated. Please use `delete_table`.", DeprecationWarning) @@ -1262,12 +1146,9 @@ def delete_table( :param table_id: A dotted ``(.|:).
`` that indicates which table will be deleted. - :type table_id: str :param not_found_ok: if True, then return success even if the requested table does not exist. - :type not_found_ok: bool :param project_id: the project used to perform the request - :type project_id: str """ self.get_client(project_id=project_id).delete_table( table=Table.from_string(table_id), @@ -1397,11 +1278,8 @@ def update_table_schema( :param include_policy_tags: If set to True policy tags will be included in the update request which requires special permissions even if unchanged see https://cloud.google.com/bigquery/docs/column-level-security#roles - :type include_policy_tags: bool :param dataset_id: the dataset ID of the requested table to be updated - :type dataset_id: str :param table_id: the table ID of the table to be updated - :type table_id: str :param schema_fields_updates: a partial schema resource. see https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableSchema @@ -1416,9 +1294,7 @@ def update_table_schema( ]}, ] - :type schema_fields_updates: List[dict] :param project_id: The name of the project where we want to update the table. - :type project_id: str """ def _build_new_schema( @@ -1485,13 +1361,9 @@ def poll_job_complete( Check if jobs completed. :param job_id: id of the job. - :type job_id: str :param project_id: Google Cloud Project where the job is running - :type project_id: str :param location: location the job is running - :type location: str :param retry: How to retry the RPC. - :type retry: google.api_core.retry.Retry :rtype: bool """ location = location or self.location @@ -1520,11 +1392,8 @@ def cancel_job( Cancels a job an wait for cancellation to complete :param job_id: id of the job. - :type job_id: str :param project_id: Google Cloud Project where the job is running - :type project_id: str :param location: location the job is running - :type location: str """ location = location or self.location @@ -1569,11 +1438,8 @@ def get_job( :param job_id: The ID of the job. The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or dashes (-). The maximum length is 1,024 characters. If not provided then uuid will be generated. - :type job_id: str :param project_id: Google Cloud Project where the job is running - :type project_id: str :param location: location the job is running - :type location: str """ client = self.get_client(project_id=project_id, location=location) job = client.get_job(job_id=job_id, project=project_id, location=location) @@ -1606,15 +1472,11 @@ def insert_job( BigQuery's configuration field in the job object. See https://cloud.google.com/bigquery/docs/reference/v2/jobs for details. - :type configuration: Dict[str, Any] :param job_id: The ID of the job. The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or dashes (-). The maximum length is 1,024 characters. If not provided then uuid will be generated. - :type job_id: str :param project_id: Google Cloud Project where the job is running - :type project_id: str :param location: location the job is running - :type location: str """ location = location or self.location job_id = job_id or self._custom_job_id(configuration) @@ -1703,77 +1565,55 @@ def run_load( project defined in the connection json. If a partition is specified the operator will automatically append the data, create a new partition or create a new DAY partitioned table. - :type destination_project_dataset_table: str :param schema_fields: The schema field list as defined here: https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load Required if autodetect=False; optional if autodetect=True. - :type schema_fields: list :param autodetect: Attempt to autodetect the schema for CSV and JSON source files. - :type autodetect: bool :param source_uris: The source Google Cloud Storage URI (e.g. gs://some-bucket/some-file.txt). A single wild per-object name can be used. - :type source_uris: list :param source_format: File format to export. - :type source_format: str :param create_disposition: The create disposition if the table doesn't exist. - :type create_disposition: str :param skip_leading_rows: Number of rows to skip when loading from a CSV. - :type skip_leading_rows: int :param write_disposition: The write disposition if the table already exists. - :type write_disposition: str :param field_delimiter: The delimiter to use when loading from a CSV. - :type field_delimiter: str :param max_bad_records: The maximum number of bad records that BigQuery can ignore when running the job. - :type max_bad_records: int :param quote_character: The value that is used to quote data sections in a CSV file. - :type quote_character: str :param ignore_unknown_values: [Optional] Indicates if BigQuery should allow extra values that are not represented in the table schema. If true, the extra values are ignored. If false, records with extra columns are treated as bad records, and if there are too many bad records, an invalid error is returned in the job result. - :type ignore_unknown_values: bool :param allow_quoted_newlines: Whether to allow quoted newlines (true) or not (false). - :type allow_quoted_newlines: bool :param allow_jagged_rows: Accept rows that are missing trailing optional columns. The missing values are treated as nulls. If false, records with missing trailing columns are treated as bad records, and if there are too many bad records, an invalid error is returned in the job result. Only applicable when source_format is CSV. - :type allow_jagged_rows: bool :param encoding: The character encoding of the data. .. seealso:: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.encoding - :type encoding: str :param schema_update_options: Allows the schema of the destination table to be updated as a side effect of the load job. - :type schema_update_options: Union[list, tuple, set] :param src_fmt_configs: configure optional fields specific to the source format - :type src_fmt_configs: dict :param time_partitioning: configure optional time partitioning fields i.e. partition by field, type and expiration as per API specifications. - :type time_partitioning: dict :param cluster_fields: Request that the result of this load be stored sorted by one or more columns. BigQuery supports clustering for both partitioned and non-partitioned tables. The order of columns given determines the sort order. - :type cluster_fields: list[str] :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys). **Example**: :: encryption_configuration = { "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key" } - :type encryption_configuration: dict :param labels: A dictionary containing labels for the BiqQuery table. - :type labels: dict :param description: A string containing the description for the BigQuery table. - :type description: str """ warnings.warn( "This method is deprecated. Please use `BigQueryHook.insert_job` method.", DeprecationWarning @@ -1948,24 +1788,18 @@ def run_copy( multiple source tables. If ```` is not included, project will be the project defined in the connection json. - :type source_project_dataset_tables: list|string :param destination_project_dataset_table: The destination BigQuery table. Format is: ``(project:|project.).
`` - :type destination_project_dataset_table: str :param write_disposition: The write disposition if the table already exists. - :type write_disposition: str :param create_disposition: The create disposition if the table doesn't exist. - :type create_disposition: str :param labels: a dictionary containing labels for the job/query, passed to BigQuery - :type labels: dict :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys). **Example**: :: encryption_configuration = { "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key" } - :type encryption_configuration: dict """ warnings.warn( "This method is deprecated. Please use `BigQueryHook.insert_job` method.", DeprecationWarning @@ -2036,23 +1870,16 @@ def run_extract( :param source_project_dataset_table: The dotted ``.
`` BigQuery table to use as the source data. - :type source_project_dataset_table: str :param destination_cloud_storage_uris: The destination Google Cloud Storage URI (e.g. gs://some-bucket/some-file.txt). Follows convention defined here: https://cloud.google.com/bigquery/exporting-data-from-bigquery#exportingmultiple - :type destination_cloud_storage_uris: list :param compression: Type of compression to use. - :type compression: str :param export_format: File format to export. - :type export_format: str :param field_delimiter: The delimiter to use when extracting to a CSV. - :type field_delimiter: str :param print_header: Whether to print a header for a CSV file extract. - :type print_header: bool :param labels: a dictionary containing labels for the job/query, passed to BigQuery - :type labels: dict """ warnings.warn( "This method is deprecated. Please use `BigQueryHook.insert_job` method.", DeprecationWarning @@ -2124,75 +1951,56 @@ def run_query( For more details about these parameters. :param sql: The BigQuery SQL to execute. - :type sql: str :param destination_dataset_table: The dotted ``.
`` BigQuery table to save the query results. - :type destination_dataset_table: str :param write_disposition: What to do if the table already exists in BigQuery. - :type write_disposition: str :param allow_large_results: Whether to allow large results. - :type allow_large_results: bool :param flatten_results: If true and query uses legacy SQL dialect, flattens all nested and repeated fields in the query results. ``allowLargeResults`` must be true if this is set to false. For standard SQL queries, this flag is ignored and results are never flattened. - :type flatten_results: bool :param udf_config: The User Defined Function configuration for the query. See https://cloud.google.com/bigquery/user-defined-functions for details. - :type udf_config: list :param use_legacy_sql: Whether to use legacy SQL (true) or standard SQL (false). If `None`, defaults to `self.use_legacy_sql`. - :type use_legacy_sql: bool :param api_resource_configs: a dictionary that contain params 'configuration' applied for Google BigQuery Jobs API: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs for example, {'query': {'useQueryCache': False}}. You could use it if you need to provide some params that are not supported by the BigQueryHook like args. - :type api_resource_configs: dict :param maximum_billing_tier: Positive integer that serves as a multiplier of the basic price. - :type maximum_billing_tier: int :param maximum_bytes_billed: Limits the bytes billed for this job. Queries that will have bytes billed beyond this limit will fail (without incurring a charge). If unspecified, this will be set to your project default. - :type maximum_bytes_billed: float :param create_disposition: Specifies whether the job is allowed to create new tables. - :type create_disposition: str :param query_params: a list of dictionary containing query parameter types and values, passed to BigQuery - :type query_params: list :param labels: a dictionary containing labels for the job/query, passed to BigQuery - :type labels: dict :param schema_update_options: Allows the schema of the destination table to be updated as a side effect of the query job. - :type schema_update_options: Union[list, tuple, set] :param priority: Specifies a priority for the query. Possible values include INTERACTIVE and BATCH. The default value is INTERACTIVE. - :type priority: str :param time_partitioning: configure optional time partitioning fields i.e. partition by field, type and expiration as per API specifications. - :type time_partitioning: dict :param cluster_fields: Request that the result of this query be stored sorted by one or more columns. BigQuery supports clustering for both partitioned and non-partitioned tables. The order of columns given determines the sort order. - :type cluster_fields: list[str] :param location: The geographic location of the job. Required except for US and EU. See details at https://cloud.google.com/bigquery/docs/locations#specifying_your_location - :type location: str :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys). **Example**: :: encryption_configuration = { "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key" } - :type encryption_configuration: dict """ warnings.warn( "This method is deprecated. Please use `BigQueryHook.insert_job` method.", DeprecationWarning @@ -2778,9 +2586,7 @@ def execute(self, operation: str, parameters: Optional[dict] = None) -> None: Executes a BigQuery query, and returns the job ID. :param operation: The query to execute. - :type operation: str :param parameters: Parameters to substitute into the query. - :type parameters: dict """ sql = _bind_parameters(operation, parameters) if parameters else operation self.flush_results() @@ -2791,10 +2597,8 @@ def executemany(self, operation: str, seq_of_parameters: list) -> None: Execute a BigQuery query multiple times with different parameters. :param operation: The query to execute. - :type operation: str :param seq_of_parameters: List of dictionary parameters to substitute into the query. - :type seq_of_parameters: list """ for parameters in seq_of_parameters: self.execute(operation, parameters) @@ -3057,13 +2861,9 @@ def _validate_src_fmt_configs( Adds the backward compatibility config to the src_fmt_configs. :param source_format: File format to export. - :type source_format: str :param src_fmt_configs: Configure optional fields specific to the source format. - :type src_fmt_configs: dict :param valid_configs: Valid configuration specific to the source format - :type valid_configs: List[str] :param backward_compatibility_configs: The top-level params for backward-compatibility - :type backward_compatibility_configs: dict """ if backward_compatibility_configs is None: backward_compatibility_configs = {} diff --git a/airflow/providers/google/cloud/hooks/bigquery_dts.py b/airflow/providers/google/cloud/hooks/bigquery_dts.py index a8ff0ffaf8155..b0ba5af717731 100644 --- a/airflow/providers/google/cloud/hooks/bigquery_dts.py +++ b/airflow/providers/google/cloud/hooks/bigquery_dts.py @@ -70,7 +70,6 @@ def _disable_auto_scheduling(config: Union[dict, TransferConfig]) -> TransferCon call start_manual_transfer_runs. :param config: Data transfer configuration to create. - :type config: Union[dict, google.cloud.bigquery_datatransfer_v1.types.TransferConfig] """ config = TransferConfig.to_dict(config) if isinstance(config, TransferConfig) else config new_config = copy(config) @@ -114,23 +113,17 @@ def create_transfer_config( Creates a new data transfer configuration. :param transfer_config: Data transfer configuration to create. - :type transfer_config: Union[dict, google.cloud.bigquery_datatransfer_v1.types.TransferConfig] :param project_id: The BigQuery project id where the transfer configuration should be created. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param authorization_code: authorization code to use with this transfer configuration. This is required if new credentials are needed. - :type authorization_code: Optional[str] :param retry: A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[google.api_core.retry.Retry] :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :return: A ``google.cloud.bigquery_datatransfer_v1.types.TransferConfig`` instance. """ client = self.get_conn() @@ -162,20 +155,15 @@ def delete_transfer_config( Deletes transfer configuration. :param transfer_config_id: Id of transfer config to be used. - :type transfer_config_id: str :param project_id: The BigQuery project id where the transfer configuration should be created. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[google.api_core.retry.Retry] :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :return: None """ client = self.get_conn() @@ -206,29 +194,22 @@ def start_manual_transfer_runs( (exclusive), or for a specific run_time. :param transfer_config_id: Id of transfer config to be used. - :type transfer_config_id: str :param requested_time_range: Time range for the transfer runs that should be started. If a dict is provided, it must be of the same form as the protobuf message `~google.cloud.bigquery_datatransfer_v1.types.TimeRange` - :type requested_time_range: Union[dict, ~google.cloud.bigquery_datatransfer_v1.types.TimeRange] :param requested_run_time: Specific run_time for a transfer run to be started. The requested_run_time must not be in the future. If a dict is provided, it must be of the same form as the protobuf message `~google.cloud.bigquery_datatransfer_v1.types.Timestamp` - :type requested_run_time: Union[dict, ~google.cloud.bigquery_datatransfer_v1.types.Timestamp] :param project_id: The BigQuery project id where the transfer configuration should be created. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[google.api_core.retry.Retry] :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :return: An ``google.cloud.bigquery_datatransfer_v1.types.StartManualTransferRunsResponse`` instance. """ client = self.get_conn() @@ -262,22 +243,16 @@ def get_transfer_run( Returns information about the particular transfer run. :param run_id: ID of the transfer run. - :type run_id: str :param transfer_config_id: ID of transfer config to be used. - :type transfer_config_id: str :param project_id: The BigQuery project id where the transfer configuration should be created. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[google.api_core.retry.Retry] :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :return: An ``google.cloud.bigquery_datatransfer_v1.types.TransferRun`` instance. """ client = self.get_conn() diff --git a/airflow/providers/google/cloud/hooks/bigtable.py b/airflow/providers/google/cloud/hooks/bigtable.py index 148e5e42b0e9a..3086baf72e7a7 100644 --- a/airflow/providers/google/cloud/hooks/bigtable.py +++ b/airflow/providers/google/cloud/hooks/bigtable.py @@ -68,11 +68,9 @@ def get_instance(self, instance_id: str, project_id: str) -> Instance: Otherwise, returns None. :param instance_id: The ID of the Cloud Bigtable instance. - :type instance_id: str :param project_id: Optional, Google Cloud project ID where the BigTable exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str """ instance = self._get_client(project_id=project_id).instance(instance_id) if not instance.exists(): @@ -89,9 +87,7 @@ def delete_instance(self, instance_id: str, project_id: str) -> None: :param project_id: Optional, Google Cloud project ID where the BigTable exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param instance_id: The ID of the Cloud Bigtable instance. - :type instance_id: str """ instance = self.get_instance(instance_id=instance_id, project_id=project_id) if instance: @@ -121,39 +117,26 @@ def create_instance( """ Creates new instance. - :type instance_id: str :param instance_id: The ID for the new instance. - :type main_cluster_id: str :param main_cluster_id: The ID for main cluster for the new instance. - :type main_cluster_zone: str :param main_cluster_zone: The zone for main cluster. See https://cloud.google.com/bigtable/docs/locations for more details. - :type project_id: str :param project_id: Optional, Google Cloud project ID where the BigTable exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type replica_clusters: List[Dict[str, str]] :param replica_clusters: (optional) A list of replica clusters for the new instance. Each cluster dictionary contains an id and a zone. Example: [{"id": "replica-1", "zone": "us-west1-a"}] - :type replica_cluster_id: str :param replica_cluster_id: (deprecated) The ID for replica cluster for the new instance. - :type replica_cluster_zone: str :param replica_cluster_zone: (deprecated) The zone for replica cluster. - :type instance_type: enums.Instance.Type :param instance_type: (optional) The type of the instance. - :type instance_display_name: str :param instance_display_name: (optional) Human-readable name of the instance. Defaults to ``instance_id``. - :type instance_labels: dict :param instance_labels: (optional) Dictionary of labels to associate with the instance. - :type cluster_nodes: int :param cluster_nodes: (optional) Number of nodes for cluster. - :type cluster_storage_type: enums.StorageType :param cluster_storage_type: (optional) The type of storage. - :type timeout: int :param timeout: (optional) timeout (in seconds) for instance creation. If None is not specified, Operator will wait indefinitely. """ @@ -216,20 +199,14 @@ def update_instance( """ Update an existing instance. - :type instance_id: str :param instance_id: The ID for the existing instance. - :type project_id: str :param project_id: Optional, Google Cloud project ID where the BigTable exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type instance_display_name: str :param instance_display_name: (optional) Human-readable name of the instance. - :type instance_type: enums.Instance.Type or enum.IntEnum :param instance_type: (optional) The type of the instance. - :type instance_labels: dict :param instance_labels: (optional) Dictionary of labels to associate with the instance. - :type timeout: int :param timeout: (optional) timeout (in seconds) for instance update. If None is not specified, Operator will wait indefinitely. """ @@ -259,14 +236,10 @@ def create_table( Creates the specified Cloud Bigtable table. Raises ``google.api_core.exceptions.AlreadyExists`` if the table exists. - :type instance: Instance :param instance: The Cloud Bigtable instance that owns the table. - :type table_id: str :param table_id: The ID of the table to create in Cloud Bigtable. - :type initial_split_keys: list :param initial_split_keys: (Optional) A list of row keys in bytes to use to initially split the table. - :type column_families: dict :param column_families: (Optional) A map of columns to create. The key is the column_id str, and the value is a :class:`google.cloud.bigtable.column_family.GarbageCollectionRule`. @@ -284,11 +257,8 @@ def delete_table(self, instance_id: str, table_id: str, project_id: str) -> None Deletes the specified table in Cloud Bigtable. Raises google.api_core.exceptions.NotFound if the table does not exist. - :type instance_id: str :param instance_id: The ID of the Cloud Bigtable instance. - :type table_id: str :param table_id: The ID of the table in Cloud Bigtable. - :type project_id: str :param project_id: Optional, Google Cloud project ID where the BigTable exists. If set to None or missing, the default project_id from the Google Cloud connection is used. @@ -302,11 +272,8 @@ def update_cluster(instance: Instance, cluster_id: str, nodes: int) -> None: Updates number of nodes in the specified Cloud Bigtable cluster. Raises google.api_core.exceptions.NotFound if the cluster does not exist. - :type instance: Instance :param instance: The Cloud Bigtable instance that owns the cluster. - :type cluster_id: str :param cluster_id: The ID of the cluster. - :type nodes: int :param nodes: The desired number of nodes. """ cluster = Cluster(cluster_id, instance) @@ -318,9 +285,7 @@ def get_column_families_for_table(instance: Instance, table_id: str) -> Dict[str """ Fetches Column Families for the specified table in Cloud Bigtable. - :type instance: Instance :param instance: The Cloud Bigtable instance that owns the table. - :type table_id: str :param table_id: The ID of the table in Cloud Bigtable to fetch Column Families from. """ @@ -333,9 +298,7 @@ def get_cluster_states_for_table(instance: Instance, table_id: str) -> Dict[str, Fetches Cluster States for the specified table in Cloud Bigtable. Raises google.api_core.exceptions.NotFound if the table does not exist. - :type instance: Instance :param instance: The Cloud Bigtable instance that owns the table. - :type table_id: str :param table_id: The ID of the table in Cloud Bigtable to fetch Cluster States from. """ diff --git a/airflow/providers/google/cloud/hooks/cloud_build.py b/airflow/providers/google/cloud/hooks/cloud_build.py index 6c738900873cf..c521a32e2e3b0 100644 --- a/airflow/providers/google/cloud/hooks/cloud_build.py +++ b/airflow/providers/google/cloud/hooks/cloud_build.py @@ -37,11 +37,9 @@ class CloudBuildHook(GoogleBaseHook): Hook for the Google Cloud Build Service. :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -50,7 +48,6 @@ class CloudBuildHook(GoogleBaseHook): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account. - :type impersonation_chain: Union[str, Sequence[str]] """ def __init__( @@ -70,7 +67,6 @@ def _get_build_id_from_operation(self, operation: Operation) -> str: :param operation: The proto to append resource_label airflow version to - :type operation: google.api_core.operation.Operation :return: Cloud Build ID :rtype: str @@ -104,18 +100,13 @@ def cancel_build( Cancels a build in progress. :param id_: The ID of the build. - :type id_: str :param project_id: Optional, Google Cloud Project project_id where the function belongs. If set to None or missing, the default project_id from the GCP connection is used. - :type project_id: Optional[str] :param retry: Optional, a retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[Retry] :param timeout: Optional, the amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Optional, additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :rtype: `google.cloud.devtools.cloudbuild_v1.types.Build` """ @@ -148,20 +139,14 @@ def create_build( :param build: The build resource to create. If a dict is provided, it must be of the same form as the protobuf message `google.cloud.devtools.cloudbuild_v1.types.Build` - :type build: Union[dict, `google.cloud.devtools.cloudbuild_v1.types.Build`] :param project_id: Optional, Google Cloud Project project_id where the function belongs. If set to None or missing, the default project_id from the GCP connection is used. - :type project_id: Optional[str] :param wait: Optional, wait for operation to finish. - :type wait: Optional[bool] :param retry: Optional, a retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[Retry] :param timeout: Optional, the amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Optional, additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :rtype: `google.cloud.devtools.cloudbuild_v1.types.Build` """ @@ -201,18 +186,13 @@ def create_build_trigger( :param trigger: The BuildTrigger to create. If a dict is provided, it must be of the same form as the protobuf message `google.cloud.devtools.cloudbuild_v1.types.BuildTrigger` - :type trigger: Union[dict, `google.cloud.devtools.cloudbuild_v1.types.BuildTrigger`] :param project_id: Optional, Google Cloud Project project_id where the function belongs. If set to None or missing, the default project_id from the GCP connection is used. - :type project_id: Optional[str] :param retry: Optional, a retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[Retry] :param timeout: Optional, the amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Optional, additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :rtype: `google.cloud.devtools.cloudbuild_v1.types.BuildTrigger` """ @@ -244,18 +224,13 @@ def delete_build_trigger( Deletes a BuildTrigger by its project ID and trigger ID. :param trigger_id: The ID of the BuildTrigger to delete. - :type trigger_id: str :param project_id: Optional, Google Cloud Project project_id where the function belongs. If set to None or missing, the default project_id from the GCP connection is used. - :type project_id: Optional[str] :param retry: Optional, a retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[Retry] :param timeout: Optional, the amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Optional, additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] """ client = self.get_conn() @@ -283,18 +258,13 @@ def get_build( Returns information about a previously requested build. :param id_: The ID of the build. - :type id_: str :param project_id: Optional, Google Cloud Project project_id where the function belongs. If set to None or missing, the default project_id from the GCP connection is used. - :type project_id: Optional[str] :param retry: Optional, a retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[Retry] :param timeout: Optional, the amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Optional, additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :rtype: `google.cloud.devtools.cloudbuild_v1.types.Build` """ @@ -326,18 +296,13 @@ def get_build_trigger( Returns information about a BuildTrigger. :param trigger_id: The ID of the BuildTrigger to get. - :type trigger_id: str :param project_id: Optional, Google Cloud Project project_id where the function belongs. If set to None or missing, the default project_id from the GCP connection is used. - :type project_id: Optional[str] :param retry: Optional, a retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[Retry] :param timeout: Optional, the amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Optional, additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :rtype: `google.cloud.devtools.cloudbuild_v1.types.BuildTrigger` """ @@ -372,21 +337,14 @@ def list_build_triggers( :param project_id: Google Cloud Project project_id where the function belongs. If set to None or missing, the default project_id from the GCP connection is used. - :type project_id: str :param location: The location of the project. - :type location: string :param page_size: Optional, number of results to return in the list. - :type page_size: Optional[int] :param page_token: Optional, token to provide to skip to a particular spot in the list. - :type page_token: Optional[str] :param retry: Optional, a retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[Retry] :param timeout: Optional, the amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Optional, additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :rtype: `google.cloud.devtools.cloudbuild_v1.types.BuildTrigger` """ @@ -429,23 +387,15 @@ def list_builds( :param project_id: Google Cloud Project project_id where the function belongs. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param location: The location of the project. - :type location: string :param page_size: Optional, number of results to return in the list. - :type page_size: Optional[int] :param page_token: Optional, token to provide to skip to a particular spot in the list. - :type page_token: Optional[str] :param filter_: Optional, the raw filter text to constrain the results. - :type filter_: Optional[str] :param retry: Optional, a retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[Retry] :param timeout: Optional, the amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Optional, additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :rtype: List[`google.cloud.devtools.cloudbuild_v1.types.Build`] """ @@ -487,20 +437,14 @@ def retry_build( using the original build request, which may or may not result in an identical build. :param id_: Build ID of the original build. - :type id_: str :param project_id: Optional, Google Cloud Project project_id where the function belongs. If set to None or missing, the default project_id from the GCP connection is used. - :type project_id: str :param wait: Optional, wait for operation to finish. - :type wait: Optional[bool] :param retry: Optional, a retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[Retry] :param timeout: Optional, the amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Optional, additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :rtype: `google.cloud.devtools.cloudbuild_v1.types.Build` """ @@ -541,23 +485,16 @@ def run_build_trigger( Runs a BuildTrigger at a particular source revision. :param trigger_id: The ID of the trigger. - :type trigger_id: str :param source: Source to build against this trigger. If a dict is provided, it must be of the same form as the protobuf message `google.cloud.devtools.cloudbuild_v1.types.RepoSource` - :type source: Union[dict, `google.cloud.devtools.cloudbuild_v1.types.RepoSource`] :param project_id: Optional, Google Cloud Project project_id where the function belongs. If set to None or missing, the default project_id from the GCP connection is used. - :type project_id: str :param wait: Optional, wait for operation to finish. - :type wait: Optional[bool] :param retry: Optional, a retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[Retry] :param timeout: Optional, the amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Optional, additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :rtype: `google.cloud.devtools.cloudbuild_v1.types.Build` """ @@ -597,21 +534,15 @@ def update_build_trigger( Updates a BuildTrigger by its project ID and trigger ID. :param trigger_id: The ID of the trigger. - :type trigger_id: str :param trigger: The BuildTrigger to create. If a dict is provided, it must be of the same form as the protobuf message `google.cloud.devtools.cloudbuild_v1.types.BuildTrigger` - :type trigger: Union[dict, `google.cloud.devtools.cloudbuild_v1.types.BuildTrigger`] :param project_id: Optional, Google Cloud Project project_id where the function belongs. If set to None or missing, the default project_id from the GCP connection is used. - :type project_id: Optional[str] :param retry: Optional, a retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[Retry] :param timeout: Optional, the amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Optional, additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :rtype: `google.cloud.devtools.cloudbuild_v1.types.BuildTrigger` """ diff --git a/airflow/providers/google/cloud/hooks/cloud_memorystore.py b/airflow/providers/google/cloud/hooks/cloud_memorystore.py index 1e21245d8669b..cafa5bf94e395 100644 --- a/airflow/providers/google/cloud/hooks/cloud_memorystore.py +++ b/airflow/providers/google/cloud/hooks/cloud_memorystore.py @@ -15,7 +15,16 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -"""Hooks for Cloud Memorystore service""" +""" +Hooks for Cloud Memorystore service. + +.. spelling:: + + DataProtectionMode + FieldMask + pb + memcache +""" from typing import Dict, Optional, Sequence, Tuple, Union from google.api_core import path_template @@ -45,11 +54,9 @@ class CloudMemorystoreHook(GoogleBaseHook): keyword arguments rather than positional. :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -58,7 +65,6 @@ class CloudMemorystoreHook(GoogleBaseHook): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account. - :type impersonation_chain: Union[str, Sequence[str]] """ def __init__( @@ -90,11 +96,8 @@ def _append_label(instance: Instance, key: str, val: str) -> Instance: :param instance: The proto to append resource_label airflow version to - :type instance: google.cloud.container_v1.types.Cluster :param key: The key label - :type key: str :param val: - :type val: str :return: The cluster proto updated with new label """ val = val.replace(".", "-").replace("+", "-") @@ -119,7 +122,6 @@ def create_instance( `__. :param location: The location of the Cloud Memorystore instance (for example europe-west1) - :type location: str :param instance_id: Required. The logical name of the Redis instance in the customer project with the following restrictions: @@ -128,23 +130,17 @@ def create_instance( - Must be between 1-40 characters. - Must end with a number or a letter. - Must be unique within the customer project / location - :type instance_id: str :param instance: Required. A Redis [Instance] resource If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.redis_v1.types.Instance` - :type instance: Union[Dict, google.cloud.redis_v1.types.Instance] :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() if isinstance(instance, dict): @@ -192,20 +188,14 @@ def delete_instance( Deletes a specific Redis instance. Instance stops serving and data is deleted. :param location: The location of the Cloud Memorystore instance (for example europe-west1) - :type location: str :param instance: The logical name of the Redis instance in the customer project. - :type instance: str :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() name = f"projects/{project_id}/locations/{location}/instances/{instance}" @@ -247,25 +237,18 @@ def export_instance( Redis will continue serving during this operation. :param location: The location of the Cloud Memorystore instance (for example europe-west1) - :type location: str :param instance: The logical name of the Redis instance in the customer project. - :type instance: str :param output_config: Required. Specify data to be exported. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.redis_v1.types.OutputConfig` - :type output_config: Union[Dict, google.cloud.redis_v1.types.OutputConfig] :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() name = f"projects/{project_id}/locations/{location}/instances/{instance}" @@ -295,24 +278,17 @@ def failover_instance( Memorystore for Redis instance. :param location: The location of the Cloud Memorystore instance (for example europe-west1) - :type location: str :param instance: The logical name of the Redis instance in the customer project. - :type instance: str :param data_protection_mode: Optional. Available data protection modes that the user can choose. If it's unspecified, data protection mode will be LIMITED_DATA_LOSS by default. - :type data_protection_mode: google.cloud.redis_v1.gapic.enums.FailoverInstanceRequest .DataProtectionMode :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() name = f"projects/{project_id}/locations/{location}/instances/{instance}" @@ -341,20 +317,14 @@ def get_instance( Gets the details of a specific Redis instance. :param location: The location of the Cloud Memorystore instance (for example europe-west1) - :type location: str :param instance: The logical name of the Redis instance in the customer project. - :type instance: str :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() name = f"projects/{project_id}/locations/{location}/instances/{instance}" @@ -385,25 +355,18 @@ def import_instance( When complete, the instance will contain only data from the imported file. :param location: The location of the Cloud Memorystore instance (for example europe-west1) - :type location: str :param instance: The logical name of the Redis instance in the customer project. - :type instance: str :param input_config: Required. Specify data to be imported. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.redis_v1.types.InputConfig` - :type input_config: Union[Dict, google.cloud.redis_v1.types.InputConfig] :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() name = f"projects/{project_id}/locations/{location}/instances/{instance}" @@ -435,22 +398,16 @@ def list_instances( If it is specified as ``-`` (wildcard), then all regions available to the project are queried, and the results are aggregated. - :type location: str :param page_size: The maximum number of resources contained in the underlying API response. If page streaming is performed per- resource, this parameter does not affect the return value. If page streaming is performed per-page, this determines the maximum number of resources in a page. - :type page_size: int :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() parent = f"projects/{project_id}/locations/{location}" @@ -488,27 +445,19 @@ def update_instance( If a dict is provided, it must be of the same form as the protobuf message :class:`~google.protobuf.field_mask_pb2.FieldMask` - :type update_mask: Union[Dict, google.protobuf.field_mask_pb2.FieldMask] :param instance: Required. Update description. Only fields specified in ``update_mask`` are updated. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.redis_v1.types.Instance` - :type instance: Union[Dict, google.cloud.redis_v1.types.Instance] :param location: The location of the Cloud Memorystore instance (for example europe-west1) - :type location: str :param instance_id: The logical name of the Redis instance in the customer project. - :type instance_id: str :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() @@ -540,11 +489,9 @@ class CloudMemorystoreMemcachedHook(GoogleBaseHook): keyword arguments rather than positional. :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -553,7 +500,6 @@ class CloudMemorystoreMemcachedHook(GoogleBaseHook): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account. - :type impersonation_chain: Union[str, Sequence[str]] """ def __init__( @@ -587,11 +533,8 @@ def _append_label(instance: cloud_memcache.Instance, key: str, val: str) -> clou :param instance: The proto to append resource_label airflow version to - :type instance: google.cloud.memcache_v1beta2.types.cloud_memcache.Instance :param key: The key label - :type key: str :param val: - :type val: str :return: The cluster proto updated with new label """ val = val.replace(".", "-").replace("+", "-") @@ -614,26 +557,18 @@ def apply_parameters( Will update current set of Parameters to the set of specified nodes of the Memcached Instance. :param node_ids: Nodes to which we should apply the instance-level parameter group. - :type node_ids: Sequence[str] :param apply_all: Whether to apply instance-level parameter group to all nodes. If set to true, will explicitly restrict users from specifying any nodes, and apply parameter group updates to all nodes within the instance. - :type apply_all: bool :param location: The location of the Cloud Memorystore instance (for example europe-west1) - :type location: str :param instance_id: The logical name of the Memcached instance in the customer project. - :type instance_id: str :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() metadata = metadata or () @@ -669,7 +604,6 @@ def create_instance( `__. :param location: The location of the Cloud Memorystore instance (for example europe-west1) - :type location: str :param instance_id: Required. The logical name of the Memcached instance in the customer project with the following restrictions: @@ -678,23 +612,17 @@ def create_instance( - Must be between 1-40 characters. - Must end with a number or a letter. - Must be unique within the customer project / location - :type instance_id: str :param instance: Required. A Memcached [Instance] resource If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.memcache_v1beta2.types.cloud_memcache.Instance` - :type instance: Union[Dict, google.cloud.memcache_v1beta2.types.cloud_memcache.Instance] :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the GCP connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() metadata = metadata or () @@ -749,20 +677,14 @@ def delete_instance( Deletes a specific Memcached instance. Instance stops serving and data is deleted. :param location: The location of the Cloud Memorystore instance (for example europe-west1) - :type location: str :param instance: The logical name of the Memcached instance in the customer project. - :type instance: str :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the GCP connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() metadata = metadata or () @@ -802,20 +724,14 @@ def get_instance( Gets the details of a specific Memcached instance. :param location: The location of the Cloud Memorystore instance (for example europe-west1) - :type location: str :param instance: The logical name of the Memcached instance in the customer project. - :type instance: str :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the GCP connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() metadata = metadata or () @@ -841,18 +757,13 @@ def list_instances( If it is specified as ``-`` (wildcard), then all regions available to the project are queried, and the results are aggregated. - :type location: str :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the GCP connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() metadata = metadata or () @@ -890,28 +801,20 @@ def update_instance( If a dict is provided, it must be of the same form as the protobuf message :class:`~google.protobuf.field_mask_pb2.FieldMask`) - :type update_mask: Union[Dict, google.protobuf.field_mask_pb2.FieldMask] :param instance: Required. Update description. Only fields specified in ``update_mask`` are updated. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.memcache_v1beta2.types.cloud_memcache.Instance` - :type instance: Union[Dict, google.cloud.memcache_v1beta2.types.cloud_memcache.Instance] :param location: The location of the Cloud Memorystore instance (for example europe-west1) - :type location: str :param instance_id: The logical name of the Memcached instance in the customer project. - :type instance_id: str :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() metadata = metadata or () @@ -952,27 +855,19 @@ def update_parameters( :param update_mask: Required. Mask of fields to update. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.protobuf.field_mask_pb2.FieldMask` - :type update_mask: Union[Dict, google.protobuf.field_mask_pb2.FieldMask] :param parameters: The parameters to apply to the instance. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.memcache_v1beta2.types.cloud_memcache.MemcacheParameters` - :type parameters: Union[Dict, google.cloud.memcache_v1beta2.types.cloud_memcache.MemcacheParameters] :param location: The location of the Cloud Memorystore instance (for example europe-west1) - :type location: str :param instance_id: The logical name of the Memcached instance in the customer project. - :type instance_id: str :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() metadata = metadata or () diff --git a/airflow/providers/google/cloud/hooks/cloud_sql.py b/airflow/providers/google/cloud/hooks/cloud_sql.py index 1b676639048c4..788b1c23459dd 100644 --- a/airflow/providers/google/cloud/hooks/cloud_sql.py +++ b/airflow/providers/google/cloud/hooks/cloud_sql.py @@ -76,14 +76,10 @@ class CloudSQLHook(GoogleBaseHook): keyword arguments rather than positional. :param api_version: This is the version of the api. - :type api_version: str :param gcp_conn_id: The Airflow connection used for GCP credentials. - :type gcp_conn_id: str :param delegate_to: This performs a task on one host with reference to other hosts. - :type delegate_to: Optional[str] :param impersonation_chain: This is the optional service account to impersonate using short term credentials. - :type impersonation_chain: Optional[str] """ conn_name_attr = 'gcp_conn_id' @@ -124,10 +120,8 @@ def get_instance(self, instance: str, project_id: str) -> dict: Retrieves a resource containing information about a Cloud SQL instance. :param instance: Database instance ID. This does not include the project ID. - :type instance: str :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :return: A Cloud SQL instance resource. :rtype: dict """ @@ -146,10 +140,8 @@ def create_instance(self, body: Dict, project_id: str) -> None: :param body: Body required by the Cloud SQL insert API, as described in https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/insert#request-body. - :type body: dict :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :return: None """ response = ( @@ -172,12 +164,9 @@ def patch_instance(self, body: dict, instance: str, project_id: str) -> None: :param body: Body required by the Cloud SQL patch API, as described in https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/patch#request-body. - :type body: dict :param instance: Cloud SQL instance ID. This does not include the project ID. - :type instance: str :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :return: None """ response = ( @@ -197,9 +186,7 @@ def delete_instance(self, instance: str, project_id: str) -> None: :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param instance: Cloud SQL instance ID. This does not include the project ID. - :type instance: str :return: None """ response = ( @@ -217,12 +204,9 @@ def get_database(self, instance: str, database: str, project_id: str) -> dict: Retrieves a database resource from a Cloud SQL instance. :param instance: Database instance ID. This does not include the project ID. - :type instance: str :param database: Name of the database in the instance. - :type database: str :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :return: A Cloud SQL database resource, as described in https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases#resource. :rtype: dict @@ -241,13 +225,10 @@ def create_database(self, instance: str, body: Dict, project_id: str) -> None: Creates a new database inside a Cloud SQL instance. :param instance: Database instance ID. This does not include the project ID. - :type instance: str :param body: The request body, as described in https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/insert#request-body. - :type body: dict :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :return: None """ response = ( @@ -275,15 +256,11 @@ def patch_database( See https://cloud.google.com/sql/docs/mysql/admin-api/how-tos/performance#patch. :param instance: Database instance ID. This does not include the project ID. - :type instance: str :param database: Name of the database to be updated in the instance. - :type database: str :param body: The request body, as described in https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/insert#request-body. - :type body: dict :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :return: None """ response = ( @@ -302,12 +279,9 @@ def delete_database(self, instance: str, database: str, project_id: str) -> None Deletes a database from a Cloud SQL instance. :param instance: Database instance ID. This does not include the project ID. - :type instance: str :param database: Name of the database to be deleted in the instance. - :type database: str :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :return: None """ response = ( @@ -328,13 +302,10 @@ def export_instance(self, instance: str, body: Dict, project_id: str) -> None: :param instance: Database instance ID of the Cloud SQL instance. This does not include the project ID. - :type instance: str :param body: The request body, as described in https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/export#request-body - :type body: dict :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :return: None """ response = ( @@ -354,13 +325,10 @@ def import_instance(self, instance: str, body: Dict, project_id: str) -> None: :param instance: Database instance ID. This does not include the project ID. - :type instance: str :param body: The request body, as described in https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/export#request-body - :type body: dict :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :return: None """ try: @@ -381,9 +349,7 @@ def _wait_for_operation_to_complete(self, project_id: str, operation_name: str) asynchronous call. :param project_id: Project ID of the project that contains the instance. - :type project_id: str :param operation_name: Name of the operation. - :type operation_name: str :return: None """ service = self.get_conn() @@ -427,27 +393,21 @@ class CloudSqlProxyRunner(LoggingMixin): :param path_prefix: Unique path prefix where proxy will be downloaded and directories created for unix sockets. - :type path_prefix: str :param instance_specification: Specification of the instance to connect the proxy to. It should be specified in the form that is described in https://cloud.google.com/sql/docs/mysql/sql-proxy#multiple-instances in -instances parameter (typically in the form of ``::`` for UNIX socket connections and in the form of ``::=tcp:`` for TCP connections. - :type instance_specification: str :param gcp_conn_id: Id of Google Cloud connection to use for authentication - :type gcp_conn_id: str :param project_id: Optional id of the Google Cloud project to connect to - it overwrites default project id taken from the Google Cloud connection. - :type project_id: str :param sql_proxy_version: Specific version of SQL proxy to download (for example 'v1.13'). By default latest version is downloaded. - :type sql_proxy_version: str :param sql_proxy_binary_path: If specified, then proxy will be used from the path specified rather than dynamically generated. This means that if the binary is not present in that path it will also be downloaded. - :type sql_proxy_binary_path: str """ def __init__( @@ -719,13 +679,10 @@ class CloudSQLDatabaseHook(BaseHook): * **sslrootcert** - Path to server's certificate to authenticate when SSL is used. :param gcp_cloudsql_conn_id: URL of the connection - :type gcp_cloudsql_conn_id: str :param gcp_conn_id: The connection ID used to connect to Google Cloud for cloud-sql-proxy authentication. - :type gcp_conn_id: str :param default_gcp_project_id: Default project id used if project_id not specified in the connection URL - :type default_gcp_project_id: str """ conn_name_attr = 'gcp_cloudsql_conn_id' diff --git a/airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py b/airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py index 0d9dba43c6d4e..8cd67ca1c7f8a 100644 --- a/airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +++ b/airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py @@ -159,7 +159,6 @@ def create_transfer_job(self, body: dict) -> dict: :param body: (Required) A request body, as described in https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferJobs/patch#request-body - :type body: dict :return: transfer job. See: https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferJobs#TransferJob @@ -205,11 +204,9 @@ def get_transfer_job(self, job_name: str, project_id: str) -> dict: Transfer Service. :param job_name: (Required) Name of the job to be fetched - :type job_name: str :param project_id: (Optional) the ID of the project that owns the Transfer Job. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :return: Transfer Job :rtype: dict """ @@ -227,7 +224,6 @@ def list_transfer_job(self, request_filter: Optional[dict] = None, **kwargs) -> :param request_filter: (Required) A request filter, as described in https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferJobs/list#body.QUERY_PARAMETERS.filter - :type request_filter: dict :return: List of Transfer Jobs :rtype: list[dict] """ @@ -261,11 +257,9 @@ def enable_transfer_job(self, job_name: str, project_id: str) -> dict: New transfers will be performed based on the schedule. :param job_name: (Required) Name of the job to be updated - :type job_name: str :param project_id: (Optional) the ID of the project that owns the Transfer Job. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :return: If successful, TransferJob. :rtype: dict """ @@ -288,10 +282,8 @@ def update_transfer_job(self, job_name: str, body: dict) -> dict: Updates a transfer job that runs periodically. :param job_name: (Required) Name of the job to be updated - :type job_name: str :param body: A request body, as described in https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferJobs/patch#request-body - :type body: dict :return: If successful, TransferJob. :rtype: dict """ @@ -312,11 +304,9 @@ def delete_transfer_job(self, job_name: str, project_id: str) -> None: 30 days after soft delete. :param job_name: (Required) Name of the job to be deleted - :type job_name: str :param project_id: (Optional) the ID of the project that owns the Transfer Job. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :rtype: None """ ( @@ -338,7 +328,6 @@ def cancel_transfer_operation(self, operation_name: str) -> None: Cancels an transfer operation in Google Storage Transfer Service. :param operation_name: Name of the transfer operation. - :type operation_name: str :rtype: None """ self.get_conn().transferOperations().cancel(name=operation_name).execute(num_retries=self.num_retries) @@ -348,7 +337,6 @@ def get_transfer_operation(self, operation_name: str) -> dict: Gets an transfer operation in Google Storage Transfer Service. :param operation_name: (Required) Name of the transfer operation. - :type operation_name: str :return: transfer operation See: https://cloud.google.com/storage-transfer/docs/reference/rest/v1/Operation @@ -373,7 +361,6 @@ def list_transfer_operations(self, request_filter: Optional[dict] = None, **kwar in the connection See: :doc:`/connections/gcp` - :type request_filter: dict :return: transfer operation :rtype: list[dict] """ @@ -414,7 +401,6 @@ def pause_transfer_operation(self, operation_name: str) -> None: Pauses an transfer operation in Google Storage Transfer Service. :param operation_name: (Required) Name of the transfer operation. - :type operation_name: str :rtype: None """ self.get_conn().transferOperations().pause(name=operation_name).execute(num_retries=self.num_retries) @@ -424,7 +410,6 @@ def resume_transfer_operation(self, operation_name: str) -> None: Resumes an transfer operation in Google Storage Transfer Service. :param operation_name: (Required) Name of the transfer operation. - :type operation_name: str :rtype: None """ self.get_conn().transferOperations().resume(name=operation_name).execute(num_retries=self.num_retries) @@ -441,14 +426,11 @@ def wait_for_transfer_job( :param job: Transfer job See: https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferJobs#TransferJob - :type job: dict :param expected_statuses: State that is expected See: https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferOperations#Status - :type expected_statuses: set[str] :param timeout: Time in which the operation must end in seconds. If not specified, defaults to 60 seconds. - :type timeout: Optional[Union[float, timedelta]] :rtype: None """ expected_statuses = ( @@ -493,11 +475,9 @@ def operations_contain_expected_statuses( throw :class:`airflow.exceptions.AirflowException`. :param operations: (Required) List of transfer operations to check. - :type operations: list[dict] :param expected_statuses: (Required) status that is expected See: https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferOperations#Status - :type expected_statuses: set[str] :return: If there is an operation with the expected state in the operation list, returns true, :raises: airflow.exceptions.AirflowException If it encounters operations diff --git a/airflow/providers/google/cloud/hooks/compute.py b/airflow/providers/google/cloud/hooks/compute.py index 3f5d56ef4c253..86d5e808e41bc 100644 --- a/airflow/providers/google/cloud/hooks/compute.py +++ b/airflow/providers/google/cloud/hooks/compute.py @@ -79,13 +79,10 @@ def start_instance(self, zone: str, resource_id: str, project_id: str) -> None: Must be called with keyword arguments rather than positional. :param zone: Google Cloud zone where the instance exists - :type zone: str :param resource_id: Name of the Compute Engine instance resource - :type resource_id: str :param project_id: Optional, Google Cloud project ID where the Compute Engine Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :return: None """ response = ( @@ -107,13 +104,10 @@ def stop_instance(self, zone: str, resource_id: str, project_id: str) -> None: Must be called with keyword arguments rather than positional. :param zone: Google Cloud zone where the instance exists - :type zone: str :param resource_id: Name of the Compute Engine instance resource - :type resource_id: str :param project_id: Optional, Google Cloud project ID where the Compute Engine Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :return: None """ response = ( @@ -135,17 +129,13 @@ def set_machine_type(self, zone: str, resource_id: str, body: dict, project_id: Must be called with keyword arguments rather than positional. :param zone: Google Cloud zone where the instance exists. - :type zone: str :param resource_id: Name of the Compute Engine instance resource - :type resource_id: str :param body: Body required by the Compute Engine setMachineType API, as described in https://cloud.google.com/compute/docs/reference/rest/v1/instances/setMachineType - :type body: dict :param project_id: Optional, Google Cloud project ID where the Compute Engine Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :return: None """ response = self._execute_set_machine_type(zone, resource_id, body, project_id) @@ -171,11 +161,9 @@ def get_instance_template(self, resource_id: str, project_id: str) -> dict: Must be called with keyword arguments rather than positional. :param resource_id: Name of the instance template - :type resource_id: str :param project_id: Optional, Google Cloud project ID where the Compute Engine Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :return: Instance template representation as object according to https://cloud.google.com/compute/docs/reference/rest/v1/instanceTemplates :rtype: dict @@ -201,16 +189,13 @@ def insert_instance_template( :param body: Instance template representation as object according to https://cloud.google.com/compute/docs/reference/rest/v1/instanceTemplates - :type body: dict :param request_id: Optional, unique request_id that you might add to achieve full idempotence (for example when client call times out repeating the request with the same request id will not create a new instance template again) It should be in UUID format as defined in RFC 4122 - :type request_id: str :param project_id: Optional, Google Cloud project ID where the Compute Engine Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :return: None """ response = ( @@ -237,13 +222,10 @@ def get_instance_group_manager( Must be called with keyword arguments rather than positional. :param zone: Google Cloud zone where the Instance Group Manager exists - :type zone: str :param resource_id: Name of the Instance Group Manager - :type resource_id: str :param project_id: Optional, Google Cloud project ID where the Compute Engine Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :return: Instance group manager representation as object according to https://cloud.google.com/compute/docs/reference/rest/beta/instanceGroupManagers :rtype: dict @@ -270,22 +252,17 @@ def patch_instance_group_manager( Must be called with keyword arguments rather than positional. :param zone: Google Cloud zone where the Instance Group Manager exists - :type zone: str :param resource_id: Name of the Instance Group Manager - :type resource_id: str :param body: Instance Group Manager representation as json-merge-patch object according to https://cloud.google.com/compute/docs/reference/rest/beta/instanceTemplates/patch - :type body: dict :param request_id: Optional, unique request_id that you might add to achieve full idempotence (for example when client call times out repeating the request with the same request id will not create a new instance template again). It should be in UUID format as defined in RFC 4122 - :type request_id: str :param project_id: Optional, Google Cloud project ID where the Compute Engine Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :return: None """ response = ( @@ -313,9 +290,7 @@ def _wait_for_operation_to_complete( Waits for the named operation to complete - checks status of the async call. :param operation_name: name of the operation - :type operation_name: str :param zone: optional region of the request (might be None for global operations) - :type zone: str :return: None """ service = self.get_conn() @@ -368,13 +343,10 @@ def get_instance_info(self, zone: str, resource_id: str, project_id: str) -> Dic Gets instance information. :param zone: Google Cloud zone where the Instance Group Manager exists - :type zone: str :param resource_id: Name of the Instance Group Manager - :type resource_id: str :param project_id: Optional, Google Cloud project ID where the Compute Engine Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str """ instance_info = ( self.get_conn() @@ -392,15 +364,11 @@ def get_instance_address( Return network address associated to instance. :param zone: Google Cloud zone where the Instance Group Manager exists - :type zone: str :param resource_id: Name of the Instance Group Manager - :type resource_id: str :param project_id: Optional, Google Cloud project ID where the Compute Engine Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param use_internal_ip: If true, return private IP address. - :type use_internal_ip: bool """ instance_info = self.get_instance_info(project_id=project_id, resource_id=resource_id, zone=zone) if use_internal_ip: @@ -419,15 +387,11 @@ def set_instance_metadata( Set instance metadata. :param zone: Google Cloud zone where the Instance Group Manager exists - :type zone: str :param resource_id: Name of the Instance Group Manager - :type resource_id: str :param metadata: The new instance metadata. - :type metadata: Dict :param project_id: Optional, Google Cloud project ID where the Compute Engine Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str """ response = ( self.get_conn() diff --git a/airflow/providers/google/cloud/hooks/compute_ssh.py b/airflow/providers/google/cloud/hooks/compute_ssh.py index cf5135fff5850..9a930d5fa36d2 100644 --- a/airflow/providers/google/cloud/hooks/compute_ssh.py +++ b/airflow/providers/google/cloud/hooks/compute_ssh.py @@ -67,33 +67,21 @@ class ComputeEngineSSHHook(SSHHook): Hook to connect to a remote instance in compute engine :param instance_name: The name of the Compute Engine instance - :type instance_name: str :param zone: The zone of the Compute Engine instance - :type zone: str :param user: The name of the user on which the login attempt will be made - :type user: str :param project_id: The project ID of the remote instance - :type project_id: str :param gcp_conn_id: The connection id to use when fetching connection info - :type gcp_conn_id: str :param hostname: The hostname of the target instance. If it is not passed, it will be detected automatically. - :type hostname: str :param use_iap_tunnel: Whether to connect through IAP tunnel - :type use_iap_tunnel: bool :param use_internal_ip: Whether to connect using internal IP - :type use_internal_ip: bool :param use_oslogin: Whether to manage keys using OsLogin API. If false, keys are managed using instance metadata - :type use_oslogin: bool :param expire_time: The maximum amount of time in seconds before the private key expires - :type expire_time: int :param gcp_conn_id: The connection id to use when fetching connection information - :type gcp_conn_id: str :param delegate_to: The account to impersonate, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str """ conn_name_attr = 'gcp_conn_id' diff --git a/airflow/providers/google/cloud/hooks/datacatalog.py b/airflow/providers/google/cloud/hooks/datacatalog.py index 8ab601b3965a7..484d5f4e541e3 100644 --- a/airflow/providers/google/cloud/hooks/datacatalog.py +++ b/airflow/providers/google/cloud/hooks/datacatalog.py @@ -40,11 +40,9 @@ class CloudDataCatalogHook(GoogleBaseHook): Hook for Google Cloud Data Catalog Service. :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -53,7 +51,6 @@ class CloudDataCatalogHook(GoogleBaseHook): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account. - :type impersonation_chain: Union[str, Sequence[str]] """ def __init__( @@ -95,26 +92,18 @@ def create_entry( Currently only entries of 'FILESET' type can be created. :param location: Required. The location of the entry to create. - :type location: str :param entry_group: Required. Entry group ID under which the entry is created. - :type entry_group: str :param entry_id: Required. The id of the entry to create. - :type entry_id: str :param entry: Required. The entry to create. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.datacatalog_v1beta1.types.Entry` - :type entry: Union[Dict, google.cloud.datacatalog_v1beta1.types.Entry] :param project_id: The ID of the Google Cloud project that owns the entry. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If set to ``None`` or missing, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() parent = f"projects/{project_id}/locations/{location}/entryGroups/{entry_group}" @@ -143,27 +132,20 @@ def create_entry_group( Creates an EntryGroup. :param location: Required. The location of the entry group to create. - :type location: str :param entry_group_id: Required. The id of the entry group to create. The id must begin with a letter or underscore, contain only English letters, numbers and underscores, and be at most 64 characters. - :type entry_group_id: str :param entry_group: The entry group to create. Defaults to an empty entry group. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.datacatalog_v1beta1.types.EntryGroup` - :type entry_group: Union[Dict, google.cloud.datacatalog_v1beta1.types.EntryGroup] :param project_id: The ID of the Google Cloud project that owns the entry group. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() parent = f"projects/{project_id}/locations/{location}" @@ -196,28 +178,19 @@ def create_tag( Creates a tag on an entry. :param location: Required. The location of the tag to create. - :type location: str :param entry_group: Required. Entry group ID under which the tag is created. - :type entry_group: str :param entry: Required. Entry group ID under which the tag is created. - :type entry: str :param tag: Required. The tag to create. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.datacatalog_v1beta1.types.Tag` - :type tag: Union[Dict, google.cloud.datacatalog_v1beta1.types.Tag] :param template_id: Required. Template ID used to create tag - :type template_id: Optional[str] :param project_id: The ID of the Google Cloud project that owns the tag. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() if template_id: @@ -268,25 +241,18 @@ def create_tag_template( Creates a tag template. :param location: Required. The location of the tag template to create. - :type location: str :param tag_template_id: Required. The id of the tag template to create. - :type tag_template_id: str :param tag_template: Required. The tag template to create. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.datacatalog_v1beta1.types.TagTemplate` - :type tag_template: Union[Dict, google.cloud.datacatalog_v1beta1.types.TagTemplate] :param project_id: The ID of the Google Cloud project that owns the tag template. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() parent = f"projects/{project_id}/locations/{location}" @@ -334,30 +300,22 @@ def create_tag_template_field( Creates a field in a tag template. :param location: Required. The location of the tag template field to create. - :type location: str :param tag_template: Required. The id of the tag template to create. - :type tag_template: str :param tag_template_field_id: Required. The ID of the tag template field to create. Field ids can contain letters (both uppercase and lowercase), numbers (0-9), underscores (\_) and dashes (-). Field IDs must be at least 1 character long and at most 128 characters long. Field IDs must also be unique within their template. - :type tag_template_field_id: str :param tag_template_field: Required. The tag template field to create. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.datacatalog_v1beta1.types.TagTemplateField` - :type tag_template_field: Union[Dict, google.cloud.datacatalog_v1beta1.types.TagTemplateField] :param project_id: The ID of the Google Cloud project that owns the tag template field. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() parent = f"projects/{project_id}/locations/{location}/tagTemplates/{tag_template}" @@ -394,22 +352,15 @@ def delete_entry( Deletes an existing entry. :param location: Required. The location of the entry to delete. - :type location: str :param entry_group: Required. Entry group ID for entries that is deleted. - :type entry_group: str :param entry: Entry ID that is deleted. - :type entry: str :param project_id: The ID of the Google Cloud project that owns the entry group. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() name = f"projects/{project_id}/locations/{location}/entryGroups/{entry_group}/entries/{entry}" @@ -433,20 +384,14 @@ def delete_entry_group( Only entry groups that do not contain entries can be deleted. :param location: Required. The location of the entry group to delete. - :type location: str :param entry_group: Entry group ID that is deleted. - :type entry_group: str :param project_id: The ID of the Google Cloud project that owns the entry group. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() name = f"projects/{project_id}/locations/{location}/entryGroups/{entry_group}" @@ -473,24 +418,16 @@ def delete_tag( Deletes a tag. :param location: Required. The location of the tag to delete. - :type location: str :param entry_group: Entry group ID for tag that is deleted. - :type entry_group: str :param entry: Entry ID for tag that is deleted. - :type entry: str :param tag: Identifier for TAG that is deleted. - :type tag: str :param project_id: The ID of the Google Cloud project that owns the entry group. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() name = ( @@ -516,24 +453,17 @@ def delete_tag_template( Deletes a tag template and all tags using the template. :param location: Required. The location of the tag template to delete. - :type location: str :param tag_template: ID for tag template that is deleted. - :type tag_template: str :param project_id: The ID of the Google Cloud project that owns the entry group. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param force: Required. Currently, this field must always be set to ``true``. This confirms the deletion of any possible tags using this template. ``force = false`` will be supported in the future. - :type force: bool :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() name = f"projects/{project_id}/locations/{location}/tagTemplates/{tag_template}" @@ -560,24 +490,16 @@ def delete_tag_template_field( Deletes a field in a tag template and all uses of that field. :param location: Required. The location of the tag template to delete. - :type location: str :param tag_template: Tag Template ID for tag template field that is deleted. - :type tag_template: str :param field: Name of field that is deleted. - :type field: str :param force: Required. This confirms the deletion of this field from any tags using this field. - :type force: bool :param project_id: The ID of the Google Cloud project that owns the entry group. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() name = f"projects/{project_id}/locations/{location}/tagTemplates/{tag_template}/fields/{field}" @@ -603,22 +525,15 @@ def get_entry( Gets an entry. :param location: Required. The location of the entry to get. - :type location: str :param entry_group: Required. The entry group of the entry to get. - :type entry_group: str :param entry: The ID of the entry to get. - :type entry: str :param project_id: The ID of the Google Cloud project that owns the entry group. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() name = f"projects/{project_id}/locations/{location}/entryGroups/{entry_group}/entries/{entry}" @@ -646,25 +561,18 @@ def get_entry_group( Gets an entry group. :param location: Required. The location of the entry group to get. - :type location: str :param entry_group: The ID of the entry group to get. - :type entry_group: str :param read_mask: The fields to return. If not set or empty, all fields are returned. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.protobuf.field_mask_pb2.FieldMask` - :type read_mask: Union[Dict, google.protobuf.field_mask_pb2.FieldMask] :param project_id: The ID of the Google Cloud project that owns the entry group. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() name = f"projects/{project_id}/locations/{location}/entryGroups/{entry_group}" @@ -696,20 +604,14 @@ def get_tag_template( Gets a tag template. :param location: Required. The location of the tag template to get. - :type location: str :param tag_template: Required. The ID of the tag template to get. - :type tag_template: str :param project_id: The ID of the Google Cloud project that owns the entry group. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() name = f"projects/{project_id}/locations/{location}/tagTemplates/{tag_template}" @@ -740,26 +642,18 @@ def list_tags( Lists the tags on an Entry. :param location: Required. The location of the tags to get. - :type location: str :param entry_group: Required. The entry group of the tags to get. - :type entry_group: str :param entry_group: Required. The entry of the tags to get. - :type entry: str :param page_size: The maximum number of resources contained in the underlying API response. If page streaming is performed per- resource, this parameter does not affect the return value. If page streaming is performed per-page, this determines the maximum number of resources in a page. - :type page_size: int :param project_id: The ID of the Google Cloud project that owns the entry group. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() parent = f"projects/{project_id}/locations/{location}/entryGroups/{entry_group}/entries/{entry}" @@ -793,24 +687,16 @@ def get_tag_for_template_name( Gets for a tag with a specific template for a specific entry. :param location: Required. The location which contains the entry to search for. - :type location: str :param entry_group: The entry group ID which contains the entry to search for. - :type entry_group: str :param entry: The name of the entry to search for. - :type entry: str :param template_name: The name of the template that will be the search criterion. - :type template_name: str :param project_id: The ID of the Google Cloud project that owns the entry group. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ tags_list = self.list_tags( location=location, @@ -842,17 +728,12 @@ def lookup_entry( represents. See: https://cloud.google.com/apis/design/resource\_names#full\_resource\_name. Full names are case-sensitive. - :type linked_resource: str :param sql_resource: The SQL name of the entry. SQL names are case-sensitive. - :type sql_resource: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() if linked_resource and sql_resource: @@ -897,26 +778,18 @@ def rename_tag_template_field( Renames a field in a tag template. :param location: Required. The location of the tag template field to rename. - :type location: str :param tag_template: The tag template ID for field that is renamed. - :type tag_template: str :param field: Required. The old ID of this tag template field. For example, ``my_old_field``. - :type field: str :param new_tag_template_field_id: Required. The new ID of this tag template field. For example, ``my_new_field``. - :type new_tag_template_field_id: str :param project_id: The ID of the Google Cloud project that owns the entry group. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() name = f"projects/{project_id}/locations/{location}/tagTemplates/{tag_template}/fields/{field}" @@ -960,7 +833,6 @@ def search_catalog( If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.datacatalog_v1beta1.types.Scope` - :type scope: Union[Dict, google.cloud.datacatalog_v1beta1.types.SearchCatalogRequest.Scope] :param query: Required. The query string in search query syntax. The query must be non-empty. Query strings can be simple as "x" or more qualified as: @@ -972,11 +844,9 @@ def search_catalog( Note: Query tokens need to have a minimum of 3 characters for substring matching to work correctly. See `Data Catalog Search Syntax `__ for more information. - :type query: str :param page_size: The maximum number of resources contained in the underlying API response. If page streaming is performed per-resource, this parameter does not affect the return value. If page streaming is performed per-page, this determines the maximum number of resources in a page. - :type page_size: int :param order_by: Specifies the ordering of results, currently supported case-sensitive choices are: - ``relevance``, only supports descending @@ -984,15 +854,11 @@ def search_catalog( - ``last_modified_timestamp [asc|desc]``, defaults to descending if not specified If not specified, defaults to ``relevance`` descending. - :type order_by: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() @@ -1034,30 +900,21 @@ def update_entry( If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.datacatalog_v1beta1.types.Entry` - :type entry: Union[Dict, google.cloud.datacatalog_v1beta1.types.Entry] :param update_mask: The fields to update on the entry. If absent or empty, all modifiable fields are updated. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.protobuf.field_mask_pb2.FieldMask` - :type update_mask: Union[Dict, google.protobuf.field_mask_pb2.FieldMask] :param location: Required. The location of the entry to update. - :type location: str :param entry_group: The entry group ID for the entry that is being updated. - :type entry_group: str :param entry_id: The entry ID that is being updated. - :type entry_id: str :param project_id: The ID of the Google Cloud project that owns the entry group. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() if project_id and location and entry_group and entry_id: @@ -1113,32 +970,22 @@ def update_tag( If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.datacatalog_v1beta1.types.Tag` - :type tag: Union[Dict, google.cloud.datacatalog_v1beta1.types.Tag] :param update_mask: The fields to update on the Tag. If absent or empty, all modifiable fields are updated. Currently the only modifiable field is the field ``fields``. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.datacatalog_v1beta1.types.FieldMask` - :type update_mask: Union[Dict, google.protobuf.field_mask_pb2.FieldMask] :param location: Required. The location of the tag to rename. - :type location: str :param entry_group: The entry group ID for the tag that is being updated. - :type entry_group: str :param entry: The entry ID for the tag that is being updated. - :type entry: str :param tag_id: The tag ID that is being updated. - :type tag_id: str :param project_id: The ID of the Google Cloud project that owns the entry group. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() if project_id and location and entry_group and entry and tag_id: @@ -1197,29 +1044,21 @@ def update_tag_template( If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.datacatalog_v1beta1.types.TagTemplate` - :type tag_template: Union[Dict, google.cloud.datacatalog_v1beta1.types.TagTemplate] :param update_mask: The field mask specifies the parts of the template to overwrite. If absent or empty, all of the allowed fields above will be updated. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.protobuf.field_mask_pb2.FieldMask` - :type update_mask: Union[Dict, google.protobuf.field_mask_pb2.FieldMask] :param location: Required. The location of the tag template to rename. - :type location: str :param tag_template_id: Optional. The tag template ID for the entry that is being updated. - :type tag_template_id: str :param project_id: The ID of the Google Cloud project that owns the entry group. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() if project_id and location and tag_template: @@ -1275,7 +1114,6 @@ def update_tag_template_field( If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.datacatalog_v1beta1.types.TagTemplateField` - :type tag_template_field: Union[Dict, google.cloud.datacatalog_v1beta1.types.TagTemplateField] :param update_mask: The field mask specifies the parts of the template to be updated. Allowed fields: - ``display_name`` @@ -1288,26 +1126,17 @@ def update_tag_template_field( If a dict is provided, it must be of the same form as the protobuf message :class:`~google.protobuf.field_mask_pb2.FieldMask` - :type update_mask: Union[Dict, google.protobuf.field_mask_pb2.FieldMask] :param tag_template_field_name: Optional. The name of the tag template field to rename. - :type tag_template_field_name: str :param location: Optional. The location of the tag to rename. - :type location: str :param tag_template: Optional. The tag template ID for tag template field to rename. - :type tag_template: str :param tag_template_field_id: Optional. The ID of tag template field to rename. - :type tag_template_field_id: str :param project_id: The ID of the Google Cloud project that owns the entry group. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() if project_id and location and tag_template and tag_template_field_id: diff --git a/airflow/providers/google/cloud/hooks/dataflow.py b/airflow/providers/google/cloud/hooks/dataflow.py index c2c19ddaa32a3..6d4492a8cf9d8 100644 --- a/airflow/providers/google/cloud/hooks/dataflow.py +++ b/airflow/providers/google/cloud/hooks/dataflow.py @@ -56,7 +56,6 @@ def process_line_and_extract_dataflow_job_id_callback( :py:class:`~airflow.providers.apache.beam.hooks.beam.BeamCommandRunner` :param on_new_job_id_callback: Callback called when the job ID is known - :type on_new_job_id_callback: callback """ def _process_line_and_extract_job_id( @@ -251,7 +250,6 @@ def fetch_job_by_id(self, job_id: str) -> dict: Helper method to fetch the job with the specified Job ID. :param job_id: Job ID to get. - :type job_id: str :return: the Job :rtype: dict """ @@ -272,7 +270,6 @@ def fetch_job_metrics_by_id(self, job_id: str) -> dict: Helper method to fetch the job metrics with the specified Job ID. :param job_id: Job ID to get. - :type job_id: str :return: the JobMetrics. See: https://cloud.google.com/dataflow/docs/reference/rest/v1b3/JobMetrics :rtype: dict @@ -293,7 +290,6 @@ def _fetch_list_job_messages_responses(self, job_id: str) -> Generator[dict, Non Helper method to fetch ListJobMessagesResponse with the specified Job ID. :param job_id: Job ID to get. - :type job_id: str :return: yields the ListJobMessagesResponse. See: https://cloud.google.com/dataflow/docs/reference/rest/v1b3/ListJobMessagesResponse :rtype: Generator[dict, None, None] @@ -323,7 +319,6 @@ def fetch_job_messages_by_id(self, job_id: str) -> List[dict]: Helper method to fetch the job messages with the specified Job ID. :param job_id: Job ID to get. - :type job_id: str :return: the list of JobMessages. See: https://cloud.google.com/dataflow/docs/reference/rest/v1b3/ListJobMessagesResponse#JobMessage :rtype: List[dict] @@ -338,7 +333,6 @@ def fetch_job_autoscaling_events_by_id(self, job_id: str) -> List[dict]: Helper method to fetch the job autoscaling events with the specified Job ID. :param job_id: Job ID to get. - :type job_id: str :return: the list of AutoscalingEvents. See: https://cloud.google.com/dataflow/docs/reference/rest/v1b3/ListJobMessagesResponse#autoscalingevent :rtype: List[dict] @@ -440,7 +434,6 @@ def get_jobs(self, refresh: bool = False) -> List[dict]: Returns Dataflow jobs. :param refresh: Forces the latest data to be fetched. - :type refresh: bool :return: list of jobs :rtype: list """ @@ -564,23 +557,15 @@ def start_java_dataflow( Starts Dataflow java job. :param job_name: The name of the job. - :type job_name: str :param variables: Variables passed to the job. - :type variables: dict :param project_id: Optional, the Google Cloud project ID in which to start a job. If set to None or missing, the default project_id from the Google Cloud connection is used. :param jar: Name of the jar for the job - :type job_class: str :param job_class: Name of the java class for the job. - :type job_class: str :param append_job_name: True if unique suffix has to be appended to job name. - :type append_job_name: bool :param multiple_jobs: True if to check for multiple job in dataflow - :type multiple_jobs: bool :param on_new_job_id_callback: Callback called when the job ID is known. - :type on_new_job_id_callback: callable :param location: Job location. - :type location: str """ warnings.warn( """"This method is deprecated. @@ -634,7 +619,6 @@ def start_template_dataflow( Starts Dataflow template job. :param job_name: The name of the job. - :type job_name: str :param variables: Map of job runtime environment options. It will update environment argument if passed. @@ -643,29 +627,20 @@ def start_template_dataflow( `https://cloud.google.com/dataflow/pipelines/specifying-exec-params `__ - :type variables: dict :param parameters: Parameters fot the template - :type parameters: dict :param dataflow_template: GCS path to the template. - :type dataflow_template: str :param project_id: Optional, the Google Cloud project ID in which to start a job. If set to None or missing, the default project_id from the Google Cloud connection is used. :param append_job_name: True if unique suffix has to be appended to job name. - :type append_job_name: bool :param on_new_job_id_callback: (Deprecated) Callback called when the Job is known. - :type on_new_job_id_callback: callable :param on_new_job_callback: Callback called when the Job is known. - :type on_new_job_callback: callable :param location: Job location. - :type location: str - :type environment: Optional, Map of job runtime environment options. .. seealso:: For more information on possible configurations, look at the API documentation `https://cloud.google.com/dataflow/pipelines/specifying-exec-params `__ - :type environment: Optional[dict] """ name = self.build_dataflow_job_name(job_name, append_job_name) @@ -762,10 +737,8 @@ def start_flex_template( :param body: The request body. See: https://cloud.google.com/dataflow/docs/reference/rest/v1b3/projects.locations.flexTemplates/launch#request-body :param location: The location of the Dataflow job (for example europe-west1) - :type location: str :param project_id: The ID of the GCP project that owns the job. If set to ``None`` or missing, the default project_id from the GCP connection is used. - :type project_id: Optional[str] :param on_new_job_id_callback: (Deprecated) A callback that is called when a Job ID is detected. :param on_new_job_callback: A callback that is called when a Job is detected. :return: the Job @@ -826,16 +799,11 @@ def start_python_dataflow( Starts Dataflow job. :param job_name: The name of the job. - :type job_name: str :param variables: Variables passed to the job. - :type variables: Dict :param dataflow: Name of the Dataflow process. - :type dataflow: str :param py_options: Additional options. - :type py_options: List[str] :param project_id: The ID of the GCP project that owns the job. If set to ``None`` or missing, the default project_id from the GCP connection is used. - :type project_id: Optional[str] :param py_interpreter: Python version of the beam pipeline. If None, this defaults to the python3. To track python versions supported by beam and related @@ -846,20 +814,15 @@ def start_python_dataflow( You could also install the apache-beam package if it is not installed on your system or you want to use a different version. - :type py_requirements: List[str] :param py_system_site_packages: Whether to include system_site_packages in your virtualenv. See virtualenv documentation for more information. This option is only relevant if the ``py_requirements`` parameter is not None. - :type py_interpreter: str :param append_job_name: True if unique suffix has to be appended to job name. - :type append_job_name: bool :param project_id: Optional, the Google Cloud project ID in which to start a job. If set to None or missing, the default project_id from the Google Cloud connection is used. :param on_new_job_id_callback: Callback called when the job ID is known. - :type on_new_job_id_callback: callable :param location: Job location. - :type location: str """ warnings.warn( """This method is deprecated. @@ -924,12 +887,9 @@ def is_job_dataflow_running( Helper method to check if jos is still running in dataflow :param name: The name of the job. - :type name: str :param project_id: Optional, the Google Cloud project ID in which to start a job. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param location: Job location. - :type location: str :return: True if job is running. :rtype: bool """ @@ -966,14 +926,10 @@ def cancel_job( Parameter ``name`` and ``job_id`` are mutually exclusive. :param job_name: Name prefix specifying which jobs are to be canceled. - :type job_name: str :param job_id: Job ID specifying which jobs are to be canceled. - :type job_id: str :param location: Job location. - :type location: str :param project_id: Optional, the Google Cloud project ID in which to start a job. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: """ jobs_controller = _DataflowJobsController( dataflow=self.get_conn(), @@ -1003,23 +959,17 @@ def start_sql_job( Starts Dataflow SQL query. :param job_name: The unique name to assign to the Cloud Dataflow job. - :type job_name: str :param query: The SQL query to execute. - :type query: str :param options: Job parameters to be executed. For more information, look at: `https://cloud.google.com/sdk/gcloud/reference/beta/dataflow/sql/query `__ command reference :param location: The location of the Dataflow job (for example europe-west1) - :type location: str :param project_id: The ID of the GCP project that owns the job. If set to ``None`` or missing, the default project_id from the GCP connection is used. - :type project_id: Optional[str] :param on_new_job_id_callback: (Deprecated) Callback called when the job ID is known. - :type on_new_job_id_callback: callable :param on_new_job_callback: Callback called when the job is known. - :type on_new_job_callback: callable :return: the new job object """ cmd = [ @@ -1083,10 +1033,8 @@ def get_job( Gets the job with the specified Job ID. :param job_id: Job ID to get. - :type job_id: str :param project_id: Optional, the Google Cloud project ID in which to start a job. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: :param location: The location of the Dataflow job (for example europe-west1). See: https://cloud.google.com/dataflow/docs/concepts/regional-endpoints :return: the Job @@ -1110,10 +1058,8 @@ def fetch_job_metrics_by_id( Gets the job metrics with the specified Job ID. :param job_id: Job ID to get. - :type job_id: str :param project_id: Optional, the Google Cloud project ID in which to start a job. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: :param location: The location of the Dataflow job (for example europe-west1). See: https://cloud.google.com/dataflow/docs/concepts/regional-endpoints :return: the JobMetrics. See: @@ -1138,12 +1084,9 @@ def fetch_job_messages_by_id( Gets the job messages with the specified Job ID. :param job_id: Job ID to get. - :type job_id: str :param project_id: Optional, the Google Cloud project ID in which to start a job. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: :param location: Job location. - :type location: str :return: the list of JobMessages. See: https://cloud.google.com/dataflow/docs/reference/rest/v1b3/ListJobMessagesResponse#JobMessage :rtype: List[dict] @@ -1166,12 +1109,9 @@ def fetch_job_autoscaling_events_by_id( Gets the job autoscaling events with the specified Job ID. :param job_id: Job ID to get. - :type job_id: str :param project_id: Optional, the Google Cloud project ID in which to start a job. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: :param location: Job location. - :type location: str :return: the list of AutoscalingEvents. See: https://cloud.google.com/dataflow/docs/reference/rest/v1b3/ListJobMessagesResponse#autoscalingevent :rtype: List[dict] @@ -1198,16 +1138,11 @@ def wait_for_done( :param job_name: The 'jobName' to use when executing the DataFlow job (templated). This ends up being set in the pipeline options, so any entry with key ``'jobName'`` in ``options`` will be overwritten. - :type job_name: str :param location: location the job is running - :type location: str :param project_id: Optional, the Google Cloud project ID in which to start a job. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: :param job_id: a Dataflow job ID - :type job_id: str :param multiple_jobs: If pipeline creates multiple jobs then monitor all jobs - :type multiple_jobs: boolean """ job_controller = _DataflowJobsController( dataflow=self.get_conn(), diff --git a/airflow/providers/google/cloud/hooks/datafusion.py b/airflow/providers/google/cloud/hooks/datafusion.py index 6f485c56536ef..f12d3339cf16b 100644 --- a/airflow/providers/google/cloud/hooks/datafusion.py +++ b/airflow/providers/google/cloud/hooks/datafusion.py @@ -170,11 +170,8 @@ def restart_instance(self, instance_name: str, location: str, project_id: str) - At the end of an operation instance is fully restarted. :param instance_name: The name of the instance to restart. - :type instance_name: str :param location: The Cloud Data Fusion location in which to handle the request. - :type location: str :param project_id: The ID of the Google Cloud project that the instance belongs to. - :type project_id: str """ operation = ( self.get_conn() @@ -192,11 +189,8 @@ def delete_instance(self, instance_name: str, location: str, project_id: str) -> Deletes a single Date Fusion instance. :param instance_name: The name of the instance to delete. - :type instance_name: str :param location: The Cloud Data Fusion location in which to handle the request. - :type location: str :param project_id: The ID of the Google Cloud project that the instance belongs to. - :type project_id: str """ operation = ( self.get_conn() @@ -220,14 +214,10 @@ def create_instance( Creates a new Data Fusion instance in the specified project and location. :param instance_name: The name of the instance to create. - :type instance_name: str :param instance: An instance of Instance. https://cloud.google.com/data-fusion/docs/reference/rest/v1beta1/projects.locations.instances#Instance - :type instance: Dict[str, Any] :param location: The Cloud Data Fusion location in which to handle the request. - :type location: str :param project_id: The ID of the Google Cloud project that the instance belongs to. - :type project_id: str """ operation = ( self.get_conn() @@ -249,11 +239,8 @@ def get_instance(self, instance_name: str, location: str, project_id: str) -> Di Gets details of a single Data Fusion instance. :param instance_name: The name of the instance. - :type instance_name: str :param location: The Cloud Data Fusion location in which to handle the request. - :type location: str :param project_id: The ID of the Google Cloud project that the instance belongs to. - :type project_id: str """ instance = ( self.get_conn() @@ -278,21 +265,16 @@ def patch_instance( Updates a single Data Fusion instance. :param instance_name: The name of the instance to create. - :type instance_name: str :param instance: An instance of Instance. https://cloud.google.com/data-fusion/docs/reference/rest/v1beta1/projects.locations.instances#Instance - :type instance: Dict[str, Any] :param update_mask: Field mask is used to specify the fields that the update will overwrite in an instance resource. The fields specified in the updateMask are relative to the resource, not the full request. A field will be overwritten if it is in the mask. If the user does not provide a mask, all the supported fields (labels and options currently) will be overwritten. A comma-separated list of fully qualified names of fields. Example: "user.displayName,photo". https://developers.google.com/protocol-buffers/docs/reference/google.protobuf?_ga=2.205612571.-968688242.1573564810#google.protobuf.FieldMask - :type update_mask: str :param location: The Cloud Data Fusion location in which to handle the request. - :type location: str :param project_id: The ID of the Google Cloud project that the instance belongs to. - :type project_id: str """ operation = ( self.get_conn() @@ -319,16 +301,12 @@ def create_pipeline( Creates a Cloud Data Fusion pipeline. :param pipeline_name: Your pipeline name. - :type pipeline_name: str :param pipeline: The pipeline definition. For more information check: https://docs.cdap.io/cdap/current/en/developer-manual/pipelines/developing-pipelines.html#pipeline-configuration-file-format - :type pipeline: Dict[str, Any] :param instance_url: Endpoint on which the REST APIs is accessible for the instance. - :type instance_url: str :param namespace: if your pipeline belongs to a Basic edition instance, the namespace ID is always default. If your pipeline belongs to an Enterprise edition instance, you can create a namespace. - :type namespace: str """ url = os.path.join(self._base_url(instance_url, namespace), quote(pipeline_name)) response = self._cdap_request(url=url, method="PUT", body=pipeline) @@ -348,15 +326,11 @@ def delete_pipeline( Deletes a Cloud Data Fusion pipeline. :param pipeline_name: Your pipeline name. - :type pipeline_name: str :param version_id: Version of pipeline to delete - :type version_id: Optional[str] :param instance_url: Endpoint on which the REST APIs is accessible for the instance. - :type instance_url: str :param namespace: f your pipeline belongs to a Basic edition instance, the namespace ID is always default. If your pipeline belongs to an Enterprise edition instance, you can create a namespace. - :type namespace: str """ url = os.path.join(self._base_url(instance_url, namespace), quote(pipeline_name)) if version_id: @@ -377,15 +351,11 @@ def list_pipelines( Lists Cloud Data Fusion pipelines. :param artifact_version: Artifact version to filter instances - :type artifact_version: Optional[str] :param artifact_name: Artifact name to filter instances - :type artifact_name: Optional[str] :param instance_url: Endpoint on which the REST APIs is accessible for the instance. - :type instance_url: str :param namespace: f your pipeline belongs to a Basic edition instance, the namespace ID is always default. If your pipeline belongs to an Enterprise edition instance, you can create a namespace. - :type namespace: str """ url = self._base_url(instance_url, namespace) query: Dict[str, str] = {} @@ -433,15 +403,11 @@ def start_pipeline( Starts a Cloud Data Fusion pipeline. Works for both batch and stream pipelines. :param pipeline_name: Your pipeline name. - :type pipeline_name: str :param instance_url: Endpoint on which the REST APIs is accessible for the instance. - :type instance_url: str :param runtime_args: Optional runtime JSON args to be passed to the pipeline - :type runtime_args: Optional[Dict[str, Any]] :param namespace: f your pipeline belongs to a Basic edition instance, the namespace ID is always default. If your pipeline belongs to an Enterprise edition instance, you can create a namespace. - :type namespace: str """ # TODO: This API endpoint starts multiple pipelines. There will eventually be a fix # return the run Id as part of the API request to run a single pipeline. @@ -474,13 +440,10 @@ def stop_pipeline(self, pipeline_name: str, instance_url: str, namespace: str = Stops a Cloud Data Fusion pipeline. Works for both batch and stream pipelines. :param pipeline_name: Your pipeline name. - :type pipeline_name: str :param instance_url: Endpoint on which the REST APIs is accessible for the instance. - :type instance_url: str :param namespace: f your pipeline belongs to a Basic edition instance, the namespace ID is always default. If your pipeline belongs to an Enterprise edition instance, you can create a namespace. - :type namespace: str """ url = os.path.join( self._base_url(instance_url, namespace), diff --git a/airflow/providers/google/cloud/hooks/dataprep.py b/airflow/providers/google/cloud/hooks/dataprep.py index b46bcd7c92cf7..945aefe42ece2 100644 --- a/airflow/providers/google/cloud/hooks/dataprep.py +++ b/airflow/providers/google/cloud/hooks/dataprep.py @@ -64,7 +64,6 @@ def get_jobs_for_job_group(self, job_id: int) -> Dict[str, Any]: Get information about the batch jobs within a Cloud Dataprep job. :param job_id: The ID of the job that will be fetched - :type job_id: int """ endpoint_path = f"v4/jobGroups/{job_id}/jobs" url: str = os.path.join(self._base_url, endpoint_path) @@ -79,11 +78,8 @@ def get_job_group(self, job_group_id: int, embed: str, include_deleted: bool) -> A job group is a job that is executed from a specific node in a flow. :param job_group_id: The ID of the job that will be fetched - :type job_group_id: int :param embed: Comma-separated list of objects to pull in as part of the response - :type embed: str :param include_deleted: if set to "true", will include deleted objects - :type include_deleted: bool """ params: Dict[str, Any] = {"embed": embed, "includeDeleted": include_deleted} endpoint_path = f"v4/jobGroups/{job_group_id}" @@ -101,7 +97,6 @@ def run_job_group(self, body_request: dict) -> Dict[str, Any]: https://clouddataprep.com/documentation/api#operation/runJobGroup :param body_request: The identifier for the recipe you would like to run. - :type body_request: dict """ endpoint_path = "v4/jobGroups" url: str = os.path.join(self._base_url, endpoint_path) diff --git a/airflow/providers/google/cloud/hooks/dataproc.py b/airflow/providers/google/cloud/hooks/dataproc.py index b2540f367f30a..6b3db8d2f60b3 100644 --- a/airflow/providers/google/cloud/hooks/dataproc.py +++ b/airflow/providers/google/cloud/hooks/dataproc.py @@ -74,7 +74,6 @@ def add_labels(self, labels: Optional[dict] = None) -> None: Set labels for Dataproc job. :param labels: Labels for the job query. - :type labels: dict """ if labels: self.job["job"]["labels"].update(labels) @@ -84,7 +83,6 @@ def add_variables(self, variables: Optional[Dict] = None) -> None: Set variables for Dataproc job. :param variables: Variables for the job query. - :type variables: Dict """ if variables is not None: self.job["job"][self.job_type]["script_variables"] = variables @@ -94,7 +92,6 @@ def add_args(self, args: Optional[List[str]] = None) -> None: Set args for Dataproc job. :param args: Args for the job query. - :type args: List[str] """ if args is not None: self.job["job"][self.job_type]["args"] = args @@ -104,7 +101,6 @@ def add_query(self, query: str) -> None: Set query for Dataproc job. :param query: query for the job. - :type query: str """ self.job["job"][self.job_type]["query_list"] = {'queries': [query]} @@ -113,7 +109,6 @@ def add_query_uri(self, query_uri: str) -> None: Set query uri for Dataproc job. :param query_uri: URI for the job query. - :type query_uri: str """ self.job["job"][self.job_type]["query_file_uri"] = query_uri @@ -122,7 +117,6 @@ def add_jar_file_uris(self, jars: Optional[List[str]] = None) -> None: Set jars uris for Dataproc job. :param jars: List of jars URIs - :type jars: List[str] """ if jars is not None: self.job["job"][self.job_type]["jar_file_uris"] = jars @@ -132,7 +126,6 @@ def add_archive_uris(self, archives: Optional[List[str]] = None) -> None: Set archives uris for Dataproc job. :param archives: List of archives URIs - :type archives: List[str] """ if archives is not None: self.job["job"][self.job_type]["archive_uris"] = archives @@ -142,7 +135,6 @@ def add_file_uris(self, files: Optional[List[str]] = None) -> None: Set file uris for Dataproc job. :param files: List of files URIs - :type files: List[str] """ if files is not None: self.job["job"][self.job_type]["file_uris"] = files @@ -152,7 +144,6 @@ def add_python_file_uris(self, pyfiles: Optional[List[str]] = None) -> None: Set python file uris for Dataproc job. :param pyfiles: List of python files URIs - :type pyfiles: List[str] """ if pyfiles is not None: self.job["job"][self.job_type]["python_file_uris"] = pyfiles @@ -162,9 +153,7 @@ def set_main(self, main_jar: Optional[str] = None, main_class: Optional[str] = N Set Dataproc main class. :param main_jar: URI for the main file. - :type main_jar: str :param main_class: Name of the main class. - :type main_class: str :raises: Exception """ if main_class is not None and main_jar is not None: @@ -179,7 +168,6 @@ def set_python_main(self, main: str) -> None: Set Dataproc main python file uri. :param main: URI for the python main file. - :type main: str """ self.job["job"][self.job_type]["main_python_file_uri"] = main @@ -188,7 +176,6 @@ def set_job_name(self, name: str) -> None: Set Dataproc job name. :param name: Job name. - :type name: str """ self.job["job"]["reference"]["job_id"] = name + "_" + str(uuid.uuid4())[:8] @@ -315,29 +302,20 @@ def create_cluster( Creates a cluster in a project. :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to. - :type project_id: str :param region: Required. The Cloud Dataproc region in which to handle the request. - :type region: str :param cluster_name: Name of the cluster to create - :type cluster_name: str :param labels: Labels that will be assigned to created cluster - :type labels: Dict[str, str] :param cluster_config: Required. The cluster config to create. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.dataproc_v1.types.ClusterConfig` - :type cluster_config: Union[Dict, google.cloud.dataproc_v1.types.ClusterConfig] :param request_id: Optional. A unique id used to identify the request. If the server receives two ``CreateClusterRequest`` requests with the same id, then the second request will be ignored and the first ``google.longrunning.Operation`` created and stored in the backend is returned. - :type request_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ # Dataproc labels must conform to the following regex: # [a-z]([-a-z0-9]*[a-z0-9])? (current airflow version string follows @@ -382,26 +360,18 @@ def delete_cluster( Deletes a cluster in a project. :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to. - :type project_id: str :param region: Required. The Cloud Dataproc region in which to handle the request. - :type region: str :param cluster_name: Required. The cluster name. - :type cluster_name: str :param cluster_uuid: Optional. Specifying the ``cluster_uuid`` means the RPC should fail if cluster with specified UUID does not exist. - :type cluster_uuid: str :param request_id: Optional. A unique id used to identify the request. If the server receives two ``DeleteClusterRequest`` requests with the same id, then the second request will be ignored and the first ``google.longrunning.Operation`` created and stored in the backend is returned. - :type request_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_cluster_client(region=region) result = client.delete_cluster( @@ -433,19 +403,13 @@ def diagnose_cluster( diagnose is returned :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to. - :type project_id: str :param region: Required. The Cloud Dataproc region in which to handle the request. - :type region: str :param cluster_name: Required. The cluster name. - :type cluster_name: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_cluster_client(region=region) operation = client.diagnose_cluster( @@ -472,19 +436,13 @@ def get_cluster( Gets the resource representation for a cluster in a project. :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to. - :type project_id: str :param region: Required. The Cloud Dataproc region in which to handle the request. - :type region: str :param cluster_name: Required. The cluster name. - :type cluster_name: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_cluster_client(region=region) result = client.get_cluster( @@ -510,23 +468,16 @@ def list_clusters( Lists all regions/{region}/clusters in a project. :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to. - :type project_id: str :param region: Required. The Cloud Dataproc region in which to handle the request. - :type region: str :param filter_: Optional. A filter constraining the clusters to list. Filters are case-sensitive. - :type filter_: str :param page_size: The maximum number of resources contained in the underlying API response. If page streaming is performed per- resource, this parameter does not affect the return value. If page streaming is performed per-page, this determines the maximum number of resources in a page. - :type page_size: int :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_cluster_client(region=region) result = client.list_clusters( @@ -556,18 +507,13 @@ def update_cluster( Updates a cluster in a project. :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :type project_id: str :param region: Required. The Cloud Dataproc region in which to handle the request. - :type region: str :param location: (To be deprecated). The Cloud Dataproc region in which to handle the request. - :type location: str :param cluster_name: Required. The cluster name. - :type cluster_name: str :param cluster: Required. The changes to the cluster. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.dataproc_v1.types.Cluster` - :type cluster: Union[Dict, google.cloud.dataproc_v1.types.Cluster] :param update_mask: Required. Specifies the path, relative to ``Cluster``, of the field to update. For example, to change the number of workers in a cluster to 5, the ``update_mask`` parameter would be specified as ``config.worker_config.num_instances``, and the ``PATCH`` request body would specify @@ -587,7 +533,6 @@ def update_cluster( If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.dataproc_v1.types.FieldMask` - :type update_mask: Union[Dict, google.cloud.dataproc_v1.types.FieldMask] :param graceful_decommission_timeout: Optional. Timeout for graceful YARN decommissioning. Graceful decommissioning allows removing nodes from the cluster without interrupting jobs in progress. Timeout specifies how long to wait for jobs in progress to finish before forcefully removing nodes @@ -598,19 +543,14 @@ def update_cluster( If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.dataproc_v1.types.Duration` - :type graceful_decommission_timeout: Union[Dict, google.cloud.dataproc_v1.types.Duration] :param request_id: Optional. A unique id used to identify the request. If the server receives two ``UpdateClusterRequest`` requests with the same id, then the second request will be ignored and the first ``google.longrunning.Operation`` created and stored in the backend is returned. - :type request_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ if region is None: if location is not None: @@ -655,22 +595,15 @@ def create_workflow_template( Creates new workflow template. :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :type project_id: str :param region: Required. The Cloud Dataproc region in which to handle the request. - :type region: str :param location: (To be deprecated). The Cloud Dataproc region in which to handle the request. - :type location: str :param template: The Dataproc workflow template to create. If a dict is provided, it must be of the same form as the protobuf message WorkflowTemplate. - :type template: Union[dict, WorkflowTemplate] :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ if region is None: if location is not None: @@ -708,34 +641,24 @@ def instantiate_workflow_template( Instantiates a template and begins execution. :param template_name: Name of template to instantiate. - :type template_name: str :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :type project_id: str :param region: Required. The Cloud Dataproc region in which to handle the request. - :type region: str :param location: (To be deprecated). The Cloud Dataproc region in which to handle the request. - :type location: str :param version: Optional. The version of workflow template to instantiate. If specified, the workflow will be instantiated only if the current version of the workflow template has the supplied version. This option cannot be used to instantiate a previous version of workflow template. - :type version: int :param request_id: Optional. A tag that prevents multiple concurrent workflow instances with the same tag from running. This mitigates risk of concurrent instances started due to retries. - :type request_id: str :param parameters: Optional. Map from parameter names to values that should be used for those parameters. Values may not exceed 100 characters. - :type parameters: Dict[str, str] :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ if region is None: if location is not None: @@ -776,25 +699,17 @@ def instantiate_inline_workflow_template( :param template: The workflow template to instantiate. If a dict is provided, it must be of the same form as the protobuf message WorkflowTemplate - :type template: Union[Dict, WorkflowTemplate] :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :type project_id: str :param region: Required. The Cloud Dataproc region in which to handle the request. - :type region: str :param location: (To be deprecated). The Cloud Dataproc region in which to handle the request. - :type location: str :param request_id: Optional. A tag that prevents multiple concurrent workflow instances with the same tag from running. This mitigates risk of concurrent instances started due to retries. - :type request_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ if region is None: if location is not None: @@ -832,17 +747,11 @@ def wait_for_job( Helper method which polls a job to check if it finishes. :param job_id: Id of the Dataproc job - :type job_id: str :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :type project_id: str :param region: Required. The Cloud Dataproc region in which to handle the request. - :type region: str :param location: (To be deprecated). The Cloud Dataproc region in which to handle the request. - :type location: str :param wait_time: Number of seconds between checks - :type wait_time: int :param timeout: How many seconds wait for job to be ready. Used only if ``asynchronous`` is False - :type timeout: int """ if region is None: if location is not None: @@ -887,21 +796,14 @@ def get_job( Gets the resource representation for a job in a project. :param job_id: Id of the Dataproc job - :type job_id: str :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :type project_id: str :param region: Required. The Cloud Dataproc region in which to handle the request. - :type region: str :param location: (To be deprecated). The Cloud Dataproc region in which to handle the request. - :type location: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ if region is None: if location is not None: @@ -940,25 +842,17 @@ def submit_job( :param job: The job resource. If a dict is provided, it must be of the same form as the protobuf message Job - :type job: Union[Dict, Job] :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :type project_id: str :param region: Required. The Cloud Dataproc region in which to handle the request. - :type region: str :param location: (To be deprecated). The Cloud Dataproc region in which to handle the request. - :type location: str :param request_id: Optional. A tag that prevents multiple concurrent workflow instances with the same tag from running. This mitigates risk of concurrent instances started due to retries. - :type request_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ if region is None: if location is not None: @@ -990,13 +884,9 @@ def submit( Submits Google Cloud Dataproc job. :param project_id: The id of Google Cloud Dataproc project. - :type project_id: str :param job: The job to be submitted - :type job: dict :param region: The region of Google Dataproc cluster. - :type region: str :param job_error_states: Job states that should be considered error states. - :type job_error_states: List[str] """ # TODO: Remover one day warnings.warn("This method is deprecated. Please use `submit_job`", DeprecationWarning, stacklevel=2) @@ -1019,21 +909,14 @@ def cancel_job( Starts a job cancellation request. :param project_id: Required. The ID of the Google Cloud project that the job belongs to. - :type project_id: str :param region: Required. The Cloud Dataproc region in which to handle the request. - :type region: str :param location: (To be deprecated). The Cloud Dataproc region in which to handle the request. - :type location: str :param job_id: Required. The job ID. - :type job_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ if region is None: if location is not None: @@ -1078,27 +961,19 @@ def create_batch( Creates a batch workload. :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to. - :type project_id: str :param region: Required. The Cloud Dataproc region in which to handle the request. - :type region: str :param batch: Required. The batch to create. - :type batch: google.cloud.dataproc_v1.types.Batch :param batch_id: Optional. The ID to use for the batch, which will become the final component of the batch's resource name. This value must be 4-63 characters. Valid characters are /[a-z][0-9]-/. - :type batch_id: str :param request_id: Optional. A unique id used to identify the request. If the server receives two ``CreateBatchRequest`` requests with the same id, then the second request will be ignored and the first ``google.longrunning.Operation`` created and stored in the backend is returned. - :type request_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_batch_client(region) parent = f'projects/{project_id}/regions/{region}' @@ -1132,19 +1007,13 @@ def delete_batch( :param batch_id: Required. The ID to use for the batch, which will become the final component of the batch's resource name. This value must be 4-63 characters. Valid characters are /[a-z][0-9]-/. - :type batch_id: str :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to. - :type project_id: str :param region: Required. The Cloud Dataproc region in which to handle the request. - :type region: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_batch_client(region) name = f"projects/{project_id}/regions/{region}/batches/{batch_id}" @@ -1174,19 +1043,13 @@ def get_batch( :param batch_id: Required. The ID to use for the batch, which will become the final component of the batch's resource name. This value must be 4-63 characters. Valid characters are /[a-z][0-9]-/. - :type batch_id: str :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to. - :type project_id: str :param region: Required. The Cloud Dataproc region in which to handle the request. - :type region: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_batch_client(region) name = f"projects/{project_id}/regions/{region}/batches/{batch_id}" @@ -1216,23 +1079,16 @@ def list_batches( Lists batch workloads. :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to. - :type project_id: str :param region: Required. The Cloud Dataproc region in which to handle the request. - :type region: str :param page_size: Optional. The maximum number of batches to return in each response. The service may return fewer than this value. The default page size is 20; the maximum page size is 1000. - :type page_size: int :param page_token: Optional. A page token received from a previous ``ListBatches`` call. Provide this token to retrieve the subsequent page. - :type page_token: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_batch_client(region) parent = f'projects/{project_id}/regions/{region}' diff --git a/airflow/providers/google/cloud/hooks/dataproc_metastore.py b/airflow/providers/google/cloud/hooks/dataproc_metastore.py index ddb72cad34948..d3a467311a0da 100644 --- a/airflow/providers/google/cloud/hooks/dataproc_metastore.py +++ b/airflow/providers/google/cloud/hooks/dataproc_metastore.py @@ -67,9 +67,7 @@ def create_backup( Creates a new backup in a given project and location. :param project_id: Required. The ID of the Google Cloud project that the service belongs to. - :type project_id: str :param region: Required. The ID of the Google Cloud region that the service belongs to. - :type region: str :param service_id: Required. The ID of the metastore service, which is used as the final component of the metastore service's name. This value must be between 2 and 63 characters long inclusive, begin with a letter, end with a letter or number, and consist of alphanumeric ASCII characters or @@ -77,28 +75,21 @@ def create_backup( This corresponds to the ``service_id`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - :type service_id: str :param backup: Required. The backup to create. The ``name`` field is ignored. The ID of the created backup must be provided in the request's ``backup_id`` field. This corresponds to the ``backup`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - :type backup: google.cloud.metastore_v1.types.Backup :param backup_id: Required. The ID of the backup, which is used as the final component of the backup's name. This value must be between 1 and 64 characters long, begin with a letter, end with a letter or number, and consist of alphanumeric ASCII characters or hyphens. This corresponds to the ``backup_id`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - :type backup_id: str :param request_id: Optional. A unique id used to identify the request. - :type request_id: str :param retry: Designation of what errors, if any, should be retried. - :type retry: google.api_core.retry.Retry :param timeout: The timeout for this request. - :type timeout: float :param metadata: Strings which should be sent along with the request as metadata. - :type metadata: Sequence[Tuple[str, str]] """ parent = f'projects/{project_id}/locations/{region}/services/{service_id}' @@ -133,9 +124,7 @@ def create_metadata_import( Creates a new MetadataImport in a given project and location. :param project_id: Required. The ID of the Google Cloud project that the service belongs to. - :type project_id: str :param region: Required. The ID of the Google Cloud region that the service belongs to. - :type region: str :param service_id: Required. The ID of the metastore service, which is used as the final component of the metastore service's name. This value must be between 2 and 63 characters long inclusive, begin with a letter, end with a letter or number, and consist of alphanumeric ASCII characters or @@ -143,13 +132,11 @@ def create_metadata_import( This corresponds to the ``service_id`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - :type service_id: str :param metadata_import: Required. The metadata import to create. The ``name`` field is ignored. The ID of the created metadata import must be provided in the request's ``metadata_import_id`` field. This corresponds to the ``metadata_import`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - :type metadata_import: google.cloud.metastore_v1.types.MetadataImport :param metadata_import_id: Required. The ID of the metadata import, which is used as the final component of the metadata import's name. This value must be between 1 and 64 characters long, begin with a letter, end with a letter or number, and consist of alphanumeric ASCII characters or @@ -157,15 +144,10 @@ def create_metadata_import( This corresponds to the ``metadata_import_id`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - :type metadata_import_id: str :param request_id: Optional. A unique id used to identify the request. - :type request_id: str :param retry: Designation of what errors, if any, should be retried. - :type retry: google.api_core.retry.Retry :param timeout: The timeout for this request. - :type timeout: float :param metadata: Strings which should be sent along with the request as metadata. - :type metadata: Sequence[Tuple[str, str]] """ parent = f'projects/{project_id}/locations/{region}/services/{service_id}' @@ -199,15 +181,12 @@ def create_service( Creates a metastore service in a project and location. :param region: Required. The ID of the Google Cloud region that the service belongs to. - :type region: str :param project_id: Required. The ID of the Google Cloud project that the service belongs to. - :type project_id: str :param service: Required. The Metastore service to create. The ``name`` field is ignored. The ID of the created metastore service must be provided in the request's ``service_id`` field. This corresponds to the ``service`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - :type service: google.cloud.metastore_v1.types.Service :param service_id: Required. The ID of the metastore service, which is used as the final component of the metastore service's name. This value must be between 2 and 63 characters long inclusive, begin with a letter, end with a letter or number, and consist of alphanumeric ASCII characters or @@ -215,15 +194,10 @@ def create_service( This corresponds to the ``service_id`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - :type service_id: str :param request_id: Optional. A unique id used to identify the request. - :type request_id: str :param retry: Designation of what errors, if any, should be retried. - :type retry: google.api_core.retry.Retry :param timeout: The timeout for this request. - :type timeout: float :param metadata: Strings which should be sent along with the request as metadata. - :type metadata: Sequence[Tuple[str, str]] """ parent = f'projects/{project_id}/locations/{region}' @@ -257,9 +231,7 @@ def delete_backup( Deletes a single backup. :param project_id: Required. The ID of the Google Cloud project that the service belongs to. - :type project_id: str :param region: Required. The ID of the Google Cloud region that the service belongs to. - :type region: str :param service_id: Required. The ID of the metastore service, which is used as the final component of the metastore service's name. This value must be between 2 and 63 characters long inclusive, begin with a letter, end with a letter or number, and consist of alphanumeric ASCII characters or @@ -267,22 +239,16 @@ def delete_backup( This corresponds to the ``service_id`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - :type service_id: str :param backup_id: Required. The ID of the backup, which is used as the final component of the backup's name. This value must be between 1 and 64 characters long, begin with a letter, end with a letter or number, and consist of alphanumeric ASCII characters or hyphens. This corresponds to the ``backup_id`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - :type backup_id: str :param request_id: Optional. A unique id used to identify the request. - :type request_id: str :param retry: Designation of what errors, if any, should be retried. - :type retry: google.api_core.retry.Retry :param timeout: The timeout for this request. - :type timeout: float :param metadata: Strings which should be sent along with the request as metadata. - :type metadata: Sequence[Tuple[str, str]] """ name = f'projects/{project_id}/locations/{region}/services/{service_id}/backups/{backup_id}' @@ -313,9 +279,7 @@ def delete_service( Deletes a single service. :param project_id: Required. The ID of the Google Cloud project that the service belongs to. - :type project_id: str :param region: Required. The ID of the Google Cloud region that the service belongs to. - :type region: str :param service_id: Required. The ID of the metastore service, which is used as the final component of the metastore service's name. This value must be between 2 and 63 characters long inclusive, begin with a letter, end with a letter or number, and consist of alphanumeric ASCII characters or @@ -323,15 +287,10 @@ def delete_service( This corresponds to the ``service_id`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - :type service_id: str :param request_id: Optional. A unique id used to identify the request. - :type request_id: str :param retry: Designation of what errors, if any, should be retried. - :type retry: google.api_core.retry.Retry :param timeout: The timeout for this request. - :type timeout: float :param metadata: Strings which should be sent along with the request as metadata. - :type metadata: Sequence[Tuple[str, str]] """ name = f'projects/{project_id}/locations/{region}/services/{service_id}' @@ -367,11 +326,8 @@ def export_metadata( ``gs:///``. A sub-folder ```` containing exported files will be created below it. - :type destination_gcs_folder: str :param project_id: Required. The ID of the Google Cloud project that the service belongs to. - :type project_id: str :param region: Required. The ID of the Google Cloud region that the service belongs to. - :type region: str :param service_id: Required. The ID of the metastore service, which is used as the final component of the metastore service's name. This value must be between 2 and 63 characters long inclusive, begin with a letter, end with a letter or number, and consist of alphanumeric ASCII characters or @@ -379,18 +335,12 @@ def export_metadata( This corresponds to the ``service_id`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - :type service_id: str :param request_id: Optional. A unique id used to identify the request. - :type request_id: str :param database_dump_type: Optional. The type of the database dump. If unspecified, defaults to ``MYSQL``. - :type database_dump_type: google.cloud.metastore_v1.types.DatabaseDumpSpec.Type :param retry: Designation of what errors, if any, should be retried. - :type retry: google.api_core.retry.Retry :param timeout: The timeout for this request. - :type timeout: float :param metadata: Strings which should be sent along with the request as metadata. - :type metadata: Sequence[Tuple[str, str]] """ service = f'projects/{project_id}/locations/{region}/services/{service_id}' @@ -422,9 +372,7 @@ def get_service( Gets the details of a single service. :param project_id: Required. The ID of the Google Cloud project that the service belongs to. - :type project_id: str :param region: Required. The ID of the Google Cloud region that the service belongs to. - :type region: str :param service_id: Required. The ID of the metastore service, which is used as the final component of the metastore service's name. This value must be between 2 and 63 characters long inclusive, begin with a letter, end with a letter or number, and consist of alphanumeric ASCII characters or @@ -432,13 +380,9 @@ def get_service( This corresponds to the ``service_id`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - :type service_id: str :param retry: Designation of what errors, if any, should be retried. - :type retry: google.api_core.retry.Retry :param timeout: The timeout for this request. - :type timeout: float :param metadata: Strings which should be sent along with the request as metadata. - :type metadata: Sequence[Tuple[str, str]] """ name = f'projects/{project_id}/locations/{region}/services/{service_id}' @@ -468,9 +412,7 @@ def get_backup( Get backup from a service. :param project_id: Required. The ID of the Google Cloud project that the service belongs to. - :type project_id: str :param region: Required. The ID of the Google Cloud region that the service belongs to. - :type region: str :param service_id: Required. The ID of the metastore service, which is used as the final component of the metastore service's name. This value must be between 2 and 63 characters long inclusive, begin with a letter, end with a letter or number, and consist of alphanumeric ASCII characters or @@ -478,15 +420,10 @@ def get_backup( This corresponds to the ``service_id`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - :type service_id: str :param backup_id: Required. The ID of the metastore service backup to restore from - :type backup_id: str :param retry: Designation of what errors, if any, should be retried. - :type retry: google.api_core.retry.Retry :param timeout: The timeout for this request. - :type timeout: float :param metadata: Strings which should be sent along with the request as metadata. - :type metadata: Sequence[Tuple[str, str]] """ backup = f'projects/{project_id}/locations/{region}/services/{service_id}/backups/{backup_id}' client = self.get_dataproc_metastore_client() @@ -518,9 +455,7 @@ def list_backups( Lists backups in a service. :param project_id: Required. The ID of the Google Cloud project that the service belongs to. - :type project_id: str :param region: Required. The ID of the Google Cloud region that the service belongs to. - :type region: str :param service_id: Required. The ID of the metastore service, which is used as the final component of the metastore service's name. This value must be between 2 and 63 characters long inclusive, begin with a letter, end with a letter or number, and consist of alphanumeric ASCII characters or @@ -528,13 +463,11 @@ def list_backups( This corresponds to the ``service_id`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - :type service_id: str :param page_size: Optional. The maximum number of backups to return. The response may contain less than the maximum number. If unspecified, no more than 500 backups are returned. The maximum value is 1000; values above 1000 are changed to 1000. - :type page_size: int :param page_token: Optional. A page token, received from a previous [DataprocMetastore.ListBackups][google.cloud.metastore.v1.DataprocMetastore.ListBackups] call. Provide this token to retrieve the subsequent page. @@ -542,22 +475,16 @@ def list_backups( When paginating, other parameters provided to [DataprocMetastore.ListBackups][google.cloud.metastore.v1.DataprocMetastore.ListBackups] must match the call that provided the page token. - :type page_token: str :param filter: Optional. The filter to apply to list results. - :type filter: str :param order_by: Optional. Specify the ordering of results as described in `Sorting Order `__. If not specified, the results will be sorted in the default order. - :type order_by: str :param retry: Designation of what errors, if any, should be retried. - :type retry: google.api_core.retry.Retry :param timeout: The timeout for this request. - :type timeout: float :param metadata: Strings which should be sent along with the request as metadata. - :type metadata: Sequence[Tuple[str, str]] """ parent = f'projects/{project_id}/locations/{region}/services/{service_id}/backups' @@ -596,9 +523,7 @@ def restore_service( Restores a service from a backup. :param project_id: Required. The ID of the Google Cloud project that the service belongs to. - :type project_id: str :param region: Required. The ID of the Google Cloud region that the service belongs to. - :type region: str :param service_id: Required. The ID of the metastore service, which is used as the final component of the metastore service's name. This value must be between 2 and 63 characters long inclusive, begin with a letter, end with a letter or number, and consist of alphanumeric ASCII characters or @@ -606,31 +531,21 @@ def restore_service( This corresponds to the ``service_id`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - :type service_id: str :param backup_project_id: Required. The ID of the Google Cloud project that the metastore service backup to restore from. - :type backup_project_id: str :param backup_region: Required. The ID of the Google Cloud region that the metastore service backup to restore from. - :type backup_region: str :param backup_service_id: Required. The ID of the metastore service backup to restore from, which is used as the final component of the metastore service's name. This value must be between 2 and 63 characters long inclusive, begin with a letter, end with a letter or number, and consist of alphanumeric ASCII characters or hyphens. - :type backup_service_id: str :param backup_id: Required. The ID of the metastore service backup to restore from - :type backup_id: str :param restore_type: Optional. The type of restore. If unspecified, defaults to ``METADATA_ONLY`` - :type restore_type: google.cloud.metastore_v1.types.Restore.RestoreType :param request_id: Optional. A unique id used to identify the request. - :type request_id: str :param retry: Designation of what errors, if any, should be retried. - :type retry: google.api_core.retry.Retry :param timeout: The timeout for this request. - :type timeout: float :param metadata: Strings which should be sent along with the request as metadata. - :type metadata: Sequence[Tuple[str, str]] """ service = f'projects/{project_id}/locations/{region}/services/{service_id}' backup = ( @@ -669,9 +584,7 @@ def update_service( Updates the parameters of a single service. :param project_id: Required. The ID of the Google Cloud project that the service belongs to. - :type project_id: str :param region: Required. The ID of the Google Cloud region that the service belongs to. - :type region: str :param service_id: Required. The ID of the metastore service, which is used as the final component of the metastore service's name. This value must be between 2 and 63 characters long inclusive, begin with a letter, end with a letter or number, and consist of alphanumeric ASCII characters or @@ -679,7 +592,6 @@ def update_service( This corresponds to the ``service_id`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - :type service_id: str :param service: Required. The metastore service to update. The server only merges fields in the service if they are specified in ``update_mask``. @@ -687,22 +599,16 @@ def update_service( This corresponds to the ``service`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - :type service: Union[Dict, google.cloud.metastore_v1.types.Service] :param update_mask: Required. A field mask used to specify the fields to be overwritten in the metastore service resource by the update. Fields specified in the ``update_mask`` are relative to the resource (not to the full request). A field is overwritten if it is in the mask. This corresponds to the ``update_mask`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - :type update_mask: google.protobuf.field_mask_pb2.FieldMask :param request_id: Optional. A unique id used to identify the request. - :type request_id: str :param retry: Designation of what errors, if any, should be retried. - :type retry: google.api_core.retry.Retry :param timeout: The timeout for this request. - :type timeout: float :param metadata: Strings which should be sent along with the request as metadata. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_dataproc_metastore_client() diff --git a/airflow/providers/google/cloud/hooks/datastore.py b/airflow/providers/google/cloud/hooks/datastore.py index af4c02dde99d1..53d42002aa496 100644 --- a/airflow/providers/google/cloud/hooks/datastore.py +++ b/airflow/providers/google/cloud/hooks/datastore.py @@ -36,7 +36,6 @@ class DatastoreHook(GoogleBaseHook): simultaneously, you will need to create a hook per thread. :param api_version: The version of the API it is going to connect to. - :type api_version: str """ def __init__( @@ -87,9 +86,7 @@ def allocate_ids(self, partial_keys: list, project_id: str) -> list: https://cloud.google.com/datastore/docs/reference/rest/v1/projects/allocateIds :param partial_keys: a list of partial keys. - :type partial_keys: list :param project_id: Google Cloud project ID against which to make the request. - :type project_id: str :return: a list of full keys. :rtype: list """ @@ -112,9 +109,7 @@ def begin_transaction(self, project_id: str, transaction_options: Dict[str, Any] https://cloud.google.com/datastore/docs/reference/rest/v1/projects/beginTransaction :param project_id: Google Cloud project ID against which to make the request. - :type project_id: str :param transaction_options: Options for a new transaction. - :type transaction_options: Dict[str, Any] :return: a transaction handle. :rtype: str """ @@ -137,9 +132,7 @@ def commit(self, body: dict, project_id: str) -> dict: https://cloud.google.com/datastore/docs/reference/rest/v1/projects/commit :param body: the body of the commit request. - :type body: dict :param project_id: Google Cloud project ID against which to make the request. - :type project_id: str :return: the response body of the commit request. :rtype: dict """ @@ -164,14 +157,10 @@ def lookup( https://cloud.google.com/datastore/docs/reference/rest/v1/projects/lookup :param keys: the keys to lookup. - :type keys: list :param read_consistency: the read consistency to use. default, strong or eventual. Cannot be used with a transaction. - :type read_consistency: str :param transaction: the transaction to use, if any. - :type transaction: str :param project_id: Google Cloud project ID against which to make the request. - :type project_id: str :return: the response body of the lookup request. :rtype: dict """ @@ -195,9 +184,7 @@ def rollback(self, transaction: str, project_id: str) -> None: https://cloud.google.com/datastore/docs/reference/rest/v1/projects/rollback :param transaction: the transaction to roll back. - :type transaction: str :param project_id: Google Cloud project ID against which to make the request. - :type project_id: str """ conn: Any = self.get_conn() @@ -214,9 +201,7 @@ def run_query(self, body: dict, project_id: str) -> dict: https://cloud.google.com/datastore/docs/reference/rest/v1/projects/runQuery :param body: the body of the query request. - :type body: dict :param project_id: Google Cloud project ID against which to make the request. - :type project_id: str :return: the batch of query results. :rtype: dict """ @@ -234,7 +219,6 @@ def get_operation(self, name: str) -> dict: https://cloud.google.com/datastore/docs/reference/data/rest/v1/projects.operations/get :param name: the name of the operation resource. - :type name: str :return: a resource operation instance. :rtype: dict """ @@ -252,7 +236,6 @@ def delete_operation(self, name: str) -> dict: https://cloud.google.com/datastore/docs/reference/data/rest/v1/projects.operations/delete :param name: the name of the operation resource. - :type name: str :return: none if successful. :rtype: dict """ @@ -267,9 +250,7 @@ def poll_operation_until_done(self, name: str, polling_interval_in_seconds: floa Poll backup operation state until it's completed. :param name: the name of the operation resource - :type name: str :param polling_interval_in_seconds: The number of seconds to wait before calling another request. - :type polling_interval_in_seconds: float :return: a resource operation instance. :rtype: dict """ @@ -304,15 +285,10 @@ def export_to_storage_bucket( https://cloud.google.com/datastore/docs/reference/admin/rest/v1/projects/export :param bucket: The name of the Cloud Storage bucket. - :type bucket: str :param namespace: The Cloud Storage namespace path. - :type namespace: str :param entity_filter: Description of what data from the project is included in the export. - :type entity_filter: dict :param labels: Client-assigned labels. - :type labels: dict of str :param project_id: Google Cloud project ID against which to make the request. - :type project_id: str :return: a resource operation instance. :rtype: dict """ @@ -356,17 +332,11 @@ def import_from_storage_bucket( https://cloud.google.com/datastore/docs/reference/admin/rest/v1/projects/import :param bucket: The name of the Cloud Storage bucket. - :type bucket: str :param file: the metadata file written by the projects.export operation. - :type file: str :param namespace: The Cloud Storage namespace path. - :type namespace: str :param entity_filter: specify which kinds/namespaces are to be imported. - :type entity_filter: dict :param labels: Client-assigned labels. - :type labels: dict of str :param project_id: Google Cloud project ID against which to make the request. - :type project_id: str :return: a resource operation instance. :rtype: dict """ diff --git a/airflow/providers/google/cloud/hooks/dlp.py b/airflow/providers/google/cloud/hooks/dlp.py index 95282aa1dfebb..2938159b5a15d 100644 --- a/airflow/providers/google/cloud/hooks/dlp.py +++ b/airflow/providers/google/cloud/hooks/dlp.py @@ -19,6 +19,11 @@ """ This module contains a CloudDLPHook which allows you to connect to Google Cloud DLP service. + +.. spelling:: + + ImageRedactionConfig + RedactImageRequest """ import re @@ -65,11 +70,9 @@ class CloudDLPHook(GoogleBaseHook): on Google Cloud based data sets. :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -78,7 +81,6 @@ class CloudDLPHook(GoogleBaseHook): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account. - :type impersonation_chain: Union[str, Sequence[str]] """ def __init__( @@ -118,20 +120,15 @@ def cancel_dlp_job( Starts asynchronous cancellation on a long-running DLP job. :param dlp_job_id: ID of the DLP job resource to be cancelled. - :type dlp_job_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() @@ -157,24 +154,17 @@ def create_deidentify_template( :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organization. - :type organization_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organization. - :type project_id: str :param deidentify_template: (Optional) The de-identify template to create. - :type deidentify_template: dict or google.cloud.dlp_v2.types.DeidentifyTemplate :param template_id: (Optional) The template ID. - :type template_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :rtype: google.cloud.dlp_v2.types.DeidentifyTemplate """ client = self.get_conn() @@ -216,29 +206,20 @@ def create_dlp_job( :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param inspect_job: (Optional) The configuration for the inspect job. - :type inspect_job: dict or google.cloud.dlp_v2.types.InspectJobConfig :param risk_job: (Optional) The configuration for the risk job. - :type risk_job: dict or google.cloud.dlp_v2.types.RiskAnalysisJobConfig :param job_id: (Optional) The job ID. - :type job_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param wait_until_finished: (Optional) If true, it will keep polling the job state until it is set to DONE. - :type wait_until_finished: bool :rtype: google.cloud.dlp_v2.types.DlpJob :param time_to_sleep_in_seconds: (Optional) Time to sleep, in seconds, between active checks of the operation results. Defaults to 60. - :type time_to_sleep_in_seconds: int """ client = self.get_conn() @@ -297,24 +278,17 @@ def create_inspect_template( :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organization. - :type organization_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organization. - :type project_id: str :param inspect_template: (Optional) The inspect template to create. - :type inspect_template: dict or google.cloud.dlp_v2.types.InspectTemplate :param template_id: (Optional) The template ID. - :type template_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :rtype: google.cloud.dlp_v2.types.InspectTemplate """ client = self.get_conn() @@ -355,20 +329,14 @@ def create_job_trigger( :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param job_trigger: (Optional) The job trigger to create. - :type job_trigger: dict or google.cloud.dlp_v2.types.JobTrigger :param trigger_id: (Optional) The job trigger ID. - :type trigger_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :rtype: google.cloud.dlp_v2.types.JobTrigger """ client = self.get_conn() @@ -398,24 +366,17 @@ def create_stored_info_type( :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organization. - :type organization_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organization. - :type project_id: str :param config: (Optional) The config for the stored info type. - :type config: dict or google.cloud.dlp_v2.types.StoredInfoTypeConfig :param stored_info_type_id: (Optional) The stored info type ID. - :type stored_info_type_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :rtype: google.cloud.dlp_v2.types.StoredInfoType """ client = self.get_conn() @@ -459,32 +420,23 @@ def deidentify_content( :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param deidentify_config: (Optional) Configuration for the de-identification of the content item. Items specified here will override the template referenced by the deidentify_template_name argument. - :type deidentify_config: dict or google.cloud.dlp_v2.types.DeidentifyConfig :param inspect_config: (Optional) Configuration for the inspector. Items specified here will override the template referenced by the inspect_template_name argument. - :type inspect_config: dict or google.cloud.dlp_v2.types.InspectConfig :param item: (Optional) The item to de-identify. Will be treated as text. - :type item: dict or google.cloud.dlp_v2.types.ContentItem :param inspect_template_name: (Optional) Optional template to use. Any configuration directly specified in inspect_config will override those set in the template. - :type inspect_template_name: str :param deidentify_template_name: (Optional) Optional template to use. Any configuration directly specified in deidentify_config will override those set in the template. - :type deidentify_template_name: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :rtype: google.cloud.dlp_v2.types.DeidentifyContentResponse """ client = self.get_conn() @@ -509,23 +461,17 @@ def delete_deidentify_template( Deletes a deidentify template. :param template_id: The ID of deidentify template to be deleted. - :type template_id: str :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organization. - :type organization_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organization. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() @@ -558,20 +504,15 @@ def delete_dlp_job( interested in the DLP job result. The job will be cancelled if possible. :param dlp_job_id: The ID of the DLP job resource to be cancelled. - :type dlp_job_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() @@ -594,23 +535,17 @@ def delete_inspect_template( Deletes an inspect template. :param template_id: The ID of the inspect template to be deleted. - :type template_id: str :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organization. - :type organization_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organization. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() @@ -642,20 +577,15 @@ def delete_job_trigger( Deletes a job trigger. :param job_trigger_id: The ID of the DLP job trigger to be deleted. - :type job_trigger_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() @@ -678,23 +608,17 @@ def delete_stored_info_type( Deletes a stored info type. :param stored_info_type_id: The ID of the stored info type to be deleted. - :type stored_info_type_id: str :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organization. - :type organization_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organization. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ client = self.get_conn() @@ -726,23 +650,17 @@ def get_deidentify_template( Gets a deidentify template. :param template_id: The ID of deidentify template to be read. - :type template_id: str :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organization. - :type organization_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organization. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :rtype: google.cloud.dlp_v2.types.DeidentifyTemplate """ client = self.get_conn() @@ -775,20 +693,15 @@ def get_dlp_job( Gets the latest state of a long-running Dlp Job. :param dlp_job_id: The ID of the DLP job resource to be read. - :type dlp_job_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :rtype: google.cloud.dlp_v2.types.DlpJob """ client = self.get_conn() @@ -812,23 +725,17 @@ def get_inspect_template( Gets an inspect template. :param template_id: The ID of inspect template to be read. - :type template_id: str :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organization. - :type organization_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organization. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :rtype: google.cloud.dlp_v2.types.InspectTemplate """ client = self.get_conn() @@ -861,20 +768,15 @@ def get_job_trigger( Gets a DLP job trigger. :param job_trigger_id: The ID of the DLP job trigger to be read. - :type job_trigger_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :rtype: google.cloud.dlp_v2.types.JobTrigger """ client = self.get_conn() @@ -898,23 +800,17 @@ def get_stored_info_type( Gets a stored info type. :param stored_info_type_id: The ID of the stored info type to be read. - :type stored_info_type_id: str :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organization. - :type organization_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organization. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :rtype: google.cloud.dlp_v2.types.StoredInfoType """ client = self.get_conn() @@ -952,24 +848,17 @@ def inspect_content( :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param inspect_config: (Optional) Configuration for the inspector. Items specified here will override the template referenced by the inspect_template_name argument. - :type inspect_config: dict or google.cloud.dlp_v2.types.InspectConfig :param item: (Optional) The item to de-identify. Will be treated as text. - :type item: dict or google.cloud.dlp_v2.types.ContentItem :param inspect_template_name: (Optional) Optional template to use. Any configuration directly specified in inspect_config will override those set in the template. - :type inspect_template_name: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :rtype: google.cloud.dlp_v2.types.InspectContentResponse """ client = self.get_conn() @@ -1000,26 +889,19 @@ def list_deidentify_templates( :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organization. - :type organization_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organization. - :type project_id: str :param page_size: (Optional) The maximum number of resources contained in the underlying API response. - :type page_size: int :param order_by: (Optional) Optional comma separated list of fields to order by, followed by asc or desc postfix. - :type order_by: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :rtype: List[google.cloud.dlp_v2.types.DeidentifyTemplate] """ client = self.get_conn() @@ -1063,26 +945,18 @@ def list_dlp_jobs( :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param results_filter: (Optional) Filter used to specify a subset of results. - :type results_filter: str :param page_size: (Optional) The maximum number of resources contained in the underlying API response. - :type page_size: int :param job_type: (Optional) The type of job. - :type job_type: str :param order_by: (Optional) Optional comma separated list of fields to order by, followed by asc or desc postfix. - :type order_by: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :rtype: List[google.cloud.dlp_v2.types.DlpJob] """ client = self.get_conn() @@ -1114,18 +988,13 @@ def list_info_types( :param language_code: (Optional) Optional BCP-47 language code for localized info type friendly names. If omitted, or if localized strings are not available, en-US strings will be returned. - :type language_code: str :param results_filter: (Optional) Filter used to specify a subset of results. - :type results_filter: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :rtype: google.cloud.dlp_v2.types.ListInfoTypesResponse """ client = self.get_conn() @@ -1153,26 +1022,19 @@ def list_inspect_templates( :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organization. - :type organization_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organization. - :type project_id: str :param page_size: (Optional) The maximum number of resources contained in the underlying API response. - :type page_size: int :param order_by: (Optional) Optional comma separated list of fields to order by, followed by asc or desc postfix. - :type order_by: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :rtype: List[google.cloud.dlp_v2.types.InspectTemplate] """ client = self.get_conn() @@ -1214,24 +1076,17 @@ def list_job_triggers( :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param page_size: (Optional) The maximum number of resources contained in the underlying API response. - :type page_size: int :param order_by: (Optional) Optional comma separated list of fields to order by, followed by asc or desc postfix. - :type order_by: str :param results_filter: (Optional) Filter used to specify a subset of results. - :type results_filter: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :rtype: List[google.cloud.dlp_v2.types.JobTrigger] """ client = self.get_conn() @@ -1263,26 +1118,19 @@ def list_stored_info_types( :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organization. - :type organization_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organization. - :type project_id: str :param page_size: (Optional) The maximum number of resources contained in the underlying API response. - :type page_size: int :param order_by: (Optional) Optional comma separated list of fields to order by, followed by asc or desc postfix. - :type order_by: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :rtype: List[google.cloud.dlp_v2.types.StoredInfoType] """ client = self.get_conn() @@ -1328,28 +1176,20 @@ def redact_image( :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param inspect_config: (Optional) Configuration for the inspector. Items specified here will override the template referenced by the inspect_template_name argument. - :type inspect_config: dict or google.cloud.dlp_v2.types.InspectConfig :param image_redaction_configs: (Optional) The configuration for specifying what content to redact from images. - :type image_redaction_configs: List[dict] or List[google.cloud.dlp_v2.types.RedactImageRequest.ImageRedactionConfig] :param include_findings: (Optional) Whether the response should include findings along with the redacted image. - :type include_findings: bool :param byte_item: (Optional) The content must be PNG, JPEG, SVG or BMP. - :type byte_item: dict or google.cloud.dlp_v2.types.ByteContentItem :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :rtype: google.cloud.dlp_v2.types.RedactImageResponse """ client = self.get_conn() @@ -1385,30 +1225,21 @@ def reidentify_content( :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param reidentify_config: (Optional) Configuration for the re-identification of the content item. - :type reidentify_config: dict or google.cloud.dlp_v2.types.DeidentifyConfig :param inspect_config: (Optional) Configuration for the inspector. - :type inspect_config: dict or google.cloud.dlp_v2.types.InspectConfig :param item: (Optional) The item to re-identify. Will be treated as text. - :type item: dict or google.cloud.dlp_v2.types.ContentItem :param inspect_template_name: (Optional) Optional template to use. Any configuration directly specified in inspect_config will override those set in the template. - :type inspect_template_name: str :param reidentify_template_name: (Optional) Optional template to use. References an instance of deidentify template. Any configuration directly specified in reidentify_config or inspect_config will override those set in the template. - :type reidentify_template_name: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :rtype: google.cloud.dlp_v2.types.ReidentifyContentResponse """ client = self.get_conn() @@ -1441,27 +1272,19 @@ def update_deidentify_template( Updates the deidentify template. :param template_id: The ID of deidentify template to be updated. - :type template_id: str :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organization. - :type organization_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organization. - :type project_id: str :param deidentify_template: New deidentify template value. - :type deidentify_template: dict or google.cloud.dlp_v2.types.DeidentifyTemplate :param update_mask: Mask to control which fields get updated. - :type update_mask: dict or google.cloud.dlp_v2.types.FieldMask :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :rtype: google.cloud.dlp_v2.types.DeidentifyTemplate """ client = self.get_conn() @@ -1503,27 +1326,19 @@ def update_inspect_template( Updates the inspect template. :param template_id: The ID of the inspect template to be updated. - :type template_id: str :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organization. - :type organization_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organization. - :type project_id: str :param inspect_template: New inspect template value. - :type inspect_template: dict or google.cloud.dlp_v2.types.InspectTemplate :param update_mask: Mask to control which fields get updated. - :type update_mask: dict or google.cloud.dlp_v2.types.FieldMask :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :rtype: google.cloud.dlp_v2.types.InspectTemplate """ client = self.get_conn() @@ -1564,24 +1379,17 @@ def update_job_trigger( Updates a job trigger. :param job_trigger_id: The ID of the DLP job trigger to be updated. - :type job_trigger_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param job_trigger: New job trigger value. - :type job_trigger: dict or google.cloud.dlp_v2.types.JobTrigger :param update_mask: Mask to control which fields get updated. - :type update_mask: dict or google.cloud.dlp_v2.types.FieldMask :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :rtype: google.cloud.dlp_v2.types.JobTrigger """ client = self.get_conn() @@ -1614,28 +1422,20 @@ def update_stored_info_type( Updates the stored info type by creating a new version. :param stored_info_type_id: The ID of the stored info type to be updated. - :type stored_info_type_id: str :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organization. - :type organization_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organization. - :type project_id: str :param config: Updated configuration for the stored info type. If not provided, a new version of the stored info type will be created with the existing configuration. - :type config: dict or google.cloud.dlp_v2.types.StoredInfoTypeConfig :param update_mask: Mask to control which fields get updated. - :type update_mask: dict or google.cloud.dlp_v2.types.FieldMask :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :rtype: google.cloud.dlp_v2.types.StoredInfoType """ client = self.get_conn() diff --git a/airflow/providers/google/cloud/hooks/functions.py b/airflow/providers/google/cloud/hooks/functions.py index 46700256f63ab..bf66e314e8e77 100644 --- a/airflow/providers/google/cloud/hooks/functions.py +++ b/airflow/providers/google/cloud/hooks/functions.py @@ -60,9 +60,7 @@ def _full_location(project_id: str, location: str) -> str: ``projects//locations/`` :param project_id: The Google Cloud Project project_id where the function belongs. - :type project_id: str :param location: The location where the function is created. - :type location: str :return: """ return f'projects/{project_id}/locations/{location}' @@ -86,7 +84,6 @@ def get_function(self, name: str) -> dict: Returns the Cloud Function with the given name. :param name: Name of the function. - :type name: str :return: A Cloud Functions object representing the function. :rtype: dict """ @@ -101,12 +98,9 @@ def create_new_function(self, location: str, body: dict, project_id: str) -> Non Creates a new function in Cloud Function in the location specified in the body. :param location: The location of the function. - :type location: str :param body: The body required by the Cloud Functions insert API. - :type body: dict :param project_id: Optional, Google Cloud Project project_id where the function belongs. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :return: None """ # fmt: off @@ -123,11 +117,8 @@ def update_function(self, name: str, body: dict, update_mask: List[str]) -> None Updates Cloud Functions according to the specified update mask. :param name: The name of the function. - :type name: str :param body: The body required by the cloud function patch API. - :type body: dict :param update_mask: The update mask - array of fields that should be patched. - :type update_mask: [str] :return: None """ # fmt: off @@ -146,12 +137,9 @@ def upload_function_zip(self, location: str, zip_path: str, project_id: str) -> Uploads zip file with sources. :param location: The location where the function is created. - :type location: str :param zip_path: The path of the valid .zip file to upload. - :type zip_path: str :param project_id: Optional, Google Cloud Project project_id where the function belongs. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :return: The upload URL that was returned by generateUploadUrl method. :rtype: str """ @@ -183,7 +171,6 @@ def delete_function(self, name: str) -> None: Deletes the specified Cloud Function. :param name: The name of the function. - :type name: str :return: None """ # fmt: off @@ -206,14 +193,10 @@ def call_function( purposes as very limited traffic is allowed. :param function_id: ID of the function to be called - :type function_id: str :param input_data: Input to be passed to the function - :type input_data: Dict :param location: The location where the function is located. - :type location: str :param project_id: Optional, Google Cloud Project project_id where the function belongs. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :return: None """ name = f"projects/{project_id}/locations/{location}/functions/{function_id}" @@ -233,7 +216,6 @@ def _wait_for_operation_to_complete(self, operation_name: str) -> dict: asynchronous call. :param operation_name: The name of the operation. - :type operation_name: str :return: The response returned by the operation. :rtype: dict :exception: AirflowException in case error is returned. diff --git a/airflow/providers/google/cloud/hooks/gcs.py b/airflow/providers/google/cloud/hooks/gcs.py index ecadec7989944..8a1e9340477ba 100644 --- a/airflow/providers/google/cloud/hooks/gcs.py +++ b/airflow/providers/google/cloud/hooks/gcs.py @@ -60,11 +60,8 @@ def _fallback_object_url_to_object_name_and_bucket_name( Decorator factory that convert object URL parameter to object name and bucket name parameter. :param object_url_keyword_arg_name: Name of the object URL parameter - :type object_url_keyword_arg_name: str :param bucket_name_keyword_arg_name: Name of the bucket name parameter - :type bucket_name_keyword_arg_name: str :param object_name_keyword_arg_name: Name of the object name parameter - :type object_name_keyword_arg_name: str :return: Decorator """ @@ -177,15 +174,11 @@ def copy( source bucket/object is used, but not both. :param source_bucket: The bucket of the object to copy from. - :type source_bucket: str :param source_object: The object to copy. - :type source_object: str :param destination_bucket: The destination of the object to copied to. Can be omitted; then the same bucket is used. - :type destination_bucket: str :param destination_object: The (renamed) path of the object if given. Can be omitted; then the same name is used. - :type destination_object: str """ destination_bucket = destination_bucket or source_bucket destination_object = destination_object or source_object @@ -230,14 +223,10 @@ def rewrite( destination_object can be omitted, in which case source_object is used. :param source_bucket: The bucket of the object to copy from. - :type source_bucket: str :param source_object: The object to copy. - :type source_object: str :param destination_bucket: The destination of the object to copied to. - :type destination_bucket: str :param destination_object: The (renamed) path of the object if given. Can be omitted; then the same name is used. - :type destination_object: str """ destination_object = destination_object or source_object if source_bucket == destination_bucket and source_object == destination_object: @@ -315,17 +304,11 @@ def download( to write to a file. :param bucket_name: The bucket to fetch from. - :type bucket_name: str :param object_name: The object to fetch. - :type object_name: str :param filename: If set, a local file path where the file should be written to. - :type filename: str :param chunk_size: Blob chunk size. - :type chunk_size: int :param timeout: Request timeout in seconds. - :type timeout: int :param num_max_attempts: Number of attempts to download the file. - :type num_max_attempts: int """ # TODO: future improvement check file size before downloading, # to check for local space availability @@ -379,15 +362,10 @@ def download_as_byte_array( to write to a file. :param bucket_name: The bucket to fetch from. - :type bucket_name: str :param object_name: The object to fetch. - :type object_name: str :param chunk_size: Blob chunk size. - :type chunk_size: int :param timeout: Request timeout in seconds. - :type timeout: int :param num_max_attempts: Number of attempts to download the file. - :type num_max_attempts: int """ # We do not pass filename, so will never receive string as response return self.download( @@ -413,11 +391,8 @@ def provide_file( or just object_url parameter. :param bucket_name: The bucket to fetch from. - :type bucket_name: str :param object_name: The object to fetch. - :type object_name: str :param object_url: File reference url. Must start with "gs: //" - :type object_url: str :return: File handler """ if object_name is None: @@ -444,11 +419,8 @@ def provide_file_and_upload( or just object_url parameter. :param bucket_name: The bucket to fetch from. - :type bucket_name: str :param object_name: The object to fetch. - :type object_name: str :param object_url: File reference url. Must start with "gs: //" - :type object_url: str :return: File handler """ if object_name is None: @@ -477,31 +449,20 @@ def upload( Uploads a local file or file data as string or bytes to Google Cloud Storage. :param bucket_name: The bucket to upload to. - :type bucket_name: str :param object_name: The object name to set when uploading the file. - :type object_name: str :param filename: The local file path to the file to be uploaded. - :type filename: str :param data: The file's data as a string or bytes to be uploaded. - :type data: str :param mime_type: The file's mime type set when uploading the file. - :type mime_type: str :param gzip: Option to compress local file or file data for upload - :type gzip: bool :param encoding: bytes encoding for file data if provided as string - :type encoding: str :param chunk_size: Blob chunk size. - :type chunk_size: int :param timeout: Request timeout in seconds. - :type timeout: int :param num_max_attempts: Number of attempts to try to upload the file. - :type num_max_attempts: int """ def _call_with_retry(f: Callable[[], None]) -> None: """Helper functions to upload a file or a string with a retry mechanism and exponential back-off. :param f: Callable that should be retried. - :type f: Callable[[], None] """ num_file_attempts = 0 @@ -575,10 +536,8 @@ def exists(self, bucket_name: str, object_name: str) -> bool: Checks for the existence of a file in Google Cloud Storage. :param bucket_name: The Google Cloud Storage bucket where the object is. - :type bucket_name: str :param object_name: The name of the blob_name to check in the Google cloud storage bucket. - :type object_name: str """ client = self.get_conn() bucket = client.bucket(bucket_name) @@ -590,10 +549,8 @@ def get_blob_update_time(self, bucket_name: str, object_name: str): Get the update time of a file in Google Cloud Storage :param bucket_name: The Google Cloud Storage bucket where the object is. - :type bucket_name: str :param object_name: The name of the blob to get updated time from the Google cloud storage bucket. - :type object_name: str """ client = self.get_conn() bucket = client.bucket(bucket_name) @@ -607,12 +564,9 @@ def is_updated_after(self, bucket_name: str, object_name: str, ts: datetime) -> Checks if an blob_name is updated in Google Cloud Storage. :param bucket_name: The Google Cloud Storage bucket where the object is. - :type bucket_name: str :param object_name: The name of the object to check in the Google cloud storage bucket. - :type object_name: str :param ts: The timestamp to check against. - :type ts: datetime.datetime """ blob_update_time = self.get_blob_update_time(bucket_name, object_name) if blob_update_time is not None: @@ -631,14 +585,10 @@ def is_updated_between( Checks if an blob_name is updated in Google Cloud Storage. :param bucket_name: The Google Cloud Storage bucket where the object is. - :type bucket_name: str :param object_name: The name of the object to check in the Google cloud storage bucket. - :type object_name: str :param min_ts: The minimum timestamp to check against. - :type min_ts: datetime.datetime :param max_ts: The maximum timestamp to check against. - :type max_ts: datetime.datetime """ blob_update_time = self.get_blob_update_time(bucket_name, object_name) if blob_update_time is not None: @@ -657,12 +607,9 @@ def is_updated_before(self, bucket_name: str, object_name: str, ts: datetime) -> Checks if an blob_name is updated before given time in Google Cloud Storage. :param bucket_name: The Google Cloud Storage bucket where the object is. - :type bucket_name: str :param object_name: The name of the object to check in the Google cloud storage bucket. - :type object_name: str :param ts: The timestamp to check against. - :type ts: datetime.datetime """ blob_update_time = self.get_blob_update_time(bucket_name, object_name) if blob_update_time is not None: @@ -679,12 +626,9 @@ def is_older_than(self, bucket_name: str, object_name: str, seconds: int) -> boo Check if object is older than given time :param bucket_name: The Google Cloud Storage bucket where the object is. - :type bucket_name: str :param object_name: The name of the object to check in the Google cloud storage bucket. - :type object_name: str :param seconds: The time in seconds to check against - :type seconds: int """ blob_update_time = self.get_blob_update_time(bucket_name, object_name) if blob_update_time is not None: @@ -702,9 +646,7 @@ def delete(self, bucket_name: str, object_name: str) -> None: Deletes an object from the bucket. :param bucket_name: name of the bucket, where the object resides - :type bucket_name: str :param object_name: name of the object to delete - :type object_name: str """ client = self.get_conn() bucket = client.bucket(bucket_name) @@ -718,7 +660,6 @@ def delete_bucket(self, bucket_name: str, force: bool = False) -> None: Delete a bucket object from the Google Cloud Storage. :param bucket_name: name of the bucket which will be deleted - :type bucket_name: str :param force: false not allow to delete non empty bucket, set force=True allows to delete non empty bucket :type: bool @@ -738,16 +679,11 @@ def list(self, bucket_name, versions=None, max_results=None, prefix=None, delimi List all objects from the bucket with the give string prefix in name :param bucket_name: bucket name - :type bucket_name: str :param versions: if true, list all versions of the objects - :type versions: bool :param max_results: max count of items to return in a single page of responses - :type max_results: int :param prefix: prefix string which filters objects whose name begin with this prefix - :type prefix: str :param delimiter: filters objects based on the delimiter (for e.g '.csv') - :type delimiter: str :return: a stream of object names matching the filtering criteria """ client = self.get_conn() @@ -795,20 +731,13 @@ def list_by_timespan( updated in the time between ``timespan_start`` and ``timespan_end``. :param bucket_name: bucket name - :type bucket_name: str :param timespan_start: will return objects that were updated at or after this datetime (UTC) - :type timespan_start: datetime :param timespan_end: will return objects that were updated before this datetime (UTC) - :type timespan_end: datetime :param versions: if true, list all versions of the objects - :type versions: bool :param max_results: max count of items to return in a single page of responses - :type max_results: int :param prefix: prefix string which filters objects whose name begin with this prefix - :type prefix: str :param delimiter: filters objects based on the delimiter (for e.g '.csv') - :type delimiter: str :return: a stream of object names matching the filtering criteria """ client = self.get_conn() @@ -848,10 +777,8 @@ def get_size(self, bucket_name: str, object_name: str) -> int: Gets the size of a file in Google Cloud Storage. :param bucket_name: The Google Cloud Storage bucket where the blob_name is. - :type bucket_name: str :param object_name: The name of the object to check in the Google cloud storage bucket_name. - :type object_name: str """ self.log.info('Checking the file size of object: %s in bucket_name: %s', object_name, bucket_name) @@ -867,10 +794,8 @@ def get_crc32c(self, bucket_name: str, object_name: str): Gets the CRC32c checksum of an object in Google Cloud Storage. :param bucket_name: The Google Cloud Storage bucket where the blob_name is. - :type bucket_name: str :param object_name: The name of the object to check in the Google cloud storage bucket_name. - :type object_name: str """ self.log.info( 'Retrieving the crc32c checksum of object_name: %s in bucket_name: %s', @@ -889,10 +814,8 @@ def get_md5hash(self, bucket_name: str, object_name: str) -> str: Gets the MD5 hash of an object in Google Cloud Storage. :param bucket_name: The Google Cloud Storage bucket where the blob_name is. - :type bucket_name: str :param object_name: The name of the object to check in the Google cloud storage bucket_name. - :type object_name: str """ self.log.info('Retrieving the MD5 hash of object: %s in bucket: %s', object_name, bucket_name) client = self.get_conn() @@ -921,11 +844,9 @@ def create_bucket( https://cloud.google.com/storage/docs/bucketnaming.html#requirements :param bucket_name: The name of the bucket. - :type bucket_name: str :param resource: An optional dict with parameters for creating the bucket. For information on available parameters, see Cloud Storage API doc: https://cloud.google.com/storage/docs/json_api/v1/buckets/insert - :type resource: dict :param storage_class: This defines how objects in the bucket are stored and determines the SLA and the cost of storage. Values include @@ -937,7 +858,6 @@ def create_bucket( If this value is not specified when the bucket is created, it will default to STANDARD. - :type storage_class: str :param location: The location of the bucket. Object data for objects in the bucket resides in physical storage within this region. Defaults to US. @@ -945,11 +865,8 @@ def create_bucket( .. seealso:: https://developers.google.com/storage/docs/bucket-locations - :type location: str :param project_id: The ID of the Google Cloud Project. - :type project_id: str :param labels: User-provided labels, in key/value pairs. - :type labels: dict :return: If successful, it returns the ``id`` of the bucket. """ self.log.info( @@ -981,18 +898,14 @@ def insert_bucket_acl( See: https://cloud.google.com/storage/docs/json_api/v1/bucketAccessControls/insert :param bucket_name: Name of a bucket_name. - :type bucket_name: str :param entity: The entity holding the permission, in one of the following forms: user-userId, user-email, group-groupId, group-email, domain-domain, project-team-projectId, allUsers, allAuthenticatedUsers. See: https://cloud.google.com/storage/docs/access-control/lists#scopes - :type entity: str :param role: The access permission for the entity. Acceptable values are: "OWNER", "READER", "WRITER". - :type role: str :param user_project: (Optional) The project to be billed for this request. Required for Requester Pays buckets. - :type user_project: str """ self.log.info('Creating a new ACL entry in bucket: %s', bucket_name) client = self.get_conn() @@ -1019,24 +932,18 @@ def insert_object_acl( See: https://cloud.google.com/storage/docs/json_api/v1/objectAccessControls/insert :param bucket_name: Name of a bucket_name. - :type bucket_name: str :param object_name: Name of the object. For information about how to URL encode object names to be path safe, see: https://cloud.google.com/storage/docs/json_api/#encoding - :type object_name: str :param entity: The entity holding the permission, in one of the following forms: user-userId, user-email, group-groupId, group-email, domain-domain, project-team-projectId, allUsers, allAuthenticatedUsers See: https://cloud.google.com/storage/docs/access-control/lists#scopes - :type entity: str :param role: The access permission for the entity. Acceptable values are: "OWNER", "READER". - :type role: str :param generation: Optional. If present, selects a specific revision of this object. - :type generation: long :param user_project: (Optional) The project to be billed for this request. Required for Requester Pays buckets. - :type user_project: str """ self.log.info('Creating a new ACL entry for object: %s in bucket: %s', object_name, bucket_name) client = self.get_conn() @@ -1062,12 +969,9 @@ def compose(self, bucket_name: str, source_objects: List, destination_object: st :param bucket_name: The name of the bucket containing the source objects. This is also the same bucket to store the composed destination object. - :type bucket_name: str :param source_objects: The list of source objects that will be composed into a single object. - :type source_objects: list :param destination_object: The path of the object if given. - :type destination_object: str """ if not source_objects: raise ValueError('source_objects cannot be empty.') @@ -1107,27 +1011,19 @@ def sync( synchronized. :param source_bucket: The name of the bucket containing the source objects. - :type source_bucket: str :param destination_bucket: The name of the bucket containing the destination objects. - :type destination_bucket: str :param source_object: The root sync directory in the source bucket. - :type source_object: Optional[str] :param destination_object: The root sync directory in the destination bucket. - :type destination_object: Optional[str] :param recursive: If True, subdirectories will be considered - :type recursive: bool :param recursive: If True, subdirectories will be considered - :type recursive: bool :param allow_overwrite: if True, the files will be overwritten if a mismatched file is found. By default, overwriting files is not allowed - :type allow_overwrite: bool :param delete_extra_files: if True, deletes additional files from the source that not found in the destination. By default extra files are not deleted. .. note:: This option can delete data quickly if you specify the wrong source/destination combination. - :type delete_extra_files: bool :return: none """ client = self.get_conn() diff --git a/airflow/providers/google/cloud/hooks/gdm.py b/airflow/providers/google/cloud/hooks/gdm.py index 9cda909dde03d..768f63dffdd37 100644 --- a/airflow/providers/google/cloud/hooks/gdm.py +++ b/airflow/providers/google/cloud/hooks/gdm.py @@ -63,11 +63,8 @@ def list_deployments( Lists deployments in a google cloud project. :param project_id: The project ID for this request. - :type project_id: str :param deployment_filter: A filter expression which limits resources returned in the response. - :type deployment_filter: str :param order_by: A field name to order by, ex: "creationTimestamp desc" - :type order_by: Optional[str] :rtype: list """ deployments = [] # type: List[Dict] @@ -90,11 +87,8 @@ def delete_deployment( Deletes a deployment and all associated resources in a google cloud project. :param project_id: The project ID for this request. - :type project_id: str :param deployment: The name of the deployment for this request. - :type deployment: str :param delete_policy: Sets the policy to use for deleting resources. (ABANDON | DELETE) - :type delete_policy: string :rtype: None """ diff --git a/airflow/providers/google/cloud/hooks/kms.py b/airflow/providers/google/cloud/hooks/kms.py index 7800394539943..b2d7a80b75e22 100644 --- a/airflow/providers/google/cloud/hooks/kms.py +++ b/airflow/providers/google/cloud/hooks/kms.py @@ -43,11 +43,9 @@ class CloudKMSHook(GoogleBaseHook): Hook for Google Cloud Key Management service. :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -56,7 +54,6 @@ class CloudKMSHook(GoogleBaseHook): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account. - :type impersonation_chain: Union[str, Sequence[str]] """ def __init__( @@ -100,20 +97,14 @@ def encrypt( :param key_name: The Resource Name for the key (or key version) to be used for encryption. Of the form ``projects/*/locations/*/keyRings/*/cryptoKeys/**`` - :type key_name: str :param plaintext: The message to be encrypted. - :type plaintext: bytes :param authenticated_data: Optional additional authenticated data that must also be provided to decrypt the message. - :type authenticated_data: bytes :param retry: A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :return: The base 64 encoded ciphertext of the original message. :rtype: str """ @@ -145,20 +136,14 @@ def decrypt( :param key_name: The Resource Name for the key to be used for decryption. Of the form ``projects/*/locations/*/keyRings/*/cryptoKeys/**`` - :type key_name: str :param ciphertext: The message to be decrypted. - :type ciphertext: str :param authenticated_data: Any additional authenticated data that was provided when encrypting the message. - :type authenticated_data: bytes :param retry: A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :return: The original message. :rtype: bytes """ diff --git a/airflow/providers/google/cloud/hooks/kubernetes_engine.py b/airflow/providers/google/cloud/hooks/kubernetes_engine.py index 0f70ff62fe756..a91c882f55ab0 100644 --- a/airflow/providers/google/cloud/hooks/kubernetes_engine.py +++ b/airflow/providers/google/cloud/hooks/kubernetes_engine.py @@ -16,7 +16,14 @@ # specific language governing permissions and limitations # under the License. # -"""This module contains a Google Kubernetes Engine Hook.""" +""" +This module contains a Google Kubernetes Engine Hook. + +.. spelling:: + + gapic + enums +""" import time import warnings @@ -90,9 +97,7 @@ def wait_for_operation(self, operation: Operation, project_id: Optional[str] = N completion or an error occurring :param operation: The Operation to wait for - :type operation: google.cloud.container_V1.gapic.enums.Operation :param project_id: Google Cloud project ID - :type project_id: str :return: A new, updated operation fetched from Google Cloud """ self.log.info("Waiting for OPERATION_NAME %s", operation.name) @@ -111,9 +116,7 @@ def get_operation(self, operation_name: str, project_id: Optional[str] = None) - Fetches the operation from Google Cloud :param operation_name: Name of operation to fetch - :type operation_name: str :param project_id: Google Cloud project ID - :type project_id: str :return: The new, updated operation from Google Cloud """ return self.get_conn().get_operation( @@ -131,11 +134,8 @@ def _append_label(cluster_proto: Cluster, key: str, val: str) -> Cluster: :param cluster_proto: The proto to append resource_label airflow version to - :type cluster_proto: google.cloud.container_v1.types.Cluster :param key: The key label - :type key: str :param val: - :type val: str :return: The cluster proto updated with new label """ val = val.replace('.', '-').replace('+', '-') @@ -159,16 +159,12 @@ def delete_cluster( initial create time. :param name: The name of the cluster to delete - :type name: str :param project_id: Google Cloud project ID - :type project_id: str :param retry: Retry object used to determine when/if to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :return: The full url to the delete operation if successful, else None """ self.log.info("Deleting (project_id=%s, location=%s, cluster_id=%s)", project_id, self.location, name) @@ -201,17 +197,13 @@ def create_cluster( :param cluster: A Cluster protobuf or dict. If dict is provided, it must be of the same form as the protobuf message :class:`google.cloud.container_v1.types.Cluster` - :type cluster: dict or google.cloud.container_v1.types.Cluster :param project_id: Google Cloud project ID - :type project_id: str :param retry: A retry object (``google.api_core.retry.Retry``) used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :return: The full url to the new, or existing, cluster :raises: ParseError: On JSON parsing problems when trying to convert dict @@ -254,16 +246,12 @@ def get_cluster( Gets details of specified cluster :param name: The name of the cluster to retrieve - :type name: str :param project_id: Google Cloud project ID - :type project_id: str :param retry: A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :return: google.cloud.container_v1.types.Cluster """ self.log.info( diff --git a/airflow/providers/google/cloud/hooks/life_sciences.py b/airflow/providers/google/cloud/hooks/life_sciences.py index c87f0b2ad1398..551d21980c396 100644 --- a/airflow/providers/google/cloud/hooks/life_sciences.py +++ b/airflow/providers/google/cloud/hooks/life_sciences.py @@ -38,13 +38,10 @@ class LifeSciencesHook(GoogleBaseHook): keyword arguments rather than positional. :param api_version: API version used (for example v1 or v1beta1). - :type api_version: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -53,7 +50,6 @@ class LifeSciencesHook(GoogleBaseHook): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account. - :type impersonation_chain: Union[str, Sequence[str]] """ _conn = None # type: Optional[Any] @@ -89,12 +85,9 @@ def run_pipeline(self, body: dict, location: str, project_id: str) -> dict: Runs a pipeline :param body: The request body. - :type body: dict :param location: The location of the project. For example: "us-east1". - :type location: str :param project_id: Optional, Google Cloud Project project_id where the function belongs. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :rtype: dict """ parent = self._location_path(project_id=project_id, location=location) @@ -118,9 +111,7 @@ def _location_path(self, project_id: str, location: str) -> str: :param project_id: Optional, Google Cloud Project project_id where the function belongs. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param location: The location of the project. For example: "us-east1". - :type location: str """ return google.api_core.path_template.expand( 'projects/{project}/locations/{location}', @@ -134,7 +125,6 @@ def _wait_for_operation_to_complete(self, operation_name: str) -> None: asynchronous call. :param operation_name: The name of the operation. - :type operation_name: str :return: The response returned by the operation. :rtype: dict :exception: AirflowException in case error is returned. diff --git a/airflow/providers/google/cloud/hooks/mlengine.py b/airflow/providers/google/cloud/hooks/mlengine.py index bbf0d71d05dc9..fdff4a0f2cded 100644 --- a/airflow/providers/google/cloud/hooks/mlengine.py +++ b/airflow/providers/google/cloud/hooks/mlengine.py @@ -41,15 +41,10 @@ def _poll_with_exponential_delay(request, execute_num_retries, max_n, is_done_fu lower level errors like `ConnectionError`/`socket.timeout`/`ssl.SSLError`. :param request: request to be executed. - :type request: googleapiclient.http.HttpRequest :param execute_num_retries: num_retries for `request.execute` method. - :type execute_num_retries: int :param max_n: number of times to retry request in this method. - :type max_n: int :param is_done_func: callable to determine if operation is done. - :type is_done_func: callable :param is_error_func: callable to determine if operation is failed. - :type is_error_func: callable :return: response :rtype: httplib2.Response """ @@ -98,7 +93,6 @@ def create_job(self, job: dict, project_id: str, use_existing_job_fn: Optional[C :param project_id: The Google Cloud project id within which MLEngine job will be launched. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param job: MLEngine Job object that should be provided to the MLEngine API, such as: :: @@ -110,7 +104,6 @@ def create_job(self, job: dict, project_id: str, use_existing_job_fn: Optional[C } } - :type job: dict :param use_existing_job_fn: In case that a MLEngine job with the same job_id already exist, this method (if provided) will decide whether we should use this existing job, continue waiting for it to finish @@ -118,7 +111,6 @@ def create_job(self, job: dict, project_id: str, use_existing_job_fn: Optional[C object, and returns a boolean value indicating whether it is OK to reuse the existing job. If 'use_existing_job_fn' is not provided, we by default reuse the existing MLEngine job. - :type use_existing_job_fn: function :return: The MLEngine job object if the job successfully reach a terminal state (which might be FAILED or CANCELLED state). :rtype: dict @@ -164,9 +156,7 @@ def cancel_job( :param project_id: The Google Cloud project id within which MLEngine job will be cancelled. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param job_id: A unique id for the want-to-be cancelled Google MLEngine training job. - :type job_id: str :return: Empty dict if cancelled successfully :rtype: dict @@ -195,9 +185,7 @@ def _get_job(self, project_id: str, job_id: str) -> dict: :param project_id: The project in which the Job is located. If set to None or missing, the default project_id from the Google Cloud connection is used. (templated) - :type project_id: str :param job_id: A unique id for the Google MLEngine job. (templated) - :type job_id: str :return: MLEngine job object if succeed. :rtype: dict :raises: googleapiclient.errors.HttpError @@ -225,11 +213,8 @@ def _wait_for_job_done(self, project_id: str, job_id: str, interval: int = 30): :param project_id: The project in which the Job is located. If set to None or missing, the default project_id from the Google Cloud connection is used. (templated) - :type project_id: str :param job_id: A unique id for the Google MLEngine job. (templated) - :type job_id: str :param interval: Time expressed in seconds after which the job status is checked again. (templated) - :type interval: int :raises: googleapiclient.errors.HttpError """ self.log.info("Waiting for job. job_id=%s", job_id) @@ -253,14 +238,11 @@ def create_version( Creates the Version on Google Cloud ML Engine. :param version_spec: A dictionary containing the information about the version. (templated) - :type version_spec: dict :param model_name: The name of the Google Cloud ML Engine model that the version belongs to. (templated) - :type model_name: str :param project_id: The Google Cloud project name to which MLEngine model belongs. If set to None or missing, the default project_id from the Google Cloud connection is used. (templated) - :type project_id: str :return: If the version was created successfully, returns the operation. Otherwise raises an error . :rtype: dict @@ -294,12 +276,9 @@ def set_default_version( :param model_name: The name of the Google Cloud ML Engine model that the version belongs to. (templated) - :type model_name: str :param version_name: A name to use for the version being operated upon. (templated) - :type version_name: str :param project_id: The Google Cloud project name to which MLEngine model belongs. If set to None or missing, the default project_id from the Google Cloud connection is used. (templated) - :type project_id: str :return: If successful, return an instance of Version. Otherwise raises an error. :rtype: dict @@ -329,10 +308,8 @@ def list_versions( :param model_name: The name of the Google Cloud ML Engine model that the version belongs to. (templated) - :type model_name: str :param project_id: The Google Cloud project name to which MLEngine model belongs. If set to None or missing, the default project_id from the Google Cloud connection is used. (templated) - :type project_id: str :return: return an list of instance of Version. :rtype: List[Dict] :raises: googleapiclient.errors.HttpError @@ -368,10 +345,8 @@ def delete_version( :param model_name: The name of the Google Cloud ML Engine model that the version belongs to. (templated) - :type model_name: str :param project_id: The Google Cloud project name to which MLEngine model belongs. - :type project_id: str :return: If the version was deleted successfully, returns the operation. Otherwise raises an error. :rtype: Dict @@ -400,10 +375,8 @@ def create_model( Create a Model. Blocks until finished. :param model: A dictionary containing the information about the model. - :type model: dict :param project_id: The Google Cloud project name to which MLEngine model belongs. If set to None or missing, the default project_id from the Google Cloud connection is used. (templated) - :type project_id: str :return: If the version was created successfully, returns the instance of Model. Otherwise raises an error. :rtype: Dict @@ -452,10 +425,8 @@ def get_model( Gets a Model. Blocks until finished. :param model_name: The name of the model. - :type model_name: str :param project_id: The Google Cloud project name to which MLEngine model belongs. If set to None or missing, the default project_id from the Google Cloud connection is used. (templated) - :type project_id: str :return: If the model exists, returns the instance of Model. Otherwise return None. :rtype: Dict @@ -485,14 +456,11 @@ def delete_model( Delete a Model. Blocks until finished. :param model_name: The name of the model. - :type model_name: str :param delete_contents: Whether to force the deletion even if the models is not empty. Will delete all version (if any) in the dataset if set to True. The default value is False. - :type delete_contents: bool :param project_id: The Google Cloud project name to which MLEngine model belongs. If set to None or missing, the default project_id from the Google Cloud connection is used. (templated) - :type project_id: str :raises: googleapiclient.errors.HttpError """ hook = self.get_conn() diff --git a/airflow/providers/google/cloud/hooks/natural_language.py b/airflow/providers/google/cloud/hooks/natural_language.py index 2174c8f883ba7..1eb07d06f0207 100644 --- a/airflow/providers/google/cloud/hooks/natural_language.py +++ b/airflow/providers/google/cloud/hooks/natural_language.py @@ -39,11 +39,9 @@ class CloudNaturalLanguageHook(GoogleBaseHook): Hook for Google Cloud Natural Language Service. :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -52,7 +50,6 @@ class CloudNaturalLanguageHook(GoogleBaseHook): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account. - :type impersonation_chain: Union[str, Sequence[str]] """ def __init__( @@ -96,17 +93,12 @@ def analyze_entities( :param document: Input document. If a dict is provided, it must be of the same form as the protobuf message Document - :type document: dict or google.cloud.language_v1.types.Document :param encoding_type: The encoding type used by the API to calculate offsets. - :type encoding_type: google.cloud.language_v1.enums.EncodingType :param retry: A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :rtype: google.cloud.language_v1.types.AnalyzeEntitiesResponse """ client = self.get_conn() @@ -130,17 +122,12 @@ def analyze_entity_sentiment( :param document: Input document. If a dict is provided, it must be of the same form as the protobuf message Document - :type document: dict or google.cloud.language_v1.types.Document :param encoding_type: The encoding type used by the API to calculate offsets. - :type encoding_type: google.cloud.language_v1.enums.EncodingType :param retry: A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :rtype: google.cloud.language_v1.types.AnalyzeEntitiesResponse """ client = self.get_conn() @@ -163,17 +150,12 @@ def analyze_sentiment( :param document: Input document. If a dict is provided, it must be of the same form as the protobuf message Document - :type document: dict or google.cloud.language_v1.types.Document :param encoding_type: The encoding type used by the API to calculate offsets. - :type encoding_type: google.cloud.language_v1.enums.EncodingType :param retry: A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :rtype: google.cloud.language_v1.types.AnalyzeSentimentResponse """ client = self.get_conn() @@ -197,17 +179,12 @@ def analyze_syntax( :param document: Input document. If a dict is provided, it must be of the same form as the protobuf message Document - :type document: dict or google.cloud.language_v1.types.Document :param encoding_type: The encoding type used by the API to calculate offsets. - :type encoding_type: google.cloud.language_v1.enums.EncodingType :param retry: A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :rtype: google.cloud.language_v1.types.AnalyzeSyntaxResponse """ client = self.get_conn() @@ -232,20 +209,14 @@ def annotate_text( :param document: Input document. If a dict is provided, it must be of the same form as the protobuf message Document - :type document: dict or google.cloud.language_v1.types.Document :param features: The enabled features. If a dict is provided, it must be of the same form as the protobuf message Features - :type features: dict or google.cloud.language_v1.types.AnnotateTextRequest.Features :param encoding_type: The encoding type used by the API to calculate offsets. - :type encoding_type: google.cloud.language_v1.enums.EncodingType :param retry: A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :rtype: google.cloud.language_v1.types.AnnotateTextResponse """ client = self.get_conn() @@ -272,15 +243,11 @@ def classify_text( :param document: Input document. If a dict is provided, it must be of the same form as the protobuf message Document - :type document: dict or google.cloud.language_v1.types.Document :param retry: A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :rtype: google.cloud.language_v1.types.ClassifyTextResponse """ client = self.get_conn() diff --git a/airflow/providers/google/cloud/hooks/os_login.py b/airflow/providers/google/cloud/hooks/os_login.py index a2e5957beee3e..34e0798aacde4 100644 --- a/airflow/providers/google/cloud/hooks/os_login.py +++ b/airflow/providers/google/cloud/hooks/os_login.py @@ -14,6 +14,13 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +""" +.. spelling:: + + ImportSshPublicKeyResponse + oslogin +""" + from typing import Dict, Optional, Sequence, Tuple, Union @@ -68,19 +75,13 @@ def import_ssh_public_key( login profile. :param user: The unique ID for the user - :type user: str :param ssh_public_key: The SSH public key and expiration time. - :type ssh_public_key: dict :param project_id: The project ID of the Google Cloud project. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: Optional[google.api_core.retry.Retry] :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :return: A :class:`~google.cloud.oslogin_v1.ImportSshPublicKeyResponse` instance. """ conn = self.get_conn() diff --git a/airflow/providers/google/cloud/hooks/pubsub.py b/airflow/providers/google/cloud/hooks/pubsub.py index 11d7c3bebce6d..6301425b6c55d 100644 --- a/airflow/providers/google/cloud/hooks/pubsub.py +++ b/airflow/providers/google/cloud/hooks/pubsub.py @@ -15,7 +15,14 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -"""This module contains a Google Pub/Sub Hook.""" +""" +This module contains a Google Pub/Sub Hook. + +.. spelling:: + + MessageStoragePolicy + ReceivedMessage +""" import sys import warnings from base64 import b64decode @@ -104,14 +111,11 @@ def publish( :param topic: the Pub/Sub topic to which to publish; do not include the ``projects/{project}/topics/`` prefix. - :type topic: str :param messages: messages to publish; if the data field in a message is set, it should be a bytestring (utf-8 encoded) - :type messages: list of PubSub messages; see http://cloud.google.com/pubsub/docs/reference/rest/v1/PubsubMessage :param project_id: Optional, the Google Cloud project ID in which to publish. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str """ self._validate_messages(messages) @@ -178,36 +182,27 @@ def create_topic( :param topic: the Pub/Sub topic name to create; do not include the ``projects/{project}/topics/`` prefix. - :type topic: str :param project_id: Optional, the Google Cloud project ID in which to create the topic If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param fail_if_exists: if set, raise an exception if the topic already exists - :type fail_if_exists: bool :param labels: Client-assigned labels; see https://cloud.google.com/pubsub/docs/labels - :type labels: Dict[str, str] :param message_storage_policy: Policy constraining the set of Google Cloud regions where messages published to the topic may be stored. If not present, then no constraints are in effect. - :type message_storage_policy: Union[Dict, google.cloud.pubsub_v1.types.MessageStoragePolicy] :param kms_key_name: The resource name of the Cloud KMS CryptoKey to be used to protect access to messages published on this topic. The expected format is ``projects/*/locations/*/keyRings/*/cryptoKeys/*``. - :type kms_key_name: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]]] """ publisher = self.get_conn() topic_path = f"projects/{project_id}/topics/{topic}" @@ -254,22 +249,16 @@ def delete_topic( :param topic: the Pub/Sub topic name to delete; do not include the ``projects/{project}/topics/`` prefix. - :type topic: str :param project_id: Optional, the Google Cloud project ID in which to delete the topic. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param fail_if_not_exists: if set, raise an exception if the topic does not exist - :type fail_if_not_exists: bool :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]]] """ publisher = self.get_conn() topic_path = f"projects/{project_id}/topics/{topic}" @@ -315,78 +304,60 @@ def create_subscription( :param topic: the Pub/Sub topic name that the subscription will be bound to create; do not include the ``projects/{project}/subscriptions/`` prefix. - :type topic: str :param project_id: Optional, the Google Cloud project ID of the topic that the subscription will be bound to. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param subscription: the Pub/Sub subscription name. If empty, a random name will be generated using the uuid module - :type subscription: str :param subscription_project_id: the Google Cloud project ID where the subscription will be created. If unspecified, ``project_id`` will be used. - :type subscription_project_id: str :param ack_deadline_secs: Number of seconds that a subscriber has to acknowledge each message pulled from the subscription - :type ack_deadline_secs: int :param fail_if_exists: if set, raise an exception if the topic already exists - :type fail_if_exists: bool :param push_config: If push delivery is used with this subscription, this field is used to configure it. An empty ``pushConfig`` signifies that the subscriber will pull and ack messages using API methods. - :type push_config: Union[Dict, google.cloud.pubsub_v1.types.PushConfig] :param retain_acked_messages: Indicates whether to retain acknowledged messages. If true, then messages are not expunged from the subscription's backlog, even if they are acknowledged, until they fall out of the ``message_retention_duration`` window. This must be true if you would like to Seek to a timestamp. - :type retain_acked_messages: bool :param message_retention_duration: How long to retain unacknowledged messages in the subscription's backlog, from the moment a message is published. If ``retain_acked_messages`` is true, then this also configures the retention of acknowledged messages, and thus configures how far back in time a ``Seek`` can be done. Defaults to 7 days. Cannot be more than 7 days or less than 10 minutes. - :type message_retention_duration: Union[Dict, google.cloud.pubsub_v1.types.Duration] :param labels: Client-assigned labels; see https://cloud.google.com/pubsub/docs/labels - :type labels: Dict[str, str] :param enable_message_ordering: If true, messages published with the same ordering_key in PubsubMessage will be delivered to the subscribers in the order in which they are received by the Pub/Sub system. Otherwise, they may be delivered in any order. - :type enable_message_ordering: bool :param expiration_policy: A policy that specifies the conditions for this subscription’s expiration. A subscription is considered active as long as any connected subscriber is successfully consuming messages from the subscription or is issuing operations on the subscription. If expiration_policy is not set, a default policy with ttl of 31 days will be used. The minimum allowed value for expiration_policy.ttl is 1 day. - :type expiration_policy: Union[Dict, google.cloud.pubsub_v1.types.ExpirationPolicy`] :param filter_: An expression written in the Cloud Pub/Sub filter language. If non-empty, then only PubsubMessages whose attributes field matches the filter are delivered on this subscription. If empty, then no messages are filtered out. - :type filter_: str :param dead_letter_policy: A policy that specifies the conditions for dead lettering messages in this subscription. If dead_letter_policy is not set, dead lettering is disabled. - :type dead_letter_policy: Union[Dict, google.cloud.pubsub_v1.types.DeadLetterPolicy] :param retry_policy: A policy that specifies how Pub/Sub retries message delivery for this subscription. If not set, the default retry policy is applied. This generally implies that messages will be retried as soon as possible for healthy subscribers. RetryPolicy will be triggered on NACKs or acknowledgement deadline exceeded events for a given message. - :type retry_policy: Union[Dict, google.cloud.pubsub_v1.types.RetryPolicy] :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]]] :return: subscription name which will be the system-generated value if the ``subscription`` parameter is not supplied :rtype: str @@ -453,19 +424,13 @@ def delete_subscription( include the ``projects/{project}/subscriptions/`` prefix. :param project_id: Optional, the Google Cloud project ID where the subscription exists If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str - :type subscription: str :param fail_if_not_exists: if set, raise an exception if the topic does not exist - :type fail_if_not_exists: bool :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]]] """ subscriber = self.subscriber_client # E501 @@ -506,26 +471,19 @@ def pull( :param subscription: the Pub/Sub subscription name to pull from; do not include the 'projects/{project}/topics/' prefix. - :type subscription: str :param max_messages: The maximum number of messages to return from the Pub/Sub API. - :type max_messages: int :param project_id: Optional, the Google Cloud project ID where the subscription exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param return_immediately: If set, the Pub/Sub API will immediately return if no messages are available. Otherwise, the request will block for an undisclosed, but bounded period of time - :type return_immediately: bool :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]]] :return: A list of Pub/Sub ReceivedMessage objects each containing an ``ackId`` property and a ``message`` property, which includes the base64-encoded message content. See @@ -570,25 +528,18 @@ def acknowledge( :param subscription: the Pub/Sub subscription name to delete; do not include the 'projects/{project}/topics/' prefix. - :type subscription: str :param ack_ids: List of ReceivedMessage ackIds from a previous pull response. Mutually exclusive with ``messages`` argument. - :type ack_ids: list :param messages: List of ReceivedMessage objects to acknowledge. Mutually exclusive with ``ack_ids`` argument. - :type messages: list :param project_id: Optional, the Google Cloud project name or ID in which to create the topic If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]]] """ if ack_ids is not None and messages is None: pass diff --git a/airflow/providers/google/cloud/hooks/secret_manager.py b/airflow/providers/google/cloud/hooks/secret_manager.py index 0561e6ade58c5..a584da267e05c 100644 --- a/airflow/providers/google/cloud/hooks/secret_manager.py +++ b/airflow/providers/google/cloud/hooks/secret_manager.py @@ -32,11 +32,9 @@ class SecretsManagerHook(GoogleBaseHook): keyword arguments rather than positional. :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -45,7 +43,6 @@ class SecretsManagerHook(GoogleBaseHook): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account. - :type impersonation_chain: Union[str, Sequence[str]] """ def __init__( @@ -78,11 +75,8 @@ def get_secret( Get secret value from the Secret Manager. :param secret_id: Secret Key - :type secret_id: str :param secret_version: version of the secret (default is 'latest') - :type secret_version: str :param project_id: Project id (if you want to override the project_id from credentials) - :type project_id: str """ return self.get_conn().get_secret( secret_id=secret_id, secret_version=secret_version, project_id=project_id # type: ignore diff --git a/airflow/providers/google/cloud/hooks/spanner.py b/airflow/providers/google/cloud/hooks/spanner.py index 654d06b201087..acab8ee1dd165 100644 --- a/airflow/providers/google/cloud/hooks/spanner.py +++ b/airflow/providers/google/cloud/hooks/spanner.py @@ -55,7 +55,6 @@ def _get_client(self, project_id: str) -> Client: Provides a client for interacting with the Cloud Spanner API. :param project_id: The ID of the Google Cloud project. - :type project_id: str :return: Client :rtype: google.cloud.spanner_v1.client.Client """ @@ -77,9 +76,7 @@ def get_instance( :param project_id: Optional, The ID of the Google Cloud project that owns the Cloud Spanner database. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param instance_id: The ID of the Cloud Spanner instance. - :type instance_id: str :return: Spanner instance :rtype: google.cloud.spanner_v1.instance.Instance """ @@ -101,20 +98,14 @@ def _apply_to_instance( Invokes a method on a given instance by applying a specified Callable. :param project_id: The ID of the Google Cloud project that owns the Cloud Spanner database. - :type project_id: str :param instance_id: The ID of the instance. - :type instance_id: str :param configuration_name: Name of the instance configuration defining how the instance will be created. Required for instances which do not yet exist. - :type configuration_name: str :param node_count: (Optional) Number of nodes allocated to the instance. - :type node_count: int :param display_name: (Optional) The display name for the instance in the Cloud Console UI. (Must be between 4 and 30 characters.) If this value is not set in the constructor, will fall back to the instance ID. - :type display_name: str :param func: Method of the instance to be called. - :type func: Callable[google.cloud.spanner_v1.instance.Instance] """ instance = self._get_client(project_id=project_id).instance( instance_id=instance_id, @@ -145,22 +136,17 @@ def create_instance( Creates a new Cloud Spanner instance. :param instance_id: The ID of the Cloud Spanner instance. - :type instance_id: str :param configuration_name: The name of the instance configuration defining how the instance will be created. Possible configuration values can be retrieved via https://cloud.google.com/spanner/docs/reference/rest/v1/projects.instanceConfigs/list - :type configuration_name: str :param node_count: (Optional) The number of nodes allocated to the Cloud Spanner instance. - :type node_count: int :param display_name: (Optional) The display name for the instance in the Google Cloud Console. Must be between 4 and 30 characters. If this value is not passed, the name falls back to the instance ID. - :type display_name: str :param project_id: Optional, the ID of the Google Cloud project that owns the Cloud Spanner database. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :return: None """ self._apply_to_instance( @@ -180,22 +166,17 @@ def update_instance( Updates an existing Cloud Spanner instance. :param instance_id: The ID of the Cloud Spanner instance. - :type instance_id: str :param configuration_name: The name of the instance configuration defining how the instance will be created. Possible configuration values can be retrieved via https://cloud.google.com/spanner/docs/reference/rest/v1/projects.instanceConfigs/list - :type configuration_name: str :param node_count: (Optional) The number of nodes allocated to the Cloud Spanner instance. - :type node_count: int :param display_name: (Optional) The display name for the instance in the Google Cloud Console. Must be between 4 and 30 characters. If this value is not set in the constructor, the name falls back to the instance ID. - :type display_name: str :param project_id: Optional, the ID of the Google Cloud project that owns the Cloud Spanner database. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :return: None """ self._apply_to_instance( @@ -208,11 +189,9 @@ def delete_instance(self, instance_id: str, project_id: str) -> None: Deletes an existing Cloud Spanner instance. :param instance_id: The ID of the Cloud Spanner instance. - :type instance_id: str :param project_id: Optional, the ID of the Google Cloud project that owns the Cloud Spanner database. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :return: None """ instance = self._get_client(project_id=project_id).instance(instance_id) @@ -235,13 +214,10 @@ def get_database( in the specified instance, it returns None. :param instance_id: The ID of the Cloud Spanner instance. - :type instance_id: str :param database_id: The ID of the database in Cloud Spanner. - :type database_id: str :param project_id: Optional, the ID of the Google Cloud project that owns the Cloud Spanner database. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :return: Database object or None if database does not exist :rtype: google.cloud.spanner_v1.database.Database or None """ @@ -265,13 +241,9 @@ def create_database( """ Creates a new database in Cloud Spanner. - :type project_id: str :param instance_id: The ID of the Cloud Spanner instance. - :type instance_id: str :param database_id: The ID of the database to create in Cloud Spanner. - :type database_id: str :param ddl_statements: The string list containing DDL for the new database. - :type ddl_statements: list[str] :param project_id: Optional, the ID of the Google Cloud project that owns the Cloud Spanner database. If set to None or missing, the default project_id from the Google Cloud connection is used. @@ -303,19 +275,14 @@ def update_database( """ Updates DDL of a database in Cloud Spanner. - :type project_id: str :param instance_id: The ID of the Cloud Spanner instance. - :type instance_id: str :param database_id: The ID of the database in Cloud Spanner. - :type database_id: str :param ddl_statements: The string list containing DDL for the new database. - :type ddl_statements: list[str] :param project_id: Optional, the ID of the Google Cloud project that owns the Cloud Spanner database. If set to None or missing, the default project_id from the Google Cloud connection is used. :param operation_id: (Optional) The unique per database operation ID that can be specified to implement idempotency check. - :type operation_id: str :return: None """ instance = self._get_client(project_id=project_id).instance(instance_id=instance_id) @@ -344,11 +311,8 @@ def delete_database(self, instance_id: str, database_id, project_id: str) -> boo """ Drops a database in Cloud Spanner. - :type project_id: str :param instance_id: The ID of the Cloud Spanner instance. - :type instance_id: str :param database_id: The ID of the database in Cloud Spanner. - :type database_id: str :param project_id: Optional, the ID of the Google Cloud project that owns the Cloud Spanner database. If set to None or missing, the default project_id from the Google Cloud connection is used. @@ -384,15 +348,11 @@ def execute_dml( Executes an arbitrary DML query (INSERT, UPDATE, DELETE). :param instance_id: The ID of the Cloud Spanner instance. - :type instance_id: str :param database_id: The ID of the database in Cloud Spanner. - :type database_id: str :param queries: The queries to execute. - :type queries: List[str] :param project_id: Optional, the ID of the Google Cloud project that owns the Cloud Spanner database. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str """ self._get_client(project_id=project_id).instance(instance_id=instance_id).database( database_id=database_id diff --git a/airflow/providers/google/cloud/hooks/speech_to_text.py b/airflow/providers/google/cloud/hooks/speech_to_text.py index fb74a0703087c..ecde2affb0bbb 100644 --- a/airflow/providers/google/cloud/hooks/speech_to_text.py +++ b/airflow/providers/google/cloud/hooks/speech_to_text.py @@ -30,11 +30,9 @@ class CloudSpeechToTextHook(GoogleBaseHook): Hook for Google Cloud Speech API. :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -43,7 +41,6 @@ class CloudSpeechToTextHook(GoogleBaseHook): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account. - :type impersonation_chain: Union[str, Sequence[str]] """ def __init__( @@ -83,16 +80,12 @@ def recognize_speech( :param config: information to the recognizer that specifies how to process the request. https://googleapis.github.io/google-cloud-python/latest/speech/gapic/v1/types.html#google.cloud.speech_v1.types.RecognitionConfig - :type config: dict or google.cloud.speech_v1.types.RecognitionConfig :param audio: audio data to be recognized https://googleapis.github.io/google-cloud-python/latest/speech/gapic/v1/types.html#google.cloud.speech_v1.types.RecognitionAudio - :type audio: dict or google.cloud.speech_v1.types.RecognitionAudio :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float """ client = self.get_conn() response = client.recognize(config=config, audio=audio, retry=retry, timeout=timeout) diff --git a/airflow/providers/google/cloud/hooks/stackdriver.py b/airflow/providers/google/cloud/hooks/stackdriver.py index afcd91772fe3b..a884566822f7e 100644 --- a/airflow/providers/google/cloud/hooks/stackdriver.py +++ b/airflow/providers/google/cloud/hooks/stackdriver.py @@ -81,33 +81,25 @@ def list_alert_policies( :param format_: (Optional) Desired output format of the result. The supported formats are "dict", "json" and None which returns python dictionary, stringified JSON and protobuf respectively. - :type format_: str :param filter_: If provided, this field specifies the criteria that must be met by alert policies to be included in the response. For more details, see https://cloud.google.com/monitoring/api/v3/sorting-and-filtering. - :type filter_: str :param order_by: A comma-separated list of fields by which to sort the result. Supports the same set of field references as the ``filter`` field. Entries can be prefixed with a minus sign to sort by the field in descending order. For more details, see https://cloud.google.com/monitoring/api/v3/sorting-and-filtering. - :type order_by: str :param page_size: The maximum number of resources contained in the underlying API response. If page streaming is performed per- resource, this parameter does not affect the return value. If page streaming is performed per-page, this determines the maximum number of resources in a page. - :type page_size: int :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: str :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: str :param project_id: The project to fetch alerts from. - :type project_id: str """ client = self._get_policy_client() policies_ = client.list_alert_policies( @@ -165,20 +157,15 @@ def enable_alert_policies( parameter. Inoperative in case the policy is already enabled. :param project_id: The project in which alert needs to be enabled. - :type project_id: str :param filter_: If provided, this field specifies the criteria that must be met by alert policies to be enabled. For more details, see https://cloud.google.com/monitoring/api/v3/sorting-and-filtering. - :type filter_: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: str :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: str """ self._toggle_policy_status( new_state=True, @@ -203,20 +190,15 @@ def disable_alert_policies( parameter. Inoperative in case the policy is already disabled. :param project_id: The project in which alert needs to be disabled. - :type project_id: str :param filter_: If provided, this field specifies the criteria that must be met by alert policies to be disabled. For more details, see https://cloud.google.com/monitoring/api/v3/sorting-and-filtering. - :type filter_: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: str :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: str """ self._toggle_policy_status( filter_=filter_, @@ -241,21 +223,16 @@ def upsert_alert( the name field in the alerts parameter. :param project_id: The project in which alert needs to be created/updated. - :type project_id: str :param alerts: A JSON string or file that specifies all the alerts that needs to be either created or updated. For more details, see https://cloud.google.com/monitoring/api/ref_v3/rest/v3/projects.alertPolicies#AlertPolicy. (templated) - :type alerts: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: str :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: str """ policy_client = self._get_policy_client() channel_client = self._get_channel_client() @@ -342,16 +319,12 @@ def delete_alert_policy( :param name: The alerting policy to delete. The format is: ``projects/[PROJECT_ID]/alertPolicies/[ALERT_POLICY_ID]``. - :type name: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: str :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: str """ policy_client = self._get_policy_client() try: @@ -383,33 +356,25 @@ def list_notification_channels( :param format_: (Optional) Desired output format of the result. The supported formats are "dict", "json" and None which returns python dictionary, stringified JSON and protobuf respectively. - :type format_: str :param filter_: If provided, this field specifies the criteria that must be met by notification channels to be included in the response. For more details, see https://cloud.google.com/monitoring/api/v3/sorting-and-filtering. - :type filter_: str :param order_by: A comma-separated list of fields by which to sort the result. Supports the same set of field references as the ``filter`` field. Entries can be prefixed with a minus sign to sort by the field in descending order. For more details, see https://cloud.google.com/monitoring/api/v3/sorting-and-filtering. - :type order_by: str :param page_size: The maximum number of resources contained in the underlying API response. If page streaming is performed per- resource, this parameter does not affect the return value. If page streaming is performed per-page, this determines the maximum number of resources in a page. - :type page_size: int :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: str :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: str :param project_id: The project to fetch notification channels from. - :type project_id: str """ client = self._get_channel_client() channels = client.list_notification_channels( @@ -469,20 +434,15 @@ def enable_notification_channels( parameter. Inoperative in case the policy is already enabled. :param project_id: The project in which notification channels needs to be enabled. - :type project_id: str :param filter_: If provided, this field specifies the criteria that must be met by notification channels to be enabled. For more details, see https://cloud.google.com/monitoring/api/v3/sorting-and-filtering. - :type filter_: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: str :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: str """ self._toggle_channel_status( project_id=project_id, @@ -507,20 +467,15 @@ def disable_notification_channels( parameter. Inoperative in case the policy is already disabled. :param project_id: The project in which notification channels needs to be enabled. - :type project_id: str :param filter_: If provided, this field specifies the criteria that must be met by alert policies to be disabled. For more details, see https://cloud.google.com/monitoring/api/v3/sorting-and-filtering. - :type filter_: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: str :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: str """ self._toggle_channel_status( filter_=filter_, @@ -548,18 +503,13 @@ def upsert_channel( to be either created or updated. For more details, see https://cloud.google.com/monitoring/api/ref_v3/rest/v3/projects.notificationChannels. (templated) - :type channels: str :param project_id: The project in which notification channels needs to be created/updated. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: str :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: str """ channel_client = self._get_channel_client() @@ -611,16 +561,12 @@ def delete_notification_channel( :param name: The alerting policy to delete. The format is: ``projects/[PROJECT_ID]/notificationChannels/[CHANNEL_ID]``. - :type name: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: str :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: str """ channel_client = self._get_channel_client() try: diff --git a/airflow/providers/google/cloud/hooks/tasks.py b/airflow/providers/google/cloud/hooks/tasks.py index ff9e9b19be27f..e21fb9686923e 100644 --- a/airflow/providers/google/cloud/hooks/tasks.py +++ b/airflow/providers/google/cloud/hooks/tasks.py @@ -42,11 +42,9 @@ class CloudTasksHook(GoogleBaseHook): keyword arguments rather than positional. :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -55,7 +53,6 @@ class CloudTasksHook(GoogleBaseHook): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account. - :type impersonation_chain: Union[str, Sequence[str]] """ def __init__( @@ -97,26 +94,19 @@ def create_queue( Creates a queue in Cloud Tasks. :param location: The location name in which the queue will be created. - :type location: str :param task_queue: The task queue to create. Queue's name cannot be the same as an existing queue. If a dict is provided, it must be of the same form as the protobuf message Queue. - :type task_queue: dict or google.cloud.tasks_v2.types.Queue :param project_id: (Optional) The ID of the Google Cloud project that owns the Cloud Tasks. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param queue_name: (Optional) The queue's name. If provided, it will be used to construct the full queue path. - :type queue_name: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :rtype: google.cloud.tasks_v2.types.Queue """ client = self.get_conn() @@ -155,29 +145,21 @@ def update_queue( :param task_queue: The task queue to update. This method creates the queue if it does not exist and updates the queue if it does exist. The queue's name must be specified. - :type task_queue: dict or google.cloud.tasks_v2.types.Queue :param project_id: (Optional) The ID of the Google Cloud project that owns the Cloud Tasks. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param location: (Optional) The location name in which the queue will be updated. If provided, it will be used to construct the full queue path. - :type location: str :param queue_name: (Optional) The queue's name. If provided, it will be used to construct the full queue path. - :type queue_name: str :param update_mask: A mast used to specify which fields of the queue are being updated. If empty, then all fields will be updated. If a dict is provided, it must be of the same form as the protobuf message. - :type update_mask: dict or google.protobuf.field_mask_pb2.FieldMask :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :rtype: google.cloud.tasks_v2.types.Queue """ client = self.get_conn() @@ -211,21 +193,15 @@ def get_queue( Gets a queue from Cloud Tasks. :param location: The location name in which the queue was created. - :type location: str :param queue_name: The queue's name. - :type queue_name: str :param project_id: (Optional) The ID of the Google Cloud project that owns the Cloud Tasks. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :rtype: google.cloud.tasks_v2.types.Queue """ client = self.get_conn() @@ -253,24 +229,17 @@ def list_queues( Lists queues from Cloud Tasks. :param location: The location name in which the queues were created. - :type location: str :param project_id: (Optional) The ID of the Google Cloud project that owns the Cloud Tasks. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param results_filter: (Optional) Filter used to specify a subset of queues. - :type results_filter: str :param page_size: (Optional) The maximum number of resources contained in the underlying API response. - :type page_size: int :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :rtype: list[google.cloud.tasks_v2.types.Queue] """ client = self.get_conn() @@ -298,21 +267,15 @@ def delete_queue( Deletes a queue from Cloud Tasks, even if it has tasks in it. :param location: The location name in which the queue will be deleted. - :type location: str :param queue_name: The queue's name. - :type queue_name: str :param project_id: (Optional) The ID of the Google Cloud project that owns the Cloud Tasks. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] """ client = self.get_conn() @@ -338,21 +301,15 @@ def purge_queue( Purges a queue by deleting all of its tasks from Cloud Tasks. :param location: The location name in which the queue will be purged. - :type location: str :param queue_name: The queue's name. - :type queue_name: str :param project_id: (Optional) The ID of the Google Cloud project that owns the Cloud Tasks. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :rtype: list[google.cloud.tasks_v2.types.Queue] """ client = self.get_conn() @@ -379,21 +336,15 @@ def pause_queue( Pauses a queue in Cloud Tasks. :param location: The location name in which the queue will be paused. - :type location: str :param queue_name: The queue's name. - :type queue_name: str :param project_id: (Optional) The ID of the Google Cloud project that owns the Cloud Tasks. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :rtype: list[google.cloud.tasks_v2.types.Queue] """ client = self.get_conn() @@ -420,21 +371,15 @@ def resume_queue( Resumes a queue in Cloud Tasks. :param location: The location name in which the queue will be resumed. - :type location: str :param queue_name: The queue's name. - :type queue_name: str :param project_id: (Optional) The ID of the Google Cloud project that owns the Cloud Tasks. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :rtype: list[google.cloud.tasks_v2.types.Queue] """ client = self.get_conn() @@ -464,30 +409,21 @@ def create_task( Creates a task in Cloud Tasks. :param location: The location name in which the task will be created. - :type location: str :param queue_name: The queue's name. - :type queue_name: str :param task: The task to add. If a dict is provided, it must be of the same form as the protobuf message Task. - :type task: dict or google.cloud.tasks_v2.types.Task :param project_id: (Optional) The ID of the Google Cloud project that owns the Cloud Tasks. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param task_name: (Optional) The task's name. If provided, it will be used to construct the full task path. - :type task_name: str :param response_view: (Optional) This field specifies which subset of the Task will be returned. - :type response_view: google.cloud.tasks_v2.Task.View :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :rtype: google.cloud.tasks_v2.types.Task """ client = self.get_conn() @@ -526,26 +462,18 @@ def get_task( Gets a task from Cloud Tasks. :param location: The location name in which the task was created. - :type location: str :param queue_name: The queue's name. - :type queue_name: str :param task_name: The task's name. - :type task_name: str :param project_id: (Optional) The ID of the Google Cloud project that owns the Cloud Tasks. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param response_view: (Optional) This field specifies which subset of the Task will be returned. - :type response_view: google.cloud.tasks_v2.Task.View :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :rtype: google.cloud.tasks_v2.types.Task """ client = self.get_conn() @@ -574,27 +502,19 @@ def list_tasks( Lists the tasks in Cloud Tasks. :param location: The location name in which the tasks were created. - :type location: str :param queue_name: The queue's name. - :type queue_name: str :param project_id: (Optional) The ID of the Google Cloud project that owns the Cloud Tasks. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param response_view: (Optional) This field specifies which subset of the Task will be returned. - :type response_view: google.cloud.tasks_v2.Task.View :param page_size: (Optional) The maximum number of resources contained in the underlying API response. - :type page_size: int :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :rtype: list[google.cloud.tasks_v2.types.Task] """ client = self.get_conn() @@ -622,23 +542,16 @@ def delete_task( Deletes a task from Cloud Tasks. :param location: The location name in which the task will be deleted. - :type location: str :param queue_name: The queue's name. - :type queue_name: str :param task_name: The task's name. - :type task_name: str :param project_id: (Optional) The ID of the Google Cloud project that owns the Cloud Tasks. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] """ client = self.get_conn() @@ -666,26 +579,18 @@ def run_task( Forces to run a task in Cloud Tasks. :param location: The location name in which the task was created. - :type location: str :param queue_name: The queue's name. - :type queue_name: str :param task_name: The task's name. - :type task_name: str :param project_id: (Optional) The ID of the Google Cloud project that owns the Cloud Tasks. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param response_view: (Optional) This field specifies which subset of the Task will be returned. - :type response_view: google.cloud.tasks_v2.Task.View :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :rtype: google.cloud.tasks_v2.types.Task """ client = self.get_conn() diff --git a/airflow/providers/google/cloud/hooks/text_to_speech.py b/airflow/providers/google/cloud/hooks/text_to_speech.py index 25e39ca1f5007..2916f4f8ccb4a 100644 --- a/airflow/providers/google/cloud/hooks/text_to_speech.py +++ b/airflow/providers/google/cloud/hooks/text_to_speech.py @@ -38,11 +38,9 @@ class CloudTextToSpeechHook(GoogleBaseHook): keyword arguments rather than positional. :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -51,7 +49,6 @@ class CloudTextToSpeechHook(GoogleBaseHook): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account. - :type impersonation_chain: Union[str, Sequence[str]] """ def __init__( @@ -96,19 +93,14 @@ def synthesize_speech( :param input_data: text input to be synthesized. See more: https://googleapis.github.io/google-cloud-python/latest/texttospeech/gapic/v1/types.html#google.cloud.texttospeech_v1.types.SynthesisInput - :type input_data: dict or google.cloud.texttospeech_v1.types.SynthesisInput :param voice: configuration of voice to be used in synthesis. See more: https://googleapis.github.io/google-cloud-python/latest/texttospeech/gapic/v1/types.html#google.cloud.texttospeech_v1.types.VoiceSelectionParams - :type voice: dict or google.cloud.texttospeech_v1.types.VoiceSelectionParams :param audio_config: configuration of the synthesized audio. See more: https://googleapis.github.io/google-cloud-python/latest/texttospeech/gapic/v1/types.html#google.cloud.texttospeech_v1.types.AudioConfig - :type audio_config: dict or google.cloud.texttospeech_v1.types.AudioConfig :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :return: SynthesizeSpeechResponse See more: https://googleapis.github.io/google-cloud-python/latest/texttospeech/gapic/v1/types.html#google.cloud.texttospeech_v1.types.SynthesizeSpeechResponse :rtype: object diff --git a/airflow/providers/google/cloud/hooks/translate.py b/airflow/providers/google/cloud/hooks/translate.py index d6c1f6c9f99c7..a1524e1329e15 100644 --- a/airflow/providers/google/cloud/hooks/translate.py +++ b/airflow/providers/google/cloud/hooks/translate.py @@ -68,19 +68,14 @@ def translate( See https://cloud.google.com/translate/docs/translating-text - :type values: str or list :param values: String or list of strings to translate. - :type target_language: str :param target_language: The language to translate results into. This is required by the API and defaults to the target language of the current instance. - :type format_: str :param format_: (Optional) One of ``text`` or ``html``, to specify if the input text is plain text or HTML. - :type source_language: str or None :param source_language: (Optional) The language of the text to be translated. - :type model: str or None :param model: (Optional) The model used to translate the text, such as ``'base'`` or ``'nmt'``. :rtype: str or list diff --git a/airflow/providers/google/cloud/hooks/video_intelligence.py b/airflow/providers/google/cloud/hooks/video_intelligence.py index 2174b2215dc6f..9d2d9b8bd4380 100644 --- a/airflow/providers/google/cloud/hooks/video_intelligence.py +++ b/airflow/providers/google/cloud/hooks/video_intelligence.py @@ -34,11 +34,9 @@ class CloudVideoIntelligenceHook(GoogleBaseHook): keyword arguments rather than positional. :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -47,7 +45,6 @@ class CloudVideoIntelligenceHook(GoogleBaseHook): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account. - :type impersonation_chain: Union[str, Sequence[str]] """ def __init__( @@ -93,31 +90,22 @@ def annotate_video( :param input_uri: Input video location. Currently, only Google Cloud Storage URIs are supported, which must be specified in the following format: ``gs://bucket-id/object-id``. - :type input_uri: str :param input_content: The video data bytes. If unset, the input video(s) should be specified via ``input_uri``. If set, ``input_uri`` should be unset. - :type input_content: bytes :param features: Requested video annotation features. - :type features: list[google.cloud.videointelligence_v1.VideoIntelligenceServiceClient.enums.Feature] :param output_uri: Optional, location where the output (in JSON format) should be stored. Currently, only Google Cloud Storage URIs are supported, which must be specified in the following format: ``gs://bucket-id/object-id``. - :type output_uri: str :param video_context: Optional, Additional video context and/or feature-specific parameters. - :type video_context: dict or google.cloud.videointelligence_v1.types.VideoContext :param location: Optional, cloud region where annotation should take place. Supported cloud regions: us-east1, us-west1, europe-west1, asia-east1. If no region is specified, a region will be determined based on video file location. - :type location: str :param retry: Retry object used to determine when/if to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: Optional, The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Optional, Additional metadata that is provided to the method. - :type metadata: seq[tuple[str, str]] """ client = self.get_conn() return client.annotate_video( diff --git a/airflow/providers/google/cloud/hooks/vision.py b/airflow/providers/google/cloud/hooks/vision.py index d5e6fdc8b1c8f..afed8b9e64fc3 100644 --- a/airflow/providers/google/cloud/hooks/vision.py +++ b/airflow/providers/google/cloud/hooks/vision.py @@ -79,13 +79,9 @@ def get_entity_with_name( :param entity: Entity - :type entity: any :param entity_id: Entity id - :type entity_id: str :param location: Location - :type location: str :param project_id: The id of Google Cloud Vision project. - :type project_id: str :return: The same entity or entity with new name :rtype: str :raises: AirflowException diff --git a/airflow/providers/google/cloud/hooks/workflows.py b/airflow/providers/google/cloud/hooks/workflows.py index 90889243277a9..2e1385d7dc7e5 100644 --- a/airflow/providers/google/cloud/hooks/workflows.py +++ b/airflow/providers/google/cloud/hooks/workflows.py @@ -62,21 +62,14 @@ def create_workflow( [ALREADY_EXISTS][google.rpc.Code.ALREADY_EXISTS] error. :param workflow: Required. Workflow to be created. - :type workflow: Dict :param workflow_id: Required. The ID of the workflow to be created. - :type workflow_id: str :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :type project_id: str :param location: Required. The GCP region in which to handle the request. - :type location: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ metadata = metadata or () client = self.get_workflows_client() @@ -102,19 +95,13 @@ def get_workflow( Gets details of a single Workflow. :param workflow_id: Required. The ID of the workflow to be created. - :type workflow_id: str :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :type project_id: str :param location: Required. The GCP region in which to handle the request. - :type location: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ metadata = metadata or () client = self.get_workflows_client() @@ -138,18 +125,13 @@ def update_workflow( used in new workflow executions. :param workflow: Required. Workflow to be created. - :type workflow: Dict :param update_mask: List of fields to be updated. If not present, the entire workflow will be updated. - :type update_mask: FieldMask :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ metadata = metadata or () client = self.get_workflows_client() @@ -176,19 +158,13 @@ def delete_workflow( executions of the workflow. :param workflow_id: Required. The ID of the workflow to be created. - :type workflow_id: str :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :type project_id: str :param location: Required. The GCP region in which to handle the request. - :type location: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ metadata = metadata or () client = self.get_workflows_client() @@ -211,24 +187,17 @@ def list_workflows( The default order is not specified. :param filter_: Filter to restrict results to specific workflows. - :type filter_: str :param order_by: Comma-separated list of fields that specifies the order of the results. Default sorting order for a field is ascending. To specify descending order for a field, append a "desc" suffix. If not specified, the results will be returned in an unspecified order. - :type order_by: str :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :type project_id: str :param location: Required. The GCP region in which to handle the request. - :type location: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ metadata = metadata or () client = self.get_workflows_client() @@ -257,21 +226,14 @@ def create_execution( the given workflow. :param execution: Required. Input parameters of the execution represented as a dictionary. - :type execution: Dict :param workflow_id: Required. The ID of the workflow. - :type workflow_id: str :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :type project_id: str :param location: Required. The GCP region in which to handle the request. - :type location: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ metadata = metadata or () client = self.get_executions_client() @@ -298,21 +260,14 @@ def get_execution( Returns an execution for the given ``workflow_id`` and ``execution_id``. :param workflow_id: Required. The ID of the workflow. - :type workflow_id: str :param execution_id: Required. The ID of the execution. - :type execution_id: str :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :type project_id: str :param location: Required. The GCP region in which to handle the request. - :type location: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ metadata = metadata or () client = self.get_executions_client() @@ -334,21 +289,14 @@ def cancel_execution( Cancels an execution using the given ``workflow_id`` and ``execution_id``. :param workflow_id: Required. The ID of the workflow. - :type workflow_id: str :param execution_id: Required. The ID of the execution. - :type execution_id: str :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :type project_id: str :param location: Required. The GCP region in which to handle the request. - :type location: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ metadata = metadata or () client = self.get_executions_client() @@ -375,19 +323,13 @@ def list_executions( first). :param workflow_id: Required. The ID of the workflow to be created. - :type workflow_id: str :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :type project_id: str :param location: Required. The GCP region in which to handle the request. - :type location: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ metadata = metadata or () client = self.get_executions_client() diff --git a/airflow/providers/google/cloud/log/gcs_task_handler.py b/airflow/providers/google/cloud/log/gcs_task_handler.py index 978829cc1f6d5..c531807af909b 100644 --- a/airflow/providers/google/cloud/log/gcs_task_handler.py +++ b/airflow/providers/google/cloud/log/gcs_task_handler.py @@ -49,25 +49,18 @@ class GCSTaskHandler(FileTaskHandler, LoggingMixin): failure, it reads from host machine's local disk. :param base_log_folder: Base log folder to place logs. - :type base_log_folder: str :param gcs_log_folder: Path to a remote location where logs will be saved. It must have the prefix ``gs://``. For example: ``gs://bucket/remote/log/location`` - :type gcs_log_folder: str :param filename_template: template filename string - :type filename_template: str :param gcp_key_path: Path to Google Cloud Service Account file (JSON). Mutually exclusive with gcp_keyfile_dict. If omitted, authorization based on `the Application Default Credentials `__ will be used. - :type gcp_key_path: str :param gcp_keyfile_dict: Dictionary of keyfile parameters. Mutually exclusive with gcp_key_path. - :type gcp_keyfile_dict: dict :param gcp_scopes: Comma-separated string containing OAuth2 scopes - :type gcp_scopes: str :param project_id: Project ID to read the secrets from. If not passed, the project ID from credentials will be used. - :type project_id: str """ def __init__( @@ -174,9 +167,7 @@ def gcs_write(self, log, remote_log_location): was created. :param log: the log to write to the remote_log_location - :type log: str :param remote_log_location: the log's location in remote storage - :type remote_log_location: str (path) """ try: blob = storage.Blob.from_string(remote_log_location, self.client) diff --git a/airflow/providers/google/cloud/log/stackdriver_task_handler.py b/airflow/providers/google/cloud/log/stackdriver_task_handler.py index 203fc527d348e..37365cd33fcbf 100644 --- a/airflow/providers/google/cloud/log/stackdriver_task_handler.py +++ b/airflow/providers/google/cloud/log/stackdriver_task_handler.py @@ -61,24 +61,18 @@ class StackdriverTaskHandler(logging.Handler): If omitted, authorization based on `the Application Default Credentials `__ will be used. - :type gcp_key_path: str :param scopes: OAuth scopes for the credentials, - :type scopes: Sequence[str] :param name: the name of the custom log in Stackdriver Logging. Defaults to 'airflow'. The name of the Python logger will be represented in the ``python_logger`` field. - :type name: str :param transport: Class for creating new transport objects. It should extend from the base :class:`google.cloud.logging.handlers.Transport` type and implement :meth`google.cloud.logging.handlers.Transport.send`. Defaults to :class:`google.cloud.logging.handlers.BackgroundThreadTransport`. The other option is :class:`google.cloud.logging.handlers.SyncTransport`. - :type transport: :class:`type` :param resource: (Optional) Monitored resource of the entry, defaults to the global resource type. - :type resource: :class:`~google.cloud.logging.resource.Resource` :param labels: (Optional) Mapping of labels for the entry. - :type labels: dict """ LABEL_TASK_ID = "task_id" @@ -146,7 +140,6 @@ def emit(self, record: logging.LogRecord) -> None: """Actually log the specified logging record. :param record: The record to be logged. - :type record: logging.LogRecord """ message = self.format(record) labels: Optional[Dict[str, str]] @@ -167,7 +160,6 @@ def set_context(self, task_instance: TaskInstance) -> None: Configures the logger to add information with information about the current task :param task_instance: Currently executed task - :type task_instance: :class:`airflow.models.TaskInstance` """ self.task_instance_labels = self._task_instance_to_labels(task_instance) self.task_instance_hostname = task_instance.hostname @@ -179,12 +171,9 @@ def read( Read logs of given task instance from Stackdriver logging. :param task_instance: task instance object - :type task_instance: :class:`airflow.models.TaskInstance` :param try_number: task instance try_number to read logs from. If None it returns all logs - :type try_number: Optional[int] :param metadata: log metadata. It is used for steaming log reading and auto-tailing. - :type metadata: Dict :return: a tuple of ( list of (one element tuple with two element tuple - hostname and logs) and list of metadata) @@ -257,7 +246,6 @@ def _read_logs( Sends requests to the Stackdriver service and downloads logs. :param log_filter: Filter specifying the logs to be downloaded. - :type log_filter: str :param next_page_token: The token of the page from which the log download will start. If None is passed, it will start from the first page. :param all_pages: If True is passed, all subpages will be downloaded. Otherwise, only the first @@ -294,10 +282,8 @@ def _read_single_logs_page(self, log_filter: str, page_token: Optional[str] = No Sends requests to the Stackdriver service and downloads single pages with logs. :param log_filter: Filter specifying the logs to be downloaded. - :type log_filter: str :param page_token: The token of the page to be downloaded. If None is passed, the first page will be downloaded. - :type page_token: str :return: Downloaded logs and next page token :rtype: Tuple[str, str] """ @@ -347,7 +333,6 @@ def get_external_log_url(self, task_instance: TaskInstance, try_number: int) -> :param task_instance: task instance object :type: task_instance: TaskInstance :param try_number: task instance try_number to read logs from. - :type try_number: Optional[int] :return: URL to the external log collection service :rtype: str """ diff --git a/airflow/providers/google/cloud/operators/automl.py b/airflow/providers/google/cloud/operators/automl.py index 2a739faec1cd8..11a4d5ae0dc15 100644 --- a/airflow/providers/google/cloud/operators/automl.py +++ b/airflow/providers/google/cloud/operators/automl.py @@ -49,22 +49,15 @@ class AutoMLTrainModelOperator(BaseOperator): :ref:`howto/operator:AutoMLTrainModelOperator` :param model: Model definition. - :type model: dict :param project_id: ID of the Google Cloud project where model will be created if None then default project_id is used. - :type project_id: str :param location: The location of the project. - :type location: str :param retry: A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[google.api_core.retry.Retry] :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :param gcp_conn_id: The connection ID to use to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -73,7 +66,6 @@ class AutoMLTrainModelOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -138,26 +130,17 @@ class AutoMLPredictOperator(BaseOperator): :ref:`howto/operator:AutoMLPredictOperator` :param model_id: Name of the model requested to serve the batch prediction. - :type model_id: str :param payload: Name od the model used for the prediction. - :type payload: dict :param project_id: ID of the Google Cloud project where model is located if None then default project_id is used. - :type project_id: str :param location: The location of the project. - :type location: str :param operation_params: Additional domain-specific parameters for the predictions. - :type operation_params: Optional[Dict[str, str]] :param retry: A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[google.api_core.retry.Retry] :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :param gcp_conn_id: The connection ID to use to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -166,7 +149,6 @@ class AutoMLPredictOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -232,37 +214,25 @@ class AutoMLBatchPredictOperator(BaseOperator): :param project_id: ID of the Google Cloud project where model will be created if None then default project_id is used. - :type project_id: str :param location: The location of the project. - :type location: str :param model_id: Name of the model_id requested to serve the batch prediction. - :type model_id: str :param input_config: Required. The input configuration for batch prediction. If a dict is provided, it must be of the same form as the protobuf message `google.cloud.automl_v1beta1.types.BatchPredictInputConfig` - :type input_config: Union[dict, ~google.cloud.automl_v1beta1.types.BatchPredictInputConfig] :param output_config: Required. The Configuration specifying where output predictions should be written. If a dict is provided, it must be of the same form as the protobuf message `google.cloud.automl_v1beta1.types.BatchPredictOutputConfig` - :type output_config: Union[dict, ~google.cloud.automl_v1beta1.types.BatchPredictOutputConfig] :param prediction_params: Additional domain-specific parameters for the predictions, any string must be up to 25000 characters long. - :type prediction_params: Optional[Dict[str, str]] :param project_id: ID of the Google Cloud project where model is located if None then default project_id is used. - :type project_id: str :param location: The location of the project. - :type location: str :param retry: A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[google.api_core.retry.Retry] :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :param gcp_conn_id: The connection ID to use to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -271,7 +241,6 @@ class AutoMLBatchPredictOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -345,24 +314,16 @@ class AutoMLCreateDatasetOperator(BaseOperator): :param dataset: The dataset to create. If a dict is provided, it must be of the same form as the protobuf message Dataset. - :type dataset: Union[dict, Dataset] :param project_id: ID of the Google Cloud project where dataset is located if None then default project_id is used. - :type project_id: str :param location: The location of the project. - :type location: str :param params: Additional domain-specific parameters for the predictions. - :type params: Optional[Dict[str, str]] :param retry: A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[google.api_core.retry.Retry] :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :param gcp_conn_id: The connection ID to use to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -371,7 +332,6 @@ class AutoMLCreateDatasetOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -436,27 +396,18 @@ class AutoMLImportDataOperator(BaseOperator): :ref:`howto/operator:AutoMLImportDataOperator` :param dataset_id: ID of dataset to be updated. - :type dataset_id: str :param input_config: The desired input location and its domain specific semantics, if any. If a dict is provided, it must be of the same form as the protobuf message InputConfig. - :type input_config: dict :param project_id: ID of the Google Cloud project where dataset is located if None then default project_id is used. - :type project_id: str :param location: The location of the project. - :type location: str :param params: Additional domain-specific parameters for the predictions. - :type params: Optional[Dict[str, str]] :param retry: A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[google.api_core.retry.Retry] :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :param gcp_conn_id: The connection ID to use to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -465,7 +416,6 @@ class AutoMLImportDataOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -530,35 +480,24 @@ class AutoMLTablesListColumnSpecsOperator(BaseOperator): :ref:`howto/operator:AutoMLTablesListColumnSpecsOperator` :param dataset_id: Name of the dataset. - :type dataset_id: str :param table_spec_id: table_spec_id for path builder. - :type table_spec_id: str :param field_mask: Mask specifying which fields to read. If a dict is provided, it must be of the same form as the protobuf message `google.cloud.automl_v1beta1.types.FieldMask` - :type field_mask: Union[dict, google.cloud.automl_v1beta1.types.FieldMask] :param filter_: Filter expression, see go/filtering. - :type filter_: str :param page_size: The maximum number of resources contained in the underlying API response. If page streaming is performed per resource, this parameter does not affect the return value. If page streaming is performed per page, this determines the maximum number of resources in a page. - :type page_size: int :param project_id: ID of the Google Cloud project where dataset is located if None then default project_id is used. - :type project_id: str :param location: The location of the project. - :type location: str :param retry: A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[google.api_core.retry.Retry] :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :param gcp_conn_id: The connection ID to use to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -567,7 +506,6 @@ class AutoMLTablesListColumnSpecsOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -645,24 +583,16 @@ class AutoMLTablesUpdateDatasetOperator(BaseOperator): :param dataset: The dataset which replaces the resource on the server. If a dict is provided, it must be of the same form as the protobuf message Dataset. - :type dataset: Union[dict, Dataset] :param update_mask: The update mask applies to the resource. If a dict is provided, it must be of the same form as the protobuf message FieldMask. - :type update_mask: Union[dict, FieldMask] :param location: The location of the project. - :type location: str :param params: Additional domain-specific parameters for the predictions. - :type params: Optional[Dict[str, str]] :param retry: A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[google.api_core.retry.Retry] :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :param gcp_conn_id: The connection ID to use to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -671,7 +601,6 @@ class AutoMLTablesUpdateDatasetOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -731,24 +660,16 @@ class AutoMLGetModelOperator(BaseOperator): :ref:`howto/operator:AutoMLGetModelOperator` :param model_id: Name of the model requested to serve the prediction. - :type model_id: str :param project_id: ID of the Google Cloud project where model is located if None then default project_id is used. - :type project_id: str :param location: The location of the project. - :type location: str :param params: Additional domain-specific parameters for the predictions. - :type params: Optional[Dict[str, str]] :param retry: A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[google.api_core.retry.Retry] :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :param gcp_conn_id: The connection ID to use to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -757,7 +678,6 @@ class AutoMLGetModelOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -816,24 +736,16 @@ class AutoMLDeleteModelOperator(BaseOperator): :ref:`howto/operator:AutoMLDeleteModelOperator` :param model_id: Name of the model requested to serve the prediction. - :type model_id: str :param project_id: ID of the Google Cloud project where model is located if None then default project_id is used. - :type project_id: str :param location: The location of the project. - :type location: str :param params: Additional domain-specific parameters for the predictions. - :type params: Optional[Dict[str, str]] :param retry: A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[google.api_core.retry.Retry] :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :param gcp_conn_id: The connection ID to use to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -842,7 +754,6 @@ class AutoMLDeleteModelOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -906,28 +817,19 @@ class AutoMLDeployModelOperator(BaseOperator): :ref:`howto/operator:AutoMLDeployModelOperator` :param model_id: Name of the model to be deployed. - :type model_id: str :param image_detection_metadata: Model deployment metadata specific to Image Object Detection. If a dict is provided, it must be of the same form as the protobuf message ImageObjectDetectionModelDeploymentMetadata - :type image_detection_metadata: dict :param project_id: ID of the Google Cloud project where model is located if None then default project_id is used. - :type project_id: str :param location: The location of the project. - :type location: str :param params: Additional domain-specific parameters for the predictions. - :type params: Optional[Dict[str, str]] :param retry: A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[google.api_core.retry.Retry] :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :param gcp_conn_id: The connection ID to use to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -936,7 +838,6 @@ class AutoMLDeployModelOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -1001,30 +902,21 @@ class AutoMLTablesListTableSpecsOperator(BaseOperator): :ref:`howto/operator:AutoMLTablesListTableSpecsOperator` :param dataset_id: Name of the dataset. - :type dataset_id: str :param filter_: Filter expression, see go/filtering. - :type filter_: str :param page_size: The maximum number of resources contained in the underlying API response. If page streaming is performed per resource, this parameter does not affect the return value. If page streaming is performed per-page, this determines the maximum number of resources in a page. - :type page_size: int :param project_id: ID of the Google Cloud project if None then default project_id is used. - :type project_id: str :param location: The location of the project. - :type location: str :param retry: A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[google.api_core.retry.Retry] :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :param gcp_conn_id: The connection ID to use to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1033,7 +925,6 @@ class AutoMLTablesListTableSpecsOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -1103,19 +994,13 @@ class AutoMLListDatasetOperator(BaseOperator): :param project_id: ID of the Google Cloud project where datasets are located if None then default project_id is used. - :type project_id: str :param location: The location of the project. - :type location: str :param retry: A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[google.api_core.retry.Retry] :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :param gcp_conn_id: The connection ID to use to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1124,7 +1009,6 @@ class AutoMLListDatasetOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -1188,22 +1072,15 @@ class AutoMLDeleteDatasetOperator(BaseOperator): :param dataset_id: Name of the dataset_id, list of dataset_id or string of dataset_id coma separated to be deleted. - :type dataset_id: Union[str, List[str]] :param project_id: ID of the Google Cloud project where dataset is located if None then default project_id is used. - :type project_id: str :param location: The location of the project. - :type location: str :param retry: A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[google.api_core.retry.Retry] :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :param gcp_conn_id: The connection ID to use to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1212,7 +1089,6 @@ class AutoMLDeleteDatasetOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/cloud/operators/bigquery.py b/airflow/providers/google/cloud/operators/bigquery.py index 184e9714da756..89daa17a446d2 100644 --- a/airflow/providers/google/cloud/operators/bigquery.py +++ b/airflow/providers/google/cloud/operators/bigquery.py @@ -139,18 +139,13 @@ class BigQueryCheckOperator(_BigQueryDbHookMixin, SQLCheckOperator): without stopping the progress of the DAG. :param sql: the sql to be executed - :type sql: str :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param bigquery_conn_id: (Deprecated) The connection ID used to connect to Google Cloud. This parameter has been deprecated. You should pass the gcp_conn_id parameter instead. - :type bigquery_conn_id: str :param use_legacy_sql: Whether to use legacy SQL (true) or standard SQL (false). - :type use_legacy_sql: bool :param location: The geographic location of the job. See details at: https://cloud.google.com/bigquery/docs/locations#specifying_your_location - :type location: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -159,9 +154,7 @@ class BigQueryCheckOperator(_BigQueryDbHookMixin, SQLCheckOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :param labels: a dictionary containing labels for the table, passed to BigQuery - :type labels: dict """ template_fields: Sequence[str] = ( @@ -207,18 +200,13 @@ class BigQueryValueCheckOperator(_BigQueryDbHookMixin, SQLValueCheckOperator): :ref:`howto/operator:BigQueryValueCheckOperator` :param sql: the sql to be executed - :type sql: str :param use_legacy_sql: Whether to use legacy SQL (true) or standard SQL (false). - :type use_legacy_sql: bool :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param bigquery_conn_id: (Deprecated) The connection ID used to connect to Google Cloud. This parameter has been deprecated. You should pass the gcp_conn_id parameter instead. - :type bigquery_conn_id: str :param location: The geographic location of the job. See details at: https://cloud.google.com/bigquery/docs/locations#specifying_your_location - :type location: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -227,9 +215,7 @@ class BigQueryValueCheckOperator(_BigQueryDbHookMixin, SQLValueCheckOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :param labels: a dictionary containing labels for the table, passed to BigQuery - :type labels: dict """ template_fields: Sequence[str] = ( @@ -284,25 +270,18 @@ class BigQueryIntervalCheckOperator(_BigQueryDbHookMixin, SQLIntervalCheckOperat :ref:`howto/operator:BigQueryIntervalCheckOperator` :param table: the table name - :type table: str :param days_back: number of days between ds and the ds we want to check against. Defaults to 7 days - :type days_back: int :param metrics_thresholds: a dictionary of ratios indexed by metrics, for example 'COUNT(*)': 1.5 would require a 50 percent or less difference between the current day, and the prior days_back. - :type metrics_thresholds: dict :param use_legacy_sql: Whether to use legacy SQL (true) or standard SQL (false). - :type use_legacy_sql: bool :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param bigquery_conn_id: (Deprecated) The connection ID used to connect to Google Cloud. This parameter has been deprecated. You should pass the gcp_conn_id parameter instead. - :type bigquery_conn_id: str :param location: The geographic location of the job. See details at: https://cloud.google.com/bigquery/docs/locations#specifying_your_location - :type location: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -311,9 +290,7 @@ class BigQueryIntervalCheckOperator(_BigQueryDbHookMixin, SQLIntervalCheckOperat If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :param labels: a dictionary containing labels for the table, passed to BigQuery - :type labels: dict """ template_fields: Sequence[str] = ( @@ -393,26 +370,18 @@ class BigQueryGetDataOperator(BaseOperator): ) :param dataset_id: The dataset ID of the requested table. (templated) - :type dataset_id: str :param table_id: The table ID of the requested table. (templated) - :type table_id: str :param max_results: The maximum number of records (rows) to be fetched from the table. (templated) - :type max_results: int :param selected_fields: List of fields to return (comma-separated). If unspecified, all fields are returned. - :type selected_fields: str :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param bigquery_conn_id: (Deprecated) The connection ID used to connect to Google Cloud. This parameter has been deprecated. You should pass the gcp_conn_id parameter instead. - :type bigquery_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param location: The location used for the operation. - :type location: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -421,7 +390,6 @@ class BigQueryGetDataOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -500,92 +468,70 @@ class BigQueryExecuteQueryOperator(BaseOperator): This operator is deprecated. Please use :class:`airflow.providers.google.cloud.operators.bigquery.BigQueryInsertJobOperator` - :param sql: the sql code to be executed (templated) - :type sql: Can receive a str representing a sql statement, - a list of str (sql statements), or reference to a template file. - Template reference are recognized by str ending in '.sql'. + :param sql: the SQL code to be executed as a single string, or + a list of str (sql statements), or a reference to a template file. + Template references are recognized by str ending in '.sql' :param destination_dataset_table: A dotted ``(.|:).
`` that, if set, will store the results of the query. (templated) - :type destination_dataset_table: str :param write_disposition: Specifies the action that occurs if the destination table already exists. (default: 'WRITE_EMPTY') - :type write_disposition: str :param create_disposition: Specifies whether the job is allowed to create new tables. (default: 'CREATE_IF_NEEDED') - :type create_disposition: str :param allow_large_results: Whether to allow large results. - :type allow_large_results: bool :param flatten_results: If true and query uses legacy SQL dialect, flattens all nested and repeated fields in the query results. ``allow_large_results`` must be ``true`` if this is set to ``false``. For standard SQL queries, this flag is ignored and results are never flattened. - :type flatten_results: bool :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param bigquery_conn_id: (Deprecated) The connection ID used to connect to Google Cloud. This parameter has been deprecated. You should pass the gcp_conn_id parameter instead. - :type bigquery_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param udf_config: The User Defined Function configuration for the query. See https://cloud.google.com/bigquery/user-defined-functions for details. - :type udf_config: list :param use_legacy_sql: Whether to use legacy SQL (true) or standard SQL (false). - :type use_legacy_sql: bool :param maximum_billing_tier: Positive integer that serves as a multiplier of the basic price. Defaults to None, in which case it uses the value set in the project. - :type maximum_billing_tier: int :param maximum_bytes_billed: Limits the bytes billed for this job. Queries that will have bytes billed beyond this limit will fail (without incurring a charge). If unspecified, this will be set to your project default. - :type maximum_bytes_billed: float :param api_resource_configs: a dictionary that contain params 'configuration' applied for Google BigQuery Jobs API: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs for example, {'query': {'useQueryCache': False}}. You could use it if you need to provide some params that are not supported by BigQueryOperator like args. - :type api_resource_configs: dict :param schema_update_options: Allows the schema of the destination table to be updated as a side effect of the load job. - :type schema_update_options: Optional[Union[list, tuple, set]] :param query_params: a list of dictionary containing query parameter types and values, passed to BigQuery. The structure of dictionary should look like 'queryParameters' in Google BigQuery Jobs API: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs. For example, [{ 'name': 'corpus', 'parameterType': { 'type': 'STRING' }, 'parameterValue': { 'value': 'romeoandjuliet' } }]. (templated) - :type query_params: list :param labels: a dictionary containing labels for the job/query, passed to BigQuery - :type labels: dict :param priority: Specifies a priority for the query. Possible values include INTERACTIVE and BATCH. The default value is INTERACTIVE. - :type priority: str :param time_partitioning: configure optional time partitioning fields i.e. partition by field, type and expiration as per API specifications. - :type time_partitioning: dict :param cluster_fields: Request that the result of this query be stored sorted by one or more columns. BigQuery supports clustering for both partitioned and non-partitioned tables. The order of columns given determines the sort order. - :type cluster_fields: list[str] :param location: The geographic location of the job. Required except for US and EU. See details at https://cloud.google.com/bigquery/docs/locations#specifying_your_location - :type location: str :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys). **Example**: :: encryption_configuration = { "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key" } - :type encryption_configuration: dict :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -594,7 +540,6 @@ class BigQueryExecuteQueryOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -763,15 +708,11 @@ class BigQueryCreateEmptyTableOperator(BaseOperator): :ref:`howto/operator:BigQueryCreateEmptyTableOperator` :param project_id: The project to create the table into. (templated) - :type project_id: str :param dataset_id: The dataset to create the table into. (templated) - :type dataset_id: str :param table_id: The Name of the table to be created. (templated) - :type table_id: str :param table_resource: Table resource as described in documentation: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table If provided all other parameters are ignored. - :type table_resource: Dict[str, Any] :param schema_fields: If set, the schema field list as defined here: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema @@ -780,27 +721,21 @@ class BigQueryCreateEmptyTableOperator(BaseOperator): schema_fields=[{"name": "emp_name", "type": "STRING", "mode": "REQUIRED"}, {"name": "salary", "type": "INTEGER", "mode": "NULLABLE"}] - :type schema_fields: list :param gcs_schema_object: Full path to the JSON file containing schema (templated). For example: ``gs://test-bucket/dir1/dir2/employee_schema.json`` - :type gcs_schema_object: str :param time_partitioning: configure optional time partitioning fields i.e. partition by field, type and expiration as per API specifications. .. seealso:: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#timePartitioning - :type time_partitioning: dict :param bigquery_conn_id: [Optional] The connection ID used to connect to Google Cloud and interact with the Bigquery service. - :type bigquery_conn_id: str :param google_cloud_storage_conn_id: [Optional] The connection ID used to connect to Google Cloud. and interact with the Google Cloud Storage service. - :type google_cloud_storage_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param labels: a dictionary containing labels for the table, passed to BigQuery **Example (with schema JSON in GCS)**: :: @@ -842,31 +777,25 @@ class BigQueryCreateEmptyTableOperator(BaseOperator): bigquery_conn_id='airflow-conn-id-account', google_cloud_storage_conn_id='airflow-conn-id' ) - :type labels: dict :param view: [Optional] A dictionary containing definition for the view. If set, it will create a view instead of a table: .. seealso:: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ViewDefinition - :type view: dict :param materialized_view: [Optional] The materialized view definition. - :type materialized_view: dict :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys). **Example**: :: encryption_configuration = { "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key" } - :type encryption_configuration: dict :param location: The location used for the operation. - :type location: str :param cluster_fields: [Optional] The fields used for clustering. BigQuery supports clustering for both partitioned and non-partitioned tables. .. seealso:: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#clustering.fields - :type cluster_fields: list :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -875,9 +804,7 @@ class BigQueryCreateEmptyTableOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :param exists_ok: If ``True``, ignore "already exists" errors when creating the table. - :type exists_ok: bool """ template_fields: Sequence[str] = ( @@ -995,14 +922,11 @@ class BigQueryCreateExternalTableOperator(BaseOperator): :ref:`howto/operator:BigQueryCreateExternalTableOperator` :param bucket: The bucket to point the external table to. (templated) - :type bucket: str :param source_objects: List of Google Cloud Storage URIs to point table to. If source_format is 'DATASTORE_BACKUP', the list must only contain a single URI. - :type source_objects: list :param destination_project_dataset_table: The dotted ``(.).
`` BigQuery table to load data into (templated). If ```` is not included, project will be the project defined in the connection json. - :type destination_project_dataset_table: str :param schema_fields: If set, the schema field list as defined here: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema @@ -1015,59 +939,41 @@ class BigQueryCreateExternalTableOperator(BaseOperator): :param table_resource: Table resource as described in documentation: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table If provided all other parameters are ignored. External schema from object will be resolved. - :type table_resource: Dict[str, Any] - :type schema_fields: list :param schema_object: If set, a GCS object path pointing to a .json file that contains the schema for the table. (templated) - :type schema_object: str :param source_format: File format of the data. - :type source_format: str :param compression: [Optional] The compression type of the data source. Possible values include GZIP and NONE. The default value is NONE. This setting is ignored for Google Cloud Bigtable, Google Cloud Datastore backups and Avro formats. - :type compression: str :param skip_leading_rows: Number of rows to skip when loading from a CSV. - :type skip_leading_rows: int :param field_delimiter: The delimiter to use for the CSV. - :type field_delimiter: str :param max_bad_records: The maximum number of bad records that BigQuery can ignore when running the job. - :type max_bad_records: int :param quote_character: The value that is used to quote data sections in a CSV file. - :type quote_character: str :param allow_quoted_newlines: Whether to allow quoted newlines (true) or not (false). - :type allow_quoted_newlines: bool :param allow_jagged_rows: Accept rows that are missing trailing optional columns. The missing values are treated as nulls. If false, records with missing trailing columns are treated as bad records, and if there are too many bad records, an invalid error is returned in the job result. Only applicable to CSV, ignored for other formats. - :type allow_jagged_rows: bool :param bigquery_conn_id: (Optional) The connection ID used to connect to Google Cloud and interact with the Bigquery service. - :type bigquery_conn_id: str :param google_cloud_storage_conn_id: (Optional) The connection ID used to connect to Google Cloud and interact with the Google Cloud Storage service. - :type google_cloud_storage_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param src_fmt_configs: configure optional fields specific to the source format - :type src_fmt_configs: dict :param labels: a dictionary containing labels for the table, passed to BigQuery - :type labels: dict :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys). **Example**: :: encryption_configuration = { "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key" } - :type encryption_configuration: dict :param location: The location used for the operation. - :type location: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1076,7 +982,6 @@ class BigQueryCreateExternalTableOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -1245,23 +1150,17 @@ class BigQueryDeleteDatasetOperator(BaseOperator): :ref:`howto/operator:BigQueryDeleteDatasetOperator` :param project_id: The project id of the dataset. - :type project_id: str :param dataset_id: The dataset to be deleted. - :type dataset_id: str :param delete_contents: (Optional) Whether to force the deletion even if the dataset is not empty. Will delete all tables (if any) in the dataset if set to True. Will raise HttpError 400: "{dataset_id} is still in use" if set to False and dataset is not empty. The default value is False. - :type delete_contents: bool :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param bigquery_conn_id: (Deprecated) The connection ID used to connect to Google Cloud. This parameter has been deprecated. You should pass the gcp_conn_id parameter instead. - :type bigquery_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1270,7 +1169,6 @@ class BigQueryDeleteDatasetOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] **Example**: :: @@ -1344,24 +1242,17 @@ class BigQueryCreateEmptyDatasetOperator(BaseOperator): :ref:`howto/operator:BigQueryCreateEmptyDatasetOperator` :param project_id: The name of the project where we want to create the dataset. - :type project_id: str :param dataset_id: The id of dataset. Don't need to provide, if datasetId in dataset_reference. - :type dataset_id: str :param location: The geographic location where the dataset should reside. - :type location: str :param dataset_reference: Dataset reference that could be provided with request body. More info: https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#resource - :type dataset_reference: dict :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param bigquery_conn_id: (Deprecated) The connection ID used to connect to Google Cloud. This parameter has been deprecated. You should pass the gcp_conn_id parameter instead. - :type bigquery_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1370,9 +1261,7 @@ class BigQueryCreateEmptyDatasetOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :param exists_ok: If ``True``, ignore "already exists" errors when creating the dataset. - :type exists_ok: bool **Example**: :: create_new_dataset = BigQueryCreateEmptyDatasetOperator( @@ -1459,16 +1348,12 @@ class BigQueryGetDatasetOperator(BaseOperator): :param dataset_id: The id of dataset. Don't need to provide, if datasetId in dataset_reference. - :type dataset_id: str :param project_id: The name of the project where we want to create the dataset. Don't need to provide, if projectId in dataset_reference. - :type project_id: str :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1477,7 +1362,6 @@ class BigQueryGetDatasetOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: dataset https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#resource @@ -1529,18 +1413,13 @@ class BigQueryGetDatasetTablesOperator(BaseOperator): :ref:`howto/operator:BigQueryGetDatasetTablesOperator` :param dataset_id: the dataset ID of the requested dataset. - :type dataset_id: str :param project_id: (Optional) the project of the requested dataset. If None, self.project_id will be used. - :type project_id: str :param max_results: (Optional) the maximum number of tables to return. - :type max_results: int :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1549,7 +1428,6 @@ class BigQueryGetDatasetTablesOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -1602,19 +1480,14 @@ class BigQueryPatchDatasetOperator(BaseOperator): :param dataset_id: The id of dataset. Don't need to provide, if datasetId in dataset_reference. - :type dataset_id: str :param dataset_resource: Dataset resource that will be provided with request body. https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#resource - :type dataset_resource: dict :param project_id: The name of the project where we want to create the dataset. Don't need to provide, if projectId in dataset_reference. - :type project_id: str :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1623,7 +1496,6 @@ class BigQueryPatchDatasetOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: dataset https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#resource @@ -1689,22 +1561,16 @@ class BigQueryUpdateTableOperator(BaseOperator): if datasetId in table_reference. :param table_id: The id of table. Don't need to provide, if tableId in table_reference. - :type table_id: str :param table_resource: Dataset resource that will be provided with request body. https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#resource - :type table_resource: Dict[str, Any] :param fields: The fields of ``table`` to change, spelled as the Table properties (e.g. "friendly_name"). - :type fields: List[str] :param project_id: The name of the project where we want to create the table. Don't need to provide, if projectId in table_reference. - :type project_id: str :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1713,7 +1579,6 @@ class BigQueryUpdateTableOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: table https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#resource @@ -1781,21 +1646,15 @@ class BigQueryUpdateDatasetOperator(BaseOperator): :param dataset_id: The id of dataset. Don't need to provide, if datasetId in dataset_reference. - :type dataset_id: str :param dataset_resource: Dataset resource that will be provided with request body. https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#resource - :type dataset_resource: Dict[str, Any] :param fields: The properties of dataset to change (e.g. "friendly_name"). - :type fields: Sequence[str] :param project_id: The name of the project where we want to create the dataset. Don't need to provide, if projectId in dataset_reference. - :type project_id: str :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1804,7 +1663,6 @@ class BigQueryUpdateDatasetOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: dataset https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#resource @@ -1867,21 +1725,15 @@ class BigQueryDeleteTableOperator(BaseOperator): :param deletion_dataset_table: A dotted ``(.|:).
`` that indicates which table will be deleted. (templated) - :type deletion_dataset_table: str :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param bigquery_conn_id: (Deprecated) The connection ID used to connect to Google Cloud. This parameter has been deprecated. You should pass the gcp_conn_id parameter instead. - :type bigquery_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param ignore_if_missing: if True, then return success even if the requested table does not exist. - :type ignore_if_missing: bool :param location: The location used for the operation. - :type location: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1890,7 +1742,6 @@ class BigQueryDeleteTableOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -1951,24 +1802,17 @@ class BigQueryUpsertTableOperator(BaseOperator): :param dataset_id: A dotted ``(.|:)`` that indicates which dataset will be updated. (templated) - :type dataset_id: str :param table_resource: a table resource. see https://cloud.google.com/bigquery/docs/reference/v2/tables#resource - :type table_resource: dict :param project_id: The name of the project where we want to update the dataset. Don't need to provide, if projectId in dataset_reference. - :type project_id: str :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param bigquery_conn_id: (Deprecated) The connection ID used to connect to Google Cloud. This parameter has been deprecated. You should pass the gcp_conn_id parameter instead. - :type bigquery_conn_id: str :param delegate_to: The account to impersonate, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param location: The location used for the operation. - :type location: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1977,7 +1821,6 @@ class BigQueryUpsertTableOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -2061,31 +1904,22 @@ class BigQueryUpdateTableSchemaOperator(BaseOperator): ]}, ] - :type schema_fields_updates: List[dict] :param include_policy_tags: (Optional) If set to True policy tags will be included in the update request which requires special permissions even if unchanged (default False) see https://cloud.google.com/bigquery/docs/column-level-security#roles - :type include_policy_tags: bool :param dataset_id: A dotted ``(.|:)`` that indicates which dataset will be updated. (templated) - :type dataset_id: str :param table_id: The table ID of the requested table. (templated) - :type table_id: str :param project_id: The name of the project where we want to update the dataset. Don't need to provide, if projectId in dataset_reference. - :type project_id: str :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param bigquery_conn_id: (Deprecated) The connection ID used to connect to Google Cloud. This parameter has been deprecated. You should pass the gcp_conn_id parameter instead. - :type bigquery_conn_id: str :param delegate_to: The account to impersonate, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param location: The location used for the operation. - :type location: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -2094,7 +1928,6 @@ class BigQueryUpdateTableSchemaOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -2172,27 +2005,20 @@ class BigQueryInsertJobOperator(BaseOperator): :param configuration: The configuration parameter maps directly to BigQuery's configuration field in the job object. For more details see https://cloud.google.com/bigquery/docs/reference/v2/jobs - :type configuration: Dict[str, Any] :param job_id: The ID of the job. It will be suffixed with hash of job configuration unless ``force_rerun`` is True. The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or dashes (-). The maximum length is 1,024 characters. If not provided then uuid will be generated. - :type job_id: str :param force_rerun: If True then operator will use hash of uuid as job id suffix - :type force_rerun: bool :param reattach_states: Set of BigQuery job's states in case of which we should reattach to the job. Should be other than final states. :param project_id: Google Cloud Project where the job is running - :type project_id: str :param location: location the job is running - :type location: str :param gcp_conn_id: The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -2201,9 +2027,7 @@ class BigQueryInsertJobOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :param cancel_on_kill: Flag which indicates whether cancel the hook's job or not, when on_kill is called - :type cancel_on_kill: bool """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/cloud/operators/bigquery_dts.py b/airflow/providers/google/cloud/operators/bigquery_dts.py index 0637734eb3524..3d67879cad7fe 100644 --- a/airflow/providers/google/cloud/operators/bigquery_dts.py +++ b/airflow/providers/google/cloud/operators/bigquery_dts.py @@ -37,27 +37,19 @@ class BigQueryCreateDataTransferOperator(BaseOperator): :ref:`howto/operator:BigQueryCreateDataTransferOperator` :param transfer_config: Data transfer configuration to create. - :type transfer_config: dict :param project_id: The BigQuery project id where the transfer configuration should be created. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param: location: BigQuery Transfer Service location for regional transfers. - :type location: Optional[str] :param authorization_code: authorization code to use with this transfer configuration. This is required if new credentials are needed. - :type authorization_code: Optional[str] :param retry: A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[google.api_core.retry.Retry] :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :param gcp_conn_id: The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -66,7 +58,6 @@ class BigQueryCreateDataTransferOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -130,23 +121,16 @@ class BigQueryDeleteDataTransferConfigOperator(BaseOperator): :ref:`howto/operator:BigQueryDeleteDataTransferConfigOperator` :param transfer_config_id: Id of transfer config to be used. - :type transfer_config_id: str :param project_id: The BigQuery project id where the transfer configuration should be created. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param: location: BigQuery Transfer Service location for regional transfers. - :type location: Optional[str] :param retry: A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[google.api_core.retry.Retry] :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :param gcp_conn_id: The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -155,7 +139,6 @@ class BigQueryDeleteDataTransferConfigOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -213,32 +196,23 @@ class BigQueryDataTransferServiceStartTransferRunsOperator(BaseOperator): :ref:`howto/operator:BigQueryDataTransferServiceStartTransferRunsOperator` :param transfer_config_id: Id of transfer config to be used. - :type transfer_config_id: str :param requested_time_range: Time range for the transfer runs that should be started. If a dict is provided, it must be of the same form as the protobuf message `~google.cloud.bigquery_datatransfer_v1.types.TimeRange` - :type requested_time_range: Union[dict, ~google.cloud.bigquery_datatransfer_v1.types.TimeRange] :param requested_run_time: Specific run_time for a transfer run to be started. The requested_run_time must not be in the future. If a dict is provided, it must be of the same form as the protobuf message `~google.cloud.bigquery_datatransfer_v1.types.Timestamp` - :type requested_run_time: Union[dict, ~google.cloud.bigquery_datatransfer_v1.types.Timestamp] :param project_id: The BigQuery project id where the transfer configuration should be created. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param: location: BigQuery Transfer Service location for regional transfers. - :type location: Optional[str] :param retry: A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[google.api_core.retry.Retry] :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :param gcp_conn_id: The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -247,7 +221,6 @@ class BigQueryDataTransferServiceStartTransferRunsOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/cloud/operators/bigtable.py b/airflow/providers/google/cloud/operators/bigtable.py index 2895d0d2a258d..7520a655ca285 100644 --- a/airflow/providers/google/cloud/operators/bigtable.py +++ b/airflow/providers/google/cloud/operators/bigtable.py @@ -56,42 +56,28 @@ class BigtableCreateInstanceOperator(BaseOperator, BigtableValidationMixin): For more information on how to use this operator, take a look at the guide: :ref:`howto/operator:BigtableCreateInstanceOperator` - :type instance_id: str :param instance_id: The ID of the Cloud Bigtable instance to create. - :type main_cluster_id: str :param main_cluster_id: The ID for main cluster for the new instance. - :type main_cluster_zone: str :param main_cluster_zone: The zone for main cluster See https://cloud.google.com/bigtable/docs/locations for more details. - :type project_id: str :param project_id: Optional, the ID of the Google Cloud project. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type replica_clusters: List[Dict[str, str]] :param replica_clusters: (optional) A list of replica clusters for the new instance. Each cluster dictionary contains an id and a zone. Example: [{"id": "replica-1", "zone": "us-west1-a"}] - :type replica_cluster_id: str :param replica_cluster_id: (deprecated) The ID for replica cluster for the new instance. - :type replica_cluster_zone: str :param replica_cluster_zone: (deprecated) The zone for replica cluster. - :type instance_type: enum.IntEnum :param instance_type: (optional) The type of the instance. - :type instance_display_name: str :param instance_display_name: (optional) Human-readable name of the instance. Defaults to ``instance_id``. - :type instance_labels: dict :param instance_labels: (optional) Dictionary of labels to associate with the instance. - :type cluster_nodes: int :param cluster_nodes: (optional) Number of nodes for cluster. - :type cluster_storage_type: enum.IntEnum :param cluster_storage_type: (optional) The type of storage. - :type timeout: int :param timeout: (optional) timeout (in seconds) for instance creation. If None is not specified, Operator will wait indefinitely. :param gcp_conn_id: The connection ID to use to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -100,7 +86,6 @@ class BigtableCreateInstanceOperator(BaseOperator, BigtableValidationMixin): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ REQUIRED_ATTRIBUTES: Iterable[str] = ('instance_id', 'main_cluster_id', 'main_cluster_zone') @@ -196,23 +181,16 @@ class BigtableUpdateInstanceOperator(BaseOperator, BigtableValidationMixin): For more information on how to use this operator, take a look at the guide: :ref:`howto/operator:BigtableUpdateInstanceOperator` - :type instance_id: str :param instance_id: The ID of the Cloud Bigtable instance to update. - :type project_id: str :param project_id: Optional, the ID of the Google Cloud project. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type instance_display_name: str :param instance_display_name: (optional) Human-readable name of the instance. - :type instance_type: enums.Instance.Type or enum.IntEnum :param instance_type: (optional) The type of the instance. - :type instance_labels: dict :param instance_labels: (optional) Dictionary of labels to associate with the instance. - :type timeout: int :param timeout: (optional) timeout (in seconds) for instance update. If None is not specified, Operator will wait indefinitely. :param gcp_conn_id: The connection ID to use to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -221,7 +199,6 @@ class BigtableUpdateInstanceOperator(BaseOperator, BigtableValidationMixin): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ REQUIRED_ATTRIBUTES: Iterable[str] = ['instance_id'] @@ -289,13 +266,10 @@ class BigtableDeleteInstanceOperator(BaseOperator, BigtableValidationMixin): For more information on how to use this operator, take a look at the guide: :ref:`howto/operator:BigtableDeleteInstanceOperator` - :type instance_id: str :param instance_id: The ID of the Cloud Bigtable instance to delete. :param project_id: Optional, the ID of the Google Cloud project. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param gcp_conn_id: The connection ID to use to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -304,7 +278,6 @@ class BigtableDeleteInstanceOperator(BaseOperator, BigtableValidationMixin): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ REQUIRED_ATTRIBUTES = ('instance_id',) # type: Iterable[str] @@ -359,23 +332,17 @@ class BigtableCreateTableOperator(BaseOperator, BigtableValidationMixin): For more information on how to use this operator, take a look at the guide: :ref:`howto/operator:BigtableCreateTableOperator` - :type instance_id: str :param instance_id: The ID of the Cloud Bigtable instance that will hold the new table. - :type table_id: str :param table_id: The ID of the table to be created. - :type project_id: str :param project_id: Optional, the ID of the Google Cloud project. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type initial_split_keys: list :param initial_split_keys: (Optional) list of row keys in bytes that will be used to initially split the table into several tablets. - :type column_families: dict :param column_families: (Optional) A map columns to create. The key is the column_id str and the value is a :class:`google.cloud.bigtable.column_family.GarbageCollectionRule` :param gcp_conn_id: The connection ID to use to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -384,7 +351,6 @@ class BigtableCreateTableOperator(BaseOperator, BigtableValidationMixin): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ REQUIRED_ATTRIBUTES = ('instance_id', 'table_id') # type: Iterable[str] @@ -474,17 +440,12 @@ class BigtableDeleteTableOperator(BaseOperator, BigtableValidationMixin): For more information on how to use this operator, take a look at the guide: :ref:`howto/operator:BigtableDeleteTableOperator` - :type instance_id: str :param instance_id: The ID of the Cloud Bigtable instance. - :type table_id: str :param table_id: The ID of the table to be deleted. - :type project_id: str :param project_id: Optional, the ID of the Google Cloud project. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type app_profile_id: str :param app_profile_id: Application profile. :param gcp_conn_id: The connection ID to use to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -493,7 +454,6 @@ class BigtableDeleteTableOperator(BaseOperator, BigtableValidationMixin): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ REQUIRED_ATTRIBUTES = ('instance_id', 'table_id') # type: Iterable[str] @@ -559,16 +519,11 @@ class BigtableUpdateClusterOperator(BaseOperator, BigtableValidationMixin): For more information on how to use this operator, take a look at the guide: :ref:`howto/operator:BigtableUpdateClusterOperator` - :type instance_id: str :param instance_id: The ID of the Cloud Bigtable instance. - :type cluster_id: str :param cluster_id: The ID of the Cloud Bigtable cluster to update. - :type nodes: int :param nodes: The desired number of nodes for the Cloud Bigtable cluster. - :type project_id: str :param project_id: Optional, the ID of the Google Cloud project. :param gcp_conn_id: The connection ID to use to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -577,7 +532,6 @@ class BigtableUpdateClusterOperator(BaseOperator, BigtableValidationMixin): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ REQUIRED_ATTRIBUTES = ('instance_id', 'cluster_id', 'nodes') # type: Iterable[str] diff --git a/airflow/providers/google/cloud/operators/cloud_build.py b/airflow/providers/google/cloud/operators/cloud_build.py index 9ca36c7231c43..74af85f9062fa 100644 --- a/airflow/providers/google/cloud/operators/cloud_build.py +++ b/airflow/providers/google/cloud/operators/cloud_build.py @@ -49,20 +49,14 @@ class CloudBuildCancelBuildOperator(BaseOperator): :ref:`howto/operator:CloudBuildCancelBuildOperator` :param id_: The ID of the build. - :type id_: str :param project_id: Optional, Google Cloud Project project_id where the function belongs. If set to None or missing, the default project_id from the GCP connection is used. - :type project_id: Optional[str] :param retry: Optional, a retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[Retry] :param timeout: Optional, the amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Optional, additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :param gcp_conn_id: Optional, the connection ID used to connect to Google Cloud Platform. - :type gcp_conn_id: Optional[str] :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -71,7 +65,6 @@ class CloudBuildCancelBuildOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: dict """ @@ -122,25 +115,17 @@ class CloudBuildCreateBuildOperator(BaseOperator): :param build: Optional, the build resource to create. If a dict is provided, it must be of the same form as the protobuf message `google.cloud.devtools.cloudbuild_v1.types.Build`. Only either build or body should be passed. - :type build: Optional[Union[dict, `google.cloud.devtools.cloudbuild_v1.types.Build`]] :param body: (Deprecated) The build resource to create. This parameter has been deprecated. You should pass the build parameter instead. - :type body: Optional[dict] :param project_id: Optional, Google Cloud Project project_id where the function belongs. If set to None or missing, the default project_id from the GCP connection is used. - :type project_id: Optional[str] :param wait: Optional, wait for operation to finish. - :type wait: Optional[bool] :param retry: Optional, a retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[Retry] :param timeout: Optional, the amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Optional, additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :param gcp_conn_id: Optional, the connection ID used to connect to Google Cloud Platform. - :type gcp_conn_id: Optional[str] :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -149,7 +134,6 @@ class CloudBuildCreateBuildOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: dict """ @@ -234,20 +218,14 @@ class CloudBuildCreateBuildTriggerOperator(BaseOperator): :param trigger: The BuildTrigger to create. If a dict is provided, it must be of the same form as the protobuf message `google.cloud.devtools.cloudbuild_v1.types.BuildTrigger` - :type trigger: Union[dict, `google.cloud.devtools.cloudbuild_v1.types.BuildTrigger`] :param project_id: Optional, Google Cloud Project project_id where the function belongs. If set to None or missing, the default project_id from the GCP connection is used. - :type project_id: Optional[str] :param retry: Optional, a retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[Retry] :param timeout: Optional, the amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Optional, additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :param gcp_conn_id: Optional, the connection ID used to connect to Google Cloud Platform. - :type gcp_conn_id: Optional[str] :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -256,7 +234,6 @@ class CloudBuildCreateBuildTriggerOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: dict """ @@ -305,20 +282,14 @@ class CloudBuildDeleteBuildTriggerOperator(BaseOperator): :ref:`howto/operator:CloudBuildDeleteBuildTriggerOperator` :param trigger_id: The ID of the BuildTrigger to delete. - :type trigger_id: str :param project_id: Optional, Google Cloud Project project_id where the function belongs. If set to None or missing, the default project_id from the GCP connection is used. - :type project_id: Optional[str] :param retry: Optional, a retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[Retry] :param timeout: Optional, the amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Optional, additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :param gcp_conn_id: Optional, the connection ID used to connect to Google Cloud Platform. - :type gcp_conn_id: Optional[str] :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -327,7 +298,6 @@ class CloudBuildDeleteBuildTriggerOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ("project_id", "trigger_id", "gcp_conn_id") @@ -373,20 +343,14 @@ class CloudBuildGetBuildOperator(BaseOperator): :ref:`howto/operator:CloudBuildGetBuildOperator` :param id_: The ID of the build. - :type id_: str :param project_id: Optional, Google Cloud Project project_id where the function belongs. If set to None or missing, the default project_id from the GCP connection is used. - :type project_id: Optional[str] :param retry: Optional, a retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[Retry] :param timeout: Optional, the amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Optional, additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :param gcp_conn_id: Optional, the connection ID used to connect to Google Cloud Platform. - :type gcp_conn_id: Optional[str] :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -395,7 +359,6 @@ class CloudBuildGetBuildOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: dict """ @@ -444,20 +407,14 @@ class CloudBuildGetBuildTriggerOperator(BaseOperator): :ref:`howto/operator:CloudBuildGetBuildTriggerOperator` :param trigger_id: The ID of the BuildTrigger to get. - :type trigger_id: str :param project_id: Optional, Google Cloud Project project_id where the function belongs. If set to None or missing, the default project_id from the GCP connection is used. - :type project_id: Optional[str] :param retry: Optional, a retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[Retry] :param timeout: Optional, the amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Optional, additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :param gcp_conn_id: Optional, the connection ID used to connect to Google Cloud Platform. - :type gcp_conn_id: Optional[str] :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -466,7 +423,6 @@ class CloudBuildGetBuildTriggerOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: dict """ @@ -515,24 +471,16 @@ class CloudBuildListBuildTriggersOperator(BaseOperator): :ref:`howto/operator:CloudBuildListBuildTriggersOperator` :param location: The location of the project. - :type location: string :param project_id: Optional, Google Cloud Project project_id where the function belongs. If set to None or missing, the default project_id from the GCP connection is used. - :type project_id: Optional[str] :param page_size: Optional, number of results to return in the list. - :type page_size: Optional[int] :param page_token: Optional, token to provide to skip to a particular spot in the list. - :type page_token: Optional[str] :param retry: Optional, a retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[Retry] :param timeout: Optional, the amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Optional, additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :param gcp_conn_id: Optional, the connection ID used to connect to Google Cloud Platform. - :type gcp_conn_id: Optional[str] :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -541,7 +489,6 @@ class CloudBuildListBuildTriggersOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: List[dict] """ @@ -596,24 +543,16 @@ class CloudBuildListBuildsOperator(BaseOperator): :ref:`howto/operator:CloudBuildListBuildsOperator` :param location: The location of the project. - :type location: string :param project_id: Optional, Google Cloud Project project_id where the function belongs. If set to None or missing, the default project_id from the GCP connection is used. - :type project_id: str :param page_size: Optional, number of results to return in the list. - :type page_size: Optional[int] :param filter_: Optional, the raw filter text to constrain the results. - :type filter_: Optional[str] :param retry: Optional, a retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[Retry] :param timeout: Optional, the amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Optional, additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :param gcp_conn_id: Optional, the connection ID used to connect to Google Cloud Platform. - :type gcp_conn_id: Optional[str] :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -622,7 +561,6 @@ class CloudBuildListBuildsOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: List[dict] """ @@ -678,22 +616,15 @@ class CloudBuildRetryBuildOperator(BaseOperator): :ref:`howto/operator:CloudBuildRetryBuildOperator` :param id_: Build ID of the original build. - :type id_: str :param project_id: Optional, Google Cloud Project project_id where the function belongs. If set to None or missing, the default project_id from the GCP connection is used. - :type project_id: str :param wait: Optional, wait for operation to finish. - :type wait: Optional[bool] :param retry: Optional, a retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[Retry] :param timeout: Optional, the amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Optional, additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :param gcp_conn_id: Optional, the connection ID used to connect to Google Cloud Platform. - :type gcp_conn_id: Optional[str] :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -702,7 +633,6 @@ class CloudBuildRetryBuildOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: dict """ @@ -754,25 +684,17 @@ class CloudBuildRunBuildTriggerOperator(BaseOperator): :ref:`howto/operator:CloudBuildRunBuildTriggerOperator` :param trigger_id: The ID of the trigger. - :type trigger_id: str :param source: Source to build against this trigger. If a dict is provided, it must be of the same form as the protobuf message `google.cloud.devtools.cloudbuild_v1.types.RepoSource` - :type source: Union[dict, `google.cloud.devtools.cloudbuild_v1.types.RepoSource`] :param project_id: Optional, Google Cloud Project project_id where the function belongs. If set to None or missing, the default project_id from the GCP connection is used. - :type project_id: str :param wait: Optional, wait for operation to finish. - :type wait: Optional[bool] :param retry: Optional, a retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[Retry] :param timeout: Optional, the amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Optional, additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :param gcp_conn_id: Optional, the connection ID used to connect to Google Cloud Platform. - :type gcp_conn_id: Optional[str] :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -781,7 +703,6 @@ class CloudBuildRunBuildTriggerOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: dict """ @@ -836,23 +757,16 @@ class CloudBuildUpdateBuildTriggerOperator(BaseOperator): :ref:`howto/operator:CloudBuildUpdateBuildTriggerOperator` :param trigger_id: The ID of the trigger. - :type trigger_id: str :param trigger: The BuildTrigger to create. If a dict is provided, it must be of the same form as the protobuf message `google.cloud.devtools.cloudbuild_v1.types.BuildTrigger` - :type trigger: Union[dict, `google.cloud.devtools.cloudbuild_v1.types.BuildTrigger`] :param project_id: Optional, Google Cloud Project project_id where the function belongs. If set to None or missing, the default project_id from the GCP connection is used. - :type project_id: Optional[str] :param retry: Optional, a retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[Retry] :param timeout: Optional, the amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Optional, additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :param gcp_conn_id: Optional, the connection ID used to connect to Google Cloud Platform. - :type gcp_conn_id: Optional[str] :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -861,7 +775,6 @@ class CloudBuildUpdateBuildTriggerOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: dict """ @@ -913,7 +826,6 @@ class BuildProcessor: :param build: The request body of the build. See: https://cloud.google.com/cloud-build/docs/api/reference/rest/Shared.Types/Build - :type build: Union[Dict, Build] """ def __init__(self, build: Union[Dict, Build]) -> None: diff --git a/airflow/providers/google/cloud/operators/cloud_memorystore.py b/airflow/providers/google/cloud/operators/cloud_memorystore.py index bcae7938e6921..cca8e3a40d4da 100644 --- a/airflow/providers/google/cloud/operators/cloud_memorystore.py +++ b/airflow/providers/google/cloud/operators/cloud_memorystore.py @@ -15,7 +15,14 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -"""Operators for Google Cloud Memorystore service""" +""" +Operators for Google Cloud Memorystore service. + +.. spelling:: + + FieldMask + memcache +""" from typing import TYPE_CHECKING, Dict, Optional, Sequence, Tuple, Union from google.api_core.retry import Retry @@ -45,7 +52,6 @@ class CloudMemorystoreCreateInstanceOperator(BaseOperator): :ref:`howto/operator:CloudMemorystoreCreateInstanceOperator` :param location: The location of the Cloud Memorystore instance (for example europe-west1) - :type location: str :param instance_id: Required. The logical name of the Redis instance in the customer project with the following restrictions: @@ -54,25 +60,18 @@ class CloudMemorystoreCreateInstanceOperator(BaseOperator): - Must be between 1-40 characters. - Must end with a number or a letter. - Must be unique within the customer project / location - :type instance_id: str :param instance: Required. A Redis [Instance] resource If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.redis_v1.types.Instance` - :type instance: Union[Dict, google.cloud.redis_v1.types.Instance] :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -81,7 +80,6 @@ class CloudMemorystoreCreateInstanceOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -146,22 +144,15 @@ class CloudMemorystoreDeleteInstanceOperator(BaseOperator): :ref:`howto/operator:CloudMemorystoreDeleteInstanceOperator` :param location: The location of the Cloud Memorystore instance (for example europe-west1) - :type location: str :param instance: The logical name of the Redis instance in the customer project. - :type instance: str :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -170,7 +161,6 @@ class CloudMemorystoreDeleteInstanceOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -232,26 +222,19 @@ class CloudMemorystoreExportInstanceOperator(BaseOperator): :ref:`howto/operator:CloudMemorystoreExportInstanceOperator` :param location: The location of the Cloud Memorystore instance (for example europe-west1) - :type location: str :param instance: The logical name of the Redis instance in the customer project. - :type instance: str :param output_config: Required. Specify data to be exported. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.redis_v1.types.OutputConfig` - :type output_config: Union[Dict, google.cloud.redis_v1.types.OutputConfig] :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the Google Cloud connection is used. :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -260,7 +243,6 @@ class CloudMemorystoreExportInstanceOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -326,24 +308,17 @@ class CloudMemorystoreFailoverInstanceOperator(BaseOperator): :ref:`howto/operator:CloudMemorystoreFailoverInstanceOperator` :param location: The location of the Cloud Memorystore instance (for example europe-west1) - :type location: str :param instance: The logical name of the Redis instance in the customer project. - :type instance: str :param data_protection_mode: Optional. Available data protection modes that the user can choose. If it's unspecified, data protection mode will be LIMITED_DATA_LOSS by default. - :type data_protection_mode: google.cloud.redis_v1.gapic.enums.FailoverInstanceRequest.DataProtectionMode :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the Google Cloud connection is used. :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -352,7 +327,6 @@ class CloudMemorystoreFailoverInstanceOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -416,21 +390,15 @@ class CloudMemorystoreGetInstanceOperator(BaseOperator): :ref:`howto/operator:CloudMemorystoreGetInstanceOperator` :param location: The location of the Cloud Memorystore instance (for example europe-west1) - :type location: str :param instance: The logical name of the Redis instance in the customer project. - :type instance: str :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the Google Cloud connection is used. :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -439,7 +407,6 @@ class CloudMemorystoreGetInstanceOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -503,26 +470,19 @@ class CloudMemorystoreImportOperator(BaseOperator): :ref:`howto/operator:CloudMemorystoreImportOperator` :param location: The location of the Cloud Memorystore instance (for example europe-west1) - :type location: str :param instance: The logical name of the Redis instance in the customer project. - :type instance: str :param input_config: Required. Specify data to be imported. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.redis_v1.types.InputConfig` - :type input_config: Union[Dict, google.cloud.redis_v1.types.InputConfig] :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the Google Cloud connection is used. :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -531,7 +491,6 @@ class CloudMemorystoreImportOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -597,23 +556,17 @@ class CloudMemorystoreListInstancesOperator(BaseOperator): :param location: The location of the Cloud Memorystore instance (for example europe-west1) If it is specified as ``-`` (wildcard), then all regions available to the project are queried, and the results are aggregated. - :type location: str :param page_size: The maximum number of resources contained in the underlying API response. If page streaming is performed per- resource, this parameter does not affect the return value. If page streaming is performed per-page, this determines the maximum number of resources in a page. - :type page_size: int :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the Google Cloud connection is used. :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -622,7 +575,6 @@ class CloudMemorystoreListInstancesOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -694,29 +646,20 @@ class CloudMemorystoreUpdateInstanceOperator(BaseOperator): For more information on how to use this operator, take a look at the guide: :ref:`howto/operator:CloudMemorystoreUpdateInstanceOperator` - :type update_mask: Union[Dict, google.cloud.redis_v1.types.FieldMask] :param instance: Required. Update description. Only fields specified in update_mask are updated. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.redis_v1.types.Instance` - :type instance: Union[Dict, google.cloud.redis_v1.types.Instance] :param location: The location of the Cloud Memorystore instance (for example europe-west1) - :type location: str :param instance_id: The logical name of the Redis instance in the customer project. - :type instance_id: str :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -725,7 +668,6 @@ class CloudMemorystoreUpdateInstanceOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -793,24 +735,16 @@ class CloudMemorystoreScaleInstanceOperator(BaseOperator): :ref:`howto/operator:CloudMemorystoreScaleInstanceOperator` :param memory_size_gb: Redis memory size in GiB. - :type memory_size_gb: int :param location: The location of the Cloud Memorystore instance (for example europe-west1) - :type location: str :param instance_id: The logical name of the Redis instance in the customer project. - :type instance_id: str :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -819,7 +753,6 @@ class CloudMemorystoreScaleInstanceOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -889,7 +822,6 @@ class CloudMemorystoreCreateInstanceAndImportOperator(BaseOperator): :ref:`howto/operator:CloudMemorystoreCreateInstanceAndImportOperator` :param location: The location of the Cloud Memorystore instance (for example europe-west1) - :type location: str :param instance_id: Required. The logical name of the Redis instance in the customer project with the following restrictions: @@ -898,30 +830,22 @@ class CloudMemorystoreCreateInstanceAndImportOperator(BaseOperator): - Must be between 1-40 characters. - Must end with a number or a letter. - Must be unique within the customer project / location - :type instance_id: str :param instance: Required. A Redis [Instance] resource If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.redis_v1.types.Instance` - :type instance: Union[Dict, google.cloud.redis_v1.types.Instance] :param input_config: Required. Specify data to be imported. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.redis_v1.types.InputConfig` - :type input_config: Union[Dict, google.cloud.redis_v1.types.InputConfig] :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -930,7 +854,6 @@ class CloudMemorystoreCreateInstanceAndImportOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -1011,26 +934,19 @@ class CloudMemorystoreExportAndDeleteInstanceOperator(BaseOperator): :ref:`howto/operator:CloudMemorystoreExportAndDeleteInstanceOperator` :param location: The location of the Cloud Memorystore instance (for example europe-west1) - :type location: str :param instance: The logical name of the Redis instance in the customer project. - :type instance: str :param output_config: Required. Specify data to be exported. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.redis_v1.types.OutputConfig` - :type output_config: Union[Dict, google.cloud.redis_v1.types.OutputConfig] :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the Google Cloud connection is used. :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1039,7 +955,6 @@ class CloudMemorystoreExportAndDeleteInstanceOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -1113,26 +1028,18 @@ class CloudMemorystoreMemcachedApplyParametersOperator(BaseOperator): :ref:`howto/operator:CloudMemorystoreMemcachedApplyParametersOperator` :param node_ids: Nodes to which we should apply the instance-level parameter group. - :type node_ids: Sequence[str] :param apply_all: Whether to apply instance-level parameter group to all nodes. If set to true, will explicitly restrict users from specifying any nodes, and apply parameter group updates to all nodes within the instance. - :type apply_all: bool :param location: The location of the Cloud Memorystore instance (for example europe-west1) - :type location: str :param instance_id: The logical name of the Memcached instance in the customer project. - :type instance_id: str :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ template_fields: Sequence[str] = ( @@ -1203,7 +1110,6 @@ class CloudMemorystoreMemcachedCreateInstanceOperator(BaseOperator): :ref:`howto/operator:CloudMemorystoreMemcachedCreateInstanceOperator` :param location: The location of the Cloud Memorystore instance (for example europe-west1) - :type location: str :param instance_id: Required. The logical name of the Memcached instance in the customer project with the following restrictions: @@ -1212,25 +1118,18 @@ class CloudMemorystoreMemcachedCreateInstanceOperator(BaseOperator): - Must be between 1-40 characters. - Must end with a number or a letter. - Must be unique within the customer project / location - :type instance_id: str :param instance: Required. A Memcached [Instance] resource If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.memcache_v1beta2.types.cloud_memcache.Instance` - :type instance: Union[Dict, google.cloud.memcache_v1beta2.types.cloud_memcache.Instance] :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the GCP connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud Platform. - :type gcp_conn_id: str """ template_fields: Sequence[str] = ( @@ -1290,22 +1189,15 @@ class CloudMemorystoreMemcachedDeleteInstanceOperator(BaseOperator): :ref:`howto/operator:CloudMemorystoreMemcachedDeleteInstanceOperator` :param location: The location of the Cloud Memorystore instance (for example europe-west1) - :type location: str :param instance: The logical name of the Memcached instance in the customer project. - :type instance: str :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the GCP connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud Platform. - :type gcp_conn_id: str """ template_fields: Sequence[str] = ( @@ -1360,21 +1252,15 @@ class CloudMemorystoreMemcachedGetInstanceOperator(BaseOperator): :ref:`howto/operator:CloudMemorystoreMemcachedGetInstanceOperator` :param location: The location of the Cloud Memorystore instance (for example europe-west1) - :type location: str :param instance: The logical name of the Memcached instance in the customer project. - :type instance: str :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the Google Cloud connection is used. :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud Platform. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1383,7 +1269,6 @@ class CloudMemorystoreMemcachedGetInstanceOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -1447,19 +1332,14 @@ class CloudMemorystoreMemcachedListInstancesOperator(BaseOperator): :param location: The location of the Cloud Memorystore instance (for example europe-west1) If it is specified as ``-`` (wildcard), then all regions available to the project are queried, and the results are aggregated. - :type location: str :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the Google Cloud connection is used. :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1468,7 +1348,6 @@ class CloudMemorystoreMemcachedListInstancesOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -1533,29 +1412,20 @@ class CloudMemorystoreMemcachedUpdateInstanceOperator(BaseOperator): For more information on how to use this operator, take a look at the guide: :ref:`howto/operator:CloudMemorystoreMemcachedUpdateInstanceOperator` - :type update_mask: Union[Dict, google.cloud.memcache_v1beta2.types.cloud_memcache.field_mask.FieldMask] :param instance: Required. Update description. Only fields specified in update_mask are updated. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.memcache_v1beta2.types.cloud_memcache.Instance` - :type instance: Union[Dict, google.cloud.memcache_v1beta2.types.cloud_memcache.Instance] :param location: The location of the Cloud Memorystore instance (for example europe-west1) - :type location: str :param instance_id: The logical name of the Memcached instance in the customer project. - :type instance_id: str :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1564,7 +1434,6 @@ class CloudMemorystoreMemcachedUpdateInstanceOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -1636,27 +1505,19 @@ class CloudMemorystoreMemcachedUpdateParametersOperator(BaseOperator): :param update_mask: Required. Mask of fields to update. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.memcache_v1beta2.types.cloud_memcache.field_mask.FieldMask` - :type update_mask: Union[Dict, google.cloud.memcache_v1beta2.types.cloud_memcache.field_mask.FieldMask] :param parameters: The parameters to apply to the instance. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.memcache_v1beta2.types.cloud_memcache.MemcacheParameters` - :type parameters: Union[Dict, google.cloud.memcache_v1beta2.types.cloud_memcache.MemcacheParameters] :param location: The location of the Cloud Memorystore instance (for example europe-west1) - :type location: str :param instance_id: The logical name of the Memcached instance in the customer project. - :type instance_id: str :param project_id: Project ID of the project that contains the instance. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/cloud/operators/cloud_sql.py b/airflow/providers/google/cloud/operators/cloud_sql.py index 13b72d5e62e4e..40ad97dd791ff 100644 --- a/airflow/providers/google/cloud/operators/cloud_sql.py +++ b/airflow/providers/google/cloud/operators/cloud_sql.py @@ -195,14 +195,10 @@ class CloudSQLBaseOperator(BaseOperator): Abstract base operator for Google Cloud SQL operators to inherit from. :param instance: Cloud SQL instance ID. This does not include the project ID. - :type instance: str :param project_id: Optional, Google Cloud Project ID. f set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param gcp_conn_id: The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param api_version: API version used (e.g. v1beta4). - :type api_version: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -211,7 +207,6 @@ class CloudSQLBaseOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ def __init__( @@ -277,18 +272,12 @@ class CloudSQLCreateInstanceOperator(CloudSQLBaseOperator): :param body: Body required by the Cloud SQL insert API, as described in https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/insert #request-body - :type body: dict :param instance: Cloud SQL instance ID. This does not include the project ID. - :type instance: str :param project_id: Optional, Google Cloud Project ID. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param gcp_conn_id: The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param api_version: API version used (e.g. v1beta4). - :type api_version: str :param validate_body: True if body should be validated, False otherwise. - :type validate_body: bool :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -297,7 +286,6 @@ class CloudSQLCreateInstanceOperator(CloudSQLBaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START gcp_sql_create_template_fields] @@ -380,16 +368,11 @@ class CloudSQLInstancePatchOperator(CloudSQLBaseOperator): :param body: Body required by the Cloud SQL patch API, as described in https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/patch#request-body - :type body: dict :param instance: Cloud SQL instance ID. This does not include the project ID. - :type instance: str :param project_id: Optional, Google Cloud Project ID. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param gcp_conn_id: The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param api_version: API version used (e.g. v1beta4). - :type api_version: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -398,7 +381,6 @@ class CloudSQLInstancePatchOperator(CloudSQLBaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START gcp_sql_patch_template_fields] @@ -462,14 +444,10 @@ class CloudSQLDeleteInstanceOperator(CloudSQLBaseOperator): :ref:`howto/operator:CloudSQLDeleteInstanceOperator` :param instance: Cloud SQL instance ID. This does not include the project ID. - :type instance: str :param project_id: Optional, Google Cloud Project ID. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param gcp_conn_id: The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param api_version: API version used (e.g. v1beta4). - :type api_version: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -478,7 +456,6 @@ class CloudSQLDeleteInstanceOperator(CloudSQLBaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START gcp_sql_delete_template_fields] @@ -513,19 +490,13 @@ class CloudSQLCreateInstanceDatabaseOperator(CloudSQLBaseOperator): :ref:`howto/operator:CloudSQLCreateInstanceDatabaseOperator` :param instance: Database instance ID. This does not include the project ID. - :type instance: str :param body: The request body, as described in https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/insert#request-body - :type body: dict :param project_id: Optional, Google Cloud Project ID. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param gcp_conn_id: The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param api_version: API version used (e.g. v1beta4). - :type api_version: str :param validate_body: Whether the body should be validated. Defaults to True. - :type validate_body: bool :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -534,7 +505,6 @@ class CloudSQLCreateInstanceDatabaseOperator(CloudSQLBaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START gcp_sql_db_create_template_fields] @@ -619,20 +589,13 @@ class CloudSQLPatchInstanceDatabaseOperator(CloudSQLBaseOperator): :ref:`howto/operator:CloudSQLPatchInstanceDatabaseOperator` :param instance: Database instance ID. This does not include the project ID. - :type instance: str :param database: Name of the database to be updated in the instance. - :type database: str :param body: The request body, as described in https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/databases/patch#request-body - :type body: dict :param project_id: Optional, Google Cloud Project ID. - :type project_id: str :param gcp_conn_id: The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param api_version: API version used (e.g. v1beta4). - :type api_version: str :param validate_body: Whether the body should be validated. Defaults to True. - :type validate_body: bool :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -641,7 +604,6 @@ class CloudSQLPatchInstanceDatabaseOperator(CloudSQLBaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START gcp_sql_db_patch_template_fields] @@ -721,16 +683,11 @@ class CloudSQLDeleteInstanceDatabaseOperator(CloudSQLBaseOperator): :ref:`howto/operator:CloudSQLDeleteInstanceDatabaseOperator` :param instance: Database instance ID. This does not include the project ID. - :type instance: str :param database: Name of the database to be deleted in the instance. - :type database: str :param project_id: Optional, Google Cloud Project ID. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param gcp_conn_id: The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param api_version: API version used (e.g. v1beta4). - :type api_version: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -739,7 +696,6 @@ class CloudSQLDeleteInstanceDatabaseOperator(CloudSQLBaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START gcp_sql_db_delete_template_fields] @@ -810,19 +766,13 @@ class CloudSQLExportInstanceOperator(CloudSQLBaseOperator): :ref:`howto/operator:CloudSQLExportInstanceOperator` :param instance: Cloud SQL instance ID. This does not include the project ID. - :type instance: str :param body: The request body, as described in https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/export#request-body - :type body: dict :param project_id: Optional, Google Cloud Project ID. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param gcp_conn_id: The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param api_version: API version used (e.g. v1beta4). - :type api_version: str :param validate_body: Whether the body should be validated. Defaults to True. - :type validate_body: bool :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -831,7 +781,6 @@ class CloudSQLExportInstanceOperator(CloudSQLBaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START gcp_sql_export_template_fields] @@ -914,19 +863,13 @@ class CloudSQLImportInstanceOperator(CloudSQLBaseOperator): :ref:`howto/operator:CloudSQLImportInstanceOperator` :param instance: Cloud SQL instance ID. This does not include the project ID. - :type instance: str :param body: The request body, as described in https://cloud.google.com/sql/docs/mysql/admin-api/v1beta4/instances/export#request-body - :type body: dict :param project_id: Optional, Google Cloud Project ID. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param gcp_conn_id: The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param api_version: API version used (e.g. v1beta4). - :type api_version: str :param validate_body: Whether the body should be validated. Defaults to True. - :type validate_body: bool :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -935,7 +878,6 @@ class CloudSQLImportInstanceOperator(CloudSQLBaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START gcp_sql_import_template_fields] @@ -1007,20 +949,15 @@ class CloudSQLExecuteQueryOperator(BaseOperator): so it is useless to pass it DQL queries. Note that it is responsibility of the author of the queries to make sure that the queries are idempotent. For example you can use CREATE TABLE IF NOT EXISTS to create a table. - :type sql: str or list[str] :param parameters: (optional) the parameters to render the SQL query with. - :type parameters: dict or iterable :param autocommit: if True, each command is automatically committed. (default value: False) - :type autocommit: bool :param gcp_conn_id: The connection ID used to connect to Google Cloud for cloud-sql-proxy authentication. - :type gcp_conn_id: str :param gcp_cloudsql_conn_id: The connection ID used to connect to Google Cloud SQL its schema should be gcpcloudsql://. See :class:`~airflow.providers.google.cloud.hooks.cloud_sql.CloudSQLDatabaseHook` for details on how to define ``gcpcloudsql://`` connection. - :type gcp_cloudsql_conn_id: str """ # [START gcp_sql_query_template_fields] diff --git a/airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py b/airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py index 29bcd1bf65b96..7a9bc4d4b5e8f 100644 --- a/airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +++ b/airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py @@ -195,14 +195,10 @@ class CloudDataTransferServiceCreateJobOperator(BaseOperator): * credentials to Amazon Web Service should be stored in the connection and indicated by the aws_conn_id parameter - :type body: dict :param aws_conn_id: The connection ID used to retrieve credentials to Amazon Web Service. - :type aws_conn_id: str :param gcp_conn_id: The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param api_version: API version used (e.g. v1). - :type api_version: str :param google_impersonation_chain: Optional Google service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -211,7 +207,6 @@ class CloudDataTransferServiceCreateJobOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type google_impersonation_chain: Union[str, Sequence[str]] """ # [START gcp_transfer_job_create_template_fields] @@ -263,7 +258,6 @@ class CloudDataTransferServiceUpdateJobOperator(BaseOperator): :ref:`howto/operator:CloudDataTransferServiceUpdateJobOperator` :param job_name: (Required) Name of the job to be updated - :type job_name: str :param body: (Required) The request body, as described in https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferJobs/patch#request-body With three additional improvements: @@ -273,14 +267,10 @@ class CloudDataTransferServiceUpdateJobOperator(BaseOperator): * credentials to Amazon Web Service should be stored in the connection and indicated by the aws_conn_id parameter - :type body: dict :param aws_conn_id: The connection ID used to retrieve credentials to Amazon Web Service. - :type aws_conn_id: str :param gcp_conn_id: The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param api_version: API version used (e.g. v1). - :type api_version: str :param google_impersonation_chain: Optional Google service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -289,7 +279,6 @@ class CloudDataTransferServiceUpdateJobOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type google_impersonation_chain: Union[str, Sequence[str]] """ # [START gcp_transfer_job_update_template_fields] @@ -349,15 +338,11 @@ class CloudDataTransferServiceDeleteJobOperator(BaseOperator): :ref:`howto/operator:CloudDataTransferServiceDeleteJobOperator` :param job_name: (Required) Name of the TRANSFER operation - :type job_name: str :param project_id: (Optional) the ID of the project that owns the Transfer Job. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param gcp_conn_id: The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param api_version: API version used (e.g. v1). - :type api_version: str :param google_impersonation_chain: Optional Google service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -366,7 +351,6 @@ class CloudDataTransferServiceDeleteJobOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type google_impersonation_chain: Union[str, Sequence[str]] """ # [START gcp_transfer_job_delete_template_fields] @@ -421,12 +405,9 @@ class CloudDataTransferServiceGetOperationOperator(BaseOperator): :ref:`howto/operator:CloudDataTransferServiceGetOperationOperator` :param operation_name: (Required) Name of the transfer operation. - :type operation_name: str :param gcp_conn_id: The connection ID used to connect to Google Cloud Platform. - :type gcp_conn_id: str :param api_version: API version used (e.g. v1). - :type api_version: str :param google_impersonation_chain: Optional Google service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -435,7 +416,6 @@ class CloudDataTransferServiceGetOperationOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type google_impersonation_chain: Union[str, Sequence[str]] """ # [START gcp_transfer_operation_get_template_fields] @@ -487,12 +467,9 @@ class CloudDataTransferServiceListOperationsOperator(BaseOperator): :param request_filter: (Required) A request filter, as described in https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferJobs/list#body.QUERY_PARAMETERS.filter - :type request_filter: dict :param gcp_conn_id: The connection ID used to connect to Google Cloud Platform. - :type gcp_conn_id: str :param api_version: API version used (e.g. v1). - :type api_version: str :param google_impersonation_chain: Optional Google service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -501,7 +478,6 @@ class CloudDataTransferServiceListOperationsOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type google_impersonation_chain: Union[str, Sequence[str]] """ # [START gcp_transfer_operations_list_template_fields] @@ -560,11 +536,8 @@ class CloudDataTransferServicePauseOperationOperator(BaseOperator): :ref:`howto/operator:CloudDataTransferServicePauseOperationOperator` :param operation_name: (Required) Name of the transfer operation. - :type operation_name: str :param gcp_conn_id: The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param api_version: API version used (e.g. v1). - :type api_version: str :param google_impersonation_chain: Optional Google service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -573,7 +546,6 @@ class CloudDataTransferServicePauseOperationOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type google_impersonation_chain: Union[str, Sequence[str]] """ # [START gcp_transfer_operation_pause_template_fields] @@ -623,11 +595,8 @@ class CloudDataTransferServiceResumeOperationOperator(BaseOperator): :ref:`howto/operator:CloudDataTransferServiceResumeOperationOperator` :param operation_name: (Required) Name of the transfer operation. - :type operation_name: str :param gcp_conn_id: The connection ID used to connect to Google Cloud. :param api_version: API version used (e.g. v1). - :type api_version: str - :type gcp_conn_id: str :param google_impersonation_chain: Optional Google service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -636,7 +605,6 @@ class CloudDataTransferServiceResumeOperationOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type google_impersonation_chain: Union[str, Sequence[str]] """ # [START gcp_transfer_operation_resume_template_fields] @@ -686,12 +654,9 @@ class CloudDataTransferServiceCancelOperationOperator(BaseOperator): :ref:`howto/operator:CloudDataTransferServiceCancelOperationOperator` :param operation_name: (Required) Name of the transfer operation. - :type operation_name: str :param api_version: API version used (e.g. v1). - :type api_version: str :param gcp_conn_id: The connection ID used to connect to Google Cloud Platform. - :type gcp_conn_id: str :param google_impersonation_chain: Optional Google service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -700,7 +665,6 @@ class CloudDataTransferServiceCancelOperationOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type google_impersonation_chain: Union[str, Sequence[str]] """ # [START gcp_transfer_operation_cancel_template_fields] @@ -764,27 +728,19 @@ class CloudDataTransferServiceS3ToGCSOperator(BaseOperator): ) :param s3_bucket: The S3 bucket where to find the objects. (templated) - :type s3_bucket: str :param gcs_bucket: The destination Google Cloud Storage bucket where you want to store the files. (templated) - :type gcs_bucket: str :param s3_path: Optional root path where the source objects are. (templated) - :type s3_path: str :param gcs_path: Optional root path for transferred objects. (templated) - :type gcs_path: str :param project_id: Optional ID of the Google Cloud Console project that owns the job - :type project_id: str :param aws_conn_id: The source S3 connection - :type aws_conn_id: str :param gcp_conn_id: The destination connection ID to use when connecting to Google Cloud Storage. - :type gcp_conn_id: str :param delegate_to: Google account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. :param description: Optional transfer service job description - :type description: str :param schedule: Optional transfer service schedule; If not set, run transfer job once as soon as the operator runs The format is described @@ -794,18 +750,13 @@ class CloudDataTransferServiceS3ToGCSOperator(BaseOperator): * dates they can be passed as :class:`datetime.date` * times they can be passed as :class:`datetime.time` - :type schedule: dict :param object_conditions: Optional transfer service object conditions; see https://cloud.google.com/storage-transfer/docs/reference/rest/v1/TransferSpec - :type object_conditions: dict :param transfer_options: Optional transfer service transfer options; see https://cloud.google.com/storage-transfer/docs/reference/rest/v1/TransferSpec - :type transfer_options: dict :param wait: Wait for transfer to finish. It must be set to True, if 'delete_job_after_completion' is set to True. - :type wait: bool :param timeout: Time to wait for the operation to end in seconds. Defaults to 60 seconds if not specified. - :type timeout: Optional[Union[float, timedelta]] :param google_impersonation_chain: Optional Google service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -814,10 +765,8 @@ class CloudDataTransferServiceS3ToGCSOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type google_impersonation_chain: Union[str, Sequence[str]] :param delete_job_after_completion: If True, delete the job after complete. If set to True, 'wait' must be set to True. - :type delete_job_after_completion: bool """ template_fields: Sequence[str] = ( @@ -952,25 +901,18 @@ class CloudDataTransferServiceGCSToGCSOperator(BaseOperator): :param source_bucket: The source Google Cloud Storage bucket where the object is. (templated) - :type source_bucket: str :param destination_bucket: The destination Google Cloud Storage bucket where the object should be. (templated) - :type destination_bucket: str :param source_path: Optional root path where the source objects are. (templated) - :type source_path: str :param destination_path: Optional root path for transferred objects. (templated) - :type destination_path: str :param project_id: The ID of the Google Cloud Console project that owns the job - :type project_id: str :param gcp_conn_id: Optional connection ID to use when connecting to Google Cloud Storage. - :type gcp_conn_id: str :param delegate_to: Google account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. :param description: Optional transfer service job description - :type description: str :param schedule: Optional transfer service schedule; If not set, run transfer job once as soon as the operator runs See: @@ -980,18 +922,13 @@ class CloudDataTransferServiceGCSToGCSOperator(BaseOperator): * dates they can be passed as :class:`datetime.date` * times they can be passed as :class:`datetime.time` - :type schedule: dict :param object_conditions: Optional transfer service object conditions; see https://cloud.google.com/storage-transfer/docs/reference/rest/v1/TransferSpec#ObjectConditions - :type object_conditions: dict :param transfer_options: Optional transfer service transfer options; see https://cloud.google.com/storage-transfer/docs/reference/rest/v1/TransferSpec#TransferOptions - :type transfer_options: dict :param wait: Wait for transfer to finish. It must be set to True, if 'delete_job_after_completion' is set to True. - :type wait: bool :param timeout: Time to wait for the operation to end in seconds. Defaults to 60 seconds if not specified. - :type timeout: Optional[Union[float, timedelta]] :param google_impersonation_chain: Optional Google service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1000,10 +937,8 @@ class CloudDataTransferServiceGCSToGCSOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type google_impersonation_chain: Union[str, Sequence[str]] :param delete_job_after_completion: If True, delete the job after complete. If set to True, 'wait' must be set to True. - :type delete_job_after_completion: bool """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/cloud/operators/compute.py b/airflow/providers/google/cloud/operators/compute.py index 090d131844f5d..7b45ede859918 100644 --- a/airflow/providers/google/cloud/operators/compute.py +++ b/airflow/providers/google/cloud/operators/compute.py @@ -77,19 +77,14 @@ class ComputeEngineStartInstanceOperator(ComputeEngineBaseOperator): :ref:`howto/operator:ComputeEngineStartInstanceOperator` :param zone: Google Cloud zone where the instance exists. - :type zone: str :param resource_id: Name of the Compute Engine instance resource. - :type resource_id: str :param project_id: Optional, Google Cloud Project ID where the Compute Engine Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. Defaults to 'google_cloud_default'. - :type gcp_conn_id: str :param api_version: Optional, API version used (for example v1 - or beta). Defaults to v1. - :type api_version: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -98,7 +93,6 @@ class ComputeEngineStartInstanceOperator(ComputeEngineBaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START gce_instance_start_template_fields] @@ -130,19 +124,14 @@ class ComputeEngineStopInstanceOperator(ComputeEngineBaseOperator): :ref:`howto/operator:ComputeEngineStopInstanceOperator` :param zone: Google Cloud zone where the instance exists. - :type zone: str :param resource_id: Name of the Compute Engine instance resource. - :type resource_id: str :param project_id: Optional, Google Cloud Project ID where the Compute Engine Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. Defaults to 'google_cloud_default'. - :type gcp_conn_id: str :param api_version: Optional, API version used (for example v1 - or beta). Defaults to v1. - :type api_version: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -151,7 +140,6 @@ class ComputeEngineStopInstanceOperator(ComputeEngineBaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START gce_instance_stop_template_fields] @@ -189,25 +177,18 @@ class ComputeEngineSetMachineTypeOperator(ComputeEngineBaseOperator): :ref:`howto/operator:ComputeEngineSetMachineTypeOperator` :param zone: Google Cloud zone where the instance exists. - :type zone: str :param resource_id: Name of the Compute Engine instance resource. - :type resource_id: str :param body: Body required by the Compute Engine setMachineType API, as described in https://cloud.google.com/compute/docs/reference/rest/v1/instances/setMachineType#request-body - :type body: dict :param project_id: Optional, Google Cloud Project ID where the Compute Engine Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. Defaults to 'google_cloud_default'. - :type gcp_conn_id: str :param api_version: Optional, API version used (for example v1 - or beta). Defaults to v1. - :type api_version: str :param validate_body: Optional, If set to False, body validation is not performed. Defaults to False. - :type validate_body: bool :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -216,7 +197,6 @@ class ComputeEngineSetMachineTypeOperator(ComputeEngineBaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START gce_instance_set_machine_type_template_fields] @@ -344,7 +324,6 @@ class ComputeEngineCopyInstanceTemplateOperator(ComputeEngineBaseOperator): :ref:`howto/operator:ComputeEngineCopyInstanceTemplateOperator` :param resource_id: Name of the Instance Template - :type resource_id: str :param body_patch: Patch to the body of instanceTemplates object following rfc7386 PATCH semantics. The body_patch content follows https://cloud.google.com/compute/docs/reference/rest/v1/instanceTemplates @@ -352,25 +331,19 @@ class ComputeEngineCopyInstanceTemplateOperator(ComputeEngineBaseOperator): all the other fields are optional. It is important to follow PATCH semantics - arrays are replaced fully, so if you need to update an array you should provide the whole target array as patch element. - :type body_patch: dict :param project_id: Optional, Google Cloud Project ID where the Compute Engine Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param request_id: Optional, unique request_id that you might add to achieve full idempotence (for example when client call times out repeating the request with the same request id will not create a new instance template again). It should be in UUID format as defined in RFC 4122. - :type request_id: str :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. Defaults to 'google_cloud_default'. - :type gcp_conn_id: str :param api_version: Optional, API version used (for example v1 - or beta). Defaults to v1. - :type api_version: str :param validate_body: Optional, If set to False, body validation is not performed. Defaults to False. - :type validate_body: bool :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -379,7 +352,6 @@ class ComputeEngineCopyInstanceTemplateOperator(ComputeEngineBaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START gce_instance_template_copy_operator_template_fields] @@ -482,28 +454,20 @@ class ComputeEngineInstanceGroupUpdateManagerTemplateOperator(ComputeEngineBaseO :ref:`howto/operator:ComputeEngineInstanceGroupUpdateManagerTemplateOperator` :param resource_id: Name of the Instance Group Manager - :type resource_id: str :param zone: Google Cloud zone where the Instance Group Manager exists. - :type zone: str :param source_template: URL of the template to replace. - :type source_template: str :param destination_template: URL of the target template. - :type destination_template: str :param project_id: Optional, Google Cloud Project ID where the Compute Engine Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param request_id: Optional, unique request_id that you might add to achieve full idempotence (for example when client call times out repeating the request with the same request id will not create a new instance template again). It should be in UUID format as defined in RFC 4122. - :type request_id: str :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. Defaults to 'google_cloud_default'. - :type gcp_conn_id: str :param api_version: Optional, API version used (for example v1 - or beta). Defaults to v1. - :type api_version: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -512,7 +476,6 @@ class ComputeEngineInstanceGroupUpdateManagerTemplateOperator(ComputeEngineBaseO If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START gce_igm_update_template_operator_template_fields] diff --git a/airflow/providers/google/cloud/operators/datacatalog.py b/airflow/providers/google/cloud/operators/datacatalog.py index 53a8d77770204..79b448accbb25 100644 --- a/airflow/providers/google/cloud/operators/datacatalog.py +++ b/airflow/providers/google/cloud/operators/datacatalog.py @@ -50,29 +50,20 @@ class CloudDataCatalogCreateEntryOperator(BaseOperator): :ref:`howto/operator:CloudDataCatalogCreateEntryOperator` :param location: Required. The location of the entry to create. - :type location: str :param entry_group: Required. Entry group ID under which the entry is created. - :type entry_group: str :param entry_id: Required. The id of the entry to create. - :type entry_id: str :param entry: Required. The entry to create. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.datacatalog_v1beta1.types.Entry` - :type entry: Union[Dict, google.cloud.datacatalog_v1beta1.types.Entry] :param project_id: The ID of the Google Cloud project that owns the entry. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: Optional[str] :param retry: A retry object used to retry requests. If set to ``None`` or missing, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. Defaults to 'google_cloud_default'. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -81,7 +72,6 @@ class CloudDataCatalogCreateEntryOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -167,30 +157,22 @@ class CloudDataCatalogCreateEntryGroupOperator(BaseOperator): :ref:`howto/operator:CloudDataCatalogCreateEntryGroupOperator` :param location: Required. The location of the entry group to create. - :type location: str :param entry_group_id: Required. The id of the entry group to create. The id must begin with a letter or underscore, contain only English letters, numbers and underscores, and be at most 64 characters. - :type entry_group_id: str :param entry_group: The entry group to create. Defaults to an empty entry group. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.datacatalog_v1beta1.types.EntryGroup` - :type entry_group: Union[Dict, google.cloud.datacatalog_v1beta1.types.EntryGroup] :param project_id: The ID of the Google Cloud project that owns the entry group. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: Optional[str] :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. Defaults to 'google_cloud_default'. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -199,7 +181,6 @@ class CloudDataCatalogCreateEntryGroupOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -281,31 +262,21 @@ class CloudDataCatalogCreateTagOperator(BaseOperator): :ref:`howto/operator:CloudDataCatalogCreateTagOperator` :param location: Required. The location of the tag to create. - :type location: str :param entry_group: Required. Entry group ID under which the tag is created. - :type entry_group: str :param entry: Required. Entry group ID under which the tag is created. - :type entry: str :param tag: Required. The tag to create. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.datacatalog_v1beta1.types.Tag` - :type tag: Union[Dict, google.cloud.datacatalog_v1beta1.types.Tag] :param template_id: Required. Template ID used to create tag - :type template_id: Optional[str] :param project_id: The ID of the Google Cloud project that owns the tag. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: Optional[str] :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. Defaults to 'google_cloud_default'. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -314,7 +285,6 @@ class CloudDataCatalogCreateTagOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -419,28 +389,20 @@ class CloudDataCatalogCreateTagTemplateOperator(BaseOperator): :ref:`howto/operator:CloudDataCatalogCreateTagTemplateOperator` :param location: Required. The location of the tag template to create. - :type location: str :param tag_template_id: Required. The id of the tag template to create. - :type tag_template_id: str :param tag_template: Required. The tag template to create. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.datacatalog_v1beta1.types.TagTemplate` - :type tag_template: Union[Dict, google.cloud.datacatalog_v1beta1.types.TagTemplate] :param project_id: The ID of the Google Cloud project that owns the tag template. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: Optional[str] :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. Defaults to 'google_cloud_default'. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -449,7 +411,6 @@ class CloudDataCatalogCreateTagTemplateOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -530,33 +491,24 @@ class CloudDataCatalogCreateTagTemplateFieldOperator(BaseOperator): :ref:`howto/operator:CloudDataCatalogCreateTagTemplateFieldOperator` :param location: Required. The location of the tag template field to create. - :type location: str :param tag_template: Required. The id of the tag template to create. - :type tag_template: str :param tag_template_field_id: Required. The ID of the tag template field to create. Field ids can contain letters (both uppercase and lowercase), numbers (0-9), underscores (\_) and dashes (-). Field IDs must be at least 1 character long and at most 128 characters long. Field IDs must also be unique within their template. - :type tag_template_field_id: str :param tag_template_field: Required. The tag template field to create. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.datacatalog_v1beta1.types.TagTemplateField` - :type tag_template_field: Union[Dict, google.cloud.datacatalog_v1beta1.types.TagTemplateField] :param project_id: The ID of the Google Cloud project that owns the tag template field. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: Optional[str] :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. Defaults to 'google_cloud_default'. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -565,7 +517,6 @@ class CloudDataCatalogCreateTagTemplateFieldOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -649,25 +600,17 @@ class CloudDataCatalogDeleteEntryOperator(BaseOperator): :ref:`howto/operator:CloudDataCatalogDeleteEntryOperator` :param location: Required. The location of the entry to delete. - :type location: str :param entry_group: Required. Entry group ID for entries that is deleted. - :type entry_group: str :param entry: Entry ID that is deleted. - :type entry: str :param project_id: The ID of the Google Cloud project that owns the entry group. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: Optional[str] :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. Defaults to 'google_cloud_default'. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -676,7 +619,6 @@ class CloudDataCatalogDeleteEntryOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -745,23 +687,16 @@ class CloudDataCatalogDeleteEntryGroupOperator(BaseOperator): :ref:`howto/operator:CloudDataCatalogDeleteEntryGroupOperator` :param location: Required. The location of the entry group to delete. - :type location: str :param entry_group: Entry group ID that is deleted. - :type entry_group: str :param project_id: The ID of the Google Cloud project that owns the entry group. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: Optional[str] :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. Defaults to 'google_cloud_default'. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -770,7 +705,6 @@ class CloudDataCatalogDeleteEntryGroupOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -833,27 +767,18 @@ class CloudDataCatalogDeleteTagOperator(BaseOperator): :ref:`howto/operator:CloudDataCatalogDeleteTagOperator` :param location: Required. The location of the tag to delete. - :type location: str :param entry_group: Entry group ID for tag that is deleted. - :type entry_group: str :param entry: Entry ID for tag that is deleted. - :type entry: str :param tag: Identifier for TAG that is deleted. - :type tag: str :param project_id: The ID of the Google Cloud project that owns the entry group. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: Optional[str] :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. Defaults to 'google_cloud_default'. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -862,7 +787,6 @@ class CloudDataCatalogDeleteTagOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -933,27 +857,19 @@ class CloudDataCatalogDeleteTagTemplateOperator(BaseOperator): :ref:`howto/operator:CloudDataCatalogDeleteTagTemplateOperator` :param location: Required. The location of the tag template to delete. - :type location: str :param tag_template: ID for tag template that is deleted. - :type tag_template: str :param project_id: The ID of the Google Cloud project that owns the entry group. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: Optional[str] :param force: Required. Currently, this field must always be set to ``true``. This confirms the deletion of any possible tags using this template. ``force = false`` will be supported in the future. - :type force: bool :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. Defaults to 'google_cloud_default'. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -962,7 +878,6 @@ class CloudDataCatalogDeleteTagTemplateOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -1029,27 +944,18 @@ class CloudDataCatalogDeleteTagTemplateFieldOperator(BaseOperator): :ref:`howto/operator:CloudDataCatalogDeleteTagTemplateFieldOperator` :param location: Required. The location of the tag template to delete. - :type location: str :param tag_template: Tag Template ID for tag template field that is deleted. - :type tag_template: str :param field: Name of field that is deleted. - :type field: str :param force: Required. This confirms the deletion of this field from any tags using this field. - :type force: bool :param project_id: The ID of the Google Cloud project that owns the entry group. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: Optional[str] :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. Defaults to 'google_cloud_default'. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1058,7 +964,6 @@ class CloudDataCatalogDeleteTagTemplateFieldOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -1129,25 +1034,17 @@ class CloudDataCatalogGetEntryOperator(BaseOperator): :ref:`howto/operator:CloudDataCatalogGetEntryOperator` :param location: Required. The location of the entry to get. - :type location: str :param entry_group: Required. The entry group of the entry to get. - :type entry_group: str :param entry: The ID of the entry to get. - :type entry: str :param project_id: The ID of the Google Cloud project that owns the entry group. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: Optional[str] :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. Defaults to 'google_cloud_default'. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1156,7 +1053,6 @@ class CloudDataCatalogGetEntryOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -1221,28 +1117,20 @@ class CloudDataCatalogGetEntryGroupOperator(BaseOperator): :ref:`howto/operator:CloudDataCatalogGetEntryGroupOperator` :param location: Required. The location of the entry group to get. - :type location: str :param entry_group: The ID of the entry group to get. - :type entry_group: str :param read_mask: The fields to return. If not set or empty, all fields are returned. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.protobuf.field_mask_pb2.FieldMask` - :type read_mask: Union[Dict, google.protobuf.field_mask_pb2.FieldMask] :param project_id: The ID of the Google Cloud project that owns the entry group. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: Optional[str] :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. Defaults to 'google_cloud_default'. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1251,7 +1139,6 @@ class CloudDataCatalogGetEntryGroupOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -1316,23 +1203,16 @@ class CloudDataCatalogGetTagTemplateOperator(BaseOperator): :ref:`howto/operator:CloudDataCatalogGetTagTemplateOperator` :param location: Required. The location of the tag template to get. - :type location: str :param tag_template: Required. The ID of the tag template to get. - :type tag_template: str :param project_id: The ID of the Google Cloud project that owns the entry group. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: Optional[str] :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. Defaults to 'google_cloud_default'. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1341,7 +1221,6 @@ class CloudDataCatalogGetTagTemplateOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -1402,30 +1281,21 @@ class CloudDataCatalogListTagsOperator(BaseOperator): :ref:`howto/operator:CloudDataCatalogListTagsOperator` :param location: Required. The location of the tags to get. - :type location: str :param entry_group: Required. The entry group of the tags to get. - :type entry_group: str :param entry: Required. The entry of the tags to get. - :type entry: str :param page_size: The maximum number of resources contained in the underlying API response. If page streaming is performed per- resource, this parameter does not affect the return value. If page streaming is performed per-page, this determines the maximum number of resources in a page. (Default: 100) - :type page_size: int :param project_id: The ID of the Google Cloud project that owns the entry group. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: Optional[str] :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. Defaults to 'google_cloud_default'. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1434,7 +1304,6 @@ class CloudDataCatalogListTagsOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -1508,20 +1377,14 @@ class CloudDataCatalogLookupEntryOperator(BaseOperator): :param linked_resource: The full name of the Google Cloud resource the Data Catalog entry represents. See: https://cloud.google.com/apis/design/resource\_names#full\_resource\_name. Full names are case-sensitive. - :type linked_resource: str :param sql_resource: The SQL name of the entry. SQL names are case-sensitive. - :type sql_resource: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. Defaults to 'google_cloud_default'. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1530,7 +1393,6 @@ class CloudDataCatalogLookupEntryOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -1590,29 +1452,20 @@ class CloudDataCatalogRenameTagTemplateFieldOperator(BaseOperator): :ref:`howto/operator:CloudDataCatalogRenameTagTemplateFieldOperator` :param location: Required. The location of the tag template field to rename. - :type location: str :param tag_template: The tag template ID for field that is renamed. - :type tag_template: str :param field: Required. The old ID of this tag template field. For example, ``my_old_field``. - :type field: str :param new_tag_template_field_id: Required. The new ID of this tag template field. For example, ``my_new_field``. - :type new_tag_template_field_id: str :param project_id: The ID of the Google Cloud project that owns the entry group. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: Optional[str] :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. Defaults to 'google_cloud_default'. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1621,7 +1474,6 @@ class CloudDataCatalogRenameTagTemplateFieldOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -1699,7 +1551,6 @@ class CloudDataCatalogSearchCatalogOperator(BaseOperator): If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.datacatalog_v1beta1.types.Scope` - :type scope: Union[Dict, google.cloud.datacatalog_v1beta1.types.SearchCatalogRequest.Scope] :param query: Required. The query string in search query syntax. The query must be non-empty. Query strings can be simple as "x" or more qualified as: @@ -1711,11 +1562,9 @@ class CloudDataCatalogSearchCatalogOperator(BaseOperator): Note: Query tokens need to have a minimum of 3 characters for substring matching to work correctly. See `Data Catalog Search Syntax `__ for more information. - :type query: str :param page_size: The maximum number of resources contained in the underlying API response. If page streaming is performed per-resource, this parameter does not affect the return value. If page streaming is performed per-page, this determines the maximum number of resources in a page. - :type page_size: int :param order_by: Specifies the ordering of results, currently supported case-sensitive choices are: - ``relevance``, only supports descending @@ -1723,18 +1572,13 @@ class CloudDataCatalogSearchCatalogOperator(BaseOperator): - ``last_modified_timestamp [asc|desc]``, defaults to descending if not specified If not specified, defaults to ``relevance`` descending. - :type order_by: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. Defaults to 'google_cloud_default'. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1743,7 +1587,6 @@ class CloudDataCatalogSearchCatalogOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -1811,33 +1654,23 @@ class CloudDataCatalogUpdateEntryOperator(BaseOperator): If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.datacatalog_v1beta1.types.Entry` - :type entry: Union[Dict, google.cloud.datacatalog_v1beta1.types.Entry] :param update_mask: The fields to update on the entry. If absent or empty, all modifiable fields are updated. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.protobuf.field_mask_pb2.FieldMask` - :type update_mask: Union[Dict, google.protobuf.field_mask_pb2.FieldMask] :param location: Required. The location of the entry to update. - :type location: str :param entry_group: The entry group ID for the entry that is being updated. - :type entry_group: str :param entry_id: The entry ID that is being updated. - :type entry_id: str :param project_id: The ID of the Google Cloud project that owns the entry group. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: Optional[str] :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. Defaults to 'google_cloud_default'. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1846,7 +1679,6 @@ class CloudDataCatalogUpdateEntryOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -1921,33 +1753,23 @@ class CloudDataCatalogUpdateTagOperator(BaseOperator): If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.datacatalog_v1beta1.types.Tag` - :type tag: Union[Dict, google.cloud.datacatalog_v1beta1.types.Tag] :param update_mask: The fields to update on the Tag. If absent or empty, all modifiable fields are updated. Currently the only modifiable field is the field ``fields``. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.protobuf.field_mask_pb2.FieldMask` - :type update_mask: Union[Dict, google.protobuf.field_mask_pb2.FieldMask] :param location: Required. The location of the tag to rename. - :type location: str :param entry_group: The entry group ID for the tag that is being updated. - :type entry_group: str :param entry: The entry ID for the tag that is being updated. - :type entry: str :param tag_id: The tag ID that is being updated. - :type tag_id: str :param project_id: The ID of the Google Cloud project that owns the entry group. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: Optional[str] :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. Defaults to 'google_cloud_default'. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1956,7 +1778,6 @@ class CloudDataCatalogUpdateTagOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -2039,32 +1860,23 @@ class CloudDataCatalogUpdateTagTemplateOperator(BaseOperator): If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.datacatalog_v1beta1.types.TagTemplate` - :type tag_template: Union[Dict, google.cloud.datacatalog_v1beta1.types.TagTemplate] :param update_mask: The field mask specifies the parts of the template to overwrite. If absent or empty, all of the allowed fields above will be updated. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.protobuf.field_mask_pb2.FieldMask` - :type update_mask: Union[Dict, google.protobuf.field_mask_pb2.FieldMask] :param location: Required. The location of the tag template to rename. - :type location: str :param tag_template_id: Optional. The tag template ID for the entry that is being updated. - :type tag_template_id: str :param project_id: The ID of the Google Cloud project that owns the entry group. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: Optional[str] :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. Defaults to 'google_cloud_default'. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -2073,7 +1885,6 @@ class CloudDataCatalogUpdateTagTemplateOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -2144,7 +1955,6 @@ class CloudDataCatalogUpdateTagTemplateFieldOperator(BaseOperator): If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.datacatalog_v1beta1.types.TagTemplateField` - :type tag_template_field: Union[Dict, google.cloud.datacatalog_v1beta1.types.TagTemplateField] :param update_mask: The field mask specifies the parts of the template to be updated. Allowed fields: - ``display_name`` @@ -2157,29 +1967,19 @@ class CloudDataCatalogUpdateTagTemplateFieldOperator(BaseOperator): If a dict is provided, it must be of the same form as the protobuf message :class:`~google.protobuf.field_mask_pb2.FieldMask` - :type update_mask: Union[Dict, google.protobuf.field_mask_pb2.FieldMask] :param tag_template_field_name: Optional. The name of the tag template field to rename. - :type tag_template_field_name: str :param location: Optional. The location of the tag to rename. - :type location: str :param tag_template: Optional. The tag template ID for tag template field to rename. - :type tag_template: str :param tag_template_field_id: Optional. The ID of tag template field to rename. - :type tag_template_field_id: str :param project_id: The ID of the Google Cloud project that owns the entry group. If set to ``None`` or missing, the default project_id from the Google Cloud connection is used. - :type project_id: Optional[str] :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. Defaults to 'google_cloud_default'. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -2188,7 +1988,6 @@ class CloudDataCatalogUpdateTagTemplateFieldOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/cloud/operators/dataflow.py b/airflow/providers/google/cloud/operators/dataflow.py index bc67a2994efb5..ce0c6fff06322 100644 --- a/airflow/providers/google/cloud/operators/dataflow.py +++ b/airflow/providers/google/cloud/operators/dataflow.py @@ -58,24 +58,17 @@ class DataflowConfiguration: :param job_name: The 'jobName' to use when executing the Dataflow job (templated). This ends up being set in the pipeline options, so any entry with key ``'jobName'`` or ``'job_name'``in ``options`` will be overwritten. - :type job_name: str :param append_job_name: True if unique suffix has to be appended to job name. - :type append_job_name: bool :param project_id: Optional, the Google Cloud project ID in which to start a job. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param location: Job location. - :type location: str :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param poll_sleep: The time in seconds to sleep between polling Google Cloud Platform for the dataflow job status while the job is in the JOB_STATE_RUNNING state. - :type poll_sleep: int :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -84,14 +77,11 @@ class DataflowConfiguration: If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :param drain_pipeline: Optional, set to True if want to stop streaming job by draining it instead of canceling during killing task instance. See: https://cloud.google.com/dataflow/docs/guides/stopping-a-pipeline - :type drain_pipeline: bool :param cancel_timeout: How long (in seconds) operator should wait for the pipeline to be successfully cancelled when task is being killed. - :type cancel_timeout: Optional[int] :param wait_until_finished: (Optional) If True, wait for the end of pipeline execution before exiting. If False, only submits job. @@ -125,17 +115,14 @@ class DataflowConfiguration: If you in your pipeline do not call the wait_for_pipeline method, and pass wait_until_finish=False to the operator, the second loop will check once is job not in terminal state and exit the loop. - :type wait_until_finished: Optional[bool] :param multiple_jobs: If pipeline creates multiple jobs then monitor all jobs. Supported only by :py:class:`~airflow.providers.apache.beam.operators.beam.BeamRunJavaPipelineOperator` - :type multiple_jobs: boolean :param check_if_running: Before running job, validate that a previous run is not in process. IgnoreJob = do not check if running. FinishIfRunning = if job is running finish with nothing. WaitForRun = wait until job finished and the run job. Supported only by: :py:class:`~airflow.providers.apache.beam.operators.beam.BeamRunJavaPipelineOperator` - :type check_if_running: CheckJobRunning """ template_fields: Sequence[str] = ("job_name", "location") @@ -223,13 +210,10 @@ class DataflowCreateJavaJobOperator(BaseOperator): :ref:`howto/operator:DataflowCreateJavaJobOperator` :param jar: The reference to a self executing Dataflow jar (templated). - :type jar: str :param job_name: The 'jobName' to use when executing the Dataflow job (templated). This ends up being set in the pipeline options, so any entry with key ``'jobName'`` in ``options`` will be overwritten. - :type job_name: str :param dataflow_default_options: Map of default job options. - :type dataflow_default_options: dict :param options: Map of job specific options.The key must be a dictionary. The value can contain different types: @@ -242,35 +226,25 @@ class DataflowCreateJavaJobOperator(BaseOperator): * Other value types will be replaced with the Python textual representation. When defining labels (``labels`` option), you can also provide a dictionary. - :type options: dict :param project_id: Optional, the Google Cloud project ID in which to start a job. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param location: Job location. - :type location: str :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param poll_sleep: The time in seconds to sleep between polling Google Cloud Platform for the dataflow job status while the job is in the JOB_STATE_RUNNING state. - :type poll_sleep: int :param job_class: The name of the dataflow job class to be executed, it is often not the main class configured in the dataflow jar file. - :type job_class: str :param multiple_jobs: If pipeline creates multiple jobs then monitor all jobs - :type multiple_jobs: boolean :param check_if_running: before running job, validate that a previous run is not in process - :type check_if_running: CheckJobRunning(IgnoreJob = do not check if running, FinishIfRunning= if job is running finish with nothing, WaitForRun= wait until job finished and the run job) ``jar``, ``options``, and ``job_name`` are templated so you can use variables in them. :param cancel_timeout: How long (in seconds) operator should wait for the pipeline to be successfully cancelled when task is being killed. - :type cancel_timeout: Optional[int] :param wait_until_finished: (Optional) If True, wait for the end of pipeline execution before exiting. If False, only submits job. @@ -304,7 +278,6 @@ class DataflowCreateJavaJobOperator(BaseOperator): If you in your pipeline do not call the wait_for_pipeline method, and pass wait_until_finish=False to the operator, the second loop will check once is job not in terminal state and exit the loop. - :type wait_until_finished: Optional[bool] Note that both ``dataflow_default_options`` and ``options`` will be merged to specify pipeline @@ -484,10 +457,8 @@ class DataflowTemplatedJobStartOperator(BaseOperator): :ref:`howto/operator:DataflowTemplatedJobStartOperator` :param template: The reference to the Dataflow template. - :type template: str :param job_name: The 'jobName' to use when executing the Dataflow template (templated). - :type job_name: Optional[str] :param options: Map of job runtime environment options. It will update environment argument if passed. @@ -496,26 +467,18 @@ class DataflowTemplatedJobStartOperator(BaseOperator): `https://cloud.google.com/dataflow/pipelines/specifying-exec-params `__ - :type options: dict :param dataflow_default_options: Map of default job environment options. - :type dataflow_default_options: dict :param parameters: Map of job specific parameters for the template. - :type parameters: dict :param project_id: Optional, the Google Cloud project ID in which to start a job. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param location: Job location. - :type location: str :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param poll_sleep: The time in seconds to sleep between polling Google Cloud Platform for the dataflow job status while the job is in the JOB_STATE_RUNNING state. - :type poll_sleep: int :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -524,17 +487,14 @@ class DataflowTemplatedJobStartOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :param environment: Optional, Map of job runtime environment options. .. seealso:: For more information on possible configurations, look at the API documentation `https://cloud.google.com/dataflow/pipelines/specifying-exec-params `__ - :type environment: Optional[dict] :param cancel_timeout: How long (in seconds) operator should wait for the pipeline to be successfully cancelled when task is being killed. - :type cancel_timeout: Optional[int] :param wait_until_finished: (Optional) If True, wait for the end of pipeline execution before exiting. If False, only submits job. @@ -568,7 +528,6 @@ class DataflowTemplatedJobStartOperator(BaseOperator): If you in your pipeline do not call the wait_for_pipeline method, and pass wait_until_finish=False to the operator, the second loop will check once is job not in terminal state and exit the loop. - :type wait_until_finished: Optional[bool] It's a good practice to define dataflow_* parameters in the default_args of the dag like the project, zone and staging location. @@ -716,24 +675,18 @@ class DataflowStartFlexTemplateOperator(BaseOperator): :param body: The request body. See: https://cloud.google.com/dataflow/docs/reference/rest/v1b3/projects.locations.flexTemplates/launch#request-body :param location: The location of the Dataflow job (for example europe-west1) - :type location: str :param project_id: The ID of the GCP project that owns the job. If set to ``None`` or missing, the default project_id from the GCP connection is used. - :type project_id: Optional[str] :param gcp_conn_id: The connection ID to use connecting to Google Cloud Platform. - :type gcp_conn_id: str :param delegate_to: The account to impersonate, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param drain_pipeline: Optional, set to True if want to stop streaming job by draining it instead of canceling during killing task instance. See: https://cloud.google.com/dataflow/docs/guides/stopping-a-pipeline - :type drain_pipeline: bool :param cancel_timeout: How long (in seconds) operator should wait for the pipeline to be successfully cancelled when task is being killed. - :type cancel_timeout: Optional[int] :param wait_until_finished: (Optional) If True, wait for the end of pipeline execution before exiting. If False, only submits job. @@ -767,7 +720,6 @@ class DataflowStartFlexTemplateOperator(BaseOperator): If you in your pipeline do not call the wait_for_pipeline method, and pass wait_until_finish=False to the operator, the second loop will check once is job not in terminal state and exit the loop. - :type wait_until_finished: Optional[bool] """ template_fields: Sequence[str] = ("body", "location", "project_id", "gcp_conn_id") @@ -841,9 +793,7 @@ class DataflowStartSqlJobOperator(BaseOperator): `__ :param job_name: The unique name to assign to the Cloud Dataflow job. - :type job_name: str :param query: The SQL query to execute. - :type query: str :param options: Job parameters to be executed. It can be a dictionary with the following keys. For more information, look at: @@ -851,23 +801,17 @@ class DataflowStartSqlJobOperator(BaseOperator): `__ command reference - :type options: dict :param location: The location of the Dataflow job (for example europe-west1) - :type location: str :param project_id: The ID of the GCP project that owns the job. If set to ``None`` or missing, the default project_id from the GCP connection is used. - :type project_id: Optional[str] :param gcp_conn_id: The connection ID to use connecting to Google Cloud Platform. - :type gcp_conn_id: str :param delegate_to: The account to impersonate, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param drain_pipeline: Optional, set to True if want to stop streaming job by draining it instead of canceling during killing task instance. See: https://cloud.google.com/dataflow/docs/guides/stopping-a-pipeline - :type drain_pipeline: bool """ template_fields: Sequence[str] = ( @@ -956,15 +900,11 @@ class DataflowCreatePythonJobOperator(BaseOperator): :param py_file: Reference to the python dataflow pipeline file.py, e.g., /some/local/file/path/to/your/python/pipeline/file. (templated) - :type py_file: str :param job_name: The 'job_name' to use when executing the Dataflow job (templated). This ends up being set in the pipeline options, so any entry with key ``'jobName'`` or ``'job_name'`` in ``options`` will be overwritten. - :type job_name: str :param py_options: Additional python options, e.g., ["-m", "-v"]. - :type py_options: list[str] :param dataflow_default_options: Map of default job options. - :type dataflow_default_options: dict :param options: Map of job specific options.The key must be a dictionary. The value can contain different types: @@ -977,45 +917,35 @@ class DataflowCreatePythonJobOperator(BaseOperator): * Other value types will be replaced with the Python textual representation. When defining labels (``labels`` option), you can also provide a dictionary. - :type options: dict :param py_interpreter: Python version of the beam pipeline. If None, this defaults to the python3. To track python versions supported by beam and related issues check: https://issues.apache.org/jira/browse/BEAM-1251 - :type py_interpreter: str :param py_requirements: Additional python package(s) to install. If a value is passed to this parameter, a new virtual environment has been created with additional packages installed. You could also install the apache_beam package if it is not installed on your system or you want to use a different version. - :type py_requirements: List[str] :param py_system_site_packages: Whether to include system_site_packages in your virtualenv. See virtualenv documentation for more information. This option is only relevant if the ``py_requirements`` parameter is not None. :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param project_id: Optional, the Google Cloud project ID in which to start a job. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param location: Job location. - :type location: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param poll_sleep: The time in seconds to sleep between polling Google Cloud Platform for the dataflow job status while the job is in the JOB_STATE_RUNNING state. - :type poll_sleep: int :param drain_pipeline: Optional, set to True if want to stop streaming job by draining it instead of canceling during killing task instance. See: https://cloud.google.com/dataflow/docs/guides/stopping-a-pipeline - :type drain_pipeline: bool :param cancel_timeout: How long (in seconds) operator should wait for the pipeline to be successfully cancelled when task is being killed. - :type cancel_timeout: Optional[int] :param wait_until_finished: (Optional) If True, wait for the end of pipeline execution before exiting. If False, only submits job. @@ -1049,7 +979,6 @@ class DataflowCreatePythonJobOperator(BaseOperator): If you in your pipeline do not call the wait_for_pipeline method, and pass wait_until_finish=False to the operator, the second loop will check once is job not in terminal state and exit the loop. - :type wait_until_finished: Optional[bool] """ template_fields: Sequence[str] = ("options", "dataflow_default_options", "job_name", "py_file") diff --git a/airflow/providers/google/cloud/operators/datafusion.py b/airflow/providers/google/cloud/operators/datafusion.py index fa6baa44f745d..5244ddbf972c5 100644 --- a/airflow/providers/google/cloud/operators/datafusion.py +++ b/airflow/providers/google/cloud/operators/datafusion.py @@ -39,19 +39,13 @@ class CloudDataFusionRestartInstanceOperator(BaseOperator): :ref:`howto/operator:CloudDataFusionRestartInstanceOperator` :param instance_name: The name of the instance to restart. - :type instance_name: str :param location: The Cloud Data Fusion location in which to handle the request. - :type location: str :param project_id: The ID of the Google Cloud project that the instance belongs to. - :type project_id: str :param api_version: The version of the api that will be requested for example 'v3'. - :type api_version: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -60,7 +54,6 @@ class CloudDataFusionRestartInstanceOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -115,19 +108,13 @@ class CloudDataFusionDeleteInstanceOperator(BaseOperator): :ref:`howto/operator:CloudDataFusionDeleteInstanceOperator` :param instance_name: The name of the instance to restart. - :type instance_name: str :param location: The Cloud Data Fusion location in which to handle the request. - :type location: str :param project_id: The ID of the Google Cloud project that the instance belongs to. - :type project_id: str :param api_version: The version of the api that will be requested for example 'v3'. - :type api_version: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -136,7 +123,6 @@ class CloudDataFusionDeleteInstanceOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -191,22 +177,15 @@ class CloudDataFusionCreateInstanceOperator(BaseOperator): :ref:`howto/operator:CloudDataFusionCreateInstanceOperator` :param instance_name: The name of the instance to create. - :type instance_name: str :param instance: An instance of Instance. https://cloud.google.com/data-fusion/docs/reference/rest/v1beta1/projects.locations.instances#Instance - :type instance: Dict[str, Any] :param location: The Cloud Data Fusion location in which to handle the request. - :type location: str :param project_id: The ID of the Google Cloud project that the instance belongs to. - :type project_id: str :param api_version: The version of the api that will be requested for example 'v3'. - :type api_version: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -215,7 +194,6 @@ class CloudDataFusionCreateInstanceOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -291,29 +269,21 @@ class CloudDataFusionUpdateInstanceOperator(BaseOperator): :ref:`howto/operator:CloudDataFusionUpdateInstanceOperator` :param instance_name: The name of the instance to create. - :type instance_name: str :param instance: An instance of Instance. https://cloud.google.com/data-fusion/docs/reference/rest/v1beta1/projects.locations.instances#Instance - :type instance: Dict[str, Any] :param update_mask: Field mask is used to specify the fields that the update will overwrite in an instance resource. The fields specified in the updateMask are relative to the resource, not the full request. A field will be overwritten if it is in the mask. If the user does not provide a mask, all the supported fields (labels and options currently) will be overwritten. A comma-separated list of fully qualified names of fields. Example: "user.displayName,photo". https://developers.google.com/protocol-buffers/docs/reference/google.protobuf?_ga=2.205612571.-968688242.1573564810#google.protobuf.FieldMask - :type update_mask: str :param location: The Cloud Data Fusion location in which to handle the request. - :type location: str :param project_id: The ID of the Google Cloud project that the instance belongs to. - :type project_id: str :param api_version: The version of the api that will be requested for example 'v3'. - :type api_version: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -322,7 +292,6 @@ class CloudDataFusionUpdateInstanceOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -384,19 +353,13 @@ class CloudDataFusionGetInstanceOperator(BaseOperator): :ref:`howto/operator:CloudDataFusionGetInstanceOperator` :param instance_name: The name of the instance. - :type instance_name: str :param location: The Cloud Data Fusion location in which to handle the request. - :type location: str :param project_id: The ID of the Google Cloud project that the instance belongs to. - :type project_id: str :param api_version: The version of the api that will be requested for example 'v3'. - :type api_version: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -405,7 +368,6 @@ class CloudDataFusionGetInstanceOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -459,26 +421,18 @@ class CloudDataFusionCreatePipelineOperator(BaseOperator): :ref:`howto/operator:CloudDataFusionCreatePipelineOperator` :param pipeline_name: Your pipeline name. - :type pipeline_name: str :param pipeline: The pipeline definition. For more information check: https://docs.cdap.io/cdap/current/en/developer-manual/pipelines/developing-pipelines.html#pipeline-configuration-file-format - :type pipeline: Dict[str, Any] :param instance_name: The name of the instance. - :type instance_name: str :param location: The Cloud Data Fusion location in which to handle the request. - :type location: str :param namespace: If your pipeline belongs to a Basic edition instance, the namespace ID is always default. If your pipeline belongs to an Enterprise edition instance, you can create a namespace. - :type namespace: str :param api_version: The version of the api that will be requested for example 'v3'. - :type api_version: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -487,7 +441,6 @@ class CloudDataFusionCreatePipelineOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -555,25 +508,17 @@ class CloudDataFusionDeletePipelineOperator(BaseOperator): :ref:`howto/operator:CloudDataFusionDeletePipelineOperator` :param pipeline_name: Your pipeline name. - :type pipeline_name: str :param version_id: Version of pipeline to delete - :type version_id: Optional[str] :param instance_name: The name of the instance. - :type instance_name: str :param location: The Cloud Data Fusion location in which to handle the request. - :type location: str :param namespace: If your pipeline belongs to a Basic edition instance, the namespace ID is always default. If your pipeline belongs to an Enterprise edition instance, you can create a namespace. - :type namespace: str :param api_version: The version of the api that will be requested for example 'v3'. - :type api_version: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -582,7 +527,6 @@ class CloudDataFusionDeletePipelineOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -652,25 +596,17 @@ class CloudDataFusionListPipelinesOperator(BaseOperator): :param instance_name: The name of the instance. - :type instance_name: str :param location: The Cloud Data Fusion location in which to handle the request. - :type location: str :param artifact_version: Artifact version to filter instances - :type artifact_version: Optional[str] :param artifact_name: Artifact name to filter instances - :type artifact_name: Optional[str] :param namespace: If your pipeline belongs to a Basic edition instance, the namespace ID is always default. If your pipeline belongs to an Enterprise edition instance, you can create a namespace. - :type namespace: str :param api_version: The version of the api that will be requested for example 'v3'. - :type api_version: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -679,7 +615,6 @@ class CloudDataFusionListPipelinesOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -749,31 +684,21 @@ class CloudDataFusionStartPipelineOperator(BaseOperator): :ref:`howto/operator:CloudDataFusionStartPipelineOperator` :param pipeline_name: Your pipeline name. - :type pipeline_name: str :param instance_name: The name of the instance. - :type instance_name: str :param success_states: If provided the operator will wait for pipeline to be in one of the provided states. - :type success_states: List[str] :param pipeline_timeout: How long (in seconds) operator should wait for the pipeline to be in one of ``success_states``. Works only if ``success_states`` are provided. - :type pipeline_timeout: int :param location: The Cloud Data Fusion location in which to handle the request. - :type location: str :param runtime_args: Optional runtime args to be passed to the pipeline - :type runtime_args: dict :param namespace: If your pipeline belongs to a Basic edition instance, the namespace ID is always default. If your pipeline belongs to an Enterprise edition instance, you can create a namespace. - :type namespace: str :param api_version: The version of the api that will be requested for example 'v3'. - :type api_version: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -782,11 +707,9 @@ class CloudDataFusionStartPipelineOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :param asynchronous: Flag to return after submitting the pipeline Id to the Data Fusion API. This is useful for submitting long running pipelines and waiting on them asynchronously using the CloudDataFusionPipelineStateSensor - :type asynchronous: bool """ template_fields: Sequence[str] = ( @@ -878,23 +801,16 @@ class CloudDataFusionStopPipelineOperator(BaseOperator): :ref:`howto/operator:CloudDataFusionStopPipelineOperator` :param pipeline_name: Your pipeline name. - :type pipeline_name: str :param instance_name: The name of the instance. - :type instance_name: str :param location: The Cloud Data Fusion location in which to handle the request. - :type location: str :param namespace: If your pipeline belongs to a Basic edition instance, the namespace ID is always default. If your pipeline belongs to an Enterprise edition instance, you can create a namespace. - :type namespace: str :param api_version: The version of the api that will be requested for example 'v3'. - :type api_version: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -903,7 +819,6 @@ class CloudDataFusionStopPipelineOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/cloud/operators/dataprep.py b/airflow/providers/google/cloud/operators/dataprep.py index 6db0c07e619ae..54f76dc381b47 100644 --- a/airflow/providers/google/cloud/operators/dataprep.py +++ b/airflow/providers/google/cloud/operators/dataprep.py @@ -35,7 +35,6 @@ class DataprepGetJobsForJobGroupOperator(BaseOperator): :ref:`howto/operator:DataprepGetJobsForJobGroupOperator` :param job_id The ID of the job that will be requests - :type job_id: int """ template_fields: Sequence[str] = ("job_id",) @@ -65,11 +64,8 @@ class DataprepGetJobGroupOperator(BaseOperator): :ref:`howto/operator:DataprepGetJobGroupOperator` :param job_group_id: The ID of the job that will be requests - :type job_group_id: int :param embed: Comma-separated list of objects to pull in as part of the response - :type embed: string :param include_deleted: if set to "true", will include deleted objects - :type include_deleted: bool """ template_fields: Sequence[str] = ("job_group_id", "embed") @@ -112,10 +108,8 @@ class DataprepRunJobGroupOperator(BaseOperator): :ref:`howto/operator:DataprepRunJobGroupOperator` :param dataprep_conn_id: The Dataprep connection ID - :type dataprep_conn_id: str :param body_request: Passed as the body_request to GoogleDataprepHook's run_job_group, where it's the identifier for the recipe to run - :type body_request: dict """ template_fields: Sequence[str] = ("body_request",) diff --git a/airflow/providers/google/cloud/operators/dataproc.py b/airflow/providers/google/cloud/operators/dataproc.py index dc04fdddbbc13..c58ab1324baf6 100644 --- a/airflow/providers/google/cloud/operators/dataproc.py +++ b/airflow/providers/google/cloud/operators/dataproc.py @@ -96,107 +96,71 @@ class ClusterGenerator: Create a new Dataproc Cluster. :param cluster_name: The name of the DataProc cluster to create. (templated) - :type cluster_name: str :param project_id: The ID of the google cloud project in which to create the cluster. (templated) - :type project_id: str :param num_workers: The # of workers to spin up. If set to zero will spin up cluster in a single node mode - :type num_workers: int :param storage_bucket: The storage bucket to use, setting to None lets dataproc generate a custom one for you - :type storage_bucket: str :param init_actions_uris: List of GCS uri's containing dataproc initialization scripts - :type init_actions_uris: list[str] :param init_action_timeout: Amount of time executable scripts in init_actions_uris has to complete - :type init_action_timeout: str :param metadata: dict of key-value google compute engine metadata entries to add to all instances - :type metadata: dict :param image_version: the version of software inside the Dataproc cluster - :type image_version: str :param custom_image: custom Dataproc image for more info see https://cloud.google.com/dataproc/docs/guides/dataproc-images - :type custom_image: str :param custom_image_project_id: project id for the custom Dataproc image, for more info see https://cloud.google.com/dataproc/docs/guides/dataproc-images - :type custom_image_project_id: str :param custom_image_family: family for the custom Dataproc image, family name can be provide using --family flag while creating custom image, for more info see https://cloud.google.com/dataproc/docs/guides/dataproc-images - :type custom_image_family: str :param autoscaling_policy: The autoscaling policy used by the cluster. Only resource names including projectid and location (region) are valid. Example: ``projects/[projectId]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]`` - :type autoscaling_policy: str :param properties: dict of properties to set on config files (e.g. spark-defaults.conf), see https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters#SoftwareConfig - :type properties: dict :param optional_components: List of optional cluster components, for more info see https://cloud.google.com/dataproc/docs/reference/rest/v1/ClusterConfig#Component - :type optional_components: list[str] :param num_masters: The # of master nodes to spin up - :type num_masters: int :param master_machine_type: Compute engine machine type to use for the primary node - :type master_machine_type: str :param master_disk_type: Type of the boot disk for the primary node (default is ``pd-standard``). Valid values: ``pd-ssd`` (Persistent Disk Solid State Drive) or ``pd-standard`` (Persistent Disk Hard Disk Drive). - :type master_disk_type: str :param master_disk_size: Disk size for the primary node - :type master_disk_size: int :param worker_machine_type: Compute engine machine type to use for the worker nodes - :type worker_machine_type: str :param worker_disk_type: Type of the boot disk for the worker node (default is ``pd-standard``). Valid values: ``pd-ssd`` (Persistent Disk Solid State Drive) or ``pd-standard`` (Persistent Disk Hard Disk Drive). - :type worker_disk_type: str :param worker_disk_size: Disk size for the worker nodes - :type worker_disk_size: int :param num_preemptible_workers: The # of preemptible worker nodes to spin up - :type num_preemptible_workers: int :param labels: dict of labels to add to the cluster - :type labels: dict :param zone: The zone where the cluster will be located. Set to None to auto-zone. (templated) - :type zone: str :param network_uri: The network uri to be used for machine communication, cannot be specified with subnetwork_uri - :type network_uri: str :param subnetwork_uri: The subnetwork uri to be used for machine communication, cannot be specified with network_uri - :type subnetwork_uri: str :param internal_ip_only: If true, all instances in the cluster will only have internal IP addresses. This can only be enabled for subnetwork enabled networks - :type internal_ip_only: bool :param tags: The GCE tags to add to all instances - :type tags: list[str] :param region: The specified region where the dataproc cluster is created. - :type region: str :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param service_account: The service account of the dataproc instances. - :type service_account: str :param service_account_scopes: The URIs of service account scopes to be included. - :type service_account_scopes: list[str] :param idle_delete_ttl: The longest duration that cluster would keep alive while staying idle. Passing this threshold will cause cluster to be auto-deleted. A duration in seconds. - :type idle_delete_ttl: int :param auto_delete_time: The time when cluster will be auto-deleted. - :type auto_delete_time: datetime.datetime :param auto_delete_ttl: The life duration of cluster, the cluster will be auto-deleted at the end of this duration. A duration in seconds. (If auto_delete_time is set this parameter will be ignored) - :type auto_delete_ttl: int :param customer_managed_key: The customer-managed key used for disk encryption ``projects/[PROJECT_STORING_KEYS]/locations/[LOCATION]/keyRings/[KEY_RING_NAME]/cryptoKeys/[KEY_NAME]`` # noqa - :type customer_managed_key: str """ def __init__( @@ -471,36 +435,24 @@ class DataprocCreateClusterOperator(BaseOperator): :param project_id: The ID of the google cloud project in which to create the cluster. (templated) - :type project_id: str :param cluster_name: Name of the cluster to create - :type cluster_name: str :param labels: Labels that will be assigned to created cluster - :type labels: Dict[str, str] :param cluster_config: Required. The cluster config to create. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.dataproc_v1.types.ClusterConfig` - :type cluster_config: Union[Dict, google.cloud.dataproc_v1.types.ClusterConfig] :param region: The specified region where the dataproc cluster is created. - :type region: str :param delete_on_error: If true the cluster will be deleted if created with ERROR state. Default value is true. - :type delete_on_error: bool :param use_if_exists: If true use existing cluster - :type use_if_exists: bool :param request_id: Optional. A unique id used to identify the request. If the server receives two ``DeleteClusterRequest`` requests with the same id, then the second request will be ignored and the first ``google.longrunning.Operation`` created and stored in the backend is returned. - :type request_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -509,7 +461,6 @@ class DataprocCreateClusterOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -721,21 +672,14 @@ class DataprocScaleClusterOperator(BaseOperator): https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/scaling-clusters :param cluster_name: The name of the cluster to scale. (templated) - :type cluster_name: str :param project_id: The ID of the google cloud project in which the cluster runs. (templated) - :type project_id: str :param region: The region for the dataproc cluster. (templated) - :type region: str :param num_workers: The new number of workers - :type num_workers: int :param num_preemptible_workers: The new number of preemptible workers - :type num_preemptible_workers: int :param graceful_decommission_timeout: Timeout for graceful YARN decommissioning. Maximum value is 1d - :type graceful_decommission_timeout: str :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -744,7 +688,6 @@ class DataprocScaleClusterOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ('cluster_name', 'project_id', 'region', 'impersonation_chain') @@ -855,28 +798,19 @@ class DataprocDeleteClusterOperator(BaseOperator): Deletes a cluster in a project. :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to (templated). - :type project_id: str :param region: Required. The Cloud Dataproc region in which to handle the request (templated). - :type region: str :param cluster_name: Required. The cluster name (templated). - :type cluster_name: str :param cluster_uuid: Optional. Specifying the ``cluster_uuid`` means the RPC should fail if cluster with specified UUID does not exist. - :type cluster_uuid: str :param request_id: Optional. A unique id used to identify the request. If the server receives two ``DeleteClusterRequest`` requests with the same id, then the second request will be ignored and the first ``google.longrunning.Operation`` created and stored in the backend is returned. - :type request_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -885,7 +819,6 @@ class DataprocDeleteClusterOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ('project_id', 'region', 'cluster_name', 'impersonation_chain') @@ -941,37 +874,27 @@ class DataprocJobBaseOperator(BaseOperator): :param job_name: The job name used in the DataProc cluster. This name by default is the task_id appended with the execution data, but can be templated. The name will always be appended with a random number to avoid name clashes. - :type job_name: str :param cluster_name: The name of the DataProc cluster. - :type cluster_name: str :param project_id: The ID of the Google Cloud project the cluster belongs to, if not specified the project will be inferred from the provided GCP connection. - :type project_id: str :param dataproc_properties: Map for the Hive properties. Ideal to put in default arguments (templated) - :type dataproc_properties: dict :param dataproc_jars: HCFS URIs of jar files to add to the CLASSPATH of the Hive server and Hadoop MapReduce (MR) tasks. Can contain Hive SerDes and UDFs. (templated) - :type dataproc_jars: list :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param labels: The labels to associate with this job. Label keys must contain 1 to 63 characters, and must conform to RFC 1035. Label values may be empty, but, if present, must contain 1 to 63 characters, and must conform to RFC 1035. No more than 32 labels can be associated with a job. - :type labels: dict :param region: The specified region where the dataproc cluster is created. - :type region: str :param job_error_states: Job states that should be considered error states. Any states in this set will result in an error being raised and failure of the task. Eg, if the ``CANCELLED`` state should also be considered a task failure, pass in ``{'ERROR', 'CANCELLED'}``. Possible values are currently only ``'ERROR'`` and ``'CANCELLED'``, but could change in the future. Defaults to ``{'ERROR'}``. - :type job_error_states: set :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -980,11 +903,9 @@ class DataprocJobBaseOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :param asynchronous: Flag to return after submitting the job to the Dataproc API. This is useful for submitting long running jobs and waiting on them asynchronously using the DataprocJobSensor - :type asynchronous: bool :var dataproc_job_id: The actual "jobId" as submitted to the Dataproc API. This is useful for identifying or linking to the job in the Google Cloud Console @@ -1137,11 +1058,8 @@ class DataprocSubmitPigJobOperator(DataprocJobBaseOperator): :param query: The query or reference to the query file (pg or pig extension). (templated) - :type query: str :param query_uri: The HCFS URI of the script that contains the Pig queries. - :type query_uri: str :param variables: Map of named parameters for the query. (templated) - :type variables: dict """ template_fields: Sequence[str] = ( @@ -1216,11 +1134,8 @@ class DataprocSubmitHiveJobOperator(DataprocJobBaseOperator): Start a Hive query Job on a Cloud DataProc cluster. :param query: The query or reference to the query file (q extension). - :type query: str :param query_uri: The HCFS URI of the script that contains the Hive queries. - :type query_uri: str :param variables: Map of named parameters for the query. - :type variables: dict """ template_fields: Sequence[str] = ( @@ -1293,11 +1208,8 @@ class DataprocSubmitSparkSqlJobOperator(DataprocJobBaseOperator): Start a Spark SQL query Job on a Cloud DataProc cluster. :param query: The query or reference to the query file (q extension). (templated) - :type query: str :param query_uri: The HCFS URI of the script that contains the SQL queries. - :type query_uri: str :param variables: Map of named parameters for the query. (templated) - :type variables: dict """ template_fields: Sequence[str] = ( @@ -1369,17 +1281,12 @@ class DataprocSubmitSparkJobOperator(DataprocJobBaseOperator): :param main_jar: The HCFS URI of the jar file that contains the main class (use this or the main_class, not both together). - :type main_jar: str :param main_class: Name of the job class. (use this or the main_jar, not both together). - :type main_class: str :param arguments: Arguments for the job. (templated) - :type arguments: list :param archives: List of archived files that will be unpacked in the work directory. Should be stored in Cloud Storage. - :type archives: list :param files: List of files to be copied to the working directory - :type files: list """ template_fields: Sequence[str] = ( @@ -1447,17 +1354,12 @@ class DataprocSubmitHadoopJobOperator(DataprocJobBaseOperator): :param main_jar: The HCFS URI of the jar file containing the main class (use this or the main_class, not both together). - :type main_jar: str :param main_class: Name of the job class. (use this or the main_jar, not both together). - :type main_class: str :param arguments: Arguments for the job. (templated) - :type arguments: list :param archives: List of archived files that will be unpacked in the work directory. Should be stored in Cloud Storage. - :type archives: list :param files: List of files to be copied to the working directory - :type files: list """ template_fields: Sequence[str] = ( @@ -1525,17 +1427,12 @@ class DataprocSubmitPySparkJobOperator(DataprocJobBaseOperator): :param main: [Required] The Hadoop Compatible Filesystem (HCFS) URI of the main Python file to use as the driver. Must be a .py file. (templated) - :type main: str :param arguments: Arguments for the job. (templated) - :type arguments: list :param archives: List of archived files that will be unpacked in the work directory. Should be stored in Cloud Storage. - :type archives: list :param files: List of files to be copied to the working directory - :type files: list :param pyfiles: List of Python files to pass to the PySpark framework. Supported file types: .py, .egg, and .zip - :type pyfiles: list """ template_fields: Sequence[str] = ( @@ -1645,22 +1542,15 @@ class DataprocCreateWorkflowTemplateOperator(BaseOperator): Creates new workflow template. :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :type project_id: str :param region: Required. The Cloud Dataproc region in which to handle the request. - :type region: str :param location: (To be deprecated). The Cloud Dataproc region in which to handle the request. - :type location: str :param template: The Dataproc workflow template to create. If a dict is provided, it must be of the same form as the protobuf message WorkflowTemplate. - :type template: Union[dict, WorkflowTemplate] :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ template_fields: Sequence[str] = ("region", "template") @@ -1728,33 +1618,24 @@ class DataprocInstantiateWorkflowTemplateOperator(BaseOperator): https://cloud.google.com/dataproc/docs/reference/rest/v1beta2/projects.regions.workflowTemplates/instantiate :param template_id: The id of the template. (templated) - :type template_id: str :param project_id: The ID of the google cloud project in which the template runs - :type project_id: str :param region: The specified region where the dataproc cluster is created. - :type region: str :param parameters: a map of parameters for Dataproc Template in key-value format: map (key: string, value: string) Example: { "date_from": "2019-08-01", "date_to": "2019-08-02"}. Values may not exceed 100 characters. Please refer to: https://cloud.google.com/dataproc/docs/concepts/workflows/workflow-parameters - :type parameters: Dict[str, str] :param request_id: Optional. A unique id used to identify the request. If the server receives two ``SubmitJobRequest`` requests with the same id, then the second request will be ignored and the first ``Job`` created and stored in the backend is returned. It is recommended to always set this value to a UUID. - :type request_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1763,7 +1644,6 @@ class DataprocInstantiateWorkflowTemplateOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ('template_id', 'impersonation_chain', 'request_id', 'parameters') @@ -1827,33 +1707,24 @@ class DataprocInstantiateInlineWorkflowTemplateOperator(BaseOperator): https://cloud.google.com/dataproc/docs/reference/rest/v1beta2/projects.regions.workflowTemplates/instantiateInline :param template: The template contents. (templated) - :type template: dict :param project_id: The ID of the google cloud project in which the template runs - :type project_id: str :param region: The specified region where the dataproc cluster is created. - :type region: str :param parameters: a map of parameters for Dataproc Template in key-value format: map (key: string, value: string) Example: { "date_from": "2019-08-01", "date_to": "2019-08-02"}. Values may not exceed 100 characters. Please refer to: https://cloud.google.com/dataproc/docs/concepts/workflows/workflow-parameters - :type parameters: Dict[str, str] :param request_id: Optional. A unique id used to identify the request. If the server receives two ``SubmitJobRequest`` requests with the same id, then the second request will be ignored and the first ``Job`` created and stored in the backend is returned. It is recommended to always set this value to a UUID. - :type request_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1862,7 +1733,6 @@ class DataprocInstantiateInlineWorkflowTemplateOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ('template', 'impersonation_chain') @@ -1915,30 +1785,21 @@ class DataprocSubmitJobOperator(BaseOperator): Submits a job to a cluster. :param project_id: Required. The ID of the Google Cloud project that the job belongs to. - :type project_id: str :param region: Required. The Cloud Dataproc region in which to handle the request. - :type region: str :param location: (To be deprecated). The Cloud Dataproc region in which to handle the request. - :type location: str :param job: Required. The job resource. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.dataproc_v1.types.Job` - :type job: Dict :param request_id: Optional. A unique id used to identify the request. If the server receives two ``SubmitJobRequest`` requests with the same id, then the second request will be ignored and the first ``Job`` created and stored in the backend is returned. It is recommended to always set this value to a UUID. - :type request_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1947,15 +1808,11 @@ class DataprocSubmitJobOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :param asynchronous: Flag to return after submitting the job to the Dataproc API. This is useful for submitting long running jobs and waiting on them asynchronously using the DataprocJobSensor - :type asynchronous: bool :param cancel_on_kill: Flag which indicates whether cancel the hook's job or not, when on_kill is called - :type cancel_on_kill: bool :param wait_timeout: How many seconds wait for job to be ready. Used only if ``asynchronous`` is False - :type wait_timeout: int """ template_fields: Sequence[str] = ('project_id', 'region', 'job', 'impersonation_chain', 'request_id') @@ -2053,44 +1910,32 @@ class DataprocUpdateClusterOperator(BaseOperator): Updates a cluster in a project. :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :type project_id: str :param region: Required. The Cloud Dataproc region in which to handle the request. - :type region: str :param location: (To be deprecated). The Cloud Dataproc region in which to handle the request. - :type location: str :param cluster_name: Required. The cluster name. - :type cluster_name: str :param cluster: Required. The changes to the cluster. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.cloud.dataproc_v1.types.Cluster` - :type cluster: Union[Dict, google.cloud.dataproc_v1.types.Cluster] :param update_mask: Required. Specifies the path, relative to ``Cluster``, of the field to update. For example, to change the number of workers in a cluster to 5, the ``update_mask`` parameter would be specified as ``config.worker_config.num_instances``, and the ``PATCH`` request body would specify the new value. If a dict is provided, it must be of the same form as the protobuf message :class:`~google.protobuf.field_mask_pb2.FieldMask` - :type update_mask: Union[Dict, google.protobuf.field_mask_pb2.FieldMask] :param graceful_decommission_timeout: Optional. Timeout for graceful YARN decommissioning. Graceful decommissioning allows removing nodes from the cluster without interrupting jobs in progress. Timeout specifies how long to wait for jobs in progress to finish before forcefully removing nodes (and potentially interrupting jobs). Default timeout is 0 (for forceful decommission), and the maximum allowed timeout is 1 day. - :type graceful_decommission_timeout: Union[Dict, google.protobuf.duration_pb2.Duration] :param request_id: Optional. A unique id used to identify the request. If the server receives two ``UpdateClusterRequest`` requests with the same id, then the second request will be ignored and the first ``google.longrunning.Operation`` created and stored in the backend is returned. - :type request_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -2099,7 +1944,6 @@ class DataprocUpdateClusterOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ('impersonation_chain', 'cluster_name') @@ -2182,29 +2026,20 @@ class DataprocCreateBatchOperator(BaseOperator): Creates a batch workload. :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to. - :type project_id: str :param region: Required. The Cloud Dataproc region in which to handle the request. - :type region: str :param batch: Required. The batch to create. - :type batch: google.cloud.dataproc_v1.types.Batch :param batch_id: Optional. The ID to use for the batch, which will become the final component of the batch's resource name. This value must be 4-63 characters. Valid characters are /[a-z][0-9]-/. - :type batch_id: str :param request_id: Optional. A unique id used to identify the request. If the server receives two ``CreateBatchRequest`` requests with the same id, then the second request will be ignored and the first ``google.longrunning.Operation`` created and stored in the backend is returned. - :type request_id: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -2213,7 +2048,6 @@ class DataprocCreateBatchOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -2297,21 +2131,14 @@ class DataprocDeleteBatchOperator(BaseOperator): :param batch_id: Required. The ID to use for the batch, which will become the final component of the batch's resource name. This value must be 4-63 characters. Valid characters are /[a-z][0-9]-/. - :type batch_id: str :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to. - :type project_id: str :param region: Required. The Cloud Dataproc region in which to handle the request. - :type region: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -2320,7 +2147,6 @@ class DataprocDeleteBatchOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ("batch_id", "region", "project_id", "impersonation_chain") @@ -2369,21 +2195,14 @@ class DataprocGetBatchOperator(BaseOperator): :param batch_id: Required. The ID to use for the batch, which will become the final component of the batch's resource name. This value must be 4-63 characters. Valid characters are /[a-z][0-9]-/. - :type batch_id: str :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to. - :type project_id: str :param region: Required. The Cloud Dataproc region in which to handle the request. - :type region: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -2392,7 +2211,6 @@ class DataprocGetBatchOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ("batch_id", "region", "project_id", "impersonation_chain") @@ -2439,25 +2257,17 @@ class DataprocListBatchesOperator(BaseOperator): Lists batch workloads. :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to. - :type project_id: str :param region: Required. The Cloud Dataproc region in which to handle the request. - :type region: str :param page_size: Optional. The maximum number of batches to return in each response. The service may return fewer than this value. The default page size is 20; the maximum page size is 1000. - :type page_size: int :param page_token: Optional. A page token received from a previous ``ListBatches`` call. Provide this token to retrieve the subsequent page. - :type page_token: str :param retry: Optional, a retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[Retry] :param timeout: Optional, the amount of time, in seconds, to wait for the request to complete. Note that if `retry` is specified, the timeout applies to each individual attempt. - :type timeout: Optional[float] :param metadata: Optional, additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :param gcp_conn_id: Optional, the connection ID used to connect to Google Cloud Platform. - :type gcp_conn_id: Optional[str] :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -2466,7 +2276,6 @@ class DataprocListBatchesOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: List[dict] """ diff --git a/airflow/providers/google/cloud/operators/dataproc_metastore.py b/airflow/providers/google/cloud/operators/dataproc_metastore.py index ae9a921adbb5b..78db1f27a4c86 100644 --- a/airflow/providers/google/cloud/operators/dataproc_metastore.py +++ b/airflow/providers/google/cloud/operators/dataproc_metastore.py @@ -41,9 +41,7 @@ class DataprocMetastoreCreateBackupOperator(BaseOperator): Creates a new backup in a given project and location. :param project_id: Required. The ID of the Google Cloud project that the service belongs to. - :type project_id: str :param region: Required. The ID of the Google Cloud region that the service belongs to. - :type region: str :param service_id: Required. The ID of the metastore service, which is used as the final component of the metastore service's name. This value must be between 2 and 63 characters long inclusive, begin with a letter, end with a letter or number, and consist of alphanumeric ASCII characters or @@ -51,30 +49,22 @@ class DataprocMetastoreCreateBackupOperator(BaseOperator): This corresponds to the ``service_id`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - :type service_id: str :param backup: Required. The backup to create. The ``name`` field is ignored. The ID of the created backup must be provided in the request's ``backup_id`` field. This corresponds to the ``backup`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - :type backup: google.cloud.metastore_v1.types.Backup :param backup_id: Required. The ID of the backup, which is used as the final component of the backup's name. This value must be between 1 and 64 characters long, begin with a letter, end with a letter or number, and consist of alphanumeric ASCII characters or hyphens. This corresponds to the ``backup_id`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - :type backup_id: str :param request_id: Optional. A unique id used to identify the request. - :type request_id: str :param retry: Optional. Designation of what errors, if any, should be retried. - :type retry: google.api_core.retry.Retry :param timeout: Optional. The timeout for this request. - :type timeout: float :param metadata: Optional. Strings which should be sent along with the request as metadata. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -83,7 +73,6 @@ class DataprocMetastoreCreateBackupOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -163,9 +152,7 @@ class DataprocMetastoreCreateMetadataImportOperator(BaseOperator): Creates a new MetadataImport in a given project and location. :param project_id: Required. The ID of the Google Cloud project that the service belongs to. - :type project_id: str :param region: Required. The ID of the Google Cloud region that the service belongs to. - :type region: str :param service_id: Required. The ID of the metastore service, which is used as the final component of the metastore service's name. This value must be between 2 and 63 characters long inclusive, begin with a letter, end with a letter or number, and consist of alphanumeric ASCII characters or @@ -173,30 +160,22 @@ class DataprocMetastoreCreateMetadataImportOperator(BaseOperator): This corresponds to the ``service_id`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - :type service_id: str :param metadata_import: Required. The metadata import to create. The ``name`` field is ignored. The ID of the created metadata import must be provided in the request's ``metadata_import_id`` field. This corresponds to the ``metadata_import`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - :type metadata_import: google.cloud.metastore_v1.types.MetadataImport :param metadata_import_id: Required. The ID of the metadata import, which is used as the final component of the metadata import's name. This value must be between 1 and 64 characters long, begin with a letter, end with a letter or number, and consist of alphanumeric ASCII characters or hyphens. This corresponds to the ``metadata_import_id`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - :type metadata_import_id: str :param request_id: Optional. A unique id used to identify the request. - :type request_id: str :param retry: Optional. Designation of what errors, if any, should be retried. - :type retry: google.api_core.retry.Retry :param timeout: Optional. The timeout for this request. - :type timeout: float :param metadata: Optional. Strings which should be sent along with the request as metadata. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -205,7 +184,6 @@ class DataprocMetastoreCreateMetadataImportOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -270,15 +248,12 @@ class DataprocMetastoreCreateServiceOperator(BaseOperator): Creates a metastore service in a project and location. :param region: Required. The ID of the Google Cloud region that the service belongs to. - :type region: str :param project_id: Required. The ID of the Google Cloud project that the service belongs to. - :type project_id: str :param service: Required. The Metastore service to create. The ``name`` field is ignored. The ID of the created metastore service must be provided in the request's ``service_id`` field. This corresponds to the ``service`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - :type service: google.cloud.metastore_v1.types.Service :param service_id: Required. The ID of the metastore service, which is used as the final component of the metastore service's name. This value must be between 2 and 63 characters long inclusive, begin with a letter, end with a letter or number, and consist of alphanumeric ASCII characters or @@ -286,17 +261,11 @@ class DataprocMetastoreCreateServiceOperator(BaseOperator): This corresponds to the ``service_id`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - :type service_id: str :param request_id: Optional. A unique id used to identify the request. - :type request_id: str :param retry: Designation of what errors, if any, should be retried. - :type retry: google.api_core.retry.Retry :param timeout: The timeout for this request. - :type timeout: float :param metadata: Strings which should be sent along with the request as metadata. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -305,7 +274,6 @@ class DataprocMetastoreCreateServiceOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -380,9 +348,7 @@ class DataprocMetastoreDeleteBackupOperator(BaseOperator): Deletes a single backup. :param project_id: Required. The ID of the Google Cloud project that the backup belongs to. - :type project_id: str :param region: Required. The ID of the Google Cloud region that the backup belongs to. - :type region: str :param service_id: Required. The ID of the metastore service, which is used as the final component of the metastore service's name. This value must be between 2 and 63 characters long inclusive, begin with a letter, end with a letter or number, and consist of alphanumeric ASCII characters or @@ -390,24 +356,17 @@ class DataprocMetastoreDeleteBackupOperator(BaseOperator): This corresponds to the ``service_id`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - :type service_id: str :param backup_id: Required. The ID of the backup, which is used as the final component of the backup's name. This value must be between 1 and 64 characters long, begin with a letter, end with a letter or number, and consist of alphanumeric ASCII characters or hyphens. This corresponds to the ``backup_id`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - :type backup_id: str :param request_id: Optional. A unique id used to identify the request. - :type request_id: str :param retry: Optional. Designation of what errors, if any, should be retried. - :type retry: google.api_core.retry.Retry :param timeout: Optional. The timeout for this request. - :type timeout: float :param metadata: Optional. Strings which should be sent along with the request as metadata. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -416,7 +375,6 @@ class DataprocMetastoreDeleteBackupOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -476,17 +434,11 @@ class DataprocMetastoreDeleteServiceOperator(BaseOperator): :param request: The request object. Request message for [DataprocMetastore.DeleteService][google.cloud.metastore.v1.DataprocMetastore.DeleteService]. - :type request: google.cloud.metastore_v1.types.DeleteServiceRequest :param project_id: Required. The ID of the Google Cloud project that the service belongs to. - :type project_id: str :param retry: Designation of what errors, if any, should be retried. - :type retry: google.api_core.retry.Retry :param timeout: The timeout for this request. - :type timeout: float :param metadata: Strings which should be sent along with the request as metadata. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: - :type gcp_conn_id: str """ template_fields: Sequence[str] = ( @@ -542,28 +494,19 @@ class DataprocMetastoreExportMetadataOperator(BaseOperator): ``gs:///``. A sub-folder ```` containing exported files will be created below it. - :type destination_gcs_folder: str :param project_id: Required. The ID of the Google Cloud project that the service belongs to. - :type project_id: str :param region: Required. The ID of the Google Cloud region that the service belongs to. - :type region: str :param service_id: Required. The ID of the metastore service, which is used as the final component of the metastore service's name. This value must be between 2 and 63 characters long inclusive, begin with a letter, end with a letter or number, and consist of alphanumeric ASCII characters or hyphens. This corresponds to the ``service_id`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - :type service_id: str :param request_id: Optional. A unique id used to identify the request. - :type request_id: str :param retry: Optional. Designation of what errors, if any, should be retried. - :type retry: google.api_core.retry.Retry :param timeout: Optional. The timeout for this request. - :type timeout: float :param metadata: Optional. Strings which should be sent along with the request as metadata. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -572,7 +515,6 @@ class DataprocMetastoreExportMetadataOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -659,9 +601,7 @@ class DataprocMetastoreGetServiceOperator(BaseOperator): Gets the details of a single service. :param region: Required. The ID of the Google Cloud region that the service belongs to. - :type region: str :param project_id: Required. The ID of the Google Cloud project that the service belongs to. - :type project_id: str :param service_id: Required. The ID of the metastore service, which is used as the final component of the metastore service's name. This value must be between 2 and 63 characters long inclusive, begin with a letter, end with a letter or number, and consist of alphanumeric ASCII characters or @@ -669,15 +609,10 @@ class DataprocMetastoreGetServiceOperator(BaseOperator): This corresponds to the ``service_id`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - :type service_id: str :param retry: Designation of what errors, if any, should be retried. - :type retry: google.api_core.retry.Retry :param timeout: The timeout for this request. - :type timeout: float :param metadata: Strings which should be sent along with the request as metadata. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -686,7 +621,6 @@ class DataprocMetastoreGetServiceOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -738,9 +672,7 @@ class DataprocMetastoreListBackupsOperator(BaseOperator): Lists backups in a service. :param project_id: Required. The ID of the Google Cloud project that the backup belongs to. - :type project_id: str :param region: Required. The ID of the Google Cloud region that the backup belongs to. - :type region: str :param service_id: Required. The ID of the metastore service, which is used as the final component of the metastore service's name. This value must be between 2 and 63 characters long inclusive, begin with a letter, end with a letter or number, and consist of alphanumeric ASCII characters or @@ -748,15 +680,10 @@ class DataprocMetastoreListBackupsOperator(BaseOperator): This corresponds to the ``service_id`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - :type service_id: str :param retry: Optional. Designation of what errors, if any, should be retried. - :type retry: google.api_core.retry.Retry :param timeout: Optional. The timeout for this request. - :type timeout: float :param metadata: Optional. Strings which should be sent along with the request as metadata. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -765,7 +692,6 @@ class DataprocMetastoreListBackupsOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -829,9 +755,7 @@ class DataprocMetastoreRestoreServiceOperator(BaseOperator): Restores a service from a backup. :param project_id: Required. The ID of the Google Cloud project that the service belongs to. - :type project_id: str :param region: Required. The ID of the Google Cloud region that the service belongs to. - :type region: str :param service_id: Required. The ID of the metastore service, which is used as the final component of the metastore service's name. This value must be between 2 and 63 characters long inclusive, begin with a letter, end with a letter or number, and consist of alphanumeric ASCII characters or @@ -839,33 +763,22 @@ class DataprocMetastoreRestoreServiceOperator(BaseOperator): This corresponds to the ``service_id`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - :type service_id: str :param backup_project_id: Required. The ID of the Google Cloud project that the metastore service backup to restore from. - :type backup_project_id: str :param backup_region: Required. The ID of the Google Cloud region that the metastore service backup to restore from. - :type backup_region: str :param backup_service_id: Required. The ID of the metastore service backup to restore from, which is used as the final component of the metastore service's name. This value must be between 2 and 63 characters long inclusive, begin with a letter, end with a letter or number, and consist of alphanumeric ASCII characters or hyphens. - :type backup_service_id: str :param backup_id: Required. The ID of the metastore service backup to restore from - :type backup_id: str :param restore_type: Optional. The type of restore. If unspecified, defaults to ``METADATA_ONLY`` - :type restore_type: google.cloud.metastore_v1.types.Restore.RestoreType :param request_id: Optional. A unique id used to identify the request. - :type request_id: str :param retry: Optional. Designation of what errors, if any, should be retried. - :type retry: google.api_core.retry.Retry :param timeout: Optional. The timeout for this request. - :type timeout: float :param metadata: Optional. Strings which should be sent along with the request as metadata. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -874,7 +787,6 @@ class DataprocMetastoreRestoreServiceOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -969,9 +881,7 @@ class DataprocMetastoreUpdateServiceOperator(BaseOperator): Updates the parameters of a single service. :param project_id: Required. The ID of the Google Cloud project that the service belongs to. - :type project_id: str :param region: Required. The ID of the Google Cloud region that the service belongs to. - :type region: str :param service_id: Required. The ID of the metastore service, which is used as the final component of the metastore service's name. This value must be between 2 and 63 characters long inclusive, begin with a letter, end with a letter or number, and consist of alphanumeric ASCII characters or @@ -979,7 +889,6 @@ class DataprocMetastoreUpdateServiceOperator(BaseOperator): This corresponds to the ``service_id`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - :type service_id: str :param service: Required. The metastore service to update. The server only merges fields in the service if they are specified in ``update_mask``. @@ -987,24 +896,17 @@ class DataprocMetastoreUpdateServiceOperator(BaseOperator): This corresponds to the ``service`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - :type service: Union[Dict, google.cloud.metastore_v1.types.Service] :param update_mask: Required. A field mask used to specify the fields to be overwritten in the metastore service resource by the update. Fields specified in the ``update_mask`` are relative to the resource (not to the full request). A field is overwritten if it is in the mask. This corresponds to the ``update_mask`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - :type update_mask: google.protobuf.field_mask_pb2.FieldMask :param request_id: Optional. A unique id used to identify the request. - :type request_id: str :param retry: Optional. Designation of what errors, if any, should be retried. - :type retry: google.api_core.retry.Retry :param timeout: Optional. The timeout for this request. - :type timeout: float :param metadata: Optional. Strings which should be sent along with the request as metadata. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1013,7 +915,6 @@ class DataprocMetastoreUpdateServiceOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/cloud/operators/datastore.py b/airflow/providers/google/cloud/operators/datastore.py index acd8c2080a8e1..82820575184fc 100644 --- a/airflow/providers/google/cloud/operators/datastore.py +++ b/airflow/providers/google/cloud/operators/datastore.py @@ -40,31 +40,22 @@ class CloudDatastoreExportEntitiesOperator(BaseOperator): https://cloud.google.com/datastore/docs/export-import-entities :param bucket: name of the cloud storage bucket to backup data - :type bucket: str :param namespace: optional namespace path in the specified Cloud Storage bucket to backup data. If this namespace does not exist in GCS, it will be created. - :type namespace: str :param datastore_conn_id: the name of the Datastore connection id to use - :type datastore_conn_id: str :param cloud_storage_conn_id: the name of the cloud storage connection id to force-write backup - :type cloud_storage_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param entity_filter: description of what data from the project is included in the export, refer to https://cloud.google.com/datastore/docs/reference/rest/Shared.Types/EntityFilter - :type entity_filter: dict :param labels: client-assigned labels for cloud storage - :type labels: dict :param polling_interval_in_seconds: number of seconds to wait before polling for execution status again - :type polling_interval_in_seconds: int :param overwrite_existing: if the storage bucket + namespace is not empty, it will be emptied prior to exports. This enables overwriting existing backups. - :type overwrite_existing: bool :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -73,7 +64,6 @@ class CloudDatastoreExportEntitiesOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -157,28 +147,20 @@ class CloudDatastoreImportEntitiesOperator(BaseOperator): https://cloud.google.com/datastore/docs/export-import-entities :param bucket: container in Cloud Storage to store data - :type bucket: str :param file: path of the backup metadata file in the specified Cloud Storage bucket. It should have the extension .overall_export_metadata - :type file: str :param namespace: optional namespace of the backup metadata file in the specified Cloud Storage bucket. - :type namespace: str :param entity_filter: description of what data from the project is included in the export, refer to https://cloud.google.com/datastore/docs/reference/rest/Shared.Types/EntityFilter - :type entity_filter: dict :param labels: client-assigned labels for cloud storage - :type labels: dict :param datastore_conn_id: the name of the connection id to use - :type datastore_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param polling_interval_in_seconds: number of seconds to wait before polling for execution status again - :type polling_interval_in_seconds: float :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -187,7 +169,6 @@ class CloudDatastoreImportEntitiesOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -265,15 +246,11 @@ class CloudDatastoreAllocateIdsOperator(BaseOperator): https://cloud.google.com/datastore/docs/reference/rest/v1/projects/allocateIds :param partial_keys: a list of partial keys. - :type partial_keys: list :param project_id: Google Cloud project ID against which to make the request. - :type project_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -282,7 +259,6 @@ class CloudDatastoreAllocateIdsOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -332,15 +308,11 @@ class CloudDatastoreBeginTransactionOperator(BaseOperator): https://cloud.google.com/datastore/docs/reference/rest/v1/projects/beginTransaction :param transaction_options: Options for a new transaction. - :type transaction_options: Dict[str, Any] :param project_id: Google Cloud project ID against which to make the request. - :type project_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -349,7 +321,6 @@ class CloudDatastoreBeginTransactionOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -399,15 +370,11 @@ class CloudDatastoreCommitOperator(BaseOperator): https://cloud.google.com/datastore/docs/reference/rest/v1/projects/commit :param body: the body of the commit request. - :type body: dict :param project_id: Google Cloud project ID against which to make the request. - :type project_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -416,7 +383,6 @@ class CloudDatastoreCommitOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -466,15 +432,11 @@ class CloudDatastoreRollbackOperator(BaseOperator): https://cloud.google.com/datastore/docs/reference/rest/v1/projects/rollback :param transaction: the transaction to roll back. - :type transaction: str :param project_id: Google Cloud project ID against which to make the request. - :type project_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -483,7 +445,6 @@ class CloudDatastoreRollbackOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -532,15 +493,11 @@ class CloudDatastoreRunQueryOperator(BaseOperator): https://cloud.google.com/datastore/docs/reference/rest/v1/projects/runQuery :param body: the body of the query request. - :type body: dict :param project_id: Google Cloud project ID against which to make the request. - :type project_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -549,7 +506,6 @@ class CloudDatastoreRunQueryOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -599,13 +555,10 @@ class CloudDatastoreGetOperationOperator(BaseOperator): https://cloud.google.com/datastore/docs/reference/data/rest/v1/projects.operations/get :param name: the name of the operation resource. - :type name: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -614,7 +567,6 @@ class CloudDatastoreGetOperationOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -659,13 +611,10 @@ class CloudDatastoreDeleteOperationOperator(BaseOperator): https://cloud.google.com/datastore/docs/reference/data/rest/v1/projects.operations/delete :param name: the name of the operation resource. - :type name: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -674,7 +623,6 @@ class CloudDatastoreDeleteOperationOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/cloud/operators/dlp.py b/airflow/providers/google/cloud/operators/dlp.py index 09803999d02ac..d967e7357b05e 100644 --- a/airflow/providers/google/cloud/operators/dlp.py +++ b/airflow/providers/google/cloud/operators/dlp.py @@ -58,22 +58,16 @@ class CloudDLPCancelDLPJobOperator(BaseOperator): :ref:`howto/operator:CloudDLPCancelDLPJobOperator` :param dlp_job_id: ID of the DLP job resource to be cancelled. - :type dlp_job_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -82,7 +76,6 @@ class CloudDLPCancelDLPJobOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -138,26 +131,18 @@ class CloudDLPCreateDeidentifyTemplateOperator(BaseOperator): :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organization. - :type organization_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organization. - :type project_id: str :param deidentify_template: (Optional) The DeidentifyTemplate to create. - :type deidentify_template: dict or google.cloud.dlp_v2.types.DeidentifyTemplate :param template_id: (Optional) The template ID. - :type template_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -166,7 +151,6 @@ class CloudDLPCreateDeidentifyTemplateOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: google.cloud.dlp_v2.types.DeidentifyTemplate """ @@ -246,27 +230,18 @@ class CloudDLPCreateDLPJobOperator(BaseOperator): :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param inspect_job: (Optional) The configuration for the inspect job. - :type inspect_job: dict or google.cloud.dlp_v2.types.InspectJobConfig :param risk_job: (Optional) The configuration for the risk job. - :type risk_job: dict or google.cloud.dlp_v2.types.RiskAnalysisJobConfig :param job_id: (Optional) The job ID. - :type job_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param wait_until_finished: (Optional) If true, it will keep polling the job state until it is set to DONE. - :type wait_until_finished: bool :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -275,7 +250,6 @@ class CloudDLPCreateDLPJobOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: google.cloud.dlp_v2.types.DlpJob """ @@ -356,26 +330,18 @@ class CloudDLPCreateInspectTemplateOperator(BaseOperator): :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organization. - :type organization_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organization. - :type project_id: str :param inspect_template: (Optional) The InspectTemplate to create. - :type inspect_template: dict or google.cloud.dlp_v2.types.InspectTemplate :param template_id: (Optional) The template ID. - :type template_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -384,7 +350,6 @@ class CloudDLPCreateInspectTemplateOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: google.cloud.dlp_v2.types.InspectTemplate """ @@ -464,22 +429,15 @@ class CloudDLPCreateJobTriggerOperator(BaseOperator): :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param job_trigger: (Optional) The JobTrigger to create. - :type job_trigger: dict or google.cloud.dlp_v2.types.JobTrigger :param trigger_id: (Optional) The JobTrigger ID. - :type trigger_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -488,7 +446,6 @@ class CloudDLPCreateJobTriggerOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: google.cloud.dlp_v2.types.JobTrigger """ @@ -563,26 +520,18 @@ class CloudDLPCreateStoredInfoTypeOperator(BaseOperator): :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organization. - :type organization_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organization. - :type project_id: str :param config: (Optional) The config for the StoredInfoType. - :type config: dict or google.cloud.dlp_v2.types.StoredInfoTypeConfig :param stored_info_type_id: (Optional) The StoredInfoType ID. - :type stored_info_type_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -591,7 +540,6 @@ class CloudDLPCreateStoredInfoTypeOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: google.cloud.dlp_v2.types.StoredInfoType """ @@ -673,34 +621,24 @@ class CloudDLPDeidentifyContentOperator(BaseOperator): :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param deidentify_config: (Optional) Configuration for the de-identification of the content item. Items specified here will override the template referenced by the deidentify_template_name argument. - :type deidentify_config: dict or google.cloud.dlp_v2.types.DeidentifyConfig :param inspect_config: (Optional) Configuration for the inspector. Items specified here will override the template referenced by the inspect_template_name argument. - :type inspect_config: dict or google.cloud.dlp_v2.types.InspectConfig :param item: (Optional) The item to de-identify. Will be treated as text. - :type item: dict or google.cloud.dlp_v2.types.ContentItem :param inspect_template_name: (Optional) Optional template to use. Any configuration directly specified in inspect_config will override those set in the template. - :type inspect_template_name: str :param deidentify_template_name: (Optional) Optional template to use. Any configuration directly specified in deidentify_config will override those set in the template. - :type deidentify_template_name: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -709,7 +647,6 @@ class CloudDLPDeidentifyContentOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: google.cloud.dlp_v2.types.DeidentifyContentResponse """ @@ -782,25 +719,18 @@ class CloudDLPDeleteDeidentifyTemplateOperator(BaseOperator): :ref:`howto/operator:CloudDLPDeleteDeidentifyTemplateOperator` :param template_id: The ID of deidentify template to be deleted. - :type template_id: str :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organization. - :type organization_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organization. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -809,7 +739,6 @@ class CloudDLPDeleteDeidentifyTemplateOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -871,22 +800,16 @@ class CloudDLPDeleteDLPJobOperator(BaseOperator): :ref:`howto/operator:CloudDLPDeleteDLPJobOperator` :param dlp_job_id: The ID of the DLP job resource to be cancelled. - :type dlp_job_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -895,7 +818,6 @@ class CloudDLPDeleteDLPJobOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -952,25 +874,18 @@ class CloudDLPDeleteInspectTemplateOperator(BaseOperator): :ref:`howto/operator:CloudDLPDeleteInspectTemplateOperator` :param template_id: The ID of the inspect template to be deleted. - :type template_id: str :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organization. - :type organization_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organization. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -979,7 +894,6 @@ class CloudDLPDeleteInspectTemplateOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -1040,22 +954,16 @@ class CloudDLPDeleteJobTriggerOperator(BaseOperator): :ref:`howto/operator:CloudDLPDeleteJobTriggerOperator` :param job_trigger_id: The ID of the DLP job trigger to be deleted. - :type job_trigger_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1064,7 +972,6 @@ class CloudDLPDeleteJobTriggerOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -1121,25 +1028,18 @@ class CloudDLPDeleteStoredInfoTypeOperator(BaseOperator): :ref:`howto/operator:CloudDLPDeleteStoredInfoTypeOperator` :param stored_info_type_id: The ID of the stored info type to be deleted. - :type stored_info_type_id: str :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organization. - :type organization_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organization. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1148,7 +1048,6 @@ class CloudDLPDeleteStoredInfoTypeOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -1209,25 +1108,18 @@ class CloudDLPGetDeidentifyTemplateOperator(BaseOperator): :ref:`howto/operator:CloudDLPGetDeidentifyTemplateOperator` :param template_id: The ID of deidentify template to be read. - :type template_id: str :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organization. - :type organization_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organization. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1236,7 +1128,6 @@ class CloudDLPGetDeidentifyTemplateOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: google.cloud.dlp_v2.types.DeidentifyTemplate """ @@ -1297,22 +1188,16 @@ class CloudDLPGetDLPJobOperator(BaseOperator): :ref:`howto/operator:CloudDLPGetDLPJobOperator` :param dlp_job_id: The ID of the DLP job resource to be read. - :type dlp_job_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1321,7 +1206,6 @@ class CloudDLPGetDLPJobOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: google.cloud.dlp_v2.types.DlpJob """ @@ -1378,25 +1262,18 @@ class CloudDLPGetInspectTemplateOperator(BaseOperator): :ref:`howto/operator:CloudDLPGetInspectTemplateOperator` :param template_id: The ID of inspect template to be read. - :type template_id: str :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organization. - :type organization_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organization. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1405,7 +1282,6 @@ class CloudDLPGetInspectTemplateOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: google.cloud.dlp_v2.types.InspectTemplate """ @@ -1466,22 +1342,16 @@ class CloudDLPGetDLPJobTriggerOperator(BaseOperator): :ref:`howto/operator:CloudDLPGetDLPJobTriggerOperator` :param job_trigger_id: The ID of the DLP job trigger to be read. - :type job_trigger_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1490,7 +1360,6 @@ class CloudDLPGetDLPJobTriggerOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: google.cloud.dlp_v2.types.JobTrigger """ @@ -1547,25 +1416,18 @@ class CloudDLPGetStoredInfoTypeOperator(BaseOperator): :ref:`howto/operator:CloudDLPGetStoredInfoTypeOperator` :param stored_info_type_id: The ID of the stored info type to be read. - :type stored_info_type_id: str :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organization. - :type organization_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organization. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1574,7 +1436,6 @@ class CloudDLPGetStoredInfoTypeOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: google.cloud.dlp_v2.types.StoredInfoType """ @@ -1638,26 +1499,18 @@ class CloudDLPInspectContentOperator(BaseOperator): :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param inspect_config: (Optional) Configuration for the inspector. Items specified here will override the template referenced by the inspect_template_name argument. - :type inspect_config: dict or google.cloud.dlp_v2.types.InspectConfig :param item: (Optional) The item to de-identify. Will be treated as text. - :type item: dict or google.cloud.dlp_v2.types.ContentItem :param inspect_template_name: (Optional) Optional template to use. Any configuration directly specified in inspect_config will override those set in the template. - :type inspect_template_name: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1666,7 +1519,6 @@ class CloudDLPInspectContentOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: google.cloud.tasks_v2.types.InspectContentResponse """ @@ -1732,28 +1584,20 @@ class CloudDLPListDeidentifyTemplatesOperator(BaseOperator): :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organization. - :type organization_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organization. - :type project_id: str :param page_size: (Optional) The maximum number of resources contained in the underlying API response. - :type page_size: int :param order_by: (Optional) Optional comma separated list of fields to order by, followed by asc or desc postfix. - :type order_by: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1762,7 +1606,6 @@ class CloudDLPListDeidentifyTemplatesOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: list[google.cloud.dlp_v2.types.DeidentifyTemplate] """ @@ -1828,28 +1671,19 @@ class CloudDLPListDLPJobsOperator(BaseOperator): :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param results_filter: (Optional) Filter used to specify a subset of results. - :type results_filter: str :param page_size: (Optional) The maximum number of resources contained in the underlying API response. - :type page_size: int :param job_type: (Optional) The type of job. - :type job_type: str :param order_by: (Optional) Optional comma separated list of fields to order by, followed by asc or desc postfix. - :type order_by: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1858,7 +1692,6 @@ class CloudDLPListDLPJobsOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: list[google.cloud.dlp_v2.types.DlpJob] """ @@ -1926,20 +1759,14 @@ class CloudDLPListInfoTypesOperator(BaseOperator): :param language_code: (Optional) Optional BCP-47 language code for localized infoType friendly names. If omitted, or if localized strings are not available, en-US strings will be returned. - :type language_code: str :param results_filter: (Optional) Filter used to specify a subset of results. - :type results_filter: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1948,7 +1775,6 @@ class CloudDLPListInfoTypesOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: ListInfoTypesResponse """ @@ -2005,28 +1831,20 @@ class CloudDLPListInspectTemplatesOperator(BaseOperator): :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organization. - :type organization_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organization. - :type project_id: str :param page_size: (Optional) The maximum number of resources contained in the underlying API response. - :type page_size: int :param order_by: (Optional) Optional comma separated list of fields to order by, followed by asc or desc postfix. - :type order_by: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -2035,7 +1853,6 @@ class CloudDLPListInspectTemplatesOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: list[google.cloud.dlp_v2.types.InspectTemplate] """ @@ -2100,26 +1917,18 @@ class CloudDLPListJobTriggersOperator(BaseOperator): :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param page_size: (Optional) The maximum number of resources contained in the underlying API response. - :type page_size: int :param order_by: (Optional) Optional comma separated list of fields to order by, followed by asc or desc postfix. - :type order_by: str :param results_filter: (Optional) Filter used to specify a subset of results. - :type results_filter: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -2128,7 +1937,6 @@ class CloudDLPListJobTriggersOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: list[google.cloud.dlp_v2.types.JobTrigger] """ @@ -2191,28 +1999,20 @@ class CloudDLPListStoredInfoTypesOperator(BaseOperator): :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organization. - :type organization_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organization. - :type project_id: str :param page_size: (Optional) The maximum number of resources contained in the underlying API response. - :type page_size: int :param order_by: (Optional) Optional comma separated list of fields to order by, followed by asc or desc postfix. - :type order_by: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -2221,7 +2021,6 @@ class CloudDLPListStoredInfoTypesOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: list[google.cloud.dlp_v2.types.StoredInfoType] """ @@ -2287,30 +2086,20 @@ class CloudDLPRedactImageOperator(BaseOperator): :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param inspect_config: (Optional) Configuration for the inspector. Items specified here will override the template referenced by the inspect_template_name argument. - :type inspect_config: dict or google.cloud.dlp_v2.types.InspectConfig :param image_redaction_configs: (Optional) The configuration for specifying what content to redact from images. - :type image_redaction_configs: list[dict] or - list[google.cloud.dlp_v2.types.RedactImageRequest.ImageRedactionConfig] :param include_findings: (Optional) Whether the response should include findings along with the redacted image. - :type include_findings: bool :param byte_item: (Optional) The content must be PNG, JPEG, SVG or BMP. - :type byte_item: dict or google.cloud.dlp_v2.types.ByteContentItem :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -2319,7 +2108,6 @@ class CloudDLPRedactImageOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: google.cloud.dlp_v2.types.RedactImageResponse """ @@ -2392,32 +2180,22 @@ class CloudDLPReidentifyContentOperator(BaseOperator): :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param reidentify_config: (Optional) Configuration for the re-identification of the content item. - :type reidentify_config: dict or google.cloud.dlp_v2.types.DeidentifyConfig :param inspect_config: (Optional) Configuration for the inspector. - :type inspect_config: dict or google.cloud.dlp_v2.types.InspectConfig :param item: (Optional) The item to re-identify. Will be treated as text. - :type item: dict or google.cloud.dlp_v2.types.ContentItem :param inspect_template_name: (Optional) Optional template to use. Any configuration directly specified in inspect_config will override those set in the template. - :type inspect_template_name: str :param reidentify_template_name: (Optional) Optional template to use. References an instance of DeidentifyTemplate. Any configuration directly specified in reidentify_config or inspect_config will override those set in the template. - :type reidentify_template_name: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -2426,7 +2204,6 @@ class CloudDLPReidentifyContentOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: google.cloud.dlp_v2.types.ReidentifyContentResponse """ @@ -2499,29 +2276,20 @@ class CloudDLPUpdateDeidentifyTemplateOperator(BaseOperator): :ref:`howto/operator:CloudDLPUpdateDeidentifyTemplateOperator` :param template_id: The ID of deidentify template to be updated. - :type template_id: str :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organization. - :type organization_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organization. - :type project_id: str :param deidentify_template: New DeidentifyTemplate value. - :type deidentify_template: dict or google.cloud.dlp_v2.types.DeidentifyTemplate :param update_mask: Mask to control which fields get updated. - :type update_mask: dict or google.cloud.dlp_v2.types.FieldMask :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -2530,7 +2298,6 @@ class CloudDLPUpdateDeidentifyTemplateOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: google.cloud.dlp_v2.types.DeidentifyTemplate """ @@ -2599,29 +2366,20 @@ class CloudDLPUpdateInspectTemplateOperator(BaseOperator): :ref:`howto/operator:CloudDLPUpdateInspectTemplateOperator` :param template_id: The ID of the inspect template to be updated. - :type template_id: str :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organization. - :type organization_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organization. - :type project_id: str :param inspect_template: New InspectTemplate value. - :type inspect_template: dict or google.cloud.dlp_v2.types.InspectTemplate :param update_mask: Mask to control which fields get updated. - :type update_mask: dict or google.cloud.dlp_v2.types.FieldMask :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -2630,7 +2388,6 @@ class CloudDLPUpdateInspectTemplateOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: google.cloud.dlp_v2.types.InspectTemplate """ @@ -2699,26 +2456,18 @@ class CloudDLPUpdateJobTriggerOperator(BaseOperator): :ref:`howto/operator:CloudDLPUpdateJobTriggerOperator` :param job_trigger_id: The ID of the DLP job trigger to be updated. - :type job_trigger_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param job_trigger: New JobTrigger value. - :type job_trigger: dict or google.cloud.dlp_v2.types.JobTrigger :param update_mask: Mask to control which fields get updated. - :type update_mask: dict or google.cloud.dlp_v2.types.FieldMask :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -2727,7 +2476,6 @@ class CloudDLPUpdateJobTriggerOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: google.cloud.dlp_v2.types.InspectTemplate """ @@ -2792,30 +2540,21 @@ class CloudDLPUpdateStoredInfoTypeOperator(BaseOperator): :ref:`howto/operator:CloudDLPUpdateStoredInfoTypeOperator` :param stored_info_type_id: The ID of the stored info type to be updated. - :type stored_info_type_id: str :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organization. - :type organization_id: str :param project_id: (Optional) Google Cloud project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organization. - :type project_id: str :param config: Updated configuration for the storedInfoType. If not provided, a new version of the storedInfoType will be created with the existing configuration. - :type config: dict or google.cloud.dlp_v2.types.StoredInfoTypeConfig :param update_mask: Mask to control which fields get updated. - :type update_mask: dict or google.cloud.dlp_v2.types.FieldMask :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -2824,7 +2563,6 @@ class CloudDLPUpdateStoredInfoTypeOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: google.cloud.dlp_v2.types.StoredInfoType """ diff --git a/airflow/providers/google/cloud/operators/functions.py b/airflow/providers/google/cloud/operators/functions.py index a204e4a24522b..6c84a1fdb60fa 100644 --- a/airflow/providers/google/cloud/operators/functions.py +++ b/airflow/providers/google/cloud/operators/functions.py @@ -107,29 +107,22 @@ class CloudFunctionDeployFunctionOperator(BaseOperator): :ref:`howto/operator:CloudFunctionDeployFunctionOperator` :param location: Google Cloud region where the function should be created. - :type location: str :param body: Body of the Cloud Functions definition. The body must be a Cloud Functions dictionary as described in: https://cloud.google.com/functions/docs/reference/rest/v1/projects.locations.functions . Different API versions require different variants of the Cloud Functions dictionary. - :type body: dict or google.cloud.functions.v1.CloudFunction :param project_id: (Optional) Google Cloud project ID where the function should be created. - :type project_id: str :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. Default 'google_cloud_default'. - :type gcp_conn_id: str :param api_version: (Optional) API version used (for example v1 - default - or v1beta1). - :type api_version: str :param zip_path: Path to zip file containing source code of the function. If the path is set, the sourceUploadUrl should not be specified in the body or it should be empty. Then the zip file will be uploaded using the upload URL generated via generateUploadUrl from the Cloud Functions API. - :type zip_path: str :param validate_body: If set to False, body validation is not performed. - :type validate_body: bool :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -138,7 +131,6 @@ class CloudFunctionDeployFunctionOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START gcf_function_deploy_template_fields] @@ -255,12 +247,10 @@ class ZipPathPreprocessor: Function API method. :param body: Body passed to the create/update method calls. - :type body: dict :param zip_path: (optional) Path to zip file containing source code of the function. If the path is set, the sourceUploadUrl should not be specified in the body or it should be empty. Then the zip file will be uploaded using the upload URL generated via generateUploadUrl from the Cloud Functions API. - :type zip_path: str """ @@ -336,11 +326,8 @@ class CloudFunctionDeleteFunctionOperator(BaseOperator): :param name: A fully-qualified function name, matching the pattern: `^projects/[^/]+/locations/[^/]+/functions/[^/]+$` - :type name: str :param gcp_conn_id: The connection ID to use to connect to Google Cloud. - :type gcp_conn_id: str :param api_version: API version used (for example v1 or v1beta1). - :type api_version: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -349,7 +336,6 @@ class CloudFunctionDeleteFunctionOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START gcf_function_delete_template_fields] @@ -413,14 +399,10 @@ class CloudFunctionInvokeFunctionOperator(BaseOperator): :ref:`howto/operator:CloudFunctionDeployFunctionOperator` :param function_id: ID of the function to be called - :type function_id: str :param input_data: Input to be passed to the function - :type input_data: Dict :param location: The location where the function is located. - :type location: str :param project_id: Optional, Google Cloud Project project_id where the function belongs. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -429,7 +411,6 @@ class CloudFunctionInvokeFunctionOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :return: None """ diff --git a/airflow/providers/google/cloud/operators/gcs.py b/airflow/providers/google/cloud/operators/gcs.py index c9fd6d885e790..963b429a86002 100644 --- a/airflow/providers/google/cloud/operators/gcs.py +++ b/airflow/providers/google/cloud/operators/gcs.py @@ -47,11 +47,9 @@ class GCSCreateBucketOperator(BaseOperator): https://cloud.google.com/storage/docs/bucketnaming.html#requirements :param bucket_name: The name of the bucket. (templated) - :type bucket_name: str :param resource: An optional dict with parameters for creating the bucket. For information on available parameters, see Cloud Storage API doc: https://cloud.google.com/storage/docs/json_api/v1/buckets/insert - :type resource: dict :param storage_class: This defines how objects in the bucket are stored and determines the SLA and the cost of storage (templated). Values include @@ -63,27 +61,20 @@ class GCSCreateBucketOperator(BaseOperator): If this value is not specified when the bucket is created, it will default to STANDARD. - :type storage_class: str :param location: The location of the bucket. (templated) Object data for objects in the bucket resides in physical storage within this region. Defaults to US. .. seealso:: https://developers.google.com/storage/docs/bucket-locations - :type location: str :param project_id: The ID of the Google Cloud Project. (templated) - :type project_id: str :param labels: User-provided labels, in key/value pairs. - :type labels: dict :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param google_cloud_storage_conn_id: (Deprecated) The connection ID used to connect to Google Cloud. This parameter has been deprecated. You should pass the gcp_conn_id parameter instead. - :type google_cloud_storage_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -92,7 +83,6 @@ class GCSCreateBucketOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] The following Operator would create a new bucket ``test-bucket`` with ``MULTI_REGIONAL`` storage class in ``EU`` region @@ -182,23 +172,17 @@ class GCSListObjectsOperator(BaseOperator): `xcom` in the downstream task. :param bucket: The Google Cloud Storage bucket to find the objects. (templated) - :type bucket: str :param prefix: Prefix string which filters objects whose name begin with this prefix. (templated) - :type prefix: str :param delimiter: The delimiter by which you want to filter the objects. (templated) For e.g to lists the CSV files from in a directory in GCS you would use delimiter='.csv'. - :type delimiter: str :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param google_cloud_storage_conn_id: (Deprecated) The connection ID used to connect to Google Cloud. This parameter has been deprecated. You should pass the gcp_conn_id parameter instead. - :type google_cloud_storage_conn_id: :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -207,7 +191,6 @@ class GCSListObjectsOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] **Example**: The following Operator would list all the Avro files from ``sales/sales-2017`` @@ -286,21 +269,16 @@ class GCSDeleteObjectsOperator(BaseOperator): matching a prefix. :param bucket_name: The GCS bucket to delete from - :type bucket_name: str :param objects: List of objects to delete. These should be the names of objects in the bucket, not including gs://bucket/ - :type objects: Iterable[str] :param prefix: Prefix of objects to delete. All objects matching this prefix in the bucket will be deleted. :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param google_cloud_storage_conn_id: (Deprecated) The connection ID used to connect to Google Cloud. This parameter has been deprecated. You should pass the gcp_conn_id parameter instead. - :type google_cloud_storage_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -309,7 +287,6 @@ class GCSDeleteObjectsOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -379,22 +356,16 @@ class GCSBucketCreateAclEntryOperator(BaseOperator): :ref:`howto/operator:GCSBucketCreateAclEntryOperator` :param bucket: Name of a bucket. - :type bucket: str :param entity: The entity holding the permission, in one of the following forms: user-userId, user-email, group-groupId, group-email, domain-domain, project-team-projectId, allUsers, allAuthenticatedUsers - :type entity: str :param role: The access permission for the entity. Acceptable values are: "OWNER", "READER", "WRITER". - :type role: str :param user_project: (Optional) The project to be billed for this request. Required for Requester Pays buckets. - :type user_project: str :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param google_cloud_storage_conn_id: (Deprecated) The connection ID used to connect to Google Cloud. This parameter has been deprecated. You should pass the gcp_conn_id parameter instead. - :type google_cloud_storage_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -403,7 +374,6 @@ class GCSBucketCreateAclEntryOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START gcs_bucket_create_acl_template_fields] @@ -465,28 +435,20 @@ class GCSObjectCreateAclEntryOperator(BaseOperator): :ref:`howto/operator:GCSObjectCreateAclEntryOperator` :param bucket: Name of a bucket. - :type bucket: str :param object_name: Name of the object. For information about how to URL encode object names to be path safe, see: https://cloud.google.com/storage/docs/json_api/#encoding - :type object_name: str :param entity: The entity holding the permission, in one of the following forms: user-userId, user-email, group-groupId, group-email, domain-domain, project-team-projectId, allUsers, allAuthenticatedUsers - :type entity: str :param role: The access permission for the entity. Acceptable values are: "OWNER", "READER". - :type role: str :param generation: Optional. If present, selects a specific revision of this object. - :type generation: long :param user_project: (Optional) The project to be billed for this request. Required for Requester Pays buckets. - :type user_project: str :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param google_cloud_storage_conn_id: (Deprecated) The connection ID used to connect to Google Cloud. This parameter has been deprecated. You should pass the gcp_conn_id parameter instead. - :type google_cloud_storage_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -495,7 +457,6 @@ class GCSObjectCreateAclEntryOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START gcs_object_create_acl_template_fields] @@ -574,14 +535,10 @@ class GCSFileTransformOperator(BaseOperator): destination file. :param source_bucket: The key to be retrieved from S3. (templated) - :type source_bucket: str :param destination_bucket: The key to be written from S3. (templated) - :type destination_bucket: str :param transform_script: location of the executable transformation script or list of arguments passed to subprocess ex. `['python', 'script.py', 10]`. (templated) - :type transform_script: Union[str, List[str]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -590,7 +547,6 @@ class GCSFileTransformOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -678,13 +634,10 @@ class GCSTimeSpanFileTransformOperator(BaseOperator): destination file. :param source_bucket: The bucket to fetch data from. (templated) - :type source_bucket: str :param source_prefix: Prefix string which filters objects whose name begin with this prefix. Can interpolate execution date and time components. (templated) - :type source_prefix: str :param source_gcp_conn_id: The connection ID to use connecting to Google Cloud to download files to be processed. - :type source_gcp_conn_id: str :param source_impersonation_chain: Optional service account to impersonate using short-term credentials (to download files to be processed), or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the @@ -693,16 +646,12 @@ class GCSTimeSpanFileTransformOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type source_impersonation_chain: Union[str, Sequence[str]] :param destination_bucket: The bucket to write data to. (templated) - :type destination_bucket: str :param destination_prefix: Prefix string for the upload location. Can interpolate execution date and time components. (templated) - :type destination_prefix: str :param destination_gcp_conn_id: The connection ID to use connecting to Google Cloud to upload processed files. - :type destination_gcp_conn_id: str :param destination_impersonation_chain: Optional service account to impersonate using short-term credentials (to upload processed files), or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -711,27 +660,20 @@ class GCSTimeSpanFileTransformOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type destination_impersonation_chain: Union[str, Sequence[str]] :param transform_script: location of the executable transformation script or list of arguments passed to subprocess ex. `['python', 'script.py', 10]`. (templated) - :type transform_script: Union[str, List[str]] :param chunk_size: The size of a chunk of data when downloading or uploading (in bytes). This must be a multiple of 256 KB (per the google clout storage API specification). - :type chunk_size: Optional[int] :param download_continue_on_fail: With this set to true, if a download fails the task does not error out but will still continue. - :type download_num_attempts: int :param upload_chunk_size: The size of a chunk of data when uploading (in bytes). This must be a multiple of 256 KB (per the google clout storage API specification). - :type download_chunk_size: Optional[int] :param upload_continue_on_fail: With this set to true, if an upload fails the task does not error out but will still continue. - :type download_chunk_size: Optional[bool] :param upload_num_attempts: Number of attempts to try to upload a single file. - :type upload_num_attempts: int """ template_fields: Sequence[str] = ( @@ -749,9 +691,7 @@ def interpolate_prefix(prefix: str, dt: datetime.datetime) -> Optional[str]: """Interpolate prefix with datetime. :param prefix: The prefix to interpolate - :type prefix: str :param dt: The datetime to interpolate - :type dt: datetime """ return dt.strftime(prefix) if prefix else None @@ -923,12 +863,10 @@ class GCSDeleteBucketOperator(BaseOperator): :ref:`howto/operator:GCSDeleteBucketOperator` :param bucket_name: name of the bucket which will be deleted - :type bucket_name: str :param force: false not allow to delete non empty bucket, set force=True allows to delete non empty bucket :type: bool :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -937,7 +875,6 @@ class GCSDeleteBucketOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -983,31 +920,22 @@ class GCSSynchronizeBucketsOperator(BaseOperator): :ref:`howto/operator:GCSSynchronizeBuckets` :param source_bucket: The name of the bucket containing the source objects. - :type source_bucket: str :param destination_bucket: The name of the bucket containing the destination objects. - :type destination_bucket: str :param source_object: The root sync directory in the source bucket. - :type source_object: Optional[str] :param destination_object: The root sync directory in the destination bucket. - :type destination_object: Optional[str] :param recursive: If True, subdirectories will be considered - :type recursive: bool :param allow_overwrite: if True, the files will be overwritten if a mismatched file is found. By default, overwriting files is not allowed - :type allow_overwrite: bool :param delete_extra_files: if True, deletes additional files from the source that not found in the destination. By default extra files are not deleted. .. note:: This option can delete data quickly if you specify the wrong source/destination combination. - :type delete_extra_files: bool :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1016,7 +944,6 @@ class GCSSynchronizeBucketsOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/cloud/operators/kubernetes_engine.py b/airflow/providers/google/cloud/operators/kubernetes_engine.py index bd0ab1b051a11..61725621e22d6 100644 --- a/airflow/providers/google/cloud/operators/kubernetes_engine.py +++ b/airflow/providers/google/cloud/operators/kubernetes_engine.py @@ -60,16 +60,11 @@ class GKEDeleteClusterOperator(BaseOperator): :ref:`howto/operator:GKEDeleteClusterOperator` :param project_id: The Google Developers Console [project ID or project number] - :type project_id: str :param name: The name of the resource to delete, in this case cluster name - :type name: str :param location: The name of the Google Compute Engine zone or region in which the cluster resides. - :type location: str :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param api_version: The api version to use - :type api_version: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -78,7 +73,6 @@ class GKEDeleteClusterOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -161,17 +155,12 @@ class GKECreateClusterOperator(BaseOperator): :ref:`howto/operator:GKECreateClusterOperator` :param project_id: The Google Developers Console [project ID or project number] - :type project_id: str :param location: The name of the Google Compute Engine or region in which the cluster resides. - :type location: str :param body: The Cluster definition to create, can be protobuf or python dict, if dict it must match protobuf message Cluster - :type body: dict or google.cloud.container_v1.types.Cluster :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param api_version: The api version to use - :type api_version: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -180,7 +169,6 @@ class GKECreateClusterOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -279,17 +267,12 @@ class GKEStartPodOperator(KubernetesPodOperator): :param location: The name of the Google Kubernetes Engine zone or region in which the cluster resides, e.g. 'us-central1-a' - :type location: str :param cluster_name: The name of the Google Kubernetes Engine cluster the pod should be spawned in - :type cluster_name: str :param use_internal_ip: Use the internal IP address as the endpoint. - :type use_internal_ip: bool :param project_id: The Google Developers Console project id - :type project_id: str :param gcp_conn_id: The google cloud connection id to use. This allows for users to specify a service account. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -298,14 +281,11 @@ class GKEStartPodOperator(KubernetesPodOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :param regional: The location param is region name. - :type regional: bool :param is_delete_operator_pod: What to do when the pod reaches its final state, or the execution is interrupted. If True, delete the pod; if False, leave the pod. Current default is False, but this will be changed in the next major release of this provider. - :type is_delete_operator_pod: bool """ template_fields: Sequence[str] = tuple( diff --git a/airflow/providers/google/cloud/operators/life_sciences.py b/airflow/providers/google/cloud/operators/life_sciences.py index 103e506b7fbd4..a3fc1b3ff5ecd 100644 --- a/airflow/providers/google/cloud/operators/life_sciences.py +++ b/airflow/providers/google/cloud/operators/life_sciences.py @@ -36,16 +36,11 @@ class LifeSciencesRunPipelineOperator(BaseOperator): :ref:`howto/operator:LifeSciencesRunPipelineOperator` :param body: The request body - :type body: dict :param location: The location of the project - :type location: str :param project_id: ID of the Google Cloud project if None then default project_id is used. - :type project_id: str :param gcp_conn_id: The connection ID to use to connect to Google Cloud. - :type gcp_conn_id: str :param api_version: API version used (for example v2beta). - :type api_version: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -54,7 +49,6 @@ class LifeSciencesRunPipelineOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/cloud/operators/mlengine.py b/airflow/providers/google/cloud/operators/mlengine.py index ecd813d24c674..ae038dc9255ce 100644 --- a/airflow/providers/google/cloud/operators/mlengine.py +++ b/airflow/providers/google/cloud/operators/mlengine.py @@ -41,7 +41,6 @@ def _normalize_mlengine_job_id(job_id: str) -> str: character. :param job_id: A job_id str that may have invalid characters. - :type job_id: str: :return: A valid job_id representation. :rtype: str """ @@ -101,58 +100,43 @@ class MLEngineStartBatchPredictionJobOperator(BaseOperator): :param job_id: A unique id for the prediction job on Google Cloud ML Engine. (templated) - :type job_id: str :param data_format: The format of the input data. It will default to 'DATA_FORMAT_UNSPECIFIED' if is not provided or is not one of ["TEXT", "TF_RECORD", "TF_RECORD_GZIP"]. - :type data_format: str :param input_paths: A list of GCS paths of input data for batch prediction. Accepting wildcard operator ``*``, but only at the end. (templated) - :type input_paths: list[str] :param output_path: The GCS path where the prediction results are written to. (templated) - :type output_path: str :param region: The Google Compute Engine region to run the prediction job in. (templated) - :type region: str :param model_name: The Google Cloud ML Engine model to use for prediction. If version_name is not provided, the default version of this model will be used. Should not be None if version_name is provided. Should be None if uri is provided. (templated) - :type model_name: str :param version_name: The Google Cloud ML Engine model version to use for prediction. Should be None if uri is provided. (templated) - :type version_name: str :param uri: The GCS path of the saved model to use for prediction. Should be None if model_name is provided. It should be a GCS path pointing to a tensorflow SavedModel. (templated) - :type uri: str :param max_worker_count: The maximum number of workers to be used for parallel processing. Defaults to 10 if not specified. Should be a string representing the worker count ("10" instead of 10, "50" instead of 50, etc.) - :type max_worker_count: str :param runtime_version: The Google Cloud ML Engine runtime version to use for batch prediction. - :type runtime_version: str :param signature_name: The name of the signature defined in the SavedModel to use for this job. - :type signature_name: str :param project_id: The Google Cloud project name where the prediction job is submitted. If set to None or missing, the default project_id from the Google Cloud connection is used. (templated) - :type project_id: str :param gcp_conn_id: The connection ID used for connection to Google Cloud Platform. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param labels: a dictionary containing labels for the job; passed to BigQuery - :type labels: Dict[str, str] :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -161,7 +145,6 @@ class MLEngineStartBatchPredictionJobOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :raises: ``ValueError``: if a unique model/version origin cannot be determined. @@ -309,22 +292,17 @@ class MLEngineManageModelOperator(BaseOperator): If the `operation` is `get`, the `model` parameter should contain the `name` of the model. - :type model: dict :param operation: The operation to perform. Available operations are: * ``create``: Creates a new model as provided by the `model` parameter. * ``get``: Gets a particular model where the name is specified in `model`. - :type operation: str :param project_id: The Google Cloud project name to which MLEngine model belongs. If set to None or missing, the default project_id from the Google Cloud connection is used. (templated) - :type project_id: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -333,7 +311,6 @@ class MLEngineManageModelOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -394,17 +371,13 @@ class MLEngineCreateModelOperator(BaseOperator): The model should be provided by the `model` parameter. :param model: A dictionary containing the information about the model. - :type model: dict :param project_id: The Google Cloud project name to which MLEngine model belongs. If set to None or missing, the default project_id from the Google Cloud connection is used. (templated) - :type project_id: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -413,7 +386,6 @@ class MLEngineCreateModelOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -459,17 +431,13 @@ class MLEngineGetModelOperator(BaseOperator): The name of model should be specified in `model_name`. :param model_name: The name of the model. - :type model_name: str :param project_id: The Google Cloud project name to which MLEngine model belongs. If set to None or missing, the default project_id from the Google Cloud connection is used. (templated) - :type project_id: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -478,7 +446,6 @@ class MLEngineGetModelOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -524,21 +491,16 @@ class MLEngineDeleteModelOperator(BaseOperator): The model should be provided by the `model_name` parameter. :param model_name: The name of the model. - :type model_name: str :param delete_contents: (Optional) Whether to force the deletion even if the models is not empty. Will delete all version (if any) in the dataset if set to True. The default value is False. - :type delete_contents: bool :param project_id: The Google Cloud project name to which MLEngine model belongs. If set to None or missing, the default project_id from the Google Cloud connection is used. (templated) - :type project_id: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -547,7 +509,6 @@ class MLEngineDeleteModelOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -598,19 +559,16 @@ class MLEngineManageVersionOperator(BaseOperator): :param model_name: The name of the Google Cloud ML Engine model that the version belongs to. (templated) - :type model_name: str :param version_name: A name to use for the version being operated upon. If not None and the `version` argument is None or does not have a value for the `name` key, then this will be populated in the payload for the `name` key. (templated) - :type version_name: str :param version: A dictionary containing the information about the version. If the `operation` is `create`, `version` should contain all the information about this version such as name, and deploymentUrl. If the `operation` is `get` or `delete`, the `version` parameter should contain the `name` of the version. If it is None, the only `operation` possible would be `list`. (templated) - :type version: dict :param operation: The operation to perform. Available operations are: * ``create``: Creates a new version in the model specified by `model_name`, @@ -629,17 +587,13 @@ class MLEngineManageVersionOperator(BaseOperator): model specified by `model_name`). The name of the version should be specified in the `version` parameter. - :type operation: str :param project_id: The Google Cloud project name to which MLEngine model belongs. If set to None or missing, the default project_id from the Google Cloud connection is used. (templated) - :type project_id: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -648,7 +602,6 @@ class MLEngineManageVersionOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -731,19 +684,14 @@ class MLEngineCreateVersionOperator(BaseOperator): information to create that version :param model_name: The name of the Google Cloud ML Engine model that the version belongs to. (templated) - :type model_name: str :param version: A dictionary containing the information about the version. (templated) - :type version: dict :param project_id: The Google Cloud project name to which MLEngine model belongs. If set to None or missing, the default project_id from the Google Cloud connection is used. (templated) - :type project_id: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -752,7 +700,6 @@ class MLEngineCreateVersionOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -814,19 +761,14 @@ class MLEngineSetDefaultVersionOperator(BaseOperator): specified in the `version_name` parameter. :param model_name: The name of the Google Cloud ML Engine model that the version belongs to. (templated) - :type model_name: str :param version_name: A name to use for the version being operated upon. (templated) - :type version_name: str :param project_id: The Google Cloud project name to which MLEngine model belongs. If set to None or missing, the default project_id from the Google Cloud connection is used. (templated) - :type project_id: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -835,7 +777,6 @@ class MLEngineSetDefaultVersionOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -897,17 +838,13 @@ class MLEngineListVersionsOperator(BaseOperator): :param model_name: The name of the Google Cloud ML Engine model that the version belongs to. (templated) - :type model_name: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param project_id: The Google Cloud project name to which MLEngine model belongs. If set to None or missing, the default project_id from the Google Cloud connection is used. (templated) - :type project_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -916,7 +853,6 @@ class MLEngineListVersionsOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -974,18 +910,13 @@ class MLEngineDeleteVersionOperator(BaseOperator): :param model_name: The name of the Google Cloud ML Engine model that the version belongs to. (templated) - :type model_name: str :param version_name: A name to use for the version being operated upon. (templated) - :type version_name: str :param project_id: The Google Cloud project name to which MLEngine model belongs. - :type project_id: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -994,7 +925,6 @@ class MLEngineDeleteVersionOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -1070,68 +1000,50 @@ class MLEngineStartTrainingJobOperator(BaseOperator): :param job_id: A unique templated id for the submitted Google MLEngine training job. (templated) - :type job_id: str :param region: The Google Compute Engine region to run the MLEngine training job in (templated). - :type region: str :param package_uris: A list of Python package locations for the training job, which should include the main training program and any additional dependencies. This is mutually exclusive with a custom image specified via master_config. (templated) - :type package_uris: List[str] :param training_python_module: The name of the Python module to run within the training job after installing the packages. This is mutually exclusive with a custom image specified via master_config. (templated) - :type training_python_module: str :param training_args: A list of command-line arguments to pass to the training program. (templated) - :type training_args: List[str] :param scale_tier: Resource tier for MLEngine training job. (templated) - :type scale_tier: str :param master_type: The type of virtual machine to use for the master worker. It must be set whenever scale_tier is CUSTOM. (templated) - :type master_type: str :param master_config: The configuration for the master worker. If this is provided, master_type must be set as well. If a custom image is specified, this is mutually exclusive with package_uris and training_python_module. (templated) - :type master_config: dict :param runtime_version: The Google Cloud ML runtime version to use for training. (templated) - :type runtime_version: str :param python_version: The version of Python used in training. (templated) - :type python_version: str :param job_dir: A Google Cloud Storage path in which to store training outputs and other data needed for training. (templated) - :type job_dir: str :param service_account: Optional service account to use when running the training application. (templated) The specified service account must have the `iam.serviceAccounts.actAs` role. The Google-managed Cloud ML Engine service account must have the `iam.serviceAccountAdmin` role for the specified service account. If set to None or missing, the Google-managed Cloud ML Engine service account will be used. - :type service_account: str :param project_id: The Google Cloud project name within which MLEngine training job should run. If set to None or missing, the default project_id from the Google Cloud connection is used. (templated) - :type project_id: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param mode: Can be one of 'DRY_RUN'/'CLOUD'. In 'DRY_RUN' mode, no real training job will be launched, but the MLEngine training job request will be printed out. In 'CLOUD' mode, a real MLEngine training job creation request will be issued. - :type mode: str :param labels: a dictionary containing labels for the job; passed to BigQuery - :type labels: Dict[str, str] :param hyperparameters: Optional HyperparameterSpec dictionary for hyperparameter tuning. For further reference, check: https://cloud.google.com/ai-platform/training/docs/reference/rest/v1/projects.jobs#HyperparameterSpec - :type hyperparameters: Dict :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1140,7 +1052,6 @@ class MLEngineStartTrainingJobOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -1324,17 +1235,13 @@ class MLEngineTrainingCancelJobOperator(BaseOperator): :param job_id: A unique templated id for the submitted Google MLEngine training job. (templated) - :type job_id: str :param project_id: The Google Cloud project name within which MLEngine training job should run. If set to None or missing, the default project_id from the Google Cloud connection is used. (templated) - :type project_id: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1343,7 +1250,6 @@ class MLEngineTrainingCancelJobOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/cloud/operators/natural_language.py b/airflow/providers/google/cloud/operators/natural_language.py index 25cba93ee5321..0d10b9f0bcdb6 100644 --- a/airflow/providers/google/cloud/operators/natural_language.py +++ b/airflow/providers/google/cloud/operators/natural_language.py @@ -44,18 +44,13 @@ class CloudNaturalLanguageAnalyzeEntitiesOperator(BaseOperator): :param document: Input document. If a dict is provided, it must be of the same form as the protobuf message Document - :type document: dict or google.cloud.language_v1.types.Document :param encoding_type: The encoding type used by the API to calculate offsets. - :type encoding_type: google.cloud.language_v1.enums.EncodingType :param retry: A retry object used to retry requests. If None is specified, requests will not be retried. :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -64,7 +59,6 @@ class CloudNaturalLanguageAnalyzeEntitiesOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START natural_language_analyze_entities_template_fields] @@ -122,18 +116,13 @@ class CloudNaturalLanguageAnalyzeEntitySentimentOperator(BaseOperator): :param document: Input document. If a dict is provided, it must be of the same form as the protobuf message Document - :type document: dict or google.cloud.language_v1.types.Document :param encoding_type: The encoding type used by the API to calculate offsets. - :type encoding_type: google.cloud.language_v1.enums.EncodingType :param retry: A retry object used to retry requests. If None is specified, requests will not be retried. :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -142,7 +131,6 @@ class CloudNaturalLanguageAnalyzeEntitySentimentOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: google.cloud.language_v1.types.AnalyzeEntitiesResponse """ @@ -205,18 +193,13 @@ class CloudNaturalLanguageAnalyzeSentimentOperator(BaseOperator): :param document: Input document. If a dict is provided, it must be of the same form as the protobuf message Document - :type document: dict or google.cloud.language_v1.types.Document :param encoding_type: The encoding type used by the API to calculate offsets. - :type encoding_type: google.cloud.language_v1.enums.EncodingType :param retry: A retry object used to retry requests. If None is specified, requests will not be retried. :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -225,7 +208,6 @@ class CloudNaturalLanguageAnalyzeSentimentOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: google.cloud.language_v1.types.AnalyzeEntitiesResponse """ @@ -284,16 +266,12 @@ class CloudNaturalLanguageClassifyTextOperator(BaseOperator): :param document: Input document. If a dict is provided, it must be of the same form as the protobuf message Document - :type document: dict or google.cloud.language_v1.types.Document :param retry: A retry object used to retry requests. If None is specified, requests will not be retried. :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -302,7 +280,6 @@ class CloudNaturalLanguageClassifyTextOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START natural_language_classify_text_template_fields] diff --git a/airflow/providers/google/cloud/operators/pubsub.py b/airflow/providers/google/cloud/operators/pubsub.py index ba09e450de5b8..567b6ee1d936d 100644 --- a/airflow/providers/google/cloud/operators/pubsub.py +++ b/airflow/providers/google/cloud/operators/pubsub.py @@ -15,7 +15,13 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -"""This module contains Google PubSub operators.""" +""" +This module contains Google PubSub operators. + +.. spelling:: + + MessageStoragePolicy +""" import warnings from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Sequence, Tuple, Union @@ -71,44 +77,33 @@ class PubSubCreateTopicOperator(BaseOperator): :param project_id: Optional, the Google Cloud project ID where the topic will be created. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param topic: the topic to create. Do not include the full topic path. In other words, instead of ``projects/{project}/topics/{topic}``, provide only ``{topic}``. (templated) - :type topic: str :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param labels: Client-assigned labels; see https://cloud.google.com/pubsub/docs/labels - :type labels: Dict[str, str] :param message_storage_policy: Policy constraining the set of Google Cloud regions where messages published to the topic may be stored. If not present, then no constraints are in effect. - :type message_storage_policy: Union[Dict, google.cloud.pubsub_v1.types.MessageStoragePolicy] :param kms_key_name: The resource name of the Cloud KMS CryptoKey to be used to protect access to messages published on this topic. The expected format is ``projects/*/locations/*/keyRings/*/cryptoKeys/*``. - :type kms_key_name: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]]] :param project: (Deprecated) the Google Cloud project ID where the topic will be created - :type project: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -117,7 +112,6 @@ class PubSubCreateTopicOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -242,88 +236,67 @@ class PubSubCreateSubscriptionOperator(BaseOperator): :param project_id: Optional, the Google Cloud project ID where the topic exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param topic: the topic to create. Do not include the full topic path. In other words, instead of ``projects/{project}/topics/{topic}``, provide only ``{topic}``. (templated) - :type topic: str :param subscription: the Pub/Sub subscription name. If empty, a random name will be generated using the uuid module - :type subscription: str :param subscription_project_id: the Google Cloud project ID where the subscription will be created. If empty, ``topic_project`` will be used. - :type subscription_project_id: str :param ack_deadline_secs: Number of seconds that a subscriber has to acknowledge each message pulled from the subscription - :type ack_deadline_secs: int :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param push_config: If push delivery is used with this subscription, this field is used to configure it. An empty ``pushConfig`` signifies that the subscriber will pull and ack messages using API methods. - :type push_config: Union[Dict, google.cloud.pubsub_v1.types.PushConfig] :param retain_acked_messages: Indicates whether to retain acknowledged messages. If true, then messages are not expunged from the subscription's backlog, even if they are acknowledged, until they fall out of the ``message_retention_duration`` window. This must be true if you would like to Seek to a timestamp. - :type retain_acked_messages: bool :param message_retention_duration: How long to retain unacknowledged messages in the subscription's backlog, from the moment a message is published. If ``retain_acked_messages`` is true, then this also configures the retention of acknowledged messages, and thus configures how far back in time a ``Seek`` can be done. Defaults to 7 days. Cannot be more than 7 days or less than 10 minutes. - :type message_retention_duration: Union[Dict, google.cloud.pubsub_v1.types.Duration] :param labels: Client-assigned labels; see https://cloud.google.com/pubsub/docs/labels - :type labels: Dict[str, str] :param enable_message_ordering: If true, messages published with the same ordering_key in PubsubMessage will be delivered to the subscribers in the order in which they are received by the Pub/Sub system. Otherwise, they may be delivered in any order. - :type enable_message_ordering: bool :param expiration_policy: A policy that specifies the conditions for this subscription’s expiration. A subscription is considered active as long as any connected subscriber is successfully consuming messages from the subscription or is issuing operations on the subscription. If expiration_policy is not set, a default policy with ttl of 31 days will be used. The minimum allowed value for expiration_policy.ttl is 1 day. - :type expiration_policy: Union[Dict, google.cloud.pubsub_v1.types.ExpirationPolicy`] :param filter_: An expression written in the Cloud Pub/Sub filter language. If non-empty, then only PubsubMessages whose attributes field matches the filter are delivered on this subscription. If empty, then no messages are filtered out. - :type filter_: str :param dead_letter_policy: A policy that specifies the conditions for dead lettering messages in this subscription. If dead_letter_policy is not set, dead lettering is disabled. - :type dead_letter_policy: Union[Dict, google.cloud.pubsub_v1.types.DeadLetterPolicy] :param retry_policy: A policy that specifies how Pub/Sub retries message delivery for this subscription. If not set, the default retry policy is applied. This generally implies that messages will be retried as soon as possible for healthy subscribers. RetryPolicy will be triggered on NACKs or acknowledgement deadline exceeded events for a given message. - :type retry_policy: Union[Dict, google.cloud.pubsub_v1.types.RetryPolicy] :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]]] :param topic_project: (Deprecated) the Google Cloud project ID where the topic exists - :type topic_project: str :param subscription_project: (Deprecated) the Google Cloud project ID where the subscription will be created. If empty, ``topic_project`` will be used. - :type subscription_project: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -332,7 +305,6 @@ class PubSubCreateSubscriptionOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -478,33 +450,24 @@ class PubSubDeleteTopicOperator(BaseOperator): :param project_id: Optional, the Google Cloud project ID in which to work (templated). If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param topic: the topic to delete. Do not include the full topic path. In other words, instead of ``projects/{project}/topics/{topic}``, provide only ``{topic}``. (templated) - :type topic: str :param fail_if_not_exists: If True and the topic does not exist, fail the task - :type fail_if_not_exists: bool :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]]] :param project: (Deprecated) the Google Cloud project ID where the topic will be created - :type project: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -513,7 +476,6 @@ class PubSubDeleteTopicOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -610,33 +572,24 @@ class PubSubDeleteSubscriptionOperator(BaseOperator): :param project_id: Optional, the Google Cloud project ID in which to work (templated). If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param subscription: the subscription to delete. Do not include the full subscription path. In other words, instead of ``projects/{project}/subscription/{subscription}``, provide only ``{subscription}``. (templated) - :type subscription: str :param fail_if_not_exists: If True and the subscription does not exist, fail the task - :type fail_if_not_exists: bool :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]]] :param project: (Deprecated) the Google Cloud project ID where the topic will be created - :type project: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -645,7 +598,6 @@ class PubSubDeleteSubscriptionOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -738,12 +690,10 @@ class PubSubPublishMessageOperator(BaseOperator): :param project_id: Optional, the Google Cloud project ID in which to work (templated). If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param topic: the topic to which to publish. Do not include the full topic path. In other words, instead of ``projects/{project}/topics/{topic}``, provide only ``{topic}``. (templated) - :type topic: str :param messages: a list of messages to be published to the topic. Each message is a dict with one or more of the following keys-value mappings: @@ -752,16 +702,12 @@ class PubSubPublishMessageOperator(BaseOperator): Each message must contain at least a non-empty 'data' value or an attribute dict with at least one key (templated). See https://cloud.google.com/pubsub/docs/reference/rest/v1/PubsubMessage - :type messages: list :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param project: (Deprecated) the Google Cloud project ID where the topic will be created - :type project: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -770,7 +716,6 @@ class PubSubPublishMessageOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -846,29 +791,22 @@ class PubSubPullOperator(BaseOperator): variables in them. :param project: the Google Cloud project ID for the subscription (templated) - :type project: str :param subscription: the Pub/Sub subscription name. Do not include the full subscription path. - :type subscription: str :param max_messages: The maximum number of messages to retrieve per PubSub pull request - :type max_messages: int :param ack_messages: If True, each message will be acknowledged immediately rather than by any downstream tasks - :type ack_messages: bool :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param messages_callback: (Optional) Callback to process received messages. It's return value will be saved to XCom. If you are pulling large messages, you probably want to provide a custom callback. If not provided, the default implementation will convert `ReceivedMessage` objects into JSON-serializable dicts using `google.protobuf.json_format.MessageToDict` function. - :type messages_callback: Optional[Callable[[List[ReceivedMessage], Dict[str, Any]], Any]] :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -877,7 +815,6 @@ class PubSubPullOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -946,7 +883,6 @@ def _default_message_callback( This default implementation converts `ReceivedMessage` objects into JSON-serializable dicts. :param pulled_messages: messages received from the topic. - :type pulled_messages: List[ReceivedMessage] :param context: same as in `execute` :return: value to be saved to XCom. """ diff --git a/airflow/providers/google/cloud/operators/spanner.py b/airflow/providers/google/cloud/operators/spanner.py index 12719078f4611..522cfd8303018 100644 --- a/airflow/providers/google/cloud/operators/spanner.py +++ b/airflow/providers/google/cloud/operators/spanner.py @@ -36,23 +36,17 @@ class SpannerDeployInstanceOperator(BaseOperator): :ref:`howto/operator:SpannerDeployInstanceOperator` :param instance_id: Cloud Spanner instance ID. - :type instance_id: str :param configuration_name: The name of the Cloud Spanner instance configuration defining how the instance will be created. Required for instances that do not yet exist. - :type configuration_name: str :param node_count: (Optional) The number of nodes allocated to the Cloud Spanner instance. - :type node_count: int :param display_name: (Optional) The display name for the Cloud Spanner instance in the Google Cloud Console. (Must be between 4 and 30 characters.) If this value is not set in the constructor, the name is the same as the instance ID. - :type display_name: str :param project_id: Optional, the ID of the project which owns the Cloud Spanner Database. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param gcp_conn_id: The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -61,7 +55,6 @@ class SpannerDeployInstanceOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START gcp_spanner_deploy_template_fields] @@ -133,12 +126,9 @@ class SpannerDeleteInstanceOperator(BaseOperator): :ref:`howto/operator:SpannerDeleteInstanceOperator` :param instance_id: The Cloud Spanner instance ID. - :type instance_id: str :param project_id: Optional, the ID of the project that owns the Cloud Spanner Database. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param gcp_conn_id: The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -147,7 +137,6 @@ class SpannerDeleteInstanceOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START gcp_spanner_delete_template_fields] @@ -206,17 +195,12 @@ class SpannerQueryDatabaseInstanceOperator(BaseOperator): :ref:`howto/operator:SpannerQueryDatabaseInstanceOperator` :param instance_id: The Cloud Spanner instance ID. - :type instance_id: str :param database_id: The Cloud Spanner database ID. - :type database_id: str :param query: The query or list of queries to be executed. Can be a path to a SQL file. - :type query: str or list :param project_id: Optional, the ID of the project that owns the Cloud Spanner Database. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param gcp_conn_id: The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -225,7 +209,6 @@ class SpannerQueryDatabaseInstanceOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START gcp_spanner_query_template_fields] @@ -300,7 +283,6 @@ def sanitize_queries(queries: List[str]) -> None: Drops empty query in queries. :param queries: queries - :type queries: List[str] :rtype: None """ if queries and queries[-1] == '': @@ -317,16 +299,11 @@ class SpannerDeployDatabaseInstanceOperator(BaseOperator): :ref:`howto/operator:SpannerDeployDatabaseInstanceOperator` :param instance_id: The Cloud Spanner instance ID. - :type instance_id: str :param database_id: The Cloud Spanner database ID. - :type database_id: str :param ddl_statements: The string list containing DDL for the new database. - :type ddl_statements: list[str] :param project_id: Optional, the ID of the project that owns the Cloud Spanner Database. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param gcp_conn_id: The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -335,7 +312,6 @@ class SpannerDeployDatabaseInstanceOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START gcp_spanner_database_deploy_template_fields] @@ -418,19 +394,13 @@ class SpannerUpdateDatabaseInstanceOperator(BaseOperator): :ref:`howto/operator:SpannerUpdateDatabaseInstanceOperator` :param instance_id: The Cloud Spanner instance ID. - :type instance_id: str :param database_id: The Cloud Spanner database ID. - :type database_id: str :param ddl_statements: The string list containing DDL to apply to the database. - :type ddl_statements: list[str] :param project_id: Optional, the ID of the project that owns the Cloud Spanner Database. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param operation_id: (Optional) Unique per database operation id that can be specified to implement idempotency check. - :type operation_id: str :param gcp_conn_id: The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -439,7 +409,6 @@ class SpannerUpdateDatabaseInstanceOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START gcp_spanner_database_update_template_fields] @@ -518,14 +487,10 @@ class SpannerDeleteDatabaseInstanceOperator(BaseOperator): :ref:`howto/operator:SpannerDeleteDatabaseInstanceOperator` :param instance_id: Cloud Spanner instance ID. - :type instance_id: str :param database_id: Cloud Spanner database ID. - :type database_id: str :param project_id: Optional, the ID of the project that owns the Cloud Spanner Database. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param gcp_conn_id: The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -534,7 +499,6 @@ class SpannerDeleteDatabaseInstanceOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START gcp_spanner_database_delete_template_fields] diff --git a/airflow/providers/google/cloud/operators/speech_to_text.py b/airflow/providers/google/cloud/operators/speech_to_text.py index 0c0616f0309e5..5bf077271b240 100644 --- a/airflow/providers/google/cloud/operators/speech_to_text.py +++ b/airflow/providers/google/cloud/operators/speech_to_text.py @@ -40,23 +40,17 @@ class CloudSpeechToTextRecognizeSpeechOperator(BaseOperator): :param config: information to the recognizer that specifies how to process the request. See more: https://googleapis.github.io/google-cloud-python/latest/speech/gapic/v1/types.html#google.cloud.speech_v1.types.RecognitionConfig - :type config: dict or google.cloud.speech_v1.types.RecognitionConfig :param audio: audio data to be recognized. See more: https://googleapis.github.io/google-cloud-python/latest/speech/gapic/v1/types.html#google.cloud.speech_v1.types.RecognitionAudio - :type audio: dict or google.cloud.speech_v1.types.RecognitionAudio :param project_id: Optional, Google Cloud Project ID where the Compute Engine Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. Defaults to 'google_cloud_default'. - :type gcp_conn_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -65,7 +59,6 @@ class CloudSpeechToTextRecognizeSpeechOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START gcp_speech_to_text_synthesize_template_fields] diff --git a/airflow/providers/google/cloud/operators/stackdriver.py b/airflow/providers/google/cloud/operators/stackdriver.py index 2134771ed34cf..359b8a5c2e5a1 100644 --- a/airflow/providers/google/cloud/operators/stackdriver.py +++ b/airflow/providers/google/cloud/operators/stackdriver.py @@ -43,40 +43,30 @@ class StackdriverListAlertPoliciesOperator(BaseOperator): :param format_: (Optional) Desired output format of the result. The supported formats are "dict", "json" and None which returns python dictionary, stringified JSON and protobuf respectively. - :type format_: str :param filter_: If provided, this field specifies the criteria that must be met by alert policies to be included in the response. For more details, see https://cloud.google.com/monitoring/api/v3/sorting-and-filtering. - :type filter_: str :param order_by: A comma-separated list of fields by which to sort the result. Supports the same set of field references as the ``filter`` field. Entries can be prefixed with a minus sign to sort by the field in descending order. For more details, see https://cloud.google.com/monitoring/api/v3/sorting-and-filtering. - :type order_by: str :param page_size: The maximum number of resources contained in the underlying API response. If page streaming is performed per- resource, this parameter does not affect the return value. If page streaming is performed per-page, this determines the maximum number of resources in a page. - :type page_size: int :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: str :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud Platform. - :type gcp_conn_id: str :param project_id: The project to fetch alerts from. - :type project_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -85,7 +75,6 @@ class StackdriverListAlertPoliciesOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -165,25 +154,18 @@ class StackdriverEnableAlertPoliciesOperator(BaseOperator): :param filter_: If provided, this field specifies the criteria that must be met by alert policies to be enabled. For more details, see https://cloud.google.com/monitoring/api/v3/sorting-and-filtering. - :type filter_: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: str :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud Platform. - :type gcp_conn_id: str :param project_id: The project in which alert needs to be enabled. - :type project_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -192,7 +174,6 @@ class StackdriverEnableAlertPoliciesOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ ui_color = "#e5ffcc" @@ -255,25 +236,18 @@ class StackdriverDisableAlertPoliciesOperator(BaseOperator): :param filter_: If provided, this field specifies the criteria that must be met by alert policies to be disabled. For more details, see https://cloud.google.com/monitoring/api/v3/sorting-and-filtering. - :type filter_: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: str :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud Platform. - :type gcp_conn_id: str :param project_id: The project in which alert needs to be disabled. - :type project_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -282,7 +256,6 @@ class StackdriverDisableAlertPoliciesOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ ui_color = "#e5ffcc" @@ -345,25 +318,18 @@ class StackdriverUpsertAlertOperator(BaseOperator): to be either created or updated. For more details, see https://cloud.google.com/monitoring/api/ref_v3/rest/v3/projects.alertPolicies#AlertPolicy. (templated) - :type alerts: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: str :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud Platform. - :type gcp_conn_id: str :param project_id: The project in which alert needs to be created/updated. - :type project_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -372,7 +338,6 @@ class StackdriverUpsertAlertOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -434,25 +399,18 @@ class StackdriverDeleteAlertOperator(BaseOperator): :param name: The alerting policy to delete. The format is: ``projects/[PROJECT_ID]/alertPolicies/[ALERT_POLICY_ID]``. - :type name: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: str :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud Platform. - :type gcp_conn_id: str :param project_id: The project from which alert needs to be deleted. - :type project_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -461,7 +419,6 @@ class StackdriverDeleteAlertOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -526,40 +483,30 @@ class StackdriverListNotificationChannelsOperator(BaseOperator): :param format_: (Optional) Desired output format of the result. The supported formats are "dict", "json" and None which returns python dictionary, stringified JSON and protobuf respectively. - :type format_: str :param filter_: If provided, this field specifies the criteria that must be met by notification channels to be included in the response. For more details, see https://cloud.google.com/monitoring/api/v3/sorting-and-filtering. - :type filter_: str :param order_by: A comma-separated list of fields by which to sort the result. Supports the same set of field references as the ``filter`` field. Entries can be prefixed with a minus sign to sort by the field in descending order. For more details, see https://cloud.google.com/monitoring/api/v3/sorting-and-filtering. - :type order_by: str :param page_size: The maximum number of resources contained in the underlying API response. If page streaming is performed per- resource, this parameter does not affect the return value. If page streaming is performed per-page, this determines the maximum number of resources in a page. - :type page_size: int :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: str :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud Platform. - :type gcp_conn_id: str :param project_id: The project to fetch notification channels from. - :type project_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -568,7 +515,6 @@ class StackdriverListNotificationChannelsOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -649,25 +595,18 @@ class StackdriverEnableNotificationChannelsOperator(BaseOperator): :param filter_: If provided, this field specifies the criteria that must be met by notification channels to be enabled. For more details, see https://cloud.google.com/monitoring/api/v3/sorting-and-filtering. - :type filter_: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: str :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud Platform. - :type gcp_conn_id: str :param project_id: The location used for the operation. - :type project_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -676,7 +615,6 @@ class StackdriverEnableNotificationChannelsOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -741,25 +679,18 @@ class StackdriverDisableNotificationChannelsOperator(BaseOperator): :param filter_: If provided, this field specifies the criteria that must be met by alert policies to be disabled. For more details, see https://cloud.google.com/monitoring/api/v3/sorting-and-filtering. - :type filter_: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: str :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud Platform. - :type gcp_conn_id: str :param project_id: The project in which notification channels needs to be enabled. - :type project_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -768,7 +699,6 @@ class StackdriverDisableNotificationChannelsOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -834,25 +764,18 @@ class StackdriverUpsertNotificationChannelOperator(BaseOperator): to be either created or updated. For more details, see https://cloud.google.com/monitoring/api/ref_v3/rest/v3/projects.notificationChannels. (templated) - :type channels: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: str :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud Platform. - :type gcp_conn_id: str :param project_id: The project in which notification channels needs to be created/updated. - :type project_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -861,7 +784,6 @@ class StackdriverUpsertNotificationChannelOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -925,25 +847,18 @@ class StackdriverDeleteNotificationChannelOperator(BaseOperator): :param name: The alerting policy to delete. The format is: ``projects/[PROJECT_ID]/notificationChannels/[CHANNEL_ID]``. - :type name: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will be retried using a default configuration. - :type retry: str :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud Platform. - :type gcp_conn_id: str :param project_id: The project from which notification channel needs to be deleted. - :type project_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -952,7 +867,6 @@ class StackdriverDeleteNotificationChannelOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/cloud/operators/tasks.py b/airflow/providers/google/cloud/operators/tasks.py index e4a8c712f6a7f..9f89497fae671 100644 --- a/airflow/providers/google/cloud/operators/tasks.py +++ b/airflow/providers/google/cloud/operators/tasks.py @@ -47,28 +47,20 @@ class CloudTasksQueueCreateOperator(BaseOperator): :ref:`howto/operator:CloudTasksQueueCreateOperator` :param location: The location name in which the queue will be created. - :type location: str :param task_queue: The task queue to create. Queue's name cannot be the same as an existing queue. If a dict is provided, it must be of the same form as the protobuf message Queue. - :type task_queue: dict or google.cloud.tasks_v2.types.Queue :param project_id: (Optional) The ID of the Google Cloud project that owns the Cloud Tasks. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param queue_name: (Optional) The queue's name. If provided, it will be used to construct the full queue path. - :type queue_name: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -77,7 +69,6 @@ class CloudTasksQueueCreateOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: google.cloud.tasks_v2.types.Queue """ @@ -157,31 +148,22 @@ class CloudTasksQueueUpdateOperator(BaseOperator): :param task_queue: The task queue to update. This method creates the queue if it does not exist and updates the queue if it does exist. The queue's name must be specified. - :type task_queue: dict or google.cloud.tasks_v2.types.Queue :param project_id: (Optional) The ID of the Google Cloud project that owns the Cloud Tasks. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param location: (Optional) The location name in which the queue will be updated. If provided, it will be used to construct the full queue path. - :type location: str :param queue_name: (Optional) The queue's name. If provided, it will be used to construct the full queue path. - :type queue_name: str :param update_mask: A mast used to specify which fields of the queue are being updated. If empty, then all fields will be updated. If a dict is provided, it must be of the same form as the protobuf message. - :type update_mask: dict or google.protobuf.field_mask_pb2.FieldMask :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -190,7 +172,6 @@ class CloudTasksQueueUpdateOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: google.cloud.tasks_v2.types.Queue """ @@ -259,23 +240,16 @@ class CloudTasksQueueGetOperator(BaseOperator): :ref:`howto/operator:CloudTasksQueueGetOperator` :param location: The location name in which the queue was created. - :type location: str :param queue_name: The queue's name. - :type queue_name: str :param project_id: (Optional) The ID of the Google Cloud project that owns the Cloud Tasks. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -284,7 +258,6 @@ class CloudTasksQueueGetOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: google.cloud.tasks_v2.types.Queue """ @@ -345,26 +318,18 @@ class CloudTasksQueuesListOperator(BaseOperator): :ref:`howto/operator:CloudTasksQueuesListOperator` :param location: The location name in which the queues were created. - :type location: str :param project_id: (Optional) The ID of the Google Cloud project that owns the Cloud Tasks. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param results_filter: (Optional) Filter used to specify a subset of queues. - :type results_filter: str :param page_size: (Optional) The maximum number of resources contained in the underlying API response. - :type page_size: int :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -373,7 +338,6 @@ class CloudTasksQueuesListOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: list[google.cloud.tasks_v2.types.Queue] """ @@ -436,23 +400,16 @@ class CloudTasksQueueDeleteOperator(BaseOperator): :ref:`howto/operator:CloudTasksQueueDeleteOperator` :param location: The location name in which the queue will be deleted. - :type location: str :param queue_name: The queue's name. - :type queue_name: str :param project_id: (Optional) The ID of the Google Cloud project that owns the Cloud Tasks. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -461,7 +418,6 @@ class CloudTasksQueueDeleteOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -519,23 +475,16 @@ class CloudTasksQueuePurgeOperator(BaseOperator): :ref:`howto/operator:CloudTasksQueuePurgeOperator` :param location: The location name in which the queue will be purged. - :type location: str :param queue_name: The queue's name. - :type queue_name: str :param project_id: (Optional) The ID of the Google Cloud project that owns the Cloud Tasks. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -544,7 +493,6 @@ class CloudTasksQueuePurgeOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: list[google.cloud.tasks_v2.types.Queue] """ @@ -605,23 +553,16 @@ class CloudTasksQueuePauseOperator(BaseOperator): :ref:`howto/operator:CloudTasksQueuePauseOperator` :param location: The location name in which the queue will be paused. - :type location: str :param queue_name: The queue's name. - :type queue_name: str :param project_id: (Optional) The ID of the Google Cloud project that owns the Cloud Tasks. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -630,7 +571,6 @@ class CloudTasksQueuePauseOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: list[google.cloud.tasks_v2.types.Queue] """ @@ -691,23 +631,16 @@ class CloudTasksQueueResumeOperator(BaseOperator): :ref:`howto/operator:CloudTasksQueueResumeOperator` :param location: The location name in which the queue will be resumed. - :type location: str :param queue_name: The queue's name. - :type queue_name: str :param project_id: (Optional) The ID of the Google Cloud project that owns the Cloud Tasks. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -716,7 +649,6 @@ class CloudTasksQueueResumeOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: list[google.cloud.tasks_v2.types.Queue] """ @@ -777,32 +709,22 @@ class CloudTasksTaskCreateOperator(BaseOperator): :ref:`howto/operator:CloudTasksTaskCreateOperator` :param location: The location name in which the task will be created. - :type location: str :param queue_name: The queue's name. - :type queue_name: str :param task: The task to add. If a dict is provided, it must be of the same form as the protobuf message Task. - :type task: dict or google.cloud.tasks_v2.types.Task :param project_id: (Optional) The ID of the Google Cloud project that owns the Cloud Tasks. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param task_name: (Optional) The task's name. If provided, it will be used to construct the full task path. - :type task_name: str :param response_view: (Optional) This field specifies which subset of the Task will be returned. - :type response_view: google.cloud.tasks_v2.enums.Task.View :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -811,7 +733,6 @@ class CloudTasksTaskCreateOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: google.cloud.tasks_v2.types.Task """ @@ -883,28 +804,19 @@ class CloudTasksTaskGetOperator(BaseOperator): :ref:`howto/operator:CloudTasksTaskGetOperator` :param location: The location name in which the task was created. - :type location: str :param queue_name: The queue's name. - :type queue_name: str :param task_name: The task's name. - :type task_name: str :param project_id: (Optional) The ID of the Google Cloud project that owns the Cloud Tasks. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param response_view: (Optional) This field specifies which subset of the Task will be returned. - :type response_view: google.cloud.tasks_v2.enums.Task.View :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -913,7 +825,6 @@ class CloudTasksTaskGetOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: google.cloud.tasks_v2.types.Task """ @@ -981,29 +892,20 @@ class CloudTasksTasksListOperator(BaseOperator): :ref:`howto/operator:CloudTasksTasksListOperator` :param location: The location name in which the tasks were created. - :type location: str :param queue_name: The queue's name. - :type queue_name: str :param project_id: (Optional) The ID of the Google Cloud project that owns the Cloud Tasks. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param response_view: (Optional) This field specifies which subset of the Task will be returned. - :type response_view: google.cloud.tasks_v2.enums.Task.View :param page_size: (Optional) The maximum number of resources contained in the underlying API response. - :type page_size: int :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1012,7 +914,6 @@ class CloudTasksTasksListOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: list[google.cloud.tasks_v2.types.Task] """ @@ -1079,25 +980,17 @@ class CloudTasksTaskDeleteOperator(BaseOperator): :ref:`howto/operator:CloudTasksTaskDeleteOperator` :param location: The location name in which the task will be deleted. - :type location: str :param queue_name: The queue's name. - :type queue_name: str :param task_name: The task's name. - :type task_name: str :param project_id: (Optional) The ID of the Google Cloud project that owns the Cloud Tasks. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1106,7 +999,6 @@ class CloudTasksTaskDeleteOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -1168,28 +1060,19 @@ class CloudTasksTaskRunOperator(BaseOperator): :ref:`howto/operator:CloudTasksTaskRunOperator` :param location: The location name in which the task was created. - :type location: str :param queue_name: The queue's name. - :type queue_name: str :param task_name: The task's name. - :type task_name: str :param project_id: (Optional) The ID of the Google Cloud project that owns the Cloud Tasks. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param response_view: (Optional) This field specifies which subset of the Task will be returned. - :type response_view: google.cloud.tasks_v2.Task.View :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1198,7 +1081,6 @@ class CloudTasksTaskRunOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :rtype: google.cloud.tasks_v2.types.Task """ diff --git a/airflow/providers/google/cloud/operators/text_to_speech.py b/airflow/providers/google/cloud/operators/text_to_speech.py index df2196ffc09e0..4fbb547ff7aaf 100644 --- a/airflow/providers/google/cloud/operators/text_to_speech.py +++ b/airflow/providers/google/cloud/operators/text_to_speech.py @@ -42,30 +42,21 @@ class CloudTextToSpeechSynthesizeOperator(BaseOperator): :param input_data: text input to be synthesized. See more: https://googleapis.github.io/google-cloud-python/latest/texttospeech/gapic/v1/types.html#google.cloud.texttospeech_v1.types.SynthesisInput - :type input_data: dict or google.cloud.texttospeech_v1.types.SynthesisInput :param voice: configuration of voice to be used in synthesis. See more: https://googleapis.github.io/google-cloud-python/latest/texttospeech/gapic/v1/types.html#google.cloud.texttospeech_v1.types.VoiceSelectionParams - :type voice: dict or google.cloud.texttospeech_v1.types.VoiceSelectionParams :param audio_config: configuration of the synthesized audio. See more: https://googleapis.github.io/google-cloud-python/latest/texttospeech/gapic/v1/types.html#google.cloud.texttospeech_v1.types.AudioConfig - :type audio_config: dict or google.cloud.texttospeech_v1.types.AudioConfig :param target_bucket_name: name of the GCS bucket in which output file should be stored - :type target_bucket_name: str :param target_filename: filename of the output file. - :type target_filename: str :param project_id: Optional, Google Cloud Project ID where the Compute Engine Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. Defaults to 'google_cloud_default'. - :type gcp_conn_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -74,7 +65,6 @@ class CloudTextToSpeechSynthesizeOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START gcp_text_to_speech_synthesize_template_fields] diff --git a/airflow/providers/google/cloud/operators/translate.py b/airflow/providers/google/cloud/operators/translate.py index 1aa344324ea40..40bd4dda6e981 100644 --- a/airflow/providers/google/cloud/operators/translate.py +++ b/airflow/providers/google/cloud/operators/translate.py @@ -52,27 +52,21 @@ class CloudTranslateTextOperator(BaseOperator): If only a single value is passed, then only a single dictionary is set as XCom return value. - :type values: str or list :param values: String or list of strings to translate. - :type target_language: str :param target_language: The language to translate results into. This is required by the API and defaults to the target language of the current instance. - :type format_: str or None :param format_: (Optional) One of ``text`` or ``html``, to specify if the input text is plain text or HTML. - :type source_language: str or None :param source_language: (Optional) The language of the text to be translated. - :type model: str or None :param model: (Optional) The model used to translate the text, such as ``'base'`` or ``'nmt'``. - :type impersonation_chain: Union[str, Sequence[str]] :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. diff --git a/airflow/providers/google/cloud/operators/translate_speech.py b/airflow/providers/google/cloud/operators/translate_speech.py index 0001be2289854..7e1f7caa724c5 100644 --- a/airflow/providers/google/cloud/operators/translate_speech.py +++ b/airflow/providers/google/cloud/operators/translate_speech.py @@ -64,37 +64,29 @@ class CloudTranslateSpeechOperator(BaseOperator): :param audio: audio data to be recognized. See more: https://googleapis.github.io/google-cloud-python/latest/speech/gapic/v1/types.html#google.cloud.speech_v1.types.RecognitionAudio - :type audio: dict or google.cloud.speech_v1.types.RecognitionAudio :param config: information to the recognizer that specifies how to process the request. See more: https://googleapis.github.io/google-cloud-python/latest/speech/gapic/v1/types.html#google.cloud.speech_v1.types.RecognitionConfig - :type config: dict or google.cloud.speech_v1.types.RecognitionConfig :param target_language: The language to translate results into. This is required by the API and defaults to the target language of the current instance. Check the list of available languages here: https://cloud.google.com/translate/docs/languages - :type target_language: str :param format_: (Optional) One of ``text`` or ``html``, to specify if the input text is plain text or HTML. - :type format_: str or None :param source_language: (Optional) The language of the text to be translated. - :type source_language: str or None :param model: (Optional) The model used to translate the text, such as ``'base'`` or ``'nmt'``. - :type model: str or None :param project_id: Optional, Google Cloud Project ID where the Compute Engine Instance exists. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. Defaults to 'google_cloud_default'. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token @@ -104,7 +96,6 @@ class CloudTranslateSpeechOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ diff --git a/airflow/providers/google/cloud/operators/video_intelligence.py b/airflow/providers/google/cloud/operators/video_intelligence.py index ffff87d63cb07..6f2cd22062646 100644 --- a/airflow/providers/google/cloud/operators/video_intelligence.py +++ b/airflow/providers/google/cloud/operators/video_intelligence.py @@ -40,30 +40,22 @@ class CloudVideoIntelligenceDetectVideoLabelsOperator(BaseOperator): :param input_uri: Input video location. Currently, only Google Cloud Storage URIs are supported, which must be specified in the following format: ``gs://bucket-id/object-id``. - :type input_uri: str :param input_content: The video data bytes. If unset, the input video(s) should be specified via ``input_uri``. If set, ``input_uri`` should be unset. - :type input_content: bytes :param output_uri: Optional, location where the output (in JSON format) should be stored. Currently, only Google Cloud Storage URIs are supported, which must be specified in the following format: ``gs://bucket-id/object-id``. - :type output_uri: str :param video_context: Optional, Additional video context and/or feature-specific parameters. - :type video_context: dict or google.cloud.videointelligence_v1.types.VideoContext :param location: Optional, cloud region where annotation should take place. Supported cloud regions: us-east1, us-west1, europe-west1, asia-east1. If no region is specified, a region will be determined based on video file location. - :type location: str :param retry: Retry object used to determine when/if to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: Optional, The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. Defaults to ``google_cloud_default``. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -72,7 +64,6 @@ class CloudVideoIntelligenceDetectVideoLabelsOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START gcp_video_intelligence_detect_labels_template_fields] @@ -139,30 +130,22 @@ class CloudVideoIntelligenceDetectVideoExplicitContentOperator(BaseOperator): :param input_uri: Input video location. Currently, only Google Cloud Storage URIs are supported, which must be specified in the following format: ``gs://bucket-id/object-id``. - :type input_uri: str :param input_content: The video data bytes. If unset, the input video(s) should be specified via ``input_uri``. If set, ``input_uri`` should be unset. - :type input_content: bytes :param output_uri: Optional, location where the output (in JSON format) should be stored. Currently, only Google Cloud Storage URIs are supported, which must be specified in the following format: ``gs://bucket-id/object-id``. - :type output_uri: str :param video_context: Optional, Additional video context and/or feature-specific parameters. - :type video_context: dict or google.cloud.videointelligence_v1.types.VideoContext :param location: Optional, cloud region where annotation should take place. Supported cloud regions: us-east1, us-west1, europe-west1, asia-east1. If no region is specified, a region will be determined based on video file location. - :type location: str :param retry: Retry object used to determine when/if to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: Optional, The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud Defaults to ``google_cloud_default``. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -171,7 +154,6 @@ class CloudVideoIntelligenceDetectVideoExplicitContentOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START gcp_video_intelligence_detect_explicit_content_template_fields] @@ -238,30 +220,22 @@ class CloudVideoIntelligenceDetectVideoShotsOperator(BaseOperator): :param input_uri: Input video location. Currently, only Google Cloud Storage URIs are supported, which must be specified in the following format: ``gs://bucket-id/object-id``. - :type input_uri: str :param input_content: The video data bytes. If unset, the input video(s) should be specified via ``input_uri``. If set, ``input_uri`` should be unset. - :type input_content: bytes :param output_uri: Optional, location where the output (in JSON format) should be stored. Currently, only Google Cloud Storage URIs are supported, which must be specified in the following format: ``gs://bucket-id/object-id``. - :type output_uri: str :param video_context: Optional, Additional video context and/or feature-specific parameters. - :type video_context: dict or google.cloud.videointelligence_v1.types.VideoContext :param location: Optional, cloud region where annotation should take place. Supported cloud regions: us-east1, us-west1, europe-west1, asia-east1. If no region is specified, a region will be determined based on video file location. - :type location: str :param retry: Retry object used to determine when/if to retry requests. If None is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: Optional, The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param gcp_conn_id: Optional, The connection ID used to connect to Google Cloud. Defaults to ``google_cloud_default``. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -270,7 +244,6 @@ class CloudVideoIntelligenceDetectVideoShotsOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START gcp_video_intelligence_detect_video_shots_template_fields] diff --git a/airflow/providers/google/cloud/operators/vision.py b/airflow/providers/google/cloud/operators/vision.py index caaed367624a2..3f34917e7a038 100644 --- a/airflow/providers/google/cloud/operators/vision.py +++ b/airflow/providers/google/cloud/operators/vision.py @@ -51,29 +51,21 @@ class CloudVisionCreateProductSetOperator(BaseOperator): :param product_set: (Required) The ProductSet to create. If a dict is provided, it must be of the same form as the protobuf message `ProductSet`. - :type product_set: dict or google.cloud.vision_v1.types.ProductSet :param location: (Required) The region where the ProductSet should be created. Valid regions (as of 2019-02-05) are: us-east1, us-west1, europe-west1, asia-east1 - :type location: str :param project_id: (Optional) The project in which the ProductSet should be created. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param product_set_id: (Optional) A user-supplied resource id for this ProductSet. If set, the server will attempt to use this value as the resource id. If it is already in use, an error is returned with code ALREADY_EXISTS. Must be at most 128 characters long. It cannot contain the character /. - :type product_set_id: str :param retry: (Optional) A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -82,7 +74,6 @@ class CloudVisionCreateProductSetOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START vision_productset_create_template_fields] @@ -153,23 +144,16 @@ class CloudVisionGetProductSetOperator(BaseOperator): :param location: (Required) The region where the ProductSet is located. Valid regions (as of 2019-02-05) are: us-east1, us-west1, europe-west1, asia-east1 - :type location: str :param product_set_id: (Required) The resource id of this ProductSet. - :type product_set_id: str :param project_id: (Optional) The project in which the ProductSet is located. If set to None or missing, the default `project_id` from the Google Cloud connection is used. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -178,7 +162,6 @@ class CloudVisionGetProductSetOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START vision_productset_get_template_fields] @@ -251,30 +234,21 @@ class CloudVisionUpdateProductSetOperator(BaseOperator): :param product_set: (Required) The ProductSet resource which replaces the one on the server. If a dict is provided, it must be of the same form as the protobuf message `ProductSet`. - :type product_set: dict or google.cloud.vision_v1.types.ProductSet :param location: (Optional) The region where the ProductSet is located. Valid regions (as of 2019-02-05) are: us-east1, us-west1, europe-west1, asia-east1 - :type location: str :param product_set_id: (Optional) The resource id of this ProductSet. - :type product_set_id: str :param project_id: (Optional) The project in which the ProductSet should be created. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param update_mask: (Optional) The `FieldMask` that specifies which fields to update. If update_mask isn’t specified, all mutable fields are to be updated. Valid mask path is display_name. If a dict is provided, it must be of the same form as the protobuf message `FieldMask`. - :type update_mask: dict or google.cloud.vision_v1.types.FieldMask :param retry: (Optional) A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -283,7 +257,6 @@ class CloudVisionUpdateProductSetOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START vision_productset_update_template_fields] @@ -352,23 +325,16 @@ class CloudVisionDeleteProductSetOperator(BaseOperator): :param location: (Required) The region where the ProductSet is located. Valid regions (as of 2019-02-05) are: us-east1, us-west1, europe-west1, asia-east1 - :type location: str :param product_set_id: (Required) The resource id of this ProductSet. - :type product_set_id: str :param project_id: (Optional) The project in which the ProductSet should be created. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -377,7 +343,6 @@ class CloudVisionDeleteProductSetOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START vision_productset_delete_template_fields] @@ -444,29 +409,21 @@ class CloudVisionCreateProductOperator(BaseOperator): :param location: (Required) The region where the Product should be created. Valid regions (as of 2019-02-05) are: us-east1, us-west1, europe-west1, asia-east1 - :type location: str :param product: (Required) The product to create. If a dict is provided, it must be of the same form as the protobuf message `Product`. - :type product: dict or google.cloud.vision_v1.types.Product :param project_id: (Optional) The project in which the Product should be created. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param product_id: (Optional) A user-supplied resource id for this Product. If set, the server will attempt to use this value as the resource id. If it is already in use, an error is returned with code ALREADY_EXISTS. Must be at most 128 characters long. It cannot contain the character /. - :type product_id: str :param retry: (Optional) A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -475,7 +432,6 @@ class CloudVisionCreateProductOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START vision_product_create_template_fields] @@ -549,23 +505,16 @@ class CloudVisionGetProductOperator(BaseOperator): :param location: (Required) The region where the Product is located. Valid regions (as of 2019-02-05) are: us-east1, us-west1, europe-west1, asia-east1 - :type location: str :param product_id: (Required) The resource id of this Product. - :type product_id: str :param project_id: (Optional) The project in which the Product is located. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -574,7 +523,6 @@ class CloudVisionGetProductOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START vision_product_get_template_fields] @@ -657,31 +605,22 @@ class CloudVisionUpdateProductOperator(BaseOperator): :param product: (Required) The Product resource which replaces the one on the server. product.name is immutable. If a dict is provided, it must be of the same form as the protobuf message `Product`. - :type product: dict or google.cloud.vision_v1.types.ProductSet :param location: (Optional) The region where the Product is located. Valid regions (as of 2019-02-05) are: us-east1, us-west1, europe-west1, asia-east1 - :type location: str :param product_id: (Optional) The resource id of this Product. - :type product_id: str :param project_id: (Optional) The project in which the Product is located. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param update_mask: (Optional) The `FieldMask` that specifies which fields to update. If update_mask isn’t specified, all mutable fields are to be updated. Valid mask paths include product_labels, display_name, and description. If a dict is provided, it must be of the same form as the protobuf message `FieldMask`. - :type update_mask: dict or google.cloud.vision_v1.types.FieldMask :param retry: (Optional) A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -690,7 +629,6 @@ class CloudVisionUpdateProductOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START vision_product_update_template_fields] @@ -764,23 +702,16 @@ class CloudVisionDeleteProductOperator(BaseOperator): :param location: (Required) The region where the Product is located. Valid regions (as of 2019-02-05) are: us-east1, us-west1, europe-west1, asia-east1 - :type location: str :param product_id: (Required) The resource id of this Product. - :type product_id: str :param project_id: (Optional) The project in which the Product is located. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -789,7 +720,6 @@ class CloudVisionDeleteProductOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START vision_product_delete_template_fields] @@ -851,17 +781,12 @@ class CloudVisionImageAnnotateOperator(BaseOperator): :param request: (Required) Annotation request for image or a batch. If a dict is provided, it must be of the same form as the protobuf message class:`google.cloud.vision_v1.types.AnnotateImageRequest` - :type request: list[dict or google.cloud.vision_v1.types.AnnotateImageRequest] for batch or - dict or google.cloud.vision_v1.types.AnnotateImageRequest for single image. :param retry: (Optional) A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -870,7 +795,6 @@ class CloudVisionImageAnnotateOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START vision_annotate_image_template_fields] @@ -924,33 +848,24 @@ class CloudVisionCreateReferenceImageOperator(BaseOperator): :param location: (Required) The region where the Product is located. Valid regions (as of 2019-02-05) are: us-east1, us-west1, europe-west1, asia-east1 - :type location: str :param reference_image: (Required) The reference image to create. If an image ID is specified, it is ignored. If a dict is provided, it must be of the same form as the protobuf message :class:`google.cloud.vision_v1.types.ReferenceImage` - :type reference_image: dict or google.cloud.vision_v1.types.ReferenceImage :param reference_image_id: (Optional) A user-supplied resource id for the ReferenceImage to be added. If set, the server will attempt to use this value as the resource id. If it is already in use, an error is returned with code ALREADY_EXISTS. Must be at most 128 characters long. It cannot contain the character `/`. - :type reference_image_id: str :param product_id: (Optional) The resource id of this Product. - :type product_id: str :param project_id: (Optional) The project in which the Product is located. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -959,7 +874,6 @@ class CloudVisionCreateReferenceImageOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START vision_reference_image_create_template_fields] @@ -1035,28 +949,20 @@ class CloudVisionDeleteReferenceImageOperator(BaseOperator): :param location: (Required) The region where the Product is located. Valid regions (as of 2019-02-05) are: us-east1, us-west1, europe-west1, asia-east1 - :type location: str :param reference_image_id: (Optional) A user-supplied resource id for the ReferenceImage to be added. If set, the server will attempt to use this value as the resource id. If it is already in use, an error is returned with code ALREADY_EXISTS. Must be at most 128 characters long. It cannot contain the character `/`. - :type reference_image_id: str :param product_id: (Optional) The resource id of this Product. - :type product_id: str :param project_id: (Optional) The project in which the Product is located. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1065,7 +971,6 @@ class CloudVisionDeleteReferenceImageOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START vision_reference_image_create_template_fields] @@ -1135,26 +1040,19 @@ class CloudVisionAddProductToProductSetOperator(BaseOperator): :ref:`howto/operator:CloudVisionAddProductToProductSetOperator` :param product_set_id: (Required) The resource id for the ProductSet to modify. - :type product_set_id: str :param product_id: (Required) The resource id of this Product. - :type product_id: str :param location: (Required) The region where the ProductSet is located. Valid regions (as of 2019-02-05) are: us-east1, us-west1, europe-west1, asia-east1 :type: str :param project_id: (Optional) The project in which the Product is located. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1163,7 +1061,6 @@ class CloudVisionAddProductToProductSetOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START vision_add_product_to_product_set_template_fields] @@ -1227,26 +1124,19 @@ class CloudVisionRemoveProductFromProductSetOperator(BaseOperator): :ref:`howto/operator:CloudVisionRemoveProductFromProductSetOperator` :param product_set_id: (Required) The resource id for the ProductSet to modify. - :type product_set_id: str :param product_id: (Required) The resource id of this Product. - :type product_id: str :param location: (Required) The region where the ProductSet is located. Valid regions (as of 2019-02-05) are: us-east1, us-west1, europe-west1, asia-east1 :type: str :param project_id: (Optional) The project in which the Product is located. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: (Optional) A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. - :type metadata: sequence[tuple[str, str]] :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1255,7 +1145,6 @@ class CloudVisionRemoveProductFromProductSetOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START vision_remove_product_from_product_set_template_fields] @@ -1320,25 +1209,17 @@ class CloudVisionDetectTextOperator(BaseOperator): :param image: (Required) The image to analyze. See more: https://googleapis.github.io/google-cloud-python/latest/vision/gapic/v1/types.html#google.cloud.vision_v1.types.Image - :type image: dict or google.cloud.vision_v1.types.Image :param max_results: (Optional) Number of results to return. - :type max_results: int :param retry: (Optional) A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: Number of seconds before timing out. - :type timeout: float :param language_hints: List of languages to use for TEXT_DETECTION. In most cases, an empty value yields the best results since it enables automatic language detection. For languages based on the Latin alphabet, setting language_hints is not needed. - :type language_hints: str or list[str] :param web_detection_params: Parameters for web detection. - :type web_detection_params: dict :param additional_properties: Additional properties to be set on the AnnotateImageRequest. See more: :class:`google.cloud.vision_v1.types.AnnotateImageRequest` - :type additional_properties: dict :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1347,7 +1228,6 @@ class CloudVisionDetectTextOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START vision_detect_text_set_template_fields] @@ -1411,25 +1291,17 @@ class CloudVisionTextDetectOperator(BaseOperator): :param image: (Required) The image to analyze. See more: https://googleapis.github.io/google-cloud-python/latest/vision/gapic/v1/types.html#google.cloud.vision_v1.types.Image - :type image: dict or google.cloud.vision_v1.types.Image :param max_results: Number of results to return. - :type max_results: int :param retry: (Optional) A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: Number of seconds before timing out. - :type timeout: float :param language_hints: List of languages to use for TEXT_DETECTION. In most cases, an empty value yields the best results since it enables automatic language detection. For languages based on the Latin alphabet, setting language_hints is not needed. - :type language_hints: str or list[str] :param web_detection_params: Parameters for web detection. - :type web_detection_params: dict :param additional_properties: Additional properties to be set on the AnnotateImageRequest. See more: https://googleapis.github.io/google-cloud-python/latest/vision/gapic/v1/types.html#google.cloud.vision_v1.types.AnnotateImageRequest - :type additional_properties: dict :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1438,7 +1310,6 @@ class CloudVisionTextDetectOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START vision_document_detect_text_set_template_fields] @@ -1501,19 +1372,13 @@ class CloudVisionDetectImageLabelsOperator(BaseOperator): :param image: (Required) The image to analyze. See more: https://googleapis.github.io/google-cloud-python/latest/vision/gapic/v1/types.html#google.cloud.vision_v1.types.Image - :type image: dict or google.cloud.vision_v1.types.Image :param max_results: Number of results to return. - :type max_results: int :param retry: (Optional) A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: Number of seconds before timing out. - :type timeout: float :param additional_properties: Additional properties to be set on the AnnotateImageRequest. See more: https://googleapis.github.io/google-cloud-python/latest/vision/gapic/v1/types.html#google.cloud.vision_v1.types.AnnotateImageRequest - :type additional_properties: dict :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1522,7 +1387,6 @@ class CloudVisionDetectImageLabelsOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START vision_detect_labels_template_fields] @@ -1579,19 +1443,13 @@ class CloudVisionDetectImageSafeSearchOperator(BaseOperator): :param image: (Required) The image to analyze. See more: https://googleapis.github.io/google-cloud-python/latest/vision/gapic/v1/types.html#google.cloud.vision_v1.types.Image - :type image: dict or google.cloud.vision_v1.types.Image :param max_results: Number of results to return. - :type max_results: int :param retry: (Optional) A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: Number of seconds before timing out. - :type timeout: float :param additional_properties: Additional properties to be set on the AnnotateImageRequest. See more: https://googleapis.github.io/google-cloud-python/latest/vision/gapic/v1/types.html#google.cloud.vision_v1.types.AnnotateImageRequest - :type additional_properties: dict :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -1600,7 +1458,6 @@ class CloudVisionDetectImageSafeSearchOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START vision_detect_safe_search_template_fields] diff --git a/airflow/providers/google/cloud/operators/workflows.py b/airflow/providers/google/cloud/operators/workflows.py index bac589ba49f48..fb4bf25921901 100644 --- a/airflow/providers/google/cloud/operators/workflows.py +++ b/airflow/providers/google/cloud/operators/workflows.py @@ -47,21 +47,14 @@ class WorkflowsCreateWorkflowOperator(BaseOperator): :ref:`howto/operator:WorkflowsCreateWorkflowOperator` :param workflow: Required. Workflow to be created. - :type workflow: Dict :param workflow_id: Required. The ID of the workflow to be created. - :type workflow_id: str :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :type project_id: str :param location: Required. The GCP region in which to handle the request. - :type location: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ template_fields: Sequence[str] = ("location", "workflow", "workflow_id") @@ -155,22 +148,15 @@ class WorkflowsUpdateWorkflowOperator(BaseOperator): :ref:`howto/operator:WorkflowsUpdateWorkflowOperator` :param workflow_id: Required. The ID of the workflow to be updated. - :type workflow_id: str :param location: Required. The GCP region in which to handle the request. - :type location: str :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :type project_id: str :param update_mask: List of fields to be updated. If not present, the entire workflow will be updated. - :type update_mask: FieldMask :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ template_fields: Sequence[str] = ("workflow_id", "update_mask") @@ -236,19 +222,13 @@ class WorkflowsDeleteWorkflowOperator(BaseOperator): :ref:`howto/operator:WorkflowsDeleteWorkflowOperator` :param workflow_id: Required. The ID of the workflow to be created. - :type workflow_id: str :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :type project_id: str :param location: Required. The GCP region in which to handle the request. - :type location: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ template_fields: Sequence[str] = ("location", "workflow_id") @@ -301,24 +281,17 @@ class WorkflowsListWorkflowsOperator(BaseOperator): :ref:`howto/operator:WorkflowsListWorkflowsOperator` :param filter_: Filter to restrict results to specific workflows. - :type filter_: str :param order_by: Comma-separated list of fields that specifies the order of the results. Default sorting order for a field is ascending. To specify descending order for a field, append a "desc" suffix. If not specified, the results will be returned in an unspecified order. - :type order_by: str :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :type project_id: str :param location: Required. The GCP region in which to handle the request. - :type location: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ template_fields: Sequence[str] = ("location", "order_by", "filter_") @@ -373,19 +346,13 @@ class WorkflowsGetWorkflowOperator(BaseOperator): :ref:`howto/operator:WorkflowsGetWorkflowOperator` :param workflow_id: Required. The ID of the workflow to be created. - :type workflow_id: str :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :type project_id: str :param location: Required. The GCP region in which to handle the request. - :type location: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ template_fields: Sequence[str] = ("location", "workflow_id") @@ -438,21 +405,14 @@ class WorkflowsCreateExecutionOperator(BaseOperator): :ref:`howto/operator:WorkflowsCreateExecutionOperator` :param execution: Required. Execution to be created. - :type execution: Dict :param workflow_id: Required. The ID of the workflow. - :type workflow_id: str :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :type project_id: str :param location: Required. The GCP region in which to handle the request. - :type location: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ template_fields: Sequence[str] = ("location", "workflow_id", "execution") @@ -510,21 +470,14 @@ class WorkflowsCancelExecutionOperator(BaseOperator): :ref:`howto/operator:WorkflowsCancelExecutionOperator` :param workflow_id: Required. The ID of the workflow. - :type workflow_id: str :param execution_id: Required. The ID of the execution. - :type execution_id: str :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :type project_id: str :param location: Required. The GCP region in which to handle the request. - :type location: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ template_fields: Sequence[str] = ("location", "workflow_id", "execution_id") @@ -583,22 +536,15 @@ class WorkflowsListExecutionsOperator(BaseOperator): :ref:`howto/operator:WorkflowsListExecutionsOperator` :param workflow_id: Required. The ID of the workflow to be created. - :type workflow_id: str :param start_date_filter: If passed only executions older that this date will be returned. By default operators return executions from last 60 minutes - :type start_date_filter: datetime :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :type project_id: str :param location: Required. The GCP region in which to handle the request. - :type location: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ template_fields: Sequence[str] = ("location", "workflow_id") @@ -653,21 +599,14 @@ class WorkflowsGetExecutionOperator(BaseOperator): :ref:`howto/operator:WorkflowsGetExecutionOperator` :param workflow_id: Required. The ID of the workflow. - :type workflow_id: str :param execution_id: Required. The ID of the execution. - :type execution_id: str :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :type project_id: str :param location: Required. The GCP region in which to handle the request. - :type location: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ template_fields: Sequence[str] = ("location", "workflow_id", "execution_id") diff --git a/airflow/providers/google/cloud/secrets/secret_manager.py b/airflow/providers/google/cloud/secrets/secret_manager.py index 6b767d0b38a6e..5e38a3f578997 100644 --- a/airflow/providers/google/cloud/secrets/secret_manager.py +++ b/airflow/providers/google/cloud/secrets/secret_manager.py @@ -56,26 +56,18 @@ class CloudSecretManagerBackend(BaseSecretsBackend, LoggingMixin): :param connections_prefix: Specifies the prefix of the secret to read to get Connections. If set to None (null), requests for connections will not be sent to GCP Secrets Manager - :type connections_prefix: str :param variables_prefix: Specifies the prefix of the secret to read to get Variables. If set to None (null), requests for variables will not be sent to GCP Secrets Manager - :type variables_prefix: str :param config_prefix: Specifies the prefix of the secret to read to get Airflow Configurations containing secrets. If set to None (null), requests for configurations will not be sent to GCP Secrets Manager - :type config_prefix: str :param gcp_key_path: Path to Google Cloud Service Account key file (JSON). Mutually exclusive with gcp_keyfile_dict. use default credentials in the current environment if not provided. - :type gcp_key_path: str :param gcp_keyfile_dict: Dictionary of keyfile parameters. Mutually exclusive with gcp_key_path. - :type gcp_keyfile_dict: dict :param gcp_scopes: Comma-separated string containing OAuth2 scopes - :type gcp_scopes: str :param project_id: Project ID to read the secrets from. If not passed, the project ID from credentials will be used. - :type project_id: str :param sep: Separator used to concatenate connections_prefix and conn_id. Default: "-" - :type sep: str """ def __init__( @@ -134,7 +126,6 @@ def get_conn_uri(self, conn_id: str) -> Optional[str]: Get secret value from the SecretManager. :param conn_id: connection id - :type conn_id: str """ if self.connections_prefix is None: return None @@ -170,9 +161,7 @@ def _get_secret(self, path_prefix: str, secret_id: str) -> Optional[str]: Get secret value from the SecretManager based on prefix. :param path_prefix: Prefix for the Path to get Secret - :type path_prefix: str :param secret_id: Secret Key - :type secret_id: str """ secret_id = self.build_path(path_prefix, secret_id, self.sep) return self.client.get_secret(secret_id=secret_id, project_id=self.project_id) diff --git a/airflow/providers/google/cloud/sensors/bigquery.py b/airflow/providers/google/cloud/sensors/bigquery.py index 17f67e2896aa0..13bc84b9d45f3 100644 --- a/airflow/providers/google/cloud/sensors/bigquery.py +++ b/airflow/providers/google/cloud/sensors/bigquery.py @@ -32,19 +32,14 @@ class BigQueryTableExistenceSensor(BaseSensorOperator): :param project_id: The Google cloud project in which to look for the table. The connection supplied to the hook must provide access to the specified project. - :type project_id: str :param dataset_id: The name of the dataset in which to look for the table. storage bucket. - :type dataset_id: str :param table_id: The name of the table to check the existence of. - :type table_id: str :param bigquery_conn_id: The connection ID to use when connecting to Google BigQuery. - :type bigquery_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -53,7 +48,6 @@ class BigQueryTableExistenceSensor(BaseSensorOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -104,21 +98,15 @@ class BigQueryTablePartitionExistenceSensor(BaseSensorOperator): :param project_id: The Google cloud project in which to look for the table. The connection supplied to the hook must provide access to the specified project. - :type project_id: str :param dataset_id: The name of the dataset in which to look for the table. storage bucket. - :type dataset_id: str :param table_id: The name of the table to check the existence of. - :type table_id: str :param partition_id: The name of the partition to check the existence of. - :type partition_id: str :param bigquery_conn_id: The connection ID to use when connecting to Google BigQuery. - :type bigquery_conn_id: str :param delegate_to: The account to impersonate, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -127,7 +115,6 @@ class BigQueryTablePartitionExistenceSensor(BaseSensorOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/cloud/sensors/bigquery_dts.py b/airflow/providers/google/cloud/sensors/bigquery_dts.py index d45ce72d2f671..5c85d3e6067c0 100644 --- a/airflow/providers/google/cloud/sensors/bigquery_dts.py +++ b/airflow/providers/google/cloud/sensors/bigquery_dts.py @@ -39,23 +39,16 @@ class BigQueryDataTransferServiceTransferRunSensor(BaseSensorOperator): :param expected_statuses: The expected state of the operation. See: https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferOperations#Status - :type expected_statuses: Union[Set[str], str] :param run_id: ID of the transfer run. - :type run_id: str :param transfer_config_id: ID of transfer config to be used. - :type transfer_config_id: str :param project_id: The BigQuery project id where the transfer configuration should be created. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param retry: A retry object used to retry requests. If `None` is specified, requests will not be retried. - :type retry: Optional[google.api_core.retry.Retry] :param request_timeout: The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. - :type request_timeout: Optional[float] :param metadata: Additional metadata that is provided to the method. - :type metadata: Optional[Sequence[Tuple[str, str]]] :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -64,7 +57,6 @@ class BigQueryDataTransferServiceTransferRunSensor(BaseSensorOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :return: An ``google.cloud.bigquery_datatransfer_v1.types.TransferRun`` instance. """ diff --git a/airflow/providers/google/cloud/sensors/bigtable.py b/airflow/providers/google/cloud/sensors/bigtable.py index f0a22e9882935..9401b1423d6e8 100644 --- a/airflow/providers/google/cloud/sensors/bigtable.py +++ b/airflow/providers/google/cloud/sensors/bigtable.py @@ -42,11 +42,8 @@ class BigtableTableReplicationCompletedSensor(BaseSensorOperator, BigtableValida For more information on how to use this operator, take a look at the guide: :ref:`howto/operator:BigtableTableReplicationCompletedSensor` - :type instance_id: str :param instance_id: The ID of the Cloud Bigtable instance. - :type table_id: str :param table_id: The ID of the table to check replication status. - :type project_id: str :param project_id: Optional, the ID of the Google Cloud project. :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token @@ -56,7 +53,6 @@ class BigtableTableReplicationCompletedSensor(BaseSensorOperator, BigtableValida If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ REQUIRED_ATTRIBUTES = ('instance_id', 'table_id') diff --git a/airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py b/airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py index c99d7ebdedd53..5cb63a37f6a9b 100644 --- a/airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +++ b/airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py @@ -40,17 +40,13 @@ class CloudDataTransferServiceJobStatusSensor(BaseSensorOperator): :ref:`howto/operator:CloudDataTransferServiceJobStatusSensor` :param job_name: The name of the transfer job - :type job_name: str :param expected_statuses: The expected state of the operation. See: https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferOperations#Status - :type expected_statuses: set[str] or string :param project_id: (Optional) the ID of the project that owns the Transfer Job. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param gcp_conn_id: The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -59,7 +55,6 @@ class CloudDataTransferServiceJobStatusSensor(BaseSensorOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ # [START gcp_transfer_job_sensor_template_fields] diff --git a/airflow/providers/google/cloud/sensors/dataflow.py b/airflow/providers/google/cloud/sensors/dataflow.py index bea45aa139e05..b423f0fa3ecb3 100644 --- a/airflow/providers/google/cloud/sensors/dataflow.py +++ b/airflow/providers/google/cloud/sensors/dataflow.py @@ -39,24 +39,18 @@ class DataflowJobStatusSensor(BaseSensorOperator): :ref:`howto/operator:DataflowJobStatusSensor` :param job_id: ID of the job to be checked. - :type job_id: str :param expected_statuses: The expected state of the operation. See: https://cloud.google.com/dataflow/docs/reference/rest/v1b3/projects.jobs#Job.JobState - :type expected_statuses: Union[Set[str], str] :param project_id: Optional, the Google Cloud project ID in which to start a job. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param location: The location of the Dataflow job (for example europe-west1). See: https://cloud.google.com/dataflow/docs/concepts/regional-endpoints - :type location: str :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. See: https://developers.google.com/identity/protocols/oauth2/service-account#delegatingauthority - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -65,7 +59,6 @@ class DataflowJobStatusSensor(BaseSensorOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ('job_id',) @@ -132,25 +125,19 @@ class DataflowJobMetricsSensor(BaseSensorOperator): :ref:`howto/operator:DataflowJobMetricsSensor` :param job_id: ID of the job to be checked. - :type job_id: str :param callback: callback which is called with list of read job metrics See: https://cloud.google.com/dataflow/docs/reference/rest/v1b3/MetricUpdate - :type callback: callable :param fail_on_terminal_state: If set to true sensor will raise Exception when job is in terminal state - :type fail_on_terminal_state: bool :param project_id: Optional, the Google Cloud project ID in which to start a job. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param location: The location of the Dataflow job (for example europe-west1). See: https://cloud.google.com/dataflow/docs/concepts/regional-endpoints :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -159,7 +146,6 @@ class DataflowJobMetricsSensor(BaseSensorOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ('job_id',) @@ -225,25 +211,18 @@ class DataflowJobMessagesSensor(BaseSensorOperator): :ref:`howto/operator:DataflowJobMessagesSensor` :param job_id: ID of the job to be checked. - :type job_id: str :param callback: callback which is called with list of read job metrics See: https://cloud.google.com/dataflow/docs/reference/rest/v1b3/MetricUpdate - :type callback: callable :param fail_on_terminal_state: If set to true sensor will raise Exception when job is in terminal state - :type fail_on_terminal_state: bool :param project_id: Optional, the Google Cloud project ID in which to start a job. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param location: Job location. - :type location: str :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -252,7 +231,6 @@ class DataflowJobMessagesSensor(BaseSensorOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ('job_id',) @@ -318,25 +296,18 @@ class DataflowJobAutoScalingEventsSensor(BaseSensorOperator): :ref:`howto/operator:DataflowJobAutoScalingEventsSensor` :param job_id: ID of the job to be checked. - :type job_id: str :param callback: callback which is called with list of read job metrics See: https://cloud.google.com/dataflow/docs/reference/rest/v1b3/MetricUpdate - :type callback: callable :param fail_on_terminal_state: If set to true sensor will raise Exception when job is in terminal state - :type fail_on_terminal_state: bool :param project_id: Optional, the Google Cloud project ID in which to start a job. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str :param location: Job location. - :type location: str :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -345,7 +316,6 @@ class DataflowJobAutoScalingEventsSensor(BaseSensorOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ('job_id',) diff --git a/airflow/providers/google/cloud/sensors/datafusion.py b/airflow/providers/google/cloud/sensors/datafusion.py index b57ff15bd18d4..63776e28e8c23 100644 --- a/airflow/providers/google/cloud/sensors/datafusion.py +++ b/airflow/providers/google/cloud/sensors/datafusion.py @@ -31,29 +31,19 @@ class CloudDataFusionPipelineStateSensor(BaseSensorOperator): Check the status of the pipeline in the Google Cloud Data Fusion :param pipeline_name: Your pipeline name. - :type pipeline_name: str :param pipeline_id: Your pipeline ID. - :type pipeline_name: str :param expected_statuses: State that is expected - :type expected_statuses: set[str] :param failure_statuses: State that will terminate the sensor with an exception - :type failure_statuses: set[str] :param instance_name: The name of the instance. - :type instance_name: str :param location: The Cloud Data Fusion location in which to handle the request. - :type location: str :param project_id: The ID of the Google Cloud project that the instance belongs to. - :type project_id: str :param namespace: If your pipeline belongs to a Basic edition instance, the namespace ID is always default. If your pipeline belongs to an Enterprise edition instance, you can create a namespace. - :type namespace: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -62,7 +52,6 @@ class CloudDataFusionPipelineStateSensor(BaseSensorOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ diff --git a/airflow/providers/google/cloud/sensors/dataproc.py b/airflow/providers/google/cloud/sensors/dataproc.py index e222c4c04fdd3..632373382602d 100644 --- a/airflow/providers/google/cloud/sensors/dataproc.py +++ b/airflow/providers/google/cloud/sensors/dataproc.py @@ -38,17 +38,11 @@ class DataprocJobSensor(BaseSensorOperator): :param project_id: The ID of the google cloud project in which to create the cluster. (templated) - :type project_id: str :param dataproc_job_id: The Dataproc job ID to poll. (templated) - :type dataproc_job_id: str :param region: Required. The Cloud Dataproc region in which to handle the request. (templated) - :type region: str :param location: (To be deprecated). The Cloud Dataproc region in which to handle the request. (templated) - :type location: str :param gcp_conn_id: The connection ID to use connecting to Google Cloud Platform. - :type gcp_conn_id: str :param wait_timeout: How many seconds wait for job to be ready. - :type wait_timeout: int """ template_fields: Sequence[str] = ('project_id', 'region', 'dataproc_job_id') diff --git a/airflow/providers/google/cloud/sensors/gcs.py b/airflow/providers/google/cloud/sensors/gcs.py index a71041423fea5..bfc48dedee0c1 100644 --- a/airflow/providers/google/cloud/sensors/gcs.py +++ b/airflow/providers/google/cloud/sensors/gcs.py @@ -36,17 +36,13 @@ class GCSObjectExistenceSensor(BaseSensorOperator): Checks for the existence of a file in Google Cloud Storage. :param bucket: The Google Cloud Storage bucket where the object is. - :type bucket: str :param object: The name of the object to check in the Google cloud storage bucket. - :type object: str :param google_cloud_conn_id: The connection ID to use when connecting to Google Cloud Storage. - :type google_cloud_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -55,7 +51,6 @@ class GCSObjectExistenceSensor(BaseSensorOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -111,21 +106,16 @@ class GCSObjectUpdateSensor(BaseSensorOperator): Checks if an object is updated in Google Cloud Storage. :param bucket: The Google Cloud Storage bucket where the object is. - :type bucket: str :param object: The name of the object to download in the Google cloud storage bucket. - :type object: str :param ts_func: Callback for defining the update condition. The default callback returns execution_date + schedule_interval. The callback takes the context as parameter. - :type ts_func: function :param google_cloud_conn_id: The connection ID to use when connecting to Google Cloud Storage. - :type google_cloud_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -134,7 +124,6 @@ class GCSObjectUpdateSensor(BaseSensorOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -182,17 +171,13 @@ class GCSObjectsWithPrefixExistenceSensor(BaseSensorOperator): through XCom for downstream tasks. :param bucket: The Google Cloud Storage bucket where the object is. - :type bucket: str :param prefix: The name of the prefix to check in the Google cloud storage bucket. - :type prefix: str :param google_cloud_conn_id: The connection ID to use when connecting to Google Cloud Storage. - :type google_cloud_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -201,7 +186,6 @@ class GCSObjectsWithPrefixExistenceSensor(BaseSensorOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -279,30 +263,23 @@ class GCSUploadSessionCompleteSensor(BaseSensorOperator): :param bucket: The Google Cloud Storage bucket where the objects are. expected. - :type bucket: str :param prefix: The name of the prefix to check in the Google cloud storage bucket. :param inactivity_period: The total seconds of inactivity to designate an upload session is over. Note, this mechanism is not real time and this operator may not return until a poke_interval after this period has passed with no additional objects sensed. - :type inactivity_period: float :param min_objects: The minimum number of objects needed for upload session to be considered valid. - :type min_objects: int :param previous_objects: The set of object ids found during the last poke. - :type previous_objects: set[str] :param allow_delete: Should this sensor consider objects being deleted between pokes valid behavior. If true a warning message will be logged when this happens. If false an error will be raised. - :type allow_delete: bool :param google_cloud_conn_id: The connection ID to use when connecting to Google Cloud Storage. - :type google_cloud_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -311,7 +288,6 @@ class GCSUploadSessionCompleteSensor(BaseSensorOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -367,7 +343,6 @@ def is_bucket_updated(self, current_objects: Set[str]) -> bool: has passed and updates the state of the sensor accordingly. :param current_objects: set of object ids in bucket during last poke. - :type current_objects: set[str] """ current_num_objects = len(current_objects) if current_objects > self.previous_objects: diff --git a/airflow/providers/google/cloud/sensors/pubsub.py b/airflow/providers/google/cloud/sensors/pubsub.py index 4e4d7c2599fc8..5a37f99ae0aa1 100644 --- a/airflow/providers/google/cloud/sensors/pubsub.py +++ b/airflow/providers/google/cloud/sensors/pubsub.py @@ -53,13 +53,10 @@ class PubSubPullSensor(BaseSensorOperator): variables in them. :param project: the Google Cloud project ID for the subscription (templated) - :type project: str :param subscription: the Pub/Sub subscription name. Do not include the full subscription path. - :type subscription: str :param max_messages: The maximum number of messages to retrieve per PubSub pull request - :type max_messages: int :param return_immediately: (Deprecated) This is an underlying PubSub API implementation detail. It has no real effect on Sensor behaviour other than some internal wait time before retrying @@ -69,23 +66,18 @@ class PubSubPullSensor(BaseSensorOperator): If you want a non-blocking task that does not to wait for messages, please use :class:`~airflow.providers.google.cloud.operators.pubsub.PubSubPullOperator` instead. - :type return_immediately: bool :param ack_messages: If True, each message will be acknowledged immediately rather than by any downstream tasks - :type ack_messages: bool :param gcp_conn_id: The connection ID to use connecting to Google Cloud. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param messages_callback: (Optional) Callback to process received messages. It's return value will be saved to XCom. If you are pulling large messages, you probably want to provide a custom callback. If not provided, the default implementation will convert `ReceivedMessage` objects into JSON-serializable dicts using `google.protobuf.json_format.MessageToDict` function. - :type messages_callback: Optional[Callable[[List[ReceivedMessage], Dict[str, Any]], Any]] :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -94,7 +86,6 @@ class PubSubPullSensor(BaseSensorOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -196,7 +187,6 @@ def _default_message_callback( This default implementation converts `ReceivedMessage` objects into JSON-serializable dicts. :param pulled_messages: messages received from the topic. - :type pulled_messages: List[ReceivedMessage] :param context: same as in `execute` :return: value to be saved to XCom. """ diff --git a/airflow/providers/google/cloud/sensors/workflows.py b/airflow/providers/google/cloud/sensors/workflows.py index dcd5869e871a5..a9e392dc9bf6e 100644 --- a/airflow/providers/google/cloud/sensors/workflows.py +++ b/airflow/providers/google/cloud/sensors/workflows.py @@ -33,27 +33,18 @@ class WorkflowExecutionSensor(BaseSensorOperator): Checks state of an execution for the given ``workflow_id`` and ``execution_id``. :param workflow_id: Required. The ID of the workflow. - :type workflow_id: str :param execution_id: Required. The ID of the execution. - :type execution_id: str :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :type project_id: str :param location: Required. The Cloud Dataproc region in which to handle the request. - :type location: str :param success_states: Execution states to be considered as successful, by default it's only ``SUCCEEDED`` state - :type success_states: List[Execution.State] :param failure_states: Execution states to be considered as failures, by default they are ``FAILED`` and ``CANCELLED`` states. - :type failure_states: List[Execution.State] :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. - :type retry: google.api_core.retry.Retry :param request_timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. - :type request_timeout: float :param metadata: Additional metadata that is provided to the method. - :type metadata: Sequence[Tuple[str, str]] """ template_fields: Sequence[str] = ("location", "workflow_id", "execution_id") diff --git a/airflow/providers/google/cloud/transfers/adls_to_gcs.py b/airflow/providers/google/cloud/transfers/adls_to_gcs.py index 72bfdefc6029c..fe3ab9664ddc5 100644 --- a/airflow/providers/google/cloud/transfers/adls_to_gcs.py +++ b/airflow/providers/google/cloud/transfers/adls_to_gcs.py @@ -37,26 +37,18 @@ class ADLSToGCSOperator(ADLSListOperator): Synchronizes an Azure Data Lake Storage path with a GCS bucket :param src_adls: The Azure Data Lake path to find the objects (templated) - :type src_adls: str :param dest_gcs: The Google Cloud Storage bucket and prefix to store the objects. (templated) - :type dest_gcs: str :param replace: If true, replaces same-named files in GCS - :type replace: bool :param gzip: Option to compress file for upload - :type gzip: bool :param azure_data_lake_conn_id: The connection ID to use when connecting to Azure Data Lake Storage. - :type azure_data_lake_conn_id: str :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param google_cloud_storage_conn_id: (Deprecated) The connection ID used to connect to Google Cloud. This parameter has been deprecated. You should pass the gcp_conn_id parameter instead. - :type google_cloud_storage_conn_id: str :param delegate_to: Google account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param google_impersonation_chain: Optional Google service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -65,7 +57,6 @@ class ADLSToGCSOperator(ADLSListOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type google_impersonation_chain: Union[str, Sequence[str]] **Examples**: The following Operator would copy a single file named diff --git a/airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py b/airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py index acf4562f82236..f107eb686e656 100644 --- a/airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +++ b/airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py @@ -34,29 +34,20 @@ class AzureFileShareToGCSOperator(BaseOperator): possibly filtered by a prefix, with a Google Cloud Storage destination path. :param share_name: The Azure FileShare share where to find the objects. (templated) - :type share_name: str :param directory_name: (Optional) Path to Azure FileShare directory which content is to be transferred. Defaults to root directory (templated) - :type directory_name: str :param prefix: Prefix string which filters objects whose name begin with such prefix. (templated) - :type prefix: str :param azure_fileshare_conn_id: The source WASB connection - :type azure_fileshare_conn_id: str :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param dest_gcs: The destination Google Cloud Storage bucket and prefix where you want to store the files. (templated) - :type dest_gcs: str :param delegate_to: Google account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param replace: Whether you want to replace existing destination files or not. - :type replace: bool :param gzip: Option to compress file for upload - :type gzip: bool :param google_impersonation_chain: Optional Google service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -65,7 +56,6 @@ class AzureFileShareToGCSOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type google_impersonation_chain: Optional[Union[str, Sequence[str]]] Note that ``share_name``, ``directory_name``, ``prefix``, ``delimiter`` and ``dest_gcs`` are templated, so you can use variables in them if you wish. diff --git a/airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py b/airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py index dca5b49a9bd5c..d439062cf81b6 100644 --- a/airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +++ b/airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py @@ -39,35 +39,25 @@ class BigQueryToBigQueryOperator(BaseOperator): source data. If ```` is not included, project will be the project defined in the connection json. Use a list if there are multiple source tables. (templated) - :type source_project_dataset_tables: list|string :param destination_project_dataset_table: The destination BigQuery table. Format is: ``(project:|project.).
`` (templated) - :type destination_project_dataset_table: str :param write_disposition: The write disposition if the table already exists. - :type write_disposition: str :param create_disposition: The create disposition if the table doesn't exist. - :type create_disposition: str :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param bigquery_conn_id: (Deprecated) The connection ID used to connect to Google Cloud. This parameter has been deprecated. You should pass the gcp_conn_id parameter instead. - :type bigquery_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param labels: a dictionary containing labels for the job/query, passed to BigQuery - :type labels: dict :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys). **Example**: :: encryption_configuration = { "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key" } - :type encryption_configuration: dict :param location: The location used for the operation. - :type location: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -76,7 +66,6 @@ class BigQueryToBigQueryOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/cloud/transfers/bigquery_to_gcs.py b/airflow/providers/google/cloud/transfers/bigquery_to_gcs.py index 80bf348931baa..2515f47004790 100644 --- a/airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +++ b/airflow/providers/google/cloud/transfers/bigquery_to_gcs.py @@ -40,34 +40,23 @@ class BigQueryToGCSOperator(BaseOperator): ``(.|:).
`` BigQuery table to use as the source data. If ```` is not included, project will be the project defined in the connection json. (templated) - :type source_project_dataset_table: str :param destination_cloud_storage_uris: The destination Google Cloud Storage URI (e.g. gs://some-bucket/some-file.txt). (templated) Follows convention defined here: https://cloud.google.com/bigquery/exporting-data-from-bigquery#exportingmultiple - :type destination_cloud_storage_uris: List[str] :param compression: Type of compression to use. - :type compression: str :param export_format: File format to export. - :type export_format: str :param field_delimiter: The delimiter to use when extracting to a CSV. - :type field_delimiter: str :param print_header: Whether to print a header for a CSV file extract. - :type print_header: bool :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param bigquery_conn_id: (Deprecated) The connection ID used to connect to Google Cloud. This parameter has been deprecated. You should pass the gcp_conn_id parameter instead. - :type bigquery_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param labels: a dictionary containing labels for the job/query, passed to BigQuery - :type labels: dict :param location: The location used for the operation. - :type location: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -76,7 +65,6 @@ class BigQueryToGCSOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/cloud/transfers/bigquery_to_mssql.py b/airflow/providers/google/cloud/transfers/bigquery_to_mssql.py index 8a16ed38e913f..ca63ff0d99b50 100644 --- a/airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +++ b/airflow/providers/google/cloud/transfers/bigquery_to_mssql.py @@ -53,26 +53,17 @@ class BigQueryToMsSqlOperator(BaseOperator): :param source_project_dataset_table: A dotted ``..
``: the big query table of origin - :type source_project_dataset_table: str :param selected_fields: List of fields to return (comma-separated). If unspecified, all fields are returned. - :type selected_fields: List[str] | str :param gcp_conn_id: reference to a specific Google Cloud hook. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param mssql_conn_id: reference to a specific mssql hook - :type mssql_conn_id: str :param database: name of database which overwrite defined one in connection - :type database: str :param replace: Whether to replace instead of insert - :type replace: bool :param batch_size: The number of rows to take in each batch - :type batch_size: int :param location: The location used for the operation. - :type location: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -81,7 +72,6 @@ class BigQueryToMsSqlOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: str | Sequence[str] """ template_fields: Sequence[str] = ('source_project_dataset_table', 'mssql_table', 'impersonation_chain') diff --git a/airflow/providers/google/cloud/transfers/bigquery_to_mysql.py b/airflow/providers/google/cloud/transfers/bigquery_to_mysql.py index 438863ca313fd..6fbfceaf38d59 100644 --- a/airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +++ b/airflow/providers/google/cloud/transfers/bigquery_to_mysql.py @@ -52,26 +52,17 @@ class BigQueryToMySqlOperator(BaseOperator): ) :param dataset_table: A dotted ``.
``: the big query table of origin - :type dataset_table: str :param selected_fields: List of fields to return (comma-separated). If unspecified, all fields are returned. - :type selected_fields: List[str] | str :param gcp_conn_id: reference to a specific Google Cloud hook. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param mysql_conn_id: Reference to :ref:`mysql connection id `. - :type mysql_conn_id: str :param database: name of database which overwrite defined one in connection - :type database: str :param replace: Whether to replace instead of insert - :type replace: bool :param batch_size: The number of rows to take in each batch - :type batch_size: int :param location: The location used for the operation. - :type location: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -80,7 +71,6 @@ class BigQueryToMySqlOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: str | Sequence[str] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/cloud/transfers/cassandra_to_gcs.py b/airflow/providers/google/cloud/transfers/cassandra_to_gcs.py index 34aeb03958c94..2c4bff7217f2f 100644 --- a/airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +++ b/airflow/providers/google/cloud/transfers/cassandra_to_gcs.py @@ -50,37 +50,27 @@ class CassandraToGCSOperator(BaseOperator): Note: Arrays of arrays are not supported. :param cql: The CQL to execute on the Cassandra table. - :type cql: str :param bucket: The bucket to upload to. - :type bucket: str :param filename: The filename to use as the object name when uploading to Google Cloud Storage. A {} should be specified in the filename to allow the operator to inject file numbers in cases where the file is split due to size. - :type filename: str :param schema_filename: If set, the filename to use as the object name when uploading a .json file containing the BigQuery schema fields for the table that was dumped from MySQL. - :type schema_filename: str :param approx_max_file_size_bytes: This operator supports the ability to split large table dumps into multiple files (see notes in the filename param docs above). This param allows developers to specify the file size of the splits. Check https://cloud.google.com/storage/quotas to see the maximum allowed file size for a single object. - :type approx_max_file_size_bytes: long :param cassandra_conn_id: Reference to a specific Cassandra hook. - :type cassandra_conn_id: str :param gzip: Option to compress file for upload - :type gzip: bool :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param google_cloud_storage_conn_id: (Deprecated) The connection ID used to connect to Google Cloud. This parameter has been deprecated. You should pass the gcp_conn_id parameter instead. - :type google_cloud_storage_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -89,11 +79,9 @@ class CassandraToGCSOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :param query_timeout: (Optional) The amount of time, in seconds, used to execute the Cassandra query. If not set, the timeout value will be set in Session.execute() by Cassandra driver. If set to None, there is no timeout. - :type query_timeout: float | None """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py b/airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py index 0f9ef4409f9e6..db5ec43df790f 100644 --- a/airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +++ b/airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py @@ -59,33 +59,23 @@ class FacebookAdsReportToGcsOperator(BaseOperator): :ref:`howto/operator:FacebookAdsReportToGcsOperator` :param bucket_name: The GCS bucket to upload to - :type bucket_name: str :param object_name: GCS path to save the object. Must be the full file path (ex. `path/to/file.txt`) - :type object_name: str :param gcp_conn_id: Airflow Google Cloud connection ID - :type gcp_conn_id: str :param facebook_conn_id: Airflow Facebook Ads connection ID - :type facebook_conn_id: str :param api_version: The version of Facebook API. Default to None. If it is None, it will use the Facebook business SDK default version. - :type api_version: str :param fields: List of fields that is obtained from Facebook. Found in AdsInsights.Field class. https://developers.facebook.com/docs/marketing-api/insights/parameters/v6.0 - :type fields: List[str] :param params: Parameters that determine the query for Facebook. This keyword is deprecated, please use `parameters` keyword to pass the parameters. https://developers.facebook.com/docs/marketing-api/insights/parameters/v6.0 - :type params: Dict[str, Any] :param parameters: Parameters that determine the query for Facebook https://developers.facebook.com/docs/marketing-api/insights/parameters/v6.0 - :type parameters: Dict[str, Any] :param gzip: Option to compress local file or file data for upload - :type gzip: bool :param upload_as_account: Option to export file with account_id This parameter only works if Account Id sets as array in Facebook Connection If set as True, each file will be exported in a separate file that has a prefix of account_id If set as False, a single file will be exported for all account_id - :type upload_as_account: bool :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -94,7 +84,6 @@ class FacebookAdsReportToGcsOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/cloud/transfers/gcs_to_bigquery.py b/airflow/providers/google/cloud/transfers/gcs_to_bigquery.py index c22f931f8c3e4..b5b91d5e43b91 100644 --- a/airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +++ b/airflow/providers/google/cloud/transfers/gcs_to_bigquery.py @@ -42,59 +42,43 @@ class GCSToBigQueryOperator(BaseOperator): :ref:`howto/operator:GCSToBigQueryOperator` :param bucket: The bucket to load from. (templated) - :type bucket: str :param source_objects: String or List of Google Cloud Storage URIs to load from. (templated) If source_format is 'DATASTORE_BACKUP', the list must only contain a single URI. - :type source_objects: str, list[str] :param destination_project_dataset_table: The dotted ``(.|:).
`` BigQuery table to load data into. If ```` is not included, project will be the project defined in the connection json. (templated) - :type destination_project_dataset_table: str :param schema_fields: If set, the schema field list as defined here: https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load Should not be set when source_format is 'DATASTORE_BACKUP'. Parameter must be defined if 'schema_object' is null and autodetect is False. - :type schema_fields: list :param schema_object: If set, a GCS object path pointing to a .json file that contains the schema for the table. (templated) Parameter must be defined if 'schema_fields' is null and autodetect is False. - :type schema_object: str :param source_format: File format to export. - :type source_format: str :param compression: [Optional] The compression type of the data source. Possible values include GZIP and NONE. The default value is NONE. This setting is ignored for Google Cloud Bigtable, Google Cloud Datastore backups and Avro formats. - :type compression: str :param create_disposition: The create disposition if the table doesn't exist. - :type create_disposition: str :param skip_leading_rows: Number of rows to skip when loading from a CSV. - :type skip_leading_rows: int :param write_disposition: The write disposition if the table already exists. - :type write_disposition: str :param field_delimiter: The delimiter to use when loading from a CSV. - :type field_delimiter: str :param max_bad_records: The maximum number of bad records that BigQuery can ignore when running the job. - :type max_bad_records: int :param quote_character: The value that is used to quote data sections in a CSV file. - :type quote_character: str :param ignore_unknown_values: [Optional] Indicates if BigQuery should allow extra values that are not represented in the table schema. If true, the extra values are ignored. If false, records with extra columns are treated as bad records, and if there are too many bad records, an invalid error is returned in the job result. - :type ignore_unknown_values: bool :param allow_quoted_newlines: Whether to allow quoted newlines (true) or not (false). - :type allow_quoted_newlines: bool :param allow_jagged_rows: Accept rows that are missing trailing optional columns. The missing values are treated as nulls. If false, records with missing trailing columns are treated as bad records, and if there are too many bad records, an invalid error is returned in the job result. Only applicable to CSV, ignored for other formats. - :type allow_jagged_rows: bool :param encoding: The character encoding of the data. See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).csvOptions.encoding https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#externalDataConfiguration.csvOptions.encoding @@ -104,50 +88,38 @@ class GCSToBigQueryOperator(BaseOperator): execute() command, which in turn gets stored in XCom for future operators to use. This can be helpful with incremental loads--during future executions, you can pick up from the max ID. - :type max_id_key: str :param bigquery_conn_id: (Optional) The connection ID used to connect to Google Cloud and interact with the BigQuery service. - :type bigquery_conn_id: str :param google_cloud_storage_conn_id: (Optional) The connection ID used to connect to Google Cloud and interact with the Google Cloud Storage service. - :type google_cloud_storage_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param schema_update_options: Allows the schema of the destination table to be updated as a side effect of the load job. - :type schema_update_options: list :param src_fmt_configs: configure optional fields specific to the source format - :type src_fmt_configs: dict :param external_table: Flag to specify if the destination table should be a BigQuery external table. Default Value is False. - :type external_table: bool :param time_partitioning: configure optional time partitioning fields i.e. partition by field, type and expiration as per API specifications. Note that 'field' is not available in concurrency with dataset.table$partition. - :type time_partitioning: dict :param cluster_fields: Request that the result of this load be stored sorted by one or more columns. BigQuery supports clustering for both partitioned and non-partitioned tables. The order of columns given determines the sort order. Not applicable for external tables. - :type cluster_fields: list[str] :param autodetect: [Optional] Indicates if we should automatically infer the options and schema for CSV and JSON sources. (Default: ``True``). Parameter must be set to True if 'schema_fields' and 'schema_object' are undefined. It is suggested to set to True if table are create outside of Airflow. - :type autodetect: bool :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys). **Example**: :: encryption_configuration = { "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key" } - :type encryption_configuration: dict :param location: [Optional] The geographic location of the job. Required except for US and EU. See details at https://cloud.google.com/bigquery/docs/locations#specifying_your_location - :type location: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -156,11 +128,8 @@ class GCSToBigQueryOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :param labels: [Optional] Labels for the BiqQuery table. - :type labels: dict :param description: [Optional] Description for the BigQuery table. - :type description: str """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/cloud/transfers/gcs_to_gcs.py b/airflow/providers/google/cloud/transfers/gcs_to_gcs.py index 63066cb16e587..7e6498cefd5ca 100644 --- a/airflow/providers/google/cloud/transfers/gcs_to_gcs.py +++ b/airflow/providers/google/cloud/transfers/gcs_to_gcs.py @@ -39,21 +39,17 @@ class GCSToGCSOperator(BaseOperator): :param source_bucket: The source Google Cloud Storage bucket where the object is. (templated) - :type source_bucket: str :param source_object: The source name of the object to copy in the Google cloud storage bucket. (templated) You can use only one wildcard for objects (filenames) within your bucket. The wildcard can appear inside the object name or at the end of the object name. Appending a wildcard to the bucket name is unsupported. - :type source_object: str :param source_objects: A list of source name of the objects to copy in the Google cloud storage bucket. (templated) - :type source_objects: List[str] :param destination_bucket: The destination Google Cloud Storage bucket where the object should be. If the destination_bucket is None, it defaults to source_bucket. (templated) - :type destination_bucket: str :param destination_object: The destination name of the object in the destination Google Cloud Storage bucket. (templated) If a wildcard is supplied in the source_object argument, this is the @@ -65,37 +61,27 @@ class GCSToGCSOperator(BaseOperator): the destination_object as e.g. ``blah/foo``, in which case the copied file will be named ``blah/foo/baz``. The same thing applies to source objects inside source_objects. - :type destination_object: str :param move_object: When move object is True, the object is moved instead of copied to the new location. This is the equivalent of a mv command as opposed to a cp command. - :type move_object: bool :param replace: Whether you want to replace existing destination files or not. - :type replace: bool :param delimiter: This is used to restrict the result to only the 'files' in a given 'folder'. If source_objects = ['foo/bah/'] and delimiter = '.avro', then only the 'files' in the folder 'foo/bah/' with '.avro' delimiter will be copied to the destination object. - :type delimiter: str :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param google_cloud_storage_conn_id: (Deprecated) The connection ID used to connect to Google Cloud. This parameter has been deprecated. You should pass the gcp_conn_id parameter instead. - :type google_cloud_storage_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param last_modified_time: When specified, the objects will be copied or moved, only if they were modified after last_modified_time. If tzinfo has not been set, UTC will be assumed. - :type last_modified_time: datetime.datetime :param maximum_modified_time: When specified, the objects will be copied or moved, only if they were modified before maximum_modified_time. If tzinfo has not been set, UTC will be assumed. - :type maximum_modified_time: datetime.datetime :param is_older_than: When specified, the objects will be copied if they are older than the specified time in seconds. - :type is_older_than: int :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -104,7 +90,6 @@ class GCSToGCSOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :Example: diff --git a/airflow/providers/google/cloud/transfers/gcs_to_local.py b/airflow/providers/google/cloud/transfers/gcs_to_local.py index c2d72855eb575..762cedfbb0bc1 100644 --- a/airflow/providers/google/cloud/transfers/gcs_to_local.py +++ b/airflow/providers/google/cloud/transfers/gcs_to_local.py @@ -41,28 +41,21 @@ class GCSToLocalFilesystemOperator(BaseOperator): :param bucket: The Google Cloud Storage bucket where the object is. Must not contain 'gs://' prefix. (templated) - :type bucket: str :param object_name: The name of the object to download in the Google cloud storage bucket. (templated) - :type object_name: str :param filename: The file path, including filename, on the local file system (where the operator is being executed) that the file should be downloaded to. (templated) If no filename passed, the downloaded data will not be stored on the local file system. - :type filename: str :param store_to_xcom_key: If this param is set, the operator will push the contents of the downloaded file to XCom with the key set in this parameter. If not set, the downloaded data will not be pushed to XCom. (templated) - :type store_to_xcom_key: str :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param google_cloud_storage_conn_id: (Deprecated) The connection ID used to connect to Google Cloud This parameter has been deprecated. You should pass the gcp_conn_id parameter instead. - :type google_cloud_storage_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -71,10 +64,8 @@ class GCSToLocalFilesystemOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] :param file_encoding: Optional encoding used to decode file_bytes into a serializable string that is suitable for storing to XCom. (templated). - :type file_encoding: str """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/cloud/transfers/gcs_to_sftp.py b/airflow/providers/google/cloud/transfers/gcs_to_sftp.py index cce5b3380e9ca..1ad74cc3c1d01 100644 --- a/airflow/providers/google/cloud/transfers/gcs_to_sftp.py +++ b/airflow/providers/google/cloud/transfers/gcs_to_sftp.py @@ -66,33 +66,25 @@ class GCSToSFTPOperator(BaseOperator): :param source_bucket: The source Google Cloud Storage bucket where the object is. (templated) - :type source_bucket: str :param source_object: The source name of the object to copy in the Google cloud storage bucket. (templated) You can use only one wildcard for objects (filenames) within your bucket. The wildcard can appear inside the object name or at the end of the object name. Appending a wildcard to the bucket name is unsupported. - :type source_object: str :param destination_path: The sftp remote path. This is the specified directory path for uploading to the SFTP server. - :type destination_path: str :param keep_directory_structure: (Optional) When set to False the path of the file on the bucket is recreated within path passed in destination_path. - :type keep_directory_structure: bool :param move_object: When move object is True, the object is moved instead of copied to the new location. This is the equivalent of a mv command as opposed to a cp command. - :type move_object: bool :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param sftp_conn_id: The sftp connection id. The name or identifier for establishing a connection to the SFTP server. - :type sftp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -101,7 +93,6 @@ class GCSToSFTPOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/cloud/transfers/gdrive_to_gcs.py b/airflow/providers/google/cloud/transfers/gdrive_to_gcs.py index f90b123170c4f..82a3090731292 100644 --- a/airflow/providers/google/cloud/transfers/gdrive_to_gcs.py +++ b/airflow/providers/google/cloud/transfers/gdrive_to_gcs.py @@ -36,26 +36,17 @@ class GoogleDriveToGCSOperator(BaseOperator): :param bucket_name: The destination Google cloud storage bucket where the file should be written to - :type bucket_name: str :param object_name: The Google Cloud Storage object name for the object created by the operator. For example: ``path/to/my/file/file.txt``. - :type object_name: str :param destination_bucket: Same as bucket_name, but for backward compatibly - :type destination_bucket: str :param destination_object: Same as object_name, but for backward compatibly - :type destination_object: str :param folder_id: The folder id of the folder in which the Google Drive file resides - :type folder_id: str :param file_name: The name of the file residing in Google Drive - :type file_name: str :param drive_id: Optional. The id of the shared Google Drive in which the file resides. - :type drive_id: str :param gcp_conn_id: The GCP connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -64,7 +55,6 @@ class GoogleDriveToGCSOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/cloud/transfers/gdrive_to_local.py b/airflow/providers/google/cloud/transfers/gdrive_to_local.py index 06f64385a2fe0..c61f2db99e116 100644 --- a/airflow/providers/google/cloud/transfers/gdrive_to_local.py +++ b/airflow/providers/google/cloud/transfers/gdrive_to_local.py @@ -33,17 +33,12 @@ class GoogleDriveToLocalOperator(BaseOperator): :ref:`howto/operator:GoogleDriveToLocalOperator` :param output_file: Path to downloaded file - :type output_file: str :param folder_id: The folder id of the folder in which the Google Drive file resides - :type folder_id: str :param file_name: The name of the file residing in Google Drive - :type file_name: str :param drive_id: Optional. The id of the shared Google Drive in which the file resides. - :type drive_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -52,7 +47,6 @@ class GoogleDriveToLocalOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/cloud/transfers/local_to_gcs.py b/airflow/providers/google/cloud/transfers/local_to_gcs.py index 45ebb44bb7031..4f0993763ae03 100644 --- a/airflow/providers/google/cloud/transfers/local_to_gcs.py +++ b/airflow/providers/google/cloud/transfers/local_to_gcs.py @@ -39,24 +39,16 @@ class LocalFilesystemToGCSOperator(BaseOperator): :param src: Path to the local file, or list of local files. Path can be either absolute (e.g. /path/to/file.ext) or relative (e.g. ../../foo/*/*.csv). (templated) - :type src: str or list :param dst: Destination path within the specified bucket on GCS (e.g. /path/to/file.ext). If multiple files are being uploaded, specify object prefix with trailing backslash (e.g. /path/to/directory/) (templated) - :type dst: str :param bucket: The bucket to upload to. (templated) - :type bucket: str :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param google_cloud_storage_conn_id: (Deprecated) The connection ID used to connect to Google Cloud. This parameter has been deprecated. You should pass the gcp_conn_id parameter instead. - :type google_cloud_storage_conn_id: str :param mime_type: The mime-type string - :type mime_type: str :param delegate_to: The account to impersonate, if any - :type delegate_to: str :param gzip: Allows for file to be compressed and uploaded as gzip - :type gzip: bool :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -65,7 +57,6 @@ class LocalFilesystemToGCSOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/cloud/transfers/mssql_to_gcs.py b/airflow/providers/google/cloud/transfers/mssql_to_gcs.py index 66add3cea74dd..79f6184058dd2 100644 --- a/airflow/providers/google/cloud/transfers/mssql_to_gcs.py +++ b/airflow/providers/google/cloud/transfers/mssql_to_gcs.py @@ -29,7 +29,6 @@ class MSSQLToGCSOperator(BaseSQLToGCSOperator): in JSON or CSV format. :param mssql_conn_id: Reference to a specific MSSQL hook. - :type mssql_conn_id: str **Example**: The following operator will export data from the Customers table diff --git a/airflow/providers/google/cloud/transfers/mysql_to_gcs.py b/airflow/providers/google/cloud/transfers/mysql_to_gcs.py index ff656a4d91b4e..57176a2826088 100644 --- a/airflow/providers/google/cloud/transfers/mysql_to_gcs.py +++ b/airflow/providers/google/cloud/transfers/mysql_to_gcs.py @@ -36,11 +36,9 @@ class MySQLToGCSOperator(BaseSQLToGCSOperator): :ref:`howto/operator:MySQLToGCSOperator` :param mysql_conn_id: Reference to :ref:`mysql connection id `. - :type mysql_conn_id: str :param ensure_utc: Ensure TIMESTAMP columns exported as UTC. If set to `False`, TIMESTAMP columns will be exported using the MySQL server's default timezone. - :type ensure_utc: bool """ ui_color = '#a0e08c' @@ -111,9 +109,7 @@ def convert_type(self, value, schema_type: str): https://cloud.google.com/bigquery/data-types :param value: MySQLdb column value - :type value: Any :param schema_type: BigQuery data type - :type schema_type: str """ if value is None: return value diff --git a/airflow/providers/google/cloud/transfers/oracle_to_gcs.py b/airflow/providers/google/cloud/transfers/oracle_to_gcs.py index 462047be5bebf..bebe2a14b24e1 100644 --- a/airflow/providers/google/cloud/transfers/oracle_to_gcs.py +++ b/airflow/providers/google/cloud/transfers/oracle_to_gcs.py @@ -37,11 +37,9 @@ class OracleToGCSOperator(BaseSQLToGCSOperator): :param oracle_conn_id: Reference to a specific :ref:`Oracle hook `. - :type oracle_conn_id: str :param ensure_utc: Ensure TIMESTAMP columns exported as UTC. If set to `False`, TIMESTAMP columns will be exported using the Oracle server's default timezone. - :type ensure_utc: bool """ ui_color = '#a0e08c' @@ -102,9 +100,7 @@ def convert_type(self, value, schema_type): https://cloud.google.com/bigquery/data-types :param value: Oracle db column value - :type value: Any :param schema_type: BigQuery data type - :type schema_type: str """ if value is None: return value diff --git a/airflow/providers/google/cloud/transfers/postgres_to_gcs.py b/airflow/providers/google/cloud/transfers/postgres_to_gcs.py index b5606c6697e4a..d7c777757c33c 100644 --- a/airflow/providers/google/cloud/transfers/postgres_to_gcs.py +++ b/airflow/providers/google/cloud/transfers/postgres_to_gcs.py @@ -69,12 +69,9 @@ class PostgresToGCSOperator(BaseSQLToGCSOperator): Copy data from Postgres to Google Cloud Storage in JSON or CSV format. :param postgres_conn_id: Reference to a specific Postgres hook. - :type postgres_conn_id: str :param use_server_side_cursor: If server-side cursor should be used for querying postgres. For detailed info, check https://www.psycopg.org/docs/usage.html#server-side-cursors - :type use_server_side_cursor: bool :param cursor_itersize: How many records are fetched at a time in case of server-side cursor. - :type cursor_itersize: int """ ui_color = '#a0e08c' diff --git a/airflow/providers/google/cloud/transfers/presto_to_gcs.py b/airflow/providers/google/cloud/transfers/presto_to_gcs.py index 1903b6b2ff96a..19928220d25fe 100644 --- a/airflow/providers/google/cloud/transfers/presto_to_gcs.py +++ b/airflow/providers/google/cloud/transfers/presto_to_gcs.py @@ -144,7 +144,6 @@ class PrestoToGCSOperator(BaseSQLToGCSOperator): """Copy data from PrestoDB to Google Cloud Storage in JSON or CSV format. :param presto_conn_id: Reference to a specific Presto hook. - :type presto_conn_id: str """ ui_color = "#a0e08c" @@ -201,8 +200,6 @@ def convert_type(self, value, schema_type): Do nothing. Presto uses JSON on the transport layer, so types are simple. :param value: Presto column value - :type value: Any :param schema_type: BigQuery data type - :type schema_type: str """ return value diff --git a/airflow/providers/google/cloud/transfers/s3_to_gcs.py b/airflow/providers/google/cloud/transfers/s3_to_gcs.py index fa8c8656dae05..9f45152096a0f 100644 --- a/airflow/providers/google/cloud/transfers/s3_to_gcs.py +++ b/airflow/providers/google/cloud/transfers/s3_to_gcs.py @@ -38,14 +38,10 @@ class S3ToGCSOperator(S3ListOperator): :ref:`howto/operator:S3ToGCSOperator` :param bucket: The S3 bucket where to find the objects. (templated) - :type bucket: str :param prefix: Prefix string which filters objects whose name begin with such prefix. (templated) - :type prefix: str :param delimiter: the delimiter marks key hierarchy. (templated) - :type delimiter: str :param aws_conn_id: The source S3 connection - :type aws_conn_id: str :param verify: Whether or not to verify SSL certificates for S3 connection. By default SSL certificates are verified. You can provide the following values: @@ -56,24 +52,17 @@ class S3ToGCSOperator(S3ListOperator): - ``path/to/cert/bundle.pem``: A filename of the CA cert bundle to uses. You can specify this argument if you want to use a different CA cert bundle than the one used by botocore. - :type verify: bool or str :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param dest_gcs_conn_id: (Deprecated) The connection ID used to connect to Google Cloud. This parameter has been deprecated. You should pass the gcp_conn_id parameter instead. - :type dest_gcs_conn_id: str :param dest_gcs: The destination Google Cloud Storage bucket and prefix where you want to store the files. (templated) - :type dest_gcs: str :param delegate_to: Google account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param replace: Whether you want to replace existing destination files or not. - :type replace: bool :param gzip: Option to compress file for upload - :type gzip: bool :param google_impersonation_chain: Optional Google service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -82,7 +71,6 @@ class S3ToGCSOperator(S3ListOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type google_impersonation_chain: Union[str, Sequence[str]] **Example**: diff --git a/airflow/providers/google/cloud/transfers/salesforce_to_gcs.py b/airflow/providers/google/cloud/transfers/salesforce_to_gcs.py index 3fb2251abd732..858a7e4d55933 100644 --- a/airflow/providers/google/cloud/transfers/salesforce_to_gcs.py +++ b/airflow/providers/google/cloud/transfers/salesforce_to_gcs.py @@ -36,31 +36,20 @@ class SalesforceToGcsOperator(BaseOperator): :ref:`howto/operator:SalesforceToGcsOperator` :param query: The query to make to Salesforce. - :type query: str :param bucket_name: The bucket to upload to. - :type bucket_name: str :param object_name: The object name to set when uploading the file. - :type object_name: str :param salesforce_conn_id: the name of the connection that has the parameters we need to connect to Salesforce. - :type salesforce_conn_id: str :param include_deleted: True if the query should include deleted records. - :type include_deleted: bool :param query_params: Additional optional arguments - :type query_params: dict :param export_format: Desired format of files to be exported. - :type export_format: str :param coerce_to_timestamp: True if you want all datetime fields to be converted into Unix timestamps. False if you want them to be left in the same format as they were in Salesforce. Leaving the value as False will result in datetimes being strings. Default: False - :type coerce_to_timestamp: bool :param record_time_added: True if you want to add a Unix timestamp field to the resulting data that marks when the data was fetched from Salesforce. Default: False - :type record_time_added: bool :param gzip: Option to compress local file or file data for upload - :type gzip: bool :param gcp_conn_id: the name of the connection that has the parameters we need to connect to GCS. - :type gcp_conn_id: str """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/cloud/transfers/sftp_to_gcs.py b/airflow/providers/google/cloud/transfers/sftp_to_gcs.py index 55ead28c08c90..8f750af2b5810 100644 --- a/airflow/providers/google/cloud/transfers/sftp_to_gcs.py +++ b/airflow/providers/google/cloud/transfers/sftp_to_gcs.py @@ -44,33 +44,24 @@ class SFTPToGCSOperator(BaseOperator): for downloading the single file or multiple files from the SFTP server. You can use only one wildcard within your path. The wildcard can appear inside the path or at the end of the path. - :type source_path: str :param destination_bucket: The bucket to upload to. - :type destination_bucket: str :param destination_path: The destination name of the object in the destination Google Cloud Storage bucket. If destination_path is not provided file/files will be placed in the main bucket path. If a wildcard is supplied in the destination_path argument, this is the prefix that will be prepended to the final destination objects' paths. - :type destination_path: str :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param sftp_conn_id: The sftp connection id. The name or identifier for establishing a connection to the SFTP server. - :type sftp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param mime_type: The mime-type string - :type mime_type: str :param gzip: Allows for file to be compressed and uploaded as gzip - :type gzip: bool :param move_object: When move object is True, the object is moved instead of copied to the new location. This is the equivalent of a mv command as opposed to a cp command. - :type move_object: bool :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -79,7 +70,6 @@ class SFTPToGCSOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/cloud/transfers/sheets_to_gcs.py b/airflow/providers/google/cloud/transfers/sheets_to_gcs.py index f75986e670a70..45b8081f2dad2 100644 --- a/airflow/providers/google/cloud/transfers/sheets_to_gcs.py +++ b/airflow/providers/google/cloud/transfers/sheets_to_gcs.py @@ -36,22 +36,16 @@ class GoogleSheetsToGCSOperator(BaseOperator): :ref:`howto/operator:GoogleSheetsToGCSOperator` :param spreadsheet_id: The Google Sheet ID to interact with. - :type spreadsheet_id: str :param sheet_filter: Default to None, if provided, Should be an array of the sheet titles to pull from. - :type sheet_filter: List[str] :param destination_bucket: The destination Google cloud storage bucket where the report should be written to. (templated) - :type destination_bucket: str :param destination_path: The Google cloud storage URI array for the object created by the operator. For example: ``path/to/my/files``. - :type destination_path: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -60,7 +54,6 @@ class GoogleSheetsToGCSOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/cloud/transfers/sql_to_gcs.py b/airflow/providers/google/cloud/transfers/sql_to_gcs.py index 8961e0eb74338..d6a39202ac6c8 100644 --- a/airflow/providers/google/cloud/transfers/sql_to_gcs.py +++ b/airflow/providers/google/cloud/transfers/sql_to_gcs.py @@ -38,48 +38,34 @@ class BaseSQLToGCSOperator(BaseOperator): Copy data from SQL to Google Cloud Storage in JSON or CSV format. :param sql: The SQL to execute. - :type sql: str :param bucket: The bucket to upload to. - :type bucket: str :param filename: The filename to use as the object name when uploading to Google Cloud Storage. A ``{}`` should be specified in the filename to allow the operator to inject file numbers in cases where the file is split due to size. - :type filename: str :param schema_filename: If set, the filename to use as the object name when uploading a .json file containing the BigQuery schema fields for the table that was dumped from the database. - :type schema_filename: str :param approx_max_file_size_bytes: This operator supports the ability to split large table dumps into multiple files (see notes in the filename param docs above). This param allows developers to specify the file size of the splits. Check https://cloud.google.com/storage/quotas to see the maximum allowed file size for a single object. - :type approx_max_file_size_bytes: long :param export_format: Desired format of files to be exported. - :type export_format: str :param field_delimiter: The delimiter to be used for CSV files. - :type field_delimiter: str :param null_marker: The null marker to be used for CSV files. - :type null_marker: str :param gzip: Option to compress file for upload (does not apply to schemas). - :type gzip: bool :param schema: The schema to use, if any. Should be a list of dict or a str. Pass a string if using Jinja template, otherwise, pass a list of dict. Examples could be seen: https://cloud.google.com/bigquery/docs /schemas#specifying_a_json_schema_file - :type schema: str or list :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param google_cloud_storage_conn_id: (Deprecated) The connection ID used to connect to Google Cloud. This parameter has been deprecated. You should pass the gcp_conn_id parameter instead. - :type google_cloud_storage_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param parameters: a parameters dict that is substituted at query runtime. - :type parameters: dict :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -88,7 +74,6 @@ class BaseSQLToGCSOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/cloud/transfers/trino_to_gcs.py b/airflow/providers/google/cloud/transfers/trino_to_gcs.py index bccf49346e803..a4635e88139d8 100644 --- a/airflow/providers/google/cloud/transfers/trino_to_gcs.py +++ b/airflow/providers/google/cloud/transfers/trino_to_gcs.py @@ -144,7 +144,6 @@ class TrinoToGCSOperator(BaseSQLToGCSOperator): """Copy data from TrinoDB to Google Cloud Storage in JSON or CSV format. :param trino_conn_id: Reference to a specific Trino hook. - :type trino_conn_id: str """ ui_color = "#a0e08c" @@ -201,8 +200,6 @@ def convert_type(self, value, schema_type): Do nothing. Trino uses JSON on the transport layer, so types are simple. :param value: Trino column value - :type value: Any :param schema_type: BigQuery data type - :type schema_type: str """ return value diff --git a/airflow/providers/google/cloud/utils/credentials_provider.py b/airflow/providers/google/cloud/utils/credentials_provider.py index 60fb2c13b4a82..907e8e1cdd307 100644 --- a/airflow/providers/google/cloud/utils/credentials_provider.py +++ b/airflow/providers/google/cloud/utils/credentials_provider.py @@ -53,11 +53,8 @@ def build_gcp_conn( scopes and project id. :param key_file_path: Path to service key. - :type key_file_path: Optional[str] :param scopes: Required OAuth scopes. - :type scopes: Optional[List[str]] :param project_id: The Google Cloud project id to be used for the connection. - :type project_id: Optional[str] :return: String representing Airflow connection. """ conn = "google-cloud-platform://?{}" @@ -86,9 +83,7 @@ def provide_gcp_credentials(key_file_path: Optional[str] = None, key_file_dict: file in ``GOOGLE_APPLICATION_CREDENTIALS`` environment variable. :param key_file_path: Path to file with Google Cloud Service Account .json file. - :type key_file_path: str :param key_file_dict: Dictionary with credentials. - :type key_file_dict: Dict __ https://cloud.google.com/docs/authentication/production """ @@ -123,11 +118,8 @@ def provide_gcp_connection( required scopes and project id. :param key_file_path: Path to file with Google Cloud Service Account .json file. - :type key_file_path: str :param scopes: OAuth scopes for the connection - :type scopes: Sequence :param project_id: The id of Google Cloud project for the connection. - :type project_id: str """ if key_file_path and key_file_path.endswith(".p12"): raise AirflowException("Legacy P12 key file are not supported, use a JSON key file.") @@ -152,11 +144,8 @@ def provide_gcp_conn_and_credentials( - temporary value of :envvar:`AIRFLOW_CONN_GOOGLE_CLOUD_DEFAULT` connection :param key_file_path: Path to file with Google Cloud Service Account .json file. - :type key_file_path: str :param scopes: OAuth scopes for the connection - :type scopes: Sequence :param project_id: The id of Google Cloud project for the connection. - :type project_id: str __ https://cloud.google.com/docs/authentication/production """ @@ -180,27 +169,21 @@ class _CredentialProvider(LoggingMixin): occur. If neither of them are provided, return default credentials for the current environment :param key_path: Path to Google Cloud Service Account key file (JSON). - :type key_path: str :param keyfile_dict: A dict representing Cloud Service Account as in the Credential JSON file - :type keyfile_dict: Dict[str, str] :param scopes: OAuth scopes for the connection - :type scopes: Collection[str] :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param disable_logging: If true, disable all log messages, which allows you to use this class to configure Logger. :param target_principal: The service account to directly impersonate using short-term credentials, if any. For this to work, the target_principal account must grant the originating account the Service Account Token Creator IAM role. - :type target_principal: str :param delegates: optional chained list of accounts required to get the access_token of target_principal. If set, the sequence of identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account and target_principal granting the role to the last account from the list. - :type delegates: Sequence[str] """ def __init__( @@ -345,7 +328,6 @@ def _get_scopes(scopes: Optional[str] = None) -> Sequence[str]: Otherwise, default scope will be returned. :param scopes: A comma-separated string containing OAuth2 scopes - :type scopes: Optional[str] :return: Returns the scope defined in the connection configuration, or the default scope :rtype: Sequence[str] """ @@ -362,7 +344,6 @@ def _get_target_principal_and_delegates( :param impersonation_chain: the service account to impersonate or a chained list leading to this account - :type impersonation_chain: Optional[Union[str, Sequence[str]]] :return: Returns the tuple of target_principal and delegates :rtype: Tuple[Optional[str], Optional[Sequence[str]]] @@ -381,7 +362,6 @@ def _get_project_id_from_service_account_email(service_account_email: str) -> st Extracts project_id from service account's email address. :param service_account_email: email of the service account. - :type service_account_email: str :return: Returns the project_id of the provided service account. :rtype: str diff --git a/airflow/providers/google/cloud/utils/field_sanitizer.py b/airflow/providers/google/cloud/utils/field_sanitizer.py index 3065b24f01184..6d2814b7e67c2 100644 --- a/airflow/providers/google/cloud/utils/field_sanitizer.py +++ b/airflow/providers/google/cloud/utils/field_sanitizer.py @@ -113,7 +113,6 @@ class GcpBodyFieldSanitizer(LoggingMixin): """Sanitizes the body according to specification. :param sanitize_specs: array of strings that specifies which fields to remove - :type sanitize_specs: list[str] """ diff --git a/airflow/providers/google/cloud/utils/field_validator.py b/airflow/providers/google/cloud/utils/field_validator.py index a02dcc4cf5d2b..974c3b4559bbc 100644 --- a/airflow/providers/google/cloud/utils/field_validator.py +++ b/airflow/providers/google/cloud/utils/field_validator.py @@ -183,9 +183,7 @@ class GcpBodyFieldValidator(LoggingMixin): for some examples and explanations of how to create specification. :param validation_specs: dictionary describing validation specification - :type validation_specs: list[dict] :param api_version: Version of the api used (for example v1) - :type api_version: str """ @@ -313,14 +311,10 @@ def _validate_field(self, validation_spec, dictionary_to_validate, parent=None, Validates if field is OK. :param validation_spec: specification of the field - :type validation_spec: dict :param dictionary_to_validate: dictionary where the field should be present - :type dictionary_to_validate: dict :param parent: full path of parent field - :type parent: str :param force_optional: forces the field to be optional (all union fields have force_optional set to True) - :type force_optional: bool :return: True if the field is present """ field_name = validation_spec['name'] @@ -424,7 +418,6 @@ def validate(self, body_to_validate: dict) -> None: body not conforming to the specification respectively. :param body_to_validate: body that must follow the specification - :type body_to_validate: dict :return: None """ try: diff --git a/airflow/providers/google/cloud/utils/mlengine_operator_utils.py b/airflow/providers/google/cloud/utils/mlengine_operator_utils.py index 8713e9df40eda..3de91247f2fbd 100644 --- a/airflow/providers/google/cloud/utils/mlengine_operator_utils.py +++ b/airflow/providers/google/cloud/utils/mlengine_operator_utils.py @@ -128,16 +128,12 @@ def validate_err_and_count(summary): :param task_prefix: a prefix for the tasks. Only alphanumeric characters and hyphen are allowed (no underscores), since this will be used as dataflow job name, which doesn't allow other characters. - :type task_prefix: str :param data_format: either of 'TEXT', 'TF_RECORD', 'TF_RECORD_GZIP' - :type data_format: str :param input_paths: a list of input paths to be sent to BatchPrediction. - :type input_paths: list[str] :param prediction_path: GCS path to put the prediction results in. - :type prediction_path: str :param metric_fn_and_keys: a tuple of metric_fn and metric_keys: @@ -145,57 +141,46 @@ def validate_err_and_count(summary): and returns a tuple of metric(s) that it calculates. - metric_keys is a list of strings to denote the key of each metric. - :type metric_fn_and_keys: tuple of a function and a list[str] :param validate_fn: a function to validate whether the averaged metric(s) is good enough to push the model. - :type validate_fn: function :param batch_prediction_job_id: the id to use for the Cloud ML Batch prediction job. Passed directly to the MLEngineBatchPredictionOperator as the job_id argument. - :type batch_prediction_job_id: str :param project_id: the Google Cloud project id in which to execute Cloud ML Batch Prediction and Dataflow jobs. If None, then the `dag`'s `default_args['project_id']` will be used. - :type project_id: str :param region: the Google Cloud region in which to execute Cloud ML Batch Prediction and Dataflow jobs. If None, then the `dag`'s `default_args['region']` will be used. - :type region: str :param dataflow_options: options to run Dataflow jobs. If None, then the `dag`'s `default_args['dataflow_default_options']` will be used. - :type dataflow_options: dictionary :param model_uri: GCS path of the model exported by Tensorflow using ``tensorflow.estimator.export_savedmodel()``. It cannot be used with model_name or version_name below. See MLEngineBatchPredictionOperator for more detail. - :type model_uri: str :param model_name: Used to indicate a model to use for prediction. Can be used in combination with version_name, but cannot be used together with model_uri. See MLEngineBatchPredictionOperator for more detail. If None, then the `dag`'s `default_args['model_name']` will be used. - :type model_name: str :param version_name: Used to indicate a model version to use for prediction, in combination with model_name. Cannot be used together with model_uri. See MLEngineBatchPredictionOperator for more detail. If None, then the `dag`'s `default_args['version_name']` will be used. - :type version_name: str :param dag: The `DAG` to use for all Operators. - :type dag: airflow.models.DAG :param py_interpreter: Python version of the beam pipeline. If None, this defaults to the python3. To track python versions supported by beam and related issues check: https://issues.apache.org/jira/browse/BEAM-1251 - :type py_interpreter: str :returns: a tuple of three operators, (prediction, summary, validation) :rtype: tuple(DataFlowPythonOperator, DataFlowPythonOperator, diff --git a/airflow/providers/google/common/hooks/base_google.py b/airflow/providers/google/common/hooks/base_google.py index 0e2c6b17ad567..a9ee05c1400ff 100644 --- a/airflow/providers/google/common/hooks/base_google.py +++ b/airflow/providers/google/common/hooks/base_google.py @@ -146,11 +146,9 @@ class GoogleBaseHook(BaseHook): JSON data provided in the UI: Specify 'Keyfile JSON'. :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -159,7 +157,6 @@ class GoogleBaseHook(BaseHook): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account. - :type impersonation_chain: Union[str, Sequence[str]] """ conn_name_attr = 'gcp_conn_id' @@ -551,11 +548,8 @@ def download_content_from_request(file_handle, request: dict, chunk_size: int) - :param file_handle: io.Base or file object. The stream in which to write the downloaded bytes. - :type file_handle: io.Base or file object :param request: googleapiclient.http.HttpRequest, the media request to perform in chunks. - :type request: Dict :param chunk_size: int, File will be downloaded in chunks of this many bytes. - :type chunk_size: int """ downloader = MediaIoBaseDownload(file_handle, request, chunksize=chunk_size) done = False diff --git a/airflow/providers/google/common/hooks/discovery_api.py b/airflow/providers/google/common/hooks/discovery_api.py index 7b84e0fd5fcd6..bad4c7945f27e 100644 --- a/airflow/providers/google/common/hooks/discovery_api.py +++ b/airflow/providers/google/common/hooks/discovery_api.py @@ -30,15 +30,11 @@ class GoogleDiscoveryApiHook(GoogleBaseHook): :param api_service_name: The name of the api service that is needed to get the data for example 'youtube'. - :type api_service_name: str :param api_version: The version of the api that will be requested for example 'v3'. - :type api_version: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -47,7 +43,6 @@ class GoogleDiscoveryApiHook(GoogleBaseHook): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account. - :type impersonation_chain: Union[str, Sequence[str]] """ _conn = None # type: Optional[Resource] @@ -97,13 +92,9 @@ def query(self, endpoint: str, data: dict, paginate: bool = False, num_retries: .. seealso:: https://developers.google.com/apis-explorer for more information on what methods are available. - :type endpoint: str :param data: The data (endpoint params) needed for the specific request to given endpoint. - :type data: dict :param paginate: If set to True, it will collect all pages of data. - :type paginate: bool :param num_retries: Define the number of retries for the requests being made if it fails. - :type num_retries: int :return: the API response from the passed endpoint. :rtype: dict """ diff --git a/airflow/providers/google/common/utils/id_token_credentials.py b/airflow/providers/google/common/utils/id_token_credentials.py index 9d7a8c67f513d..2183285828a32 100644 --- a/airflow/providers/google/common/utils/id_token_credentials.py +++ b/airflow/providers/google/common/utils/id_token_credentials.py @@ -66,7 +66,6 @@ def _load_credentials_from_file( The credentials file must be a service account key or a stored authorized user credential. :param filename: The full path to the credentials file. - :type filename: str :return: Loaded credentials :rtype: google.auth.credentials.Credentials :raise google.auth.exceptions.DefaultCredentialsError: if the file is in the wrong format or is missing. @@ -182,11 +181,9 @@ def get_default_id_token_credentials( /identity/protocols/application-default-credentials :param target_audience: The intended audience for these credentials. - :type target_audience: Sequence[str] :param request: An object used to make HTTP requests. This is used to detect whether the application is running on Compute Engine. If not specified, then it will use the standard library http client to make requests. - :type request: google.auth.transport.Request :return: the current environment's credentials. :rtype: google.auth.credentials.Credentials :raises ~google.auth.exceptions.DefaultCredentialsError: diff --git a/airflow/providers/google/firebase/hooks/firestore.py b/airflow/providers/google/firebase/hooks/firestore.py index 59b9ad968dada..8ba0e6101dc0f 100644 --- a/airflow/providers/google/firebase/hooks/firestore.py +++ b/airflow/providers/google/firebase/hooks/firestore.py @@ -37,13 +37,10 @@ class CloudFirestoreHook(GoogleBaseHook): keyword arguments rather than positional. :param api_version: API version used (for example v1 or v1beta1). - :type api_version: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -52,7 +49,6 @@ class CloudFirestoreHook(GoogleBaseHook): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account. - :type impersonation_chain: Union[str, Sequence[str]] """ _conn = None # type: Optional[Any] @@ -97,14 +93,11 @@ def export_documents( Starts a export with the specified configuration. :param database_id: The Database ID. - :type database_id: str :param body: The request body. See: https://firebase.google.com/docs/firestore/reference/rest/v1beta1/projects.databases/exportDocuments - :type body: dict :param project_id: Optional, Google Cloud Project project_id where the database belongs. If set to None or missing, the default project_id from the Google Cloud connection is used. - :type project_id: str """ service = self.get_conn() @@ -125,7 +118,6 @@ def _wait_for_operation_to_complete(self, operation_name: str) -> None: asynchronous call. :param operation_name: The name of the operation. - :type operation_name: str :return: The response returned by the operation. :rtype: dict :exception: AirflowException in case error is returned. diff --git a/airflow/providers/google/firebase/operators/firestore.py b/airflow/providers/google/firebase/operators/firestore.py index eb622f7fdd4b3..77227ca275a9b 100644 --- a/airflow/providers/google/firebase/operators/firestore.py +++ b/airflow/providers/google/firebase/operators/firestore.py @@ -35,18 +35,13 @@ class CloudFirestoreExportDatabaseOperator(BaseOperator): :ref:`howto/operator:CloudFirestoreExportDatabaseOperator` :param database_id: The Database ID. - :type database_id: str :param body: The request body. See: https://firebase.google.com/docs/firestore/reference/rest/v1beta1/projects.databases/exportDocuments - :type body: dict :param project_id: ID of the Google Cloud project if None then default project_id is used. - :type project_id: str :param gcp_conn_id: The connection ID to use to connect to Google Cloud. - :type gcp_conn_id: str :param api_version: API version used (for example v1 or v1beta1). - :type api_version: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -55,7 +50,6 @@ class CloudFirestoreExportDatabaseOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/leveldb/hooks/leveldb.py b/airflow/providers/google/leveldb/hooks/leveldb.py index fdb6b28c24395..2ca78f01f9a85 100644 --- a/airflow/providers/google/leveldb/hooks/leveldb.py +++ b/airflow/providers/google/leveldb/hooks/leveldb.py @@ -52,11 +52,8 @@ def get_conn(self, name: str = '/tmp/testdb/', create_if_missing: bool = False, Creates `Plyvel DB `__ :param name: path to create database e.g. `/tmp/testdb/`) - :type name: str :param create_if_missing: whether a new database should be created if needed - :type create_if_missing: bool :param kwargs: other options of creation plyvel.DB. See more in the link above. - :type kwargs: Dict[str, Any] :returns: DB :rtype: plyvel.DB """ @@ -85,15 +82,10 @@ def run( :param command: command of plyvel(python wrap for leveldb) for DB object e.g. ``"put"``, ``"get"``, ``"delete"``, ``"write_batch"``. - :type command: str :param key: key for command(put,get,delete) execution(, e.g. ``b'key'``, ``b'another-key'``) - :type key: bytes :param value: value for command(put) execution(bytes, e.g. ``b'value'``, ``b'another-value'``) - :type value: Optional[bytes] :param keys: keys for command(write_batch) execution(List[bytes], e.g. ``[b'key', b'another-key'])`` - :type keys: Optional[List[bytes]] :param values: values for command(write_batch) execution e.g. ``[b'value'``, ``b'another-value']`` - :type values: Optional[List[bytes]] :returns: value from get or None :rtype: Optional[bytes] """ @@ -119,9 +111,7 @@ def put(self, key: bytes, value: bytes): Put a single value into a leveldb db by key :param key: key for put execution, e.g. ``b'key'``, ``b'another-key'`` - :type key: bytes :param value: value for put execution e.g. ``b'value'``, ``b'another-value'`` - :type value: bytes """ if not self.db: raise Exception(DB_NOT_INITIALIZED_BEFORE) @@ -132,7 +122,6 @@ def get(self, key: bytes) -> bytes: Get a single value into a leveldb db by key :param key: key for get execution, e.g. ``b'key'``, ``b'another-key'`` - :type key: bytes :returns: value of key from db.get :rtype: bytes """ @@ -145,7 +134,6 @@ def delete(self, key: bytes): Delete a single value in a leveldb db by key. :param key: key for delete execution, e.g. ``b'key'``, ``b'another-key'`` - :type key: bytes """ if not self.db: raise Exception(DB_NOT_INITIALIZED_BEFORE) @@ -156,9 +144,7 @@ def write_batch(self, keys: List[bytes], values: List[bytes]): Write batch of values in a leveldb db by keys :param keys: keys for write_batch execution e.g. ``[b'key', b'another-key']`` - :type keys: List[bytes] :param values: values for write_batch execution e.g. ``[b'value', b'another-value']`` - :type values: List[bytes] """ if not self.db: raise Exception(DB_NOT_INITIALIZED_BEFORE) diff --git a/airflow/providers/google/leveldb/operators/leveldb.py b/airflow/providers/google/leveldb/operators/leveldb.py index 772ade1ac73e7..8f9ae6c2727af 100644 --- a/airflow/providers/google/leveldb/operators/leveldb.py +++ b/airflow/providers/google/leveldb/operators/leveldb.py @@ -33,22 +33,14 @@ class LevelDBOperator(BaseOperator): :param command: command of plyvel(python wrap for leveldb) for DB object e.g. ``"put"``, ``"get"``, ``"delete"``, ``"write_batch"``. - :type command: str :param key: key for command(put,get,delete) execution(, e.g. ``b'key'``, ``b'another-key'``) - :type key: bytes :param value: value for command(put) execution(bytes, e.g. ``b'value'``, ``b'another-value'``) - :type value: Optional[bytes] :param keys: keys for command(write_batch) execution(List[bytes], e.g. ``[b'key', b'another-key'])`` - :type keys: Optional[List[bytes]] :param values: values for command(write_batch) execution e.g. ``[b'value'``, ``b'another-value']`` - :type values: Optional[List[bytes]] :param leveldb_conn_id: - :type leveldb_conn_id: str :param create_if_missing: whether a new database should be created if needed - :type create_if_missing: bool :param create_db_extra_options: extra options of creation LevelDBOperator. See more in the link below `Plyvel DB `__ - :type create_db_extra_options: Optional[Dict[str, Any]] """ def __init__( diff --git a/airflow/providers/google/marketing_platform/hooks/analytics.py b/airflow/providers/google/marketing_platform/hooks/analytics.py index 26a6b6c79902d..975c4b0d80053 100644 --- a/airflow/providers/google/marketing_platform/hooks/analytics.py +++ b/airflow/providers/google/marketing_platform/hooks/analytics.py @@ -73,11 +73,8 @@ def get_ad_words_link( Returns a web property-Google Ads link to which the user has access. :param account_id: ID of the account which the given web property belongs to. - :type account_id: string :param web_property_id: Web property-Google Ads link UA-string. - :type web_property_id: string :param web_property_ad_words_link_id: to retrieve the Google Ads link for. - :type web_property_ad_words_link_id: string :returns: web property-Google Ads :rtype: Dict @@ -101,9 +98,7 @@ def list_ad_words_links(self, account_id: str, web_property_id: str) -> List[Dic Lists webProperty-Google Ads links for a given web property. :param account_id: ID of the account which the given web property belongs to. - :type account_id: str :param web_property_id: Web property UA-string to retrieve the Google Ads links for. - :type web_property_id: str :returns: list of entity Google Ads links. :rtype: list @@ -127,16 +122,11 @@ def upload_data( Uploads file to GA via the Data Import API :param file_location: The path and name of the file to upload. - :type file_location: str :param account_id: The GA account Id to which the data upload belongs. - :type account_id: str :param web_property_id: UA-string associated with the upload. - :type web_property_id: str :param custom_data_source_id: Custom Data Source Id to which this data import belongs. - :type custom_data_source_id: str :param resumable_upload: flag to upload the file in a resumable fashion, using a series of at least two requests. - :type resumable_upload: bool """ media = MediaFileUpload( file_location, @@ -169,13 +159,9 @@ def delete_upload_data( Deletes the uploaded data for a given account/property/dataset :param account_id: The GA account Id to which the data upload belongs. - :type account_id: str :param web_property_id: UA-string associated with the upload. - :type web_property_id: str :param custom_data_source_id: Custom Data Source Id to which this data import belongs. - :type custom_data_source_id: str :param delete_request_body: Dict of customDataImportUids to delete. - :type delete_request_body: dict """ self.log.info( "Deleting previous uploads to GA file for accountId:%s, " @@ -197,11 +183,8 @@ def list_uploads(self, account_id, web_property_id, custom_data_source_id) -> Li Get list of data upload from GA :param account_id: The GA account Id to which the data upload belongs. - :type account_id: str :param web_property_id: UA-string associated with the upload. - :type web_property_id: str :param custom_data_source_id: Custom Data Source Id to which this data import belongs. - :type custom_data_source_id: str """ self.log.info( "Getting list of uploads for accountId:%s, webPropertyId:%s and customDataSourceId:%s ", diff --git a/airflow/providers/google/marketing_platform/hooks/campaign_manager.py b/airflow/providers/google/marketing_platform/hooks/campaign_manager.py index 5f6d25d33d089..e529546318988 100644 --- a/airflow/providers/google/marketing_platform/hooks/campaign_manager.py +++ b/airflow/providers/google/marketing_platform/hooks/campaign_manager.py @@ -61,9 +61,7 @@ def delete_report(self, profile_id: str, report_id: str) -> Any: Deletes a report by its ID. :param profile_id: The DFA user profile ID. - :type profile_id: str :param report_id: The ID of the report. - :type report_id: str """ response = ( self.get_conn() @@ -78,9 +76,7 @@ def insert_report(self, profile_id: str, report: Dict[str, Any]) -> Any: Creates a report. :param profile_id: The DFA user profile ID. - :type profile_id: str :param report: The report resource to be inserted. - :type report: Dict[str, Any] """ response = ( self.get_conn() @@ -102,15 +98,10 @@ def list_reports( Retrieves list of reports. :param profile_id: The DFA user profile ID. - :type profile_id: str :param max_results: Maximum number of results to return. - :type max_results: Optional[int] :param scope: The scope that defines which results are returned. - :type scope: Optional[str] :param sort_field: The field by which to sort the list. - :type sort_field: Optional[str] :param sort_order: Order of sorted results. - :type sort_order: Optional[str] """ reports: List[dict] = [] conn = self.get_conn() @@ -133,12 +124,9 @@ def patch_report(self, profile_id: str, report_id: str, update_mask: dict) -> An Updates a report. This method supports patch semantics. :param profile_id: The DFA user profile ID. - :type profile_id: str :param report_id: The ID of the report. - :type report_id: str :param update_mask: The relevant portions of a report resource, according to the rules of patch semantics. - :type update_mask: Dict """ response = ( self.get_conn() @@ -153,11 +141,8 @@ def run_report(self, profile_id: str, report_id: str, synchronous: Optional[bool Runs a report. :param profile_id: The DFA profile ID. - :type profile_id: str :param report_id: The ID of the report. - :type report_id: str :param synchronous: If set and true, tries to run the report synchronously. - :type synchronous: Optional[bool] """ response = ( self.get_conn() @@ -172,9 +157,7 @@ def update_report(self, profile_id: str, report_id: str) -> Any: Updates a report. :param profile_id: The DFA user profile ID. - :type profile_id: str :param report_id: The ID of the report. - :type report_id: str """ response = ( self.get_conn() @@ -189,11 +172,8 @@ def get_report(self, file_id: str, profile_id: str, report_id: str) -> Any: Retrieves a report file. :param profile_id: The DFA user profile ID. - :type profile_id: str :param report_id: The ID of the report. - :type report_id: str :param file_id: The ID of the report file. - :type file_id: str """ response = ( self.get_conn() @@ -209,11 +189,8 @@ def get_report_file(self, file_id: str, profile_id: str, report_id: str) -> http Retrieves a media part of report file. :param profile_id: The DFA user profile ID. - :type profile_id: str :param report_id: The ID of the report. - :type report_id: str :param file_id: The ID of the report file. - :type file_id: str :return: googleapiclient.http.HttpRequest """ request = ( @@ -256,21 +233,15 @@ def conversions_batch_insert( Inserts conversions. :param profile_id: User profile ID associated with this request. - :type profile_id: str :param conversions: Conversations to insert, should by type of Conversation: https://developers.google.com/doubleclick-advertisers/v3.3/conversions#resource - :type conversions: List[Dict[str, Any]] :param encryption_entity_type: The encryption entity type. This should match the encryption configuration for ad serving or Data Transfer. - :type encryption_entity_type: str :param encryption_entity_id: The encryption entity ID. This should match the encryption configuration for ad serving or Data Transfer. - :type encryption_entity_id: int :param encryption_source: Describes whether the encrypted cookie was received from ad serving (the %m macro) or from Data Transfer. - :type encryption_source: str :param max_failed_inserts: The maximum number of conversions that failed to be inserted - :type max_failed_inserts: int """ response = ( self.get_conn() @@ -306,21 +277,15 @@ def conversions_batch_update( Updates existing conversions. :param profile_id: User profile ID associated with this request. - :type profile_id: str :param conversions: Conversations to update, should by type of Conversation: https://developers.google.com/doubleclick-advertisers/v3.3/conversions#resource - :type conversions: List[Dict[str, Any]] :param encryption_entity_type: The encryption entity type. This should match the encryption configuration for ad serving or Data Transfer. - :type encryption_entity_type: str :param encryption_entity_id: The encryption entity ID. This should match the encryption configuration for ad serving or Data Transfer. - :type encryption_entity_id: int :param encryption_source: Describes whether the encrypted cookie was received from ad serving (the %m macro) or from Data Transfer. - :type encryption_source: str :param max_failed_updates: The maximum number of conversions that failed to be updated - :type max_failed_updates: int """ response = ( self.get_conn() diff --git a/airflow/providers/google/marketing_platform/hooks/display_video.py b/airflow/providers/google/marketing_platform/hooks/display_video.py index cca66c526c5f0..bd7333ecb224a 100644 --- a/airflow/providers/google/marketing_platform/hooks/display_video.py +++ b/airflow/providers/google/marketing_platform/hooks/display_video.py @@ -81,10 +81,8 @@ def erf_uri(partner_id, entity_type) -> List[str]: https://developers.google.com/bid-manager/guides/entity-read/overview :param partner_id The numeric ID of your Partner. - :type partner_id: int :param entity_type: The type of file Partner, Advertiser, InsertionOrder, LineItem, Creative, Pixel, InventorySource, UserList, UniversalChannel, and summary. - :type entity_type: str """ return [f"gdbm-{partner_id}/entity/{{{{ ds_nodash }}}}.*.{entity_type}.json"] @@ -93,7 +91,6 @@ def create_query(self, query: Dict[str, Any]) -> dict: Creates a query. :param query: Query object to be passed to request body. - :type query: Dict[str, Any] """ response = self.get_conn().queries().createquery(body=query).execute(num_retries=self.num_retries) return response @@ -103,7 +100,6 @@ def delete_query(self, query_id: str) -> None: Deletes a stored query as well as the associated stored reports. :param query_id: Query ID to delete. - :type query_id: str """ (self.get_conn().queries().deletequery(queryId=query_id).execute(num_retries=self.num_retries)) @@ -112,7 +108,6 @@ def get_query(self, query_id: str) -> dict: Retrieves a stored query. :param query_id: Query ID to retrieve. - :type query_id: str """ response = self.get_conn().queries().getquery(queryId=query_id).execute(num_retries=self.num_retries) return response @@ -129,9 +124,7 @@ def run_query(self, query_id: str, params: Optional[Dict[str, Any]]) -> None: Runs a stored query to generate a report. :param query_id: Query ID to run. - :type query_id: str :param params: Parameters for the report. - :type params: Dict[str, Any] """ ( self.get_conn() @@ -145,7 +138,6 @@ def upload_line_items(self, line_items: Any) -> List[Dict[str, Any]]: Uploads line items in CSV format. :param line_items: downloaded data from GCS and passed to the body request - :type line_items: Any :return: response body. :rtype: List[Dict[str, Any]] """ @@ -170,7 +162,6 @@ def download_line_items(self, request_body: Dict[str, Any]) -> List[Any]: :param request_body: dictionary with parameters that should be passed into. More information about it can be found here: https://developers.google.com/bid-manager/v1.1/lineitems/downloadlineitems - :type request_body: Dict[str, Any] """ response = ( self.get_conn() @@ -185,7 +176,6 @@ def create_sdf_download_operation(self, body_request: Dict[str, Any]) -> Dict[st Creates an SDF Download Task and Returns an Operation. :param body_request: Body request. - :type body_request: Dict[str, Any] More information about body request n be found here: https://developers.google.com/display-video/api/reference/rest/v1/sdfdownloadtasks/create @@ -203,7 +193,6 @@ def get_sdf_download_operation(self, operation_name: str): Gets the latest state of an asynchronous SDF download task operation. :param operation_name: The name of the operation resource. - :type operation_name: str """ result = ( self.get_conn_to_display_video() @@ -219,7 +208,6 @@ def download_media(self, resource_name: str): Downloads media. :param resource_name: of the media that is being downloaded. - :type resource_name: str """ request = self.get_conn_to_display_video().media().download_media(resourceName=resource_name) return request diff --git a/airflow/providers/google/marketing_platform/hooks/search_ads.py b/airflow/providers/google/marketing_platform/hooks/search_ads.py index 57b8bca1c503f..8fb382b20aed9 100644 --- a/airflow/providers/google/marketing_platform/hooks/search_ads.py +++ b/airflow/providers/google/marketing_platform/hooks/search_ads.py @@ -59,7 +59,6 @@ def insert_report(self, report: Dict[str, Any]) -> Any: Inserts a report request into the reporting system. :param report: Report to be generated. - :type report: Dict[str, Any] """ response = self.get_conn().reports().request(body=report).execute(num_retries=self.num_retries) return response @@ -69,7 +68,6 @@ def get(self, report_id: str) -> Any: Polls for the status of a report request. :param report_id: ID of the report request being polled. - :type report_id: str """ response = self.get_conn().reports().get(reportId=report_id).execute(num_retries=self.num_retries) return response @@ -79,9 +77,7 @@ def get_file(self, report_fragment: int, report_id: str) -> Any: Downloads a report file encoded in UTF-8. :param report_fragment: The index of the report fragment to download. - :type report_fragment: int :param report_id: ID of the report. - :type report_id: str """ response = ( self.get_conn() diff --git a/airflow/providers/google/marketing_platform/operators/analytics.py b/airflow/providers/google/marketing_platform/operators/analytics.py index de7fcfc62da48..8d5d90c986577 100644 --- a/airflow/providers/google/marketing_platform/operators/analytics.py +++ b/airflow/providers/google/marketing_platform/operators/analytics.py @@ -43,9 +43,7 @@ class GoogleAnalyticsListAccountsOperator(BaseOperator): :ref:`howto/operator:GoogleAnalyticsListAccountsOperator` :param api_version: The version of the api that will be requested for example 'v3'. - :type api_version: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -54,7 +52,6 @@ class GoogleAnalyticsListAccountsOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -100,11 +97,8 @@ class GoogleAnalyticsGetAdsLinkOperator(BaseOperator): :ref:`howto/operator:GoogleAnalyticsGetAdsLinkOperator` :param account_id: ID of the account which the given web property belongs to. - :type account_id: str :param web_property_ad_words_link_id: Web property-Google Ads link ID. - :type web_property_ad_words_link_id: str :param web_property_id: Web property ID to retrieve the Google Ads link for. - :type web_property_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -113,7 +107,6 @@ class GoogleAnalyticsGetAdsLinkOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -172,9 +165,7 @@ class GoogleAnalyticsRetrieveAdsLinksListOperator(BaseOperator): :ref:`howto/operator:GoogleAnalyticsRetrieveAdsLinksListOperator` :param account_id: ID of the account which the given web property belongs to. - :type account_id: str :param web_property_id: Web property UA-string to retrieve the Google Ads links for. - :type web_property_id: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -183,7 +174,6 @@ class GoogleAnalyticsRetrieveAdsLinksListOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -230,28 +220,19 @@ class GoogleAnalyticsDataImportUploadOperator(BaseOperator): Take a file from Cloud Storage and uploads it to GA via data import API. :param storage_bucket: The Google cloud storage bucket where the file is stored. - :type storage_bucket: str :param storage_name_object: The name of the object in the desired Google cloud storage bucket. (templated) If the destination points to an existing folder, the file will be taken from the specified folder. - :type storage_name_object: str :param account_id: The GA account Id (long) to which the data upload belongs. - :type account_id: str :param web_property_id: The web property UA-string associated with the upload. - :type web_property_id: str :param custom_data_source_id: The id to which the data import belongs - :type custom_data_source_id: str :param resumable_upload: flag to upload the file in a resumable fashion, using a series of at least two requests. - :type resumable_upload: bool :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param api_version: The version of the api that will be requested for example 'v3'. - :type api_version: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -260,7 +241,6 @@ class GoogleAnalyticsDataImportUploadOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -336,19 +316,13 @@ class GoogleAnalyticsDeletePreviousDataUploadsOperator(BaseOperator): Deletes previous GA uploads to leave the latest file to control the size of the Data Set Quota. :param account_id: The GA account Id (long) to which the data upload belongs. - :type account_id: str :param web_property_id: The web property UA-string associated with the upload. - :type web_property_id: str :param custom_data_source_id: The id to which the data import belongs. - :type custom_data_source_id: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param api_version: The version of the api that will be requested for example 'v3'. - :type api_version: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -357,7 +331,6 @@ class GoogleAnalyticsDeletePreviousDataUploadsOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ("impersonation_chain",) @@ -415,22 +388,17 @@ class GoogleAnalyticsModifyFileHeadersDataImportOperator(BaseOperator): match the custom dimension ID in GA i.e clientId : dimensionX. :param storage_bucket: The Google cloud storage bucket where the file is stored. - :type storage_bucket: str :param storage_name_object: The name of the object in the desired Google cloud storage bucket. (templated) If the destination points to an existing folder, the file will be taken from the specified folder. - :type storage_name_object: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param custom_dimension_header_mapping: Dictionary to handle when uploading custom dimensions which have generic IDs ie. 'dimensionX' which are set by GA. Dictionary maps the current CSV header to GA ID which will be the new header for the CSV to upload to GA eg clientId : dimension1. - :type custom_dimension_header_mapping: dict :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -439,7 +407,6 @@ class GoogleAnalyticsModifyFileHeadersDataImportOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/marketing_platform/operators/campaign_manager.py b/airflow/providers/google/marketing_platform/operators/campaign_manager.py index 89032f9313359..45730a30d26c4 100644 --- a/airflow/providers/google/marketing_platform/operators/campaign_manager.py +++ b/airflow/providers/google/marketing_platform/operators/campaign_manager.py @@ -45,19 +45,13 @@ class GoogleCampaignManagerDeleteReportOperator(BaseOperator): :ref:`howto/operator:GoogleCampaignManagerDeleteReportOperator` :param profile_id: The DFA user profile ID. - :type profile_id: str :param report_name: The name of the report to delete. - :type report_name: str :param report_id: The ID of the report. - :type report_id: str :param api_version: The version of the api that will be requested for example 'v3'. - :type api_version: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -66,7 +60,6 @@ class GoogleCampaignManagerDeleteReportOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -139,27 +132,17 @@ class GoogleCampaignManagerDownloadReportOperator(BaseOperator): :ref:`howto/operator:GoogleCampaignManagerDownloadReportOperator` :param profile_id: The DFA user profile ID. - :type profile_id: str :param report_id: The ID of the report. - :type report_id: str :param file_id: The ID of the report file. - :type file_id: str :param bucket_name: The bucket to upload to. - :type bucket_name: str :param report_name: The report name to set when uploading the local file. - :type report_name: str :param gzip: Option to compress local file or file data for upload - :type gzip: bool :param chunk_size: File will be downloaded in chunks of this many bytes. - :type chunk_size: int :param api_version: The version of the api that will be requested for example 'v3'. - :type api_version: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -168,7 +151,6 @@ class GoogleCampaignManagerDownloadReportOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -282,17 +264,12 @@ class GoogleCampaignManagerInsertReportOperator(BaseOperator): :ref:`howto/operator:GoogleCampaignManagerInsertReportOperator` :param profile_id: The DFA user profile ID. - :type profile_id: str :param report: Report to be created. - :type report: Dict[str, Any] :param api_version: The version of the api that will be requested for example 'v3'. - :type api_version: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -301,7 +278,6 @@ class GoogleCampaignManagerInsertReportOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -368,19 +344,13 @@ class GoogleCampaignManagerRunReportOperator(BaseOperator): :ref:`howto/operator:GoogleCampaignManagerRunReportOperator` :param profile_id: The DFA profile ID. - :type profile_id: str :param report_id: The ID of the report. - :type report_id: str :param synchronous: If set and true, tries to run the report synchronously. - :type synchronous: bool :param api_version: The version of the api that will be requested for example 'v3'. - :type api_version: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -389,7 +359,6 @@ class GoogleCampaignManagerRunReportOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -455,29 +424,20 @@ class GoogleCampaignManagerBatchInsertConversionsOperator(BaseOperator): :ref:`howto/operator:GoogleCampaignManagerBatchInsertConversionsOperator` :param profile_id: User profile ID associated with this request. - :type profile_id: str :param conversions: Conversations to insert, should by type of Conversation: https://developers.google.com/doubleclick-advertisers/v3.3/conversions#resource - :type conversions: List[Dict[str, Any]] :param encryption_entity_type: The encryption entity type. This should match the encryption configuration for ad serving or Data Transfer. - :type encryption_entity_type: str :param encryption_entity_id: The encryption entity ID. This should match the encryption configuration for ad serving or Data Transfer. - :type encryption_entity_id: int :param encryption_source: Describes whether the encrypted cookie was received from ad serving (the %m macro) or from Data Transfer. - :type encryption_source: str :param max_failed_inserts: The maximum number of conversions that failed to be inserted - :type max_failed_inserts: int :param api_version: The version of the api that will be requested for example 'v3'. - :type api_version: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -486,7 +446,6 @@ class GoogleCampaignManagerBatchInsertConversionsOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -556,29 +515,20 @@ class GoogleCampaignManagerBatchUpdateConversionsOperator(BaseOperator): :ref:`howto/operator:GoogleCampaignManagerBatchUpdateConversionsOperator` :param profile_id: User profile ID associated with this request. - :type profile_id: str :param conversions: Conversations to update, should by type of Conversation: https://developers.google.com/doubleclick-advertisers/v3.3/conversions#resource - :type conversions: List[Dict[str, Any]] :param encryption_entity_type: The encryption entity type. This should match the encryption configuration for ad serving or Data Transfer. - :type encryption_entity_type: str :param encryption_entity_id: The encryption entity ID. This should match the encryption configuration for ad serving or Data Transfer. - :type encryption_entity_id: int :param encryption_source: Describes whether the encrypted cookie was received from ad serving (the %m macro) or from Data Transfer. - :type encryption_source: str :param max_failed_updates: The maximum number of conversions that failed to be updated - :type max_failed_updates: int :param api_version: The version of the api that will be requested for example 'v3'. - :type api_version: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -587,7 +537,6 @@ class GoogleCampaignManagerBatchUpdateConversionsOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/marketing_platform/operators/display_video.py b/airflow/providers/google/marketing_platform/operators/display_video.py index a7a1e7f35d676..ee3ede2b2d3a0 100644 --- a/airflow/providers/google/marketing_platform/operators/display_video.py +++ b/airflow/providers/google/marketing_platform/operators/display_video.py @@ -48,15 +48,11 @@ class GoogleDisplayVideo360CreateReportOperator(BaseOperator): :param body: Report object passed to the request's body as described here: https://developers.google.com/bid-manager/v1/queries#resource - :type body: Dict[str, Any] :param api_version: The version of the api that will be requested for example 'v3'. - :type api_version: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -65,7 +61,6 @@ class GoogleDisplayVideo360CreateReportOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -125,17 +120,12 @@ class GoogleDisplayVideo360DeleteReportOperator(BaseOperator): `https://developers.google.com/bid-manager/v1/queries/deletequery` :param report_id: Report ID to delete. - :type report_id: str :param report_name: Name of the report to delete. - :type report_name: str :param api_version: The version of the api that will be requested for example 'v3'. - :type api_version: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -144,7 +134,6 @@ class GoogleDisplayVideo360DeleteReportOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -211,23 +200,15 @@ class GoogleDisplayVideo360DownloadReportOperator(BaseOperator): `https://developers.google.com/bid-manager/v1/queries/getquery` :param report_id: Report ID to retrieve. - :type report_id: str :param bucket_name: The bucket to upload to. - :type bucket_name: str :param report_name: The report name to set when uploading the local file. - :type report_name: str :param chunk_size: File will be downloaded in chunks of this many bytes. - :type chunk_size: int :param gzip: Option to compress local file or file data for upload - :type gzip: bool :param api_version: The version of the api that will be requested for example 'v3'. - :type api_version: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -236,7 +217,6 @@ class GoogleDisplayVideo360DownloadReportOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -342,22 +322,16 @@ class GoogleDisplayVideo360RunReportOperator(BaseOperator): `https://developers.google.com/bid-manager/v1/queries/runquery` :param report_id: Report ID to run. - :type report_id: str :param params: Parameters for running a report as described here: https://developers.google.com/bid-manager/v1/queries/runquery. Please note that this keyword is deprecated, please use `parameters` keyword to pass the parameters. - :type params: Dict[str, Any] :param parameters: Parameters for running a report as described here: https://developers.google.com/bid-manager/v1/queries/runquery - :type parameters: Dict[str, Any] :param api_version: The version of the api that will be requested for example 'v3'. - :type api_version: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -366,7 +340,6 @@ class GoogleDisplayVideo360RunReportOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -436,7 +409,6 @@ class GoogleDisplayVideo360DownloadLineItemsOperator(BaseOperator): :param request_body: dictionary with parameters that should be passed into. More information about it can be found here: https://developers.google.com/bid-manager/v1.1/lineitems/downloadlineitems - :type request_body: Dict[str, Any], """ template_fields: Sequence[str] = ( @@ -512,15 +484,10 @@ class GoogleDisplayVideo360UploadLineItemsOperator(BaseOperator): `https://developers.google.com/bid-manager/v1.1/lineitems/uploadlineitems` :param request_body: request to upload line items. - :type request_body: Dict[str, Any] :param bucket_name: The bucket form data is downloaded. - :type bucket_name: str :param object_name: The object to fetch. - :type object_name: str, :param filename: The filename to fetch. - :type filename: str, :param dry_run: Upload status without actually persisting the line items. - :type dry_run: str, """ template_fields: Sequence[str] = ( @@ -587,23 +554,15 @@ class GoogleDisplayVideo360CreateSDFDownloadTaskOperator(BaseOperator): `https://developers.google.com/display-video/api/reference/rest` :param version: The SDF version of the downloaded file. - :type version: str :param partner_id: The ID of the partner to download SDF for. - :type partner_id: str :param advertiser_id: The ID of the advertiser to download SDF for. - :type advertiser_id: str :param parent_entity_filter: Filters on selected file types. - :type parent_entity_filter: Dict[str, Any] :param id_filter: Filters on entities by their entity IDs. - :type id_filter: Dict[str, Any] :param inventory_source_filter: Filters on Inventory Sources by their IDs. - :type inventory_source_filter: Dict[str, Any] :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -612,7 +571,6 @@ class GoogleDisplayVideo360CreateSDFDownloadTaskOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -668,23 +626,15 @@ class GoogleDisplayVideo360SDFtoGCSOperator(BaseOperator): `https://developers.google.com/display-video/api/reference/rest` :param version: The SDF version of the downloaded file. - :type version: str :param partner_id: The ID of the partner to download SDF for. - :type partner_id: str :param advertiser_id: The ID of the advertiser to download SDF for. - :type advertiser_id: str :param parent_entity_filter: Filters on selected file types. - :type parent_entity_filter: Dict[str, Any] :param id_filter: Filters on entities by their entity IDs. - :type id_filter: Dict[str, Any] :param inventory_source_filter: Filters on Inventory Sources by their IDs. - :type inventory_source_filter: Dict[str, Any] :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -693,7 +643,6 @@ class GoogleDisplayVideo360SDFtoGCSOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/marketing_platform/operators/search_ads.py b/airflow/providers/google/marketing_platform/operators/search_ads.py index 4afe23e3081bc..674dc448f6d7a 100644 --- a/airflow/providers/google/marketing_platform/operators/search_ads.py +++ b/airflow/providers/google/marketing_platform/operators/search_ads.py @@ -42,15 +42,11 @@ class GoogleSearchAdsInsertReportOperator(BaseOperator): :ref:`howto/operator:GoogleSearchAdsInsertReportOperator` :param report: Report to be generated - :type report: Dict[str, Any] :param api_version: The version of the api that will be requested for example 'v3'. - :type api_version: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -59,7 +55,6 @@ class GoogleSearchAdsInsertReportOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -119,22 +114,15 @@ class GoogleSearchAdsDownloadReportOperator(BaseOperator): :ref:`howto/operator:GoogleSearchAdsGetfileReportOperator` :param report_id: ID of the report. - :type report_id: str :param bucket_name: The bucket to upload to. - :type bucket_name: str :param report_name: The report name to set when uploading the local file. If not provided then report_id is used. - :type report_name: str :param gzip: Option to compress local file or file data for upload - :type gzip: bool :param api_version: The version of the api that will be requested for example 'v3'. - :type api_version: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -143,7 +131,6 @@ class GoogleSearchAdsDownloadReportOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/marketing_platform/sensors/campaign_manager.py b/airflow/providers/google/marketing_platform/sensors/campaign_manager.py index 652c4fac46d4b..705fe22cf4e64 100644 --- a/airflow/providers/google/marketing_platform/sensors/campaign_manager.py +++ b/airflow/providers/google/marketing_platform/sensors/campaign_manager.py @@ -38,19 +38,13 @@ class GoogleCampaignManagerReportSensor(BaseSensorOperator): :ref:`howto/operator:GoogleCampaignManagerReportSensor` :param profile_id: The DFA user profile ID. - :type profile_id: str :param report_id: The ID of the report. - :type report_id: str :param file_id: The ID of the report file. - :type file_id: str :param api_version: The version of the api that will be requested for example 'v3'. - :type api_version: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -59,7 +53,6 @@ class GoogleCampaignManagerReportSensor(BaseSensorOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/marketing_platform/sensors/display_video.py b/airflow/providers/google/marketing_platform/sensors/display_video.py index 82a7c66309bd7..5af14fc1da5ea 100644 --- a/airflow/providers/google/marketing_platform/sensors/display_video.py +++ b/airflow/providers/google/marketing_platform/sensors/display_video.py @@ -35,15 +35,11 @@ class GoogleDisplayVideo360ReportSensor(BaseSensorOperator): :ref:`howto/operator:GoogleDisplayVideo360ReportSensor` :param report_id: Report ID to delete. - :type report_id: str :param api_version: The version of the api that will be requested for example 'v3'. - :type api_version: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -52,7 +48,6 @@ class GoogleDisplayVideo360ReportSensor(BaseSensorOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( @@ -101,15 +96,11 @@ class GoogleDisplayVideo360GetSDFDownloadOperationSensor(BaseSensorOperator): :ref:`howto/operator:GoogleDisplayVideo360GetSDFDownloadOperationSensor` :param operation_name: The name of the operation resource - :type operation_name: Dict[str, Any] :param api_version: The version of the api that will be requested for example 'v1'. - :type api_version: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -118,7 +109,6 @@ class GoogleDisplayVideo360GetSDFDownloadOperationSensor(BaseSensorOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ diff --git a/airflow/providers/google/marketing_platform/sensors/search_ads.py b/airflow/providers/google/marketing_platform/sensors/search_ads.py index 53002be970985..9c5e7c2b3a1cc 100644 --- a/airflow/providers/google/marketing_platform/sensors/search_ads.py +++ b/airflow/providers/google/marketing_platform/sensors/search_ads.py @@ -38,15 +38,11 @@ class GoogleSearchAdsReportSensor(BaseSensorOperator): :ref:`howto/operator:GoogleSearchAdsReportSensor` :param report_id: ID of the report request being polled. - :type report_id: str :param api_version: The version of the api that will be requested for example 'v3'. - :type api_version: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -55,7 +51,6 @@ class GoogleSearchAdsReportSensor(BaseSensorOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/suite/hooks/calendar.py b/airflow/providers/google/suite/hooks/calendar.py index 7a8cc4a12393a..567cc8ade0581 100644 --- a/airflow/providers/google/suite/hooks/calendar.py +++ b/airflow/providers/google/suite/hooks/calendar.py @@ -34,13 +34,10 @@ class GoogleCalendarHook(GoogleBaseHook): https://developers.google.com/calendar/api/v3/reference :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param api_version: API Version. For example v3 - :type api_version: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -49,7 +46,6 @@ class GoogleCalendarHook(GoogleBaseHook): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account. - :type impersonation_chain: Union[str, Sequence[str]] """ def __init__( @@ -106,46 +102,30 @@ def get_events( https://developers.google.com/calendar/api/v3/reference/events/list :param calendar_id: The Google Calendar ID to interact with - :type calendar_id: str :param i_cal_uid: Optional. Specifies event ID in the ``iCalendar`` format in the response. - :type i_cal_uid: str :param max_attendees: Optional. If there are more than the specified number of attendees, only the participant is returned. - :type max_attendees: int :param max_results: Optional. Maximum number of events returned on one result page. Incomplete pages can be detected by a non-empty ``nextPageToken`` field in the response. By default the value is 250 events. The page size can never be larger than 2500 events - :type max_results: int :param order_by: Optional. Acceptable values are ``"startTime"`` or "updated" - :type order_by: str :param private_extended_property: Optional. Extended properties constraint specified as ``propertyName=value``. Matches only private properties. This parameter might be repeated multiple times to return events that match all given constraints. - :type private_extended_property: str :param q: Optional. Free text search. - :type q: str :param shared_extended_property: Optional. Extended properties constraint specified as ``propertyName=value``. Matches only shared properties. This parameter might be repeated multiple times to return events that match all given constraints. - :type shared_extended_property: str :param show_deleted: Optional. False by default - :type show_deleted: bool :param show_hidden_invitation: Optional. False by default - :type show_hidden_invitation: bool :param single_events: Optional. False by default - :type single_events: bool :param sync_token: Optional. Token obtained from the ``nextSyncToken`` field returned - :type sync_token: str :param time_max: Optional. Upper bound (exclusive) for an event's start time to filter by. Default is no filter - :type time_max: datetime :param time_min: Optional. Lower bound (exclusive) for an event's end time to filter by. Default is no filter - :type time_min: datetime :param time_zone: Optional. Time zone used in response. Default is calendars time zone. - :type time_zone: str :param updated_min: Optional. Lower bound for an event's last modification time - :type updated_min: datetime :rtype: List """ service = self.get_conn() @@ -196,23 +176,14 @@ def create_event( https://developers.google.com/calendar/api/v3/reference/events/insert :param calendar_id: The Google Calendar ID to interact with - :type calendar_id: str :param conference_data_version: Optional. Version number of conference data supported by the API client. - :type conference_data_version: int :param max_attendees: Optional. If there are more than the specified number of attendees, only the participant is returned. - :type max_attendees: int :param send_notifications: Optional. Default is False - :type send_notifications: bool :param send_updates: Optional. Default is "false". Acceptable values as "all", "none", ``"externalOnly"`` - :type send_updates: str - :type supports_attachments: Optional. Default is False - :type supports_attachments: bool - :type event: Required. Request body of Events resource. Start and End are required https://developers.google.com/calendar/api/v3/reference/events#resource - :type event: dict :rtype: Dict """ if "start" not in event or "end" not in event: diff --git a/airflow/providers/google/suite/hooks/drive.py b/airflow/providers/google/suite/hooks/drive.py index 5f08427e7bac9..bf5608e955457 100644 --- a/airflow/providers/google/suite/hooks/drive.py +++ b/airflow/providers/google/suite/hooks/drive.py @@ -29,13 +29,10 @@ class GoogleDriveHook(GoogleBaseHook): Hook for the Google Drive APIs. :param api_version: API version used (for example v3). - :type api_version: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -44,7 +41,6 @@ class GoogleDriveHook(GoogleBaseHook): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account. - :type impersonation_chain: Union[str, Sequence[str]] """ _conn = None # type: Optional[Resource] @@ -125,7 +121,6 @@ def get_media_request(self, file_id: str) -> HttpRequest: Returns a get_media http request to a Google Drive object. :param file_id: The Google Drive file id - :type file_id: str :return: request :rtype: HttpRequest """ @@ -138,11 +133,8 @@ def exists(self, folder_id: str, file_name: str, drive_id: Optional[str] = None) Checks to see if a file exists within a Google Drive folder :param folder_id: The id of the Google Drive folder in which the file resides - :type folder_id: str :param file_name: The name of a file in Google Drive - :type file_name: str :param drive_id: Optional. The id of the shared Google Drive in which the file resides. - :type drive_id: str :return: True if the file exists, False otherwise :rtype: bool """ @@ -153,11 +145,8 @@ def get_file_id(self, folder_id: str, file_name: str, drive_id: Optional[str] = Returns the file id of a Google Drive file :param folder_id: The id of the Google Drive folder in which the file resides - :type folder_id: str :param file_name: The name of a file in Google Drive - :type file_name: str :param drive_id: Optional. The id of the shared Google Drive in which the file resides. - :type drive_id: str :return: Google Drive file id if the file exists, otherwise None :rtype: str if file exists else None """ @@ -196,9 +185,7 @@ def upload_file(self, local_location: str, remote_location: str) -> str: Uploads a file that is available locally to a Google Drive service. :param local_location: The path where the file is available. - :type local_location: str :param remote_location: The path where the file will be send - :type remote_location: str :return: File ID :rtype: str """ @@ -224,9 +211,7 @@ def download_file(self, file_id: str, file_handle: IO, chunk_size: int = 1048576 Download a file from Google Drive. :param file_id: the id of the file - :type file_id: str :param file_handle: file handle used to write the content to - :type file_handle: io.TextIOWrapper """ request = self.get_media_request(file_id=file_id) self.download_content_from_request(file_handle=file_handle, request=request, chunk_size=chunk_size) diff --git a/airflow/providers/google/suite/hooks/sheets.py b/airflow/providers/google/suite/hooks/sheets.py index 5a3e0d5c07388..3e95407b25f4b 100644 --- a/airflow/providers/google/suite/hooks/sheets.py +++ b/airflow/providers/google/suite/hooks/sheets.py @@ -33,13 +33,10 @@ class GSheetsHook(GoogleBaseHook): https://developers.google.com/sheets/api/guides/values :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param api_version: API Version - :type api_version: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -48,7 +45,6 @@ class GSheetsHook(GoogleBaseHook): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account. - :type impersonation_chain: Union[str, Sequence[str]] """ def __init__( @@ -94,18 +90,13 @@ def get_values( https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets.values/get :param spreadsheet_id: The Google Sheet ID to interact with - :type spreadsheet_id: str :param range_: The A1 notation of the values to retrieve. - :type range_: str :param major_dimension: Indicates which dimension an operation should apply to. DIMENSION_UNSPECIFIED, ROWS, or COLUMNS - :type major_dimension: str :param value_render_option: Determines how values should be rendered in the output. FORMATTED_VALUE, UNFORMATTED_VALUE, or FORMULA - :type value_render_option: str :param date_time_render_option: Determines how dates should be rendered in the output. SERIAL_NUMBER or FORMATTED_STRING - :type date_time_render_option: str :return: An array of sheet values from the specified sheet. :rtype: List """ @@ -139,18 +130,13 @@ def batch_get_values( https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets.values/batchGet :param spreadsheet_id: The Google Sheet ID to interact with - :type spreadsheet_id: str :param ranges: The A1 notation of the values to retrieve. - :type ranges: List :param major_dimension: Indicates which dimension an operation should apply to. DIMENSION_UNSPECIFIED, ROWS, or COLUMNS - :type major_dimension: str :param value_render_option: Determines how values should be rendered in the output. FORMATTED_VALUE, UNFORMATTED_VALUE, or FORMULA - :type value_render_option: str :param date_time_render_option: Determines how dates should be rendered in the output. SERIAL_NUMBER or FORMATTED_STRING - :type date_time_render_option: str :return: Google Sheets API response. :rtype: Dict """ @@ -187,26 +173,18 @@ def update_values( https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets.values/update :param spreadsheet_id: The Google Sheet ID to interact with. - :type spreadsheet_id: str :param range_: The A1 notation of the values to retrieve. - :type range_: str :param values: Data within a range of the spreadsheet. - :type values: List :param major_dimension: Indicates which dimension an operation should apply to. DIMENSION_UNSPECIFIED, ROWS, or COLUMNS - :type major_dimension: str :param value_input_option: Determines how input data should be interpreted. RAW or USER_ENTERED - :type value_input_option: str :param include_values_in_response: Determines if the update response should include the values of the cells that were updated. - :type include_values_in_response: bool :param value_render_option: Determines how values should be rendered in the output. FORMATTED_VALUE, UNFORMATTED_VALUE, or FORMULA - :type value_render_option: str :param date_time_render_option: Determines how dates should be rendered in the output. SERIAL_NUMBER or FORMATTED_STRING - :type date_time_render_option: str :return: Google Sheets API response. :rtype: Dict """ @@ -246,26 +224,18 @@ def batch_update_values( https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets.values/batchUpdate :param spreadsheet_id: The Google Sheet ID to interact with - :type spreadsheet_id: str :param ranges: The A1 notation of the values to retrieve. - :type ranges: List :param values: Data within a range of the spreadsheet. - :type values: List :param major_dimension: Indicates which dimension an operation should apply to. DIMENSION_UNSPECIFIED, ROWS, or COLUMNS - :type major_dimension: str :param value_input_option: Determines how input data should be interpreted. RAW or USER_ENTERED - :type value_input_option: str :param include_values_in_response: Determines if the update response should include the values of the cells that were updated. - :type include_values_in_response: bool :param value_render_option: Determines how values should be rendered in the output. FORMATTED_VALUE, UNFORMATTED_VALUE, or FORMULA - :type value_render_option: str :param date_time_render_option: Determines how dates should be rendered in the output. SERIAL_NUMBER or FORMATTED_STRING - :type date_time_render_option: str :return: Google Sheets API response. :rtype: Dict """ @@ -313,29 +283,20 @@ def append_values( https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets.values/append :param spreadsheet_id: The Google Sheet ID to interact with - :type spreadsheet_id: str :param range_: The A1 notation of the values to retrieve. - :type range_: str :param values: Data within a range of the spreadsheet. - :type values: List :param major_dimension: Indicates which dimension an operation should apply to. DIMENSION_UNSPECIFIED, ROWS, or COLUMNS - :type major_dimension: str :param value_input_option: Determines how input data should be interpreted. RAW or USER_ENTERED - :type value_input_option: str :param insert_data_option: Determines how existing data is changed when new data is input. OVERWRITE or INSERT_ROWS - :type insert_data_option: str :param include_values_in_response: Determines if the update response should include the values of the cells that were updated. - :type include_values_in_response: bool :param value_render_option: Determines how values should be rendered in the output. FORMATTED_VALUE, UNFORMATTED_VALUE, or FORMULA - :type value_render_option: str :param date_time_render_option: Determines how dates should be rendered in the output. SERIAL_NUMBER or FORMATTED_STRING - :type date_time_render_option: str :return: Google Sheets API response. :rtype: Dict """ @@ -366,9 +327,7 @@ def clear(self, spreadsheet_id: str, range_: str) -> dict: https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets.values/clear :param spreadsheet_id: The Google Sheet ID to interact with - :type spreadsheet_id: str :param range_: The A1 notation of the values to retrieve. - :type range_: str :return: Google Sheets API response. :rtype: Dict """ @@ -389,9 +348,7 @@ def batch_clear(self, spreadsheet_id: str, ranges: list) -> dict: https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets.values/batchClear :param spreadsheet_id: The Google Sheet ID to interact with - :type spreadsheet_id: str :param ranges: The A1 notation of the values to retrieve. - :type ranges: List :return: Google Sheets API response. :rtype: Dict """ @@ -412,7 +369,6 @@ def get_spreadsheet(self, spreadsheet_id: str): Retrieves spreadsheet matching the given id. :param spreadsheet_id: The spreadsheet id. - :type spreadsheet_id: str :return: An spreadsheet that matches the sheet filter. """ response = ( @@ -428,9 +384,7 @@ def get_sheet_titles(self, spreadsheet_id: str, sheet_filter: Optional[List[str] Retrieves the sheet titles from a spreadsheet matching the given id and sheet filter. :param spreadsheet_id: The spreadsheet id. - :type spreadsheet_id: str :param sheet_filter: List of sheet title to retrieve from sheet. - :type sheet_filter: List[str] :return: An list of sheet titles from the specified sheet that match the sheet filter. """ @@ -452,7 +406,6 @@ def create_spreadsheet(self, spreadsheet: Dict[str, Any]) -> Dict[str, Any]: :param spreadsheet: an instance of Spreadsheet https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets#Spreadsheet - :type spreadsheet: Dict[str, Any] :return: An spreadsheet object. """ self.log.info("Creating spreadsheet: %s", spreadsheet['properties']['title']) diff --git a/airflow/providers/google/suite/operators/sheets.py b/airflow/providers/google/suite/operators/sheets.py index f3548ca17eac0..48f93af2839e9 100644 --- a/airflow/providers/google/suite/operators/sheets.py +++ b/airflow/providers/google/suite/operators/sheets.py @@ -31,13 +31,10 @@ class GoogleSheetsCreateSpreadsheetOperator(BaseOperator): :param spreadsheet: an instance of Spreadsheet https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets#Spreadsheet - :type spreadsheet: Dict[str, Any] :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -46,7 +43,6 @@ class GoogleSheetsCreateSpreadsheetOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/suite/sensors/drive.py b/airflow/providers/google/suite/sensors/drive.py index 7fbc6c19ba3f5..5729deb316153 100644 --- a/airflow/providers/google/suite/sensors/drive.py +++ b/airflow/providers/google/suite/sensors/drive.py @@ -31,18 +31,13 @@ class GoogleDriveFileExistenceSensor(BaseSensorOperator): Checks for the existence of a file in Google Cloud Storage. :param folder_id: The Google drive folder where the file is. - :type folder_id: str :param file_name: The name of the file to check in Google Drive - :type file_name: str :param drive_id: Optional. The id of the shared Google Drive in which the file resides. - :type drive_id: str :param gcp_conn_id: The connection ID to use when connecting to Google Cloud Storage. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -51,7 +46,6 @@ class GoogleDriveFileExistenceSensor(BaseSensorOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/suite/transfers/gcs_to_gdrive.py b/airflow/providers/google/suite/transfers/gcs_to_gdrive.py index 72c5fc89b54a5..f58119e1612d5 100644 --- a/airflow/providers/google/suite/transfers/gcs_to_gdrive.py +++ b/airflow/providers/google/suite/transfers/gcs_to_gdrive.py @@ -47,13 +47,11 @@ class GCSToGoogleDriveOperator(BaseOperator): :ref:`howto/operator:GCSToGoogleDriveOperator` :param source_bucket: The source Google Cloud Storage bucket where the object is. (templated) - :type source_bucket: str :param source_object: The source name of the object to copy in the Google cloud storage bucket. (templated) You can use only one wildcard for objects (filenames) within your bucket. The wildcard can appear inside the object name or at the end of the object name. Appending a wildcard to the bucket name is unsupported. - :type source_object: str :param destination_object: The destination name of the object in the destination Google Drive service. (templated) If a wildcard is supplied in the source_object argument, this is the prefix that will be prepended @@ -63,16 +61,12 @@ class GCSToGoogleDriveOperator(BaseOperator): For example, with prefix ``foo/*`` and destination_object ``blah/``, the file ``foo/baz`` will be copied to ``blah/baz``; to retain the prefix write the destination_object as e.g. ``blah/foo``, in which case the copied file will be named ``blah/foo/baz``. - :type destination_object: str :param move_object: When move object is True, the object is moved instead of copied to the new location. This is the equivalent of a mv command as opposed to a cp command. - :type move_object: bool :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -81,7 +75,6 @@ class GCSToGoogleDriveOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/suite/transfers/gcs_to_sheets.py b/airflow/providers/google/suite/transfers/gcs_to_sheets.py index 733c729f291f5..591ae77ca9545 100644 --- a/airflow/providers/google/suite/transfers/gcs_to_sheets.py +++ b/airflow/providers/google/suite/transfers/gcs_to_sheets.py @@ -33,19 +33,13 @@ class GCSToGoogleSheetsOperator(BaseOperator): :ref:`howto/operator:GCSToGoogleSheets` :param spreadsheet_id: The Google Sheet ID to interact with. - :type spreadsheet_id: str :param bucket_name: Name of GCS bucket.: - :type bucket_name: str :param object_name: Path to the .csv file on the GCS bucket. - :type object_name: str :param spreadsheet_range: The A1 notation of the values to retrieve. - :type spreadsheet_range: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -54,7 +48,6 @@ class GCSToGoogleSheetsOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/google/suite/transfers/sql_to_sheets.py b/airflow/providers/google/suite/transfers/sql_to_sheets.py index 2a680a652052a..6f972d076debe 100644 --- a/airflow/providers/google/suite/transfers/sql_to_sheets.py +++ b/airflow/providers/google/suite/transfers/sql_to_sheets.py @@ -30,23 +30,15 @@ class SQLToGoogleSheetsOperator(BaseSQLOperator): Copy data from SQL results to provided Google Spreadsheet. :param sql: The SQL to execute. - :type sql: str :param spreadsheet_id: The Google Sheet ID to interact with. - :type spreadsheet_id: str :param conn_id: the connection ID used to connect to the database. - :type sql_conn_id: str :param parameters: The parameters to render the SQL query with. - :type parameters: dict or iterable :param database: name of database which overwrite the defined one in connection - :type database: str :param spreadsheet_range: The A1 notation of the values to retrieve. - :type spreadsheet_range: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -55,7 +47,6 @@ class SQLToGoogleSheetsOperator(BaseSQLOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - :type impersonation_chain: Union[str, Sequence[str]] """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/grpc/hooks/grpc.py b/airflow/providers/grpc/hooks/grpc.py index e7e0bc7645ced..1b575d73f4bdf 100644 --- a/airflow/providers/grpc/hooks/grpc.py +++ b/airflow/providers/grpc/hooks/grpc.py @@ -35,16 +35,14 @@ class GrpcHook(BaseHook): General interaction with gRPC servers. :param grpc_conn_id: The connection ID to use when fetching connection info. - :type grpc_conn_id: str :param interceptors: a list of gRPC interceptor objects which would be applied to the connected gRPC channel. None by default. - :type interceptors: a list of gRPC interceptors based on or extends the four + Each interceptor should based on or extends the four official gRPC interceptors, eg, UnaryUnaryClientInterceptor, UnaryStreamClientInterceptor, StreamUnaryClientInterceptor, StreamStreamClientInterceptor. :param custom_connection_func: The customized connection function to return gRPC channel. - :type custom_connection_func: python callable objects that accept the connection as - its only arg. Could be partial or lambda. + A callable that accepts the connection as its only arg. """ conn_name_attr = 'grpc_conn_id' diff --git a/airflow/providers/grpc/operators/grpc.py b/airflow/providers/grpc/operators/grpc.py index 3120e256dc809..5cca48b4fff31 100644 --- a/airflow/providers/grpc/operators/grpc.py +++ b/airflow/providers/grpc/operators/grpc.py @@ -30,26 +30,17 @@ class GrpcOperator(BaseOperator): Calls a gRPC endpoint to execute an action :param stub_class: The stub client to use for this gRPC call - :type stub_class: gRPC stub class generated from proto file :param call_func: The client function name to call the gRPC endpoint - :type call_func: gRPC client function name for the endpoint generated from proto file, str :param grpc_conn_id: The connection to run the operator against - :type grpc_conn_id: str :param data: The data to pass to the rpc call - :type data: A dict with key value pairs as kwargs of the call_func :param interceptors: A list of gRPC interceptor objects to be used on the channel - :type interceptors: A list of gRPC interceptor objects, has to be initialized - :param custom_connection_func: The customized connection function to return channel object - :type custom_connection_func: A python function that returns channel object, take in - a connection object, can be a partial function + :param custom_connection_func: The customized connection function to return channel object. + A callable that accepts the connection as its only arg. :param streaming: A flag to indicate if the call is a streaming call - :type streaming: boolean - :param response_callback: The callback function to process the response from gRPC call - :type response_callback: A python function that process the response from gRPC call, + :param response_callback: The callback function to process the response from gRPC call, takes in response object and context object, context object can be used to perform push xcom or other after task actions :param log_response: A flag to indicate if we need to log the response - :type log_response: boolean """ template_fields: Sequence[str] = ('stub_class', 'call_func', 'data') diff --git a/airflow/providers/hashicorp/_internal_client/vault_client.py b/airflow/providers/hashicorp/_internal_client/vault_client.py index 40fc7061f7ac2..9eecf26f87283 100644 --- a/airflow/providers/hashicorp/_internal_client/vault_client.py +++ b/airflow/providers/hashicorp/_internal_client/vault_client.py @@ -56,59 +56,37 @@ class _VaultClient(LoggingMixin): with VaultHook using standard Airflow Connection definition. :param url: Base URL for the Vault instance being addressed. - :type url: str :param auth_type: Authentication Type for Vault. Default is ``token``. Available values are in ('approle', 'aws_iam', 'azure', 'github', 'gcp', 'kubernetes', 'ldap', 'radius', 'token', 'userpass') - :type auth_type: str :param auth_mount_point: It can be used to define mount_point for authentication chosen Default depends on the authentication method used. - :type auth_mount_point: str :param mount_point: The "path" the secret engine was mounted on. Default is "secret". Note that this mount_point is not used for authentication if authentication is done via a different engine. For authentication mount_points see, auth_mount_point. - :type mount_point: str :param kv_engine_version: Selects the version of the engine to run (``1`` or ``2``, default: ``2``). - :type kv_engine_version: int :param token: Authentication token to include in requests sent to Vault (for ``token`` and ``github`` auth_type). - :type token: str :param token_path: path to file containing authentication token to include in requests sent to Vault (for ``token`` and ``github`` auth_type). - :type token_path: str :param username: Username for Authentication (for ``ldap`` and ``userpass`` auth_types). - :type username: str :param password: Password for Authentication (for ``ldap`` and ``userpass`` auth_types). - :type password: str :param key_id: Key ID for Authentication (for ``aws_iam`` and ''azure`` auth_type). - :type key_id: str :param secret_id: Secret ID for Authentication (for ``approle``, ``aws_iam`` and ``azure`` auth_types). - :type secret_id: str :param role_id: Role ID for Authentication (for ``approle``, ``aws_iam`` auth_types). - :type role_id: str :param kubernetes_role: Role for Authentication (for ``kubernetes`` auth_type). - :type kubernetes_role: str :param kubernetes_jwt_path: Path for kubernetes jwt token (for ``kubernetes`` auth_type, default: ``/var/run/secrets/kubernetes.io/serviceaccount/token``). - :type kubernetes_jwt_path: str :param gcp_key_path: Path to Google Cloud Service Account key file (JSON) (for ``gcp`` auth_type). Mutually exclusive with gcp_keyfile_dict - :type gcp_key_path: str :param gcp_keyfile_dict: Dictionary of keyfile parameters. (for ``gcp`` auth_type). Mutually exclusive with gcp_key_path - :type gcp_keyfile_dict: dict :param gcp_scopes: Comma-separated string containing OAuth2 scopes (for ``gcp`` auth_type). - :type gcp_scopes: str :param azure_tenant_id: The tenant id for the Azure Active Directory (for ``azure`` auth_type). - :type azure_tenant_id: str :param azure_resource: The configured URL for the application registered in Azure Active Directory (for ``azure`` auth_type). - :type azure_resource: str :param radius_host: Host for radius (for ``radius`` auth_type). - :type radius_host: str :param radius_secret: Secret for radius (for ``radius`` auth_type). - :type radius_secret: str :param radius_port: Port for radius (for ``radius`` auth_type). - :type radius_port: int """ def __init__( @@ -356,10 +334,8 @@ def get_secret(self, secret_path: str, secret_version: Optional[int] = None) -> Get secret value from the KV engine. :param secret_path: The path of the secret. - :type secret_path: str :param secret_version: Specifies the version of Secret to return. If not set, the latest version is returned. (Can only be used in case of version 2 of KV). - :type secret_version: int See https://hvac.readthedocs.io/en/stable/usage/secrets_engines/kv_v1.html and https://hvac.readthedocs.io/en/stable/usage/secrets_engines/kv_v2.html for details. @@ -389,7 +365,6 @@ def get_secret_metadata(self, secret_path: str) -> Optional[dict]: Reads secret metadata (including versions) from the engine. It is only valid for KV version 2. :param secret_path: The path of the secret. - :type secret_path: str :rtype: dict :return: secret metadata. This is a Dict containing metadata for the secret. @@ -415,10 +390,8 @@ def get_secret_including_metadata( See https://hvac.readthedocs.io/en/stable/usage/secrets_engines/kv_v2.html for details. :param secret_path: The path of the secret. - :type secret_path: str :param secret_version: Specifies the version of Secret to return. If not set, the latest version is returned. (Can only be used in case of version 2 of KV). - :type secret_version: int :rtype: dict :return: The key info. This is a Dict with "data" mapping keeping secret and "metadata" mapping keeping metadata of the secret. @@ -445,18 +418,14 @@ def create_or_update_secret( Creates or updates secret. :param secret_path: The path of the secret. - :type secret_path: str :param secret: Secret to create or update for the path specified - :type secret: dict :param method: Optional parameter to explicitly request a POST (create) or PUT (update) request to the selected kv secret engine. If no argument is provided for this parameter, hvac attempts to intelligently determine which method is appropriate. Only valid for KV engine version 1 - :type method: str :param cas: Set the "cas" value to use a Check-And-Set operation. If not set the write will be allowed. If set to 0 a write will only be allowed if the key doesn't exist. If the index is non-zero the write will only be allowed if the key's current version matches the version specified in the cas parameter. Only valid for KV engine version 2. - :type cas: int :rtype: requests.Response :return: The response of the create_or_update_secret request. diff --git a/airflow/providers/hashicorp/hooks/vault.py b/airflow/providers/hashicorp/hooks/vault.py index 36b28f00b2466..ce351ff2c52cb 100644 --- a/airflow/providers/hashicorp/hooks/vault.py +++ b/airflow/providers/hashicorp/hooks/vault.py @@ -74,40 +74,26 @@ class VaultHook(BaseHook): * radius: password -> radius_secret :param vault_conn_id: The id of the connection to use - :type vault_conn_id: str :param auth_type: Authentication Type for the Vault. Default is ``token``. Available values are: ('approle', 'github', 'gcp', 'kubernetes', 'ldap', 'token', 'userpass') - :type auth_type: str :param auth_mount_point: It can be used to define mount_point for authentication chosen Default depends on the authentication method used. - :type auth_mount_point: str :param kv_engine_version: Select the version of the engine to run (``1`` or ``2``). Defaults to version defined in connection or ``2`` if not defined in connection. - :type kv_engine_version: int :param role_id: Role ID for ``aws_iam`` Authentication. - :type role_id: str :param kubernetes_role: Role for Authentication (for ``kubernetes`` auth_type) - :type kubernetes_role: str :param kubernetes_jwt_path: Path for kubernetes jwt token (for ``kubernetes`` auth_type, default: ``/var/run/secrets/kubernetes.io/serviceaccount/token``) - :type kubernetes_jwt_path: str :param token_path: path to file containing authentication token to include in requests sent to Vault (for ``token`` and ``github`` auth_type). - :type token_path: str :param gcp_key_path: Path to Google Cloud Service Account key file (JSON) (for ``gcp`` auth_type) Mutually exclusive with gcp_keyfile_dict - :type gcp_key_path: str :param gcp_scopes: Comma-separated string containing OAuth2 scopes (for ``gcp`` auth_type) - :type gcp_scopes: str :param azure_tenant_id: The tenant id for the Azure Active Directory (for ``azure`` auth_type) - :type azure_tenant_id: str :param azure_resource: The configured URL for the application registered in Azure Active Directory (for ``azure`` auth_type) - :type azure_resource: str :param radius_host: Host for radius (for ``radius`` auth_type) - :type radius_host: str :param radius_port: Port for radius (for ``radius`` auth_type) - :type radius_port: int """ @@ -297,15 +283,12 @@ def get_secret(self, secret_path: str, secret_version: Optional[int] = None) -> Get secret value from the engine. :param secret_path: Path of the secret - :type secret_path: str :param secret_version: Optional version of key to read - can only be used in case of version 2 of KV - :type secret_version: int See https://hvac.readthedocs.io/en/stable/usage/secrets_engines/kv_v1.html and https://hvac.readthedocs.io/en/stable/usage/secrets_engines/kv_v2.html for details. :param secret_path: Path of the secret - :type secret_path: str :rtype: dict :return: secret stored in the vault as a dictionary """ @@ -316,7 +299,6 @@ def get_secret_metadata(self, secret_path: str) -> Optional[dict]: Reads secret metadata (including versions) from the engine. It is only valid for KV version 2. :param secret_path: Path to read from - :type secret_path: str :rtype: dict :return: secret metadata. This is a Dict containing metadata for the secret. @@ -334,9 +316,7 @@ def get_secret_including_metadata( See https://hvac.readthedocs.io/en/stable/usage/secrets_engines/kv_v2.html for details. :param secret_path: Path of the secret - :type secret_path: str :param secret_version: Optional version of key to read - can only be used in case of version 2 of KV - :type secret_version: int :rtype: dict :return: key info. This is a Dict with "data" mapping keeping secret and "metadata" mapping keeping metadata of the secret. @@ -353,18 +333,14 @@ def create_or_update_secret( Creates or updates secret. :param secret_path: Path to read from - :type secret_path: str :param secret: Secret to create or update for the path specified - :type secret: dict :param method: Optional parameter to explicitly request a POST (create) or PUT (update) request to the selected kv secret engine. If no argument is provided for this parameter, hvac attempts to intelligently determine which method is appropriate. Only valid for KV engine version 1 - :type method: str :param cas: Set the "cas" value to use a Check-And-Set operation. If not set the write will be allowed. If set to 0 a write will only be allowed if the key doesn't exist. If the index is non-zero the write will only be allowed if the key's current version matches the version specified in the cas parameter. Only valid for KV engine version 2. - :type cas: int :rtype: requests.Response :return: The response of the create_or_update_secret request. diff --git a/airflow/providers/hashicorp/secrets/vault.py b/airflow/providers/hashicorp/secrets/vault.py index 86c5df01a3cba..ec5f19c60391a 100644 --- a/airflow/providers/hashicorp/secrets/vault.py +++ b/airflow/providers/hashicorp/secrets/vault.py @@ -45,67 +45,42 @@ class VaultBackend(BaseSecretsBackend, LoggingMixin): :param connections_path: Specifies the path of the secret to read to get Connections. (default: 'connections'). If set to None (null), requests for connections will not be sent to Vault. - :type connections_path: str :param variables_path: Specifies the path of the secret to read to get Variable. (default: 'variables'). If set to None (null), requests for variables will not be sent to Vault. - :type variables_path: str :param config_path: Specifies the path of the secret to read Airflow Configurations (default: 'config'). If set to None (null), requests for configurations will not be sent to Vault. - :type config_path: str :param url: Base URL for the Vault instance being addressed. - :type url: str :param auth_type: Authentication Type for Vault. Default is ``token``. Available values are: ('approle', 'aws_iam', 'azure', 'github', 'gcp', 'kubernetes', 'ldap', 'radius', 'token', 'userpass') - :type auth_type: str :param auth_mount_point: It can be used to define mount_point for authentication chosen Default depends on the authentication method used. - :type auth_mount_point: str :param mount_point: The "path" the secret engine was mounted on. Default is "secret". Note that this mount_point is not used for authentication if authentication is done via a different engine. For authentication mount_points see, auth_mount_point. - :type mount_point: str :param kv_engine_version: Select the version of the engine to run (``1`` or ``2``, default: ``2``). - :type kv_engine_version: int :param token: Authentication token to include in requests sent to Vault. (for ``token`` and ``github`` auth_type) - :type token: str :param token_path: path to file containing authentication token to include in requests sent to Vault (for ``token`` and ``github`` auth_type). - :type token_path: str :param username: Username for Authentication (for ``ldap`` and ``userpass`` auth_type). - :type username: str :param password: Password for Authentication (for ``ldap`` and ``userpass`` auth_type). - :type password: str :param key_id: Key ID for Authentication (for ``aws_iam`` and ''azure`` auth_type). - :type key_id: str :param secret_id: Secret ID for Authentication (for ``approle``, ``aws_iam`` and ``azure`` auth_types). - :type secret_id: str :param role_id: Role ID for Authentication (for ``approle``, ``aws_iam`` auth_types). - :type role_id: str :param kubernetes_role: Role for Authentication (for ``kubernetes`` auth_type). - :type kubernetes_role: str :param kubernetes_jwt_path: Path for kubernetes jwt token (for ``kubernetes`` auth_type, default: ``/var/run/secrets/kubernetes.io/serviceaccount/token``). - :type kubernetes_jwt_path: str :param gcp_key_path: Path to Google Cloud Service Account key file (JSON) (for ``gcp`` auth_type). Mutually exclusive with gcp_keyfile_dict. - :type gcp_key_path: str :param gcp_keyfile_dict: Dictionary of keyfile parameters. (for ``gcp`` auth_type). Mutually exclusive with gcp_key_path. - :type gcp_keyfile_dict: dict :param gcp_scopes: Comma-separated string containing OAuth2 scopes (for ``gcp`` auth_type). - :type gcp_scopes: str :param azure_tenant_id: The tenant id for the Azure Active Directory (for ``azure`` auth_type). - :type azure_tenant_id: str :param azure_resource: The configured URL for the application registered in Azure Active Directory (for ``azure`` auth_type). - :type azure_resource: str :param radius_host: Host for radius (for ``radius`` auth_type). - :type radius_host: str :param radius_secret: Secret for radius (for ``radius`` auth_type). - :type radius_secret: str :param radius_port: Port for radius (for ``radius`` auth_type). - :type radius_port: str """ def __init__( @@ -182,7 +157,6 @@ def get_response(self, conn_id: str) -> Optional[dict]: """ Get data from Vault - :type conn_id: str :rtype: dict :return: The data from the Vault path if exists """ @@ -197,7 +171,6 @@ def get_conn_uri(self, conn_id: str) -> Optional[str]: Get secret value from Vault. Store the secret in the form of URI :param conn_id: The connection id - :type conn_id: str :rtype: str :return: The connection uri retrieved from the secret """ @@ -215,7 +188,6 @@ def get_connection(self, conn_id: str) -> 'Optional[Connection]': Get connection from Vault as secret. Prioritize conn_uri if exists, if not fall back to normal Connection creation. - :type conn_id: str :rtype: Connection :return: A Connection object constructed from Vault data """ @@ -238,7 +210,6 @@ def get_variable(self, key: str) -> Optional[str]: Get Airflow Variable :param key: Variable Key - :type key: str :rtype: str :return: Variable Value retrieved from the vault """ @@ -254,7 +225,6 @@ def get_config(self, key: str) -> Optional[str]: Get Airflow Configuration :param key: Configuration Option Key - :type key: str :rtype: str :return: Configuration Option Value retrieved from the vault """ diff --git a/airflow/providers/http/hooks/http.py b/airflow/providers/http/hooks/http.py index f66d40d04f90b..9ea9c39932fd4 100644 --- a/airflow/providers/http/hooks/http.py +++ b/airflow/providers/http/hooks/http.py @@ -30,13 +30,10 @@ class HttpHook(BaseHook): Interact with HTTP servers. :param method: the API method to be called - :type method: str :param http_conn_id: :ref:`http connection` that has the base API url i.e https://www.google.com/ and optional authentication credentials. Default headers can also be specified in the Extra field in json format. - :type http_conn_id: str :param auth_type: The auth type for the service - :type auth_type: AuthBase of python requests lib """ conn_name_attr = 'http_conn_id' @@ -64,7 +61,6 @@ def get_conn(self, headers: Optional[Dict[Any, Any]] = None) -> requests.Session Returns http session for use with requests :param headers: additional headers to be passed through as a dictionary - :type headers: dict """ session = requests.Session() @@ -105,15 +101,11 @@ def run( Performs the request :param endpoint: the endpoint to be called i.e. resource/v1/query? - :type endpoint: str :param data: payload to be uploaded or request parameters - :type data: dict :param headers: additional headers to be passed through as a dictionary - :type headers: dict :param extra_options: additional options to be used when executing the request i.e. {'check_response': False} to avoid checking raising exceptions on non 2XX or 3XX status codes - :type extra_options: dict :param request_kwargs: Additional kwargs to pass when creating a request. For example, ``run(json=obj)`` is passed as ``requests.Request(json=obj)`` """ @@ -146,7 +138,6 @@ def check_response(self, response: requests.Response) -> None: status codes :param response: A requests response object - :type response: requests.response """ try: response.raise_for_status() @@ -166,13 +157,10 @@ def run_and_check( checking for the result :param session: the session to be used to execute the request - :type session: requests.Session :param prepped_request: the prepared request generated in run() - :type prepped_request: session.prepare_request :param extra_options: additional options to be used when executing the request i.e. ``{'check_response': False}`` to avoid checking raising exceptions on non 2XX or 3XX status codes - :type extra_options: dict """ extra_options = extra_options or {} @@ -210,7 +198,6 @@ def run_with_advanced_retry(self, _retry_args: Dict[Any, Any], *args: Any, **kwa :param _retry_args: Arguments which define the retry behaviour. See Tenacity documentation at https://github.com/jd/tenacity - :type _retry_args: dict .. code-block:: python diff --git a/airflow/providers/http/operators/http.py b/airflow/providers/http/operators/http.py index 96f856d94b1e1..eedd8632dd360 100644 --- a/airflow/providers/http/operators/http.py +++ b/airflow/providers/http/operators/http.py @@ -37,35 +37,25 @@ class SimpleHttpOperator(BaseOperator): :param http_conn_id: The :ref:`http connection` to run the operator against - :type http_conn_id: str :param endpoint: The relative part of the full url. (templated) - :type endpoint: str :param method: The HTTP method to use, default = "POST" - :type method: str :param data: The data to pass. POST-data in POST/PUT and params in the URL for a GET request. (templated) :type data: For POST/PUT, depends on the content-type parameter, for GET a dictionary of key/value string pairs :param headers: The HTTP headers to be added to the GET request - :type headers: a dictionary of string key/value pairs :param response_check: A check against the 'requests' response object. The callable takes the response object as the first positional argument and optionally any number of keyword arguments available in the context dictionary. It should return True for 'pass' and False otherwise. - :type response_check: A lambda or defined function. :param response_filter: A function allowing you to manipulate the response text. e.g response_filter=lambda response: json.loads(response.text). The callable takes the response object as the first positional argument and optionally any number of keyword arguments available in the context dictionary. - :type response_filter: A lambda or defined function. :param extra_options: Extra options for the 'requests' library, see the 'requests' documentation (options to modify timeout, ssl, etc.) - :type extra_options: A dictionary of options, where key is string and value - depends on the option that's being modified. :param log_response: Log the response (default: False) - :type log_response: bool :param auth_type: The auth type for the service - :type auth_type: AuthBase of python requests lib """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/http/sensors/http.py b/airflow/providers/http/sensors/http.py index 41f7d8dd61427..640c55d79978b 100644 --- a/airflow/providers/http/sensors/http.py +++ b/airflow/providers/http/sensors/http.py @@ -54,24 +54,16 @@ def response_check(response, task_instance): :param http_conn_id: The :ref:`http connection` to run the sensor against - :type http_conn_id: str :param method: The HTTP request method to use - :type method: str :param endpoint: The relative part of the full url - :type endpoint: str :param request_params: The parameters to be added to the GET url - :type request_params: a dictionary of string key/value pairs :param headers: The HTTP headers to be added to the GET request - :type headers: a dictionary of string key/value pairs :param response_check: A check against the 'requests' response object. The callable takes the response object as the first positional argument and optionally any number of keyword arguments available in the context dictionary. It should return True for 'pass' and False otherwise. - :type response_check: A lambda or defined function. :param extra_options: Extra options for the 'requests' library, see the 'requests' documentation (options to modify timeout, ssl, etc.) - :type extra_options: A dictionary of options, where key is string and value - depends on the option that's being modified. """ template_fields: Sequence[str] = ('endpoint', 'request_params', 'headers') diff --git a/airflow/providers/imap/hooks/imap.py b/airflow/providers/imap/hooks/imap.py index ce13389777f0d..56499ddac3fa4 100644 --- a/airflow/providers/imap/hooks/imap.py +++ b/airflow/providers/imap/hooks/imap.py @@ -41,7 +41,6 @@ class ImapHook(BaseHook): :param imap_conn_id: The :ref:`imap connection id ` that contains the information used to authenticate the client. - :type imap_conn_id: str """ conn_name_attr = 'imap_conn_id' @@ -98,14 +97,10 @@ def has_mail_attachment( Checks the mail folder for mails containing attachments with the given name. :param name: The name of the attachment that will be searched for. - :type name: str :param check_regex: Checks the name for a regular expression. - :type check_regex: bool :param mail_folder: The mail folder where to look at. - :type mail_folder: str :param mail_filter: If set other than 'All' only specific mails will be checked. See :py:meth:`imaplib.IMAP4.search` for details. - :type mail_filter: str :returns: True if there is an attachment with the given name and False if not. :rtype: bool """ @@ -128,22 +123,16 @@ def retrieve_mail_attachments( Retrieves mail's attachments in the mail folder by its name. :param name: The name of the attachment that will be downloaded. - :type name: str :param check_regex: Checks the name for a regular expression. - :type check_regex: bool :param latest_only: If set to True it will only retrieve the first matched attachment. - :type latest_only: bool :param mail_folder: The mail folder where to look at. - :type mail_folder: str :param mail_filter: If set other than 'All' only specific mails will be checked. See :py:meth:`imaplib.IMAP4.search` for details. - :type mail_filter: str :param not_found_mode: Specify what should happen if no attachment has been found. Supported values are 'raise', 'warn' and 'ignore'. If it is set to 'raise' it will raise an exception, if set to 'warn' it will only print a warning and if set to 'ignore' it won't notify you at all. - :type not_found_mode: str :returns: a list of tuple each containing the attachment filename and its payload. :rtype: a list of tuple """ @@ -171,25 +160,18 @@ def download_mail_attachments( Downloads mail's attachments in the mail folder by its name to the local directory. :param name: The name of the attachment that will be downloaded. - :type name: str :param local_output_directory: The output directory on the local machine where the files will be downloaded to. - :type local_output_directory: str :param check_regex: Checks the name for a regular expression. - :type check_regex: bool :param latest_only: If set to True it will only download the first matched attachment. - :type latest_only: bool :param mail_folder: The mail folder where to look at. - :type mail_folder: str :param mail_filter: If set other than 'All' only specific mails will be checked. See :py:meth:`imaplib.IMAP4.search` for details. - :type mail_filter: str :param not_found_mode: Specify what should happen if no attachment has been found. Supported values are 'raise', 'warn' and 'ignore'. If it is set to 'raise' it will raise an exception, if set to 'warn' it will only print a warning and if set to 'ignore' it won't notify you at all. - :type not_found_mode: str """ mail_attachments = self._retrieve_mails_attachments_by_name( name, check_regex, latest_only, mail_folder, mail_filter @@ -292,7 +274,6 @@ class Mail(LoggingMixin): This class simplifies working with mails returned by the imaplib client. :param mail_body: The mail body of a mail received from imaplib client. - :type mail_body: str """ def __init__(self, mail_body: str) -> None: @@ -315,11 +296,8 @@ def get_attachments_by_name( Gets all attachments by name for the mail. :param name: The name of the attachment to look for. - :type name: str :param check_regex: Checks the name for a regular expression. - :type check_regex: bool :param find_first: If set to True it will only find the first match and then quit. - :type find_first: bool :returns: a list of tuples each containing name and payload where the attachments name matches the given name. :rtype: list(tuple) @@ -351,7 +329,6 @@ class MailPart: This class is a wrapper for a Mail object's part and gives it more features. :param part: The mail part in a Mail object. - :type part: any """ def __init__(self, part: Any) -> None: @@ -371,7 +348,6 @@ def has_matching_name(self, name: str) -> Optional[Tuple[Any, Any]]: Checks if the given name matches the part's name. :param name: The name to look for. - :type name: str :returns: True if it matches the name (including regular expression). :rtype: tuple """ @@ -382,7 +358,6 @@ def has_equal_name(self, name: str) -> bool: Checks if the given name is equal to the part's name. :param name: The name to look for. - :type name: str :returns: True if it is equal to the given name. :rtype: bool """ diff --git a/airflow/providers/imap/sensors/imap_attachment.py b/airflow/providers/imap/sensors/imap_attachment.py index 5e048bfb1e79f..a18b09f36ca54 100644 --- a/airflow/providers/imap/sensors/imap_attachment.py +++ b/airflow/providers/imap/sensors/imap_attachment.py @@ -30,18 +30,13 @@ class ImapAttachmentSensor(BaseSensorOperator): Waits for a specific attachment on a mail server. :param attachment_name: The name of the attachment that will be checked. - :type attachment_name: str :param check_regex: If set to True the attachment's name will be parsed as regular expression. Through this you can get a broader set of attachments that it will look for than just only the equality of the attachment name. - :type check_regex: bool :param mail_folder: The mail folder in where to search for the attachment. - :type mail_folder: str :param mail_filter: If set other than 'All' only specific mails will be checked. See :py:meth:`imaplib.IMAP4.search` for details. - :type mail_filter: str :param imap_conn_id: The :ref:`imap connection id ` to run the sensor against. - :type imap_conn_id: str """ template_fields: Sequence[str] = ('attachment_name', 'mail_filter') @@ -69,7 +64,6 @@ def poke(self, context: 'Context') -> bool: Pokes for a mail attachment on the mail server. :param context: The context that is being provided when poking. - :type context: dict :return: True if attachment with the given name is present and False if not. :rtype: bool """ diff --git a/airflow/providers/influxdb/hooks/influxdb.py b/airflow/providers/influxdb/hooks/influxdb.py index a33a2b1b8c1f3..c70369d87e9ab 100644 --- a/airflow/providers/influxdb/hooks/influxdb.py +++ b/airflow/providers/influxdb/hooks/influxdb.py @@ -16,7 +16,13 @@ # specific language governing permissions and limitations # under the License. -"""This module allows to connect to a InfluxDB database.""" +""" +This module allows to connect to a InfluxDB database. + +.. spelling:: + + FluxTable +""" from typing import Dict, List @@ -37,7 +43,6 @@ class InfluxDBHook(BaseHook): Performs a connection to InfluxDB and retrieves client. :param influxdb_conn_id: Reference to :ref:`Influxdb connection id `. - :type influxdb_conn_id: str """ conn_name_attr = 'influxdb_conn_id' diff --git a/airflow/providers/influxdb/operators/influxdb.py b/airflow/providers/influxdb/operators/influxdb.py index b5dc0205eff8c..6222b22202061 100644 --- a/airflow/providers/influxdb/operators/influxdb.py +++ b/airflow/providers/influxdb/operators/influxdb.py @@ -34,9 +34,7 @@ class InfluxDBOperator(BaseOperator): :param sql: the sql code to be executed. Can receive a str representing a sql statement - :type sql: str :param influxdb_conn_id: Reference to :ref:`Influxdb connection id `. - :type influxdb_conn_id: str """ template_fields: Sequence[str] = ('sql',) diff --git a/airflow/providers/jdbc/hooks/jdbc.py b/airflow/providers/jdbc/hooks/jdbc.py index 1f610d693bae8..54a3345e6ed5e 100644 --- a/airflow/providers/jdbc/hooks/jdbc.py +++ b/airflow/providers/jdbc/hooks/jdbc.py @@ -82,9 +82,7 @@ def set_autocommit(self, conn: jaydebeapi.Connection, autocommit: bool) -> None: Enable or disable autocommit for the given connection. :param conn: The connection. - :type conn: connection object :param autocommit: The connection's autocommit setting. - :type autocommit: bool """ conn.jconn.setAutoCommit(autocommit) @@ -95,7 +93,6 @@ def get_autocommit(self, conn: jaydebeapi.Connection) -> bool: Return False if conn.autocommit is not set or set to False :param conn: The connection. - :type conn: connection object :return: connection autocommit setting. :rtype: bool """ diff --git a/airflow/providers/jdbc/operators/jdbc.py b/airflow/providers/jdbc/operators/jdbc.py index 6881dde6b856b..440f74c24e077 100644 --- a/airflow/providers/jdbc/operators/jdbc.py +++ b/airflow/providers/jdbc/operators/jdbc.py @@ -34,17 +34,13 @@ class JdbcOperator(BaseOperator): For more information on how to use this operator, take a look at the guide: :ref:`howto/operator:JdbcOperator` - :param sql: the sql code to be executed. (templated) - :type sql: Can receive a str representing a sql statement, - a list of str (sql statements), or reference to a template file. - Template reference are recognized by str ending in '.sql' + :param sql: the SQL code to be executed as a single string, or + a list of str (sql statements), or a reference to a template file. + Template references are recognized by str ending in '.sql' :param jdbc_conn_id: reference to a predefined database - :type jdbc_conn_id: str :param autocommit: if True, each command is automatically committed. (default value: False) - :type autocommit: bool :param parameters: (optional) the parameters to render the SQL query with. - :type parameters: dict or iterable """ template_fields: Sequence[str] = ('sql',) diff --git a/airflow/providers/jenkins/operators/jenkins_job_trigger.py b/airflow/providers/jenkins/operators/jenkins_job_trigger.py index 90c121e266603..3f3bafa27bbbf 100644 --- a/airflow/providers/jenkins/operators/jenkins_job_trigger.py +++ b/airflow/providers/jenkins/operators/jenkins_job_trigger.py @@ -79,20 +79,14 @@ class JenkinsJobTriggerOperator(BaseOperator): You'll also need to configure a Jenkins connection in the connections screen. :param jenkins_connection_id: The jenkins connection to use for this job - :type jenkins_connection_id: str :param job_name: The name of the job to trigger - :type job_name: str :param parameters: The parameters block provided to jenkins for use in the API call when triggering a build. (templated) - :type parameters: str, Dict, or List :param sleep_time: How long will the operator sleep between each status request for the job (min 1, default 10) - :type sleep_time: int :param max_try_before_job_appears: The maximum number of requests to make while waiting for the job to appears on jenkins server (default 10) - :type max_try_before_job_appears: int :param allowed_jenkins_states: Iterable of allowed result jenkins states, default is ``['SUCCESS']`` - :type allowed_jenkins_states: Optional[Iterable[str]] """ template_fields: Sequence[str] = ('parameters',) diff --git a/airflow/providers/jira/hooks/jira.py b/airflow/providers/jira/hooks/jira.py index 4f936fe5bba0f..27ef4b0c3916d 100644 --- a/airflow/providers/jira/hooks/jira.py +++ b/airflow/providers/jira/hooks/jira.py @@ -30,7 +30,6 @@ class JiraHook(BaseHook): Jira interaction hook, a Wrapper around JIRA Python SDK. :param jira_conn_id: reference to a pre-defined Jira Connection - :type jira_conn_id: str """ default_conn_name = 'jira_default' diff --git a/airflow/providers/jira/operators/jira.py b/airflow/providers/jira/operators/jira.py index 9e610ce254b6e..e9a45bdd45525 100644 --- a/airflow/providers/jira/operators/jira.py +++ b/airflow/providers/jira/operators/jira.py @@ -32,16 +32,11 @@ class JiraOperator(BaseOperator): This operator is designed to use Jira Python SDK: http://jira.readthedocs.io :param jira_conn_id: reference to a pre-defined Jira Connection - :type jira_conn_id: str :param jira_method: method name from Jira Python SDK to be called - :type jira_method: str :param jira_method_args: required method parameters for the jira_method. (templated) - :type jira_method_args: dict :param result_processor: function to further process the response from Jira - :type result_processor: function :param get_jira_resource_method: function or operator to get jira resource on which the provided jira_method will be executed - :type get_jira_resource_method: function """ template_fields: Sequence[str] = ("jira_method_args",) diff --git a/airflow/providers/jira/sensors/jira.py b/airflow/providers/jira/sensors/jira.py index c7cfc3f21a131..44bf35696d72e 100644 --- a/airflow/providers/jira/sensors/jira.py +++ b/airflow/providers/jira/sensors/jira.py @@ -31,13 +31,9 @@ class JiraSensor(BaseSensorOperator): Monitors a jira ticket for any change. :param jira_conn_id: reference to a pre-defined Jira Connection - :type jira_conn_id: str :param method_name: method name from jira-python-sdk to be execute - :type method_name: str :param method_params: parameters for the method method_name - :type method_params: dict :param result_processor: function that return boolean and act as a sensor response - :type result_processor: function """ def __init__( @@ -73,15 +69,10 @@ class JiraTicketSensor(JiraSensor): Monitors a jira ticket for given change in terms of function. :param jira_conn_id: reference to a pre-defined Jira Connection - :type jira_conn_id: str :param ticket_id: id of the ticket to be monitored - :type ticket_id: str :param field: field of the ticket to be monitored - :type field: str :param expected_value: expected value of the field - :type expected_value: str :param result_processor: function that return boolean and act as a sensor response - :type result_processor: function """ template_fields: Sequence[str] = ("ticket_id",) diff --git a/airflow/providers/microsoft/azure/hooks/adx.py b/airflow/providers/microsoft/azure/hooks/adx.py index 63e91a9bd06e8..6e8172fa3ae98 100644 --- a/airflow/providers/microsoft/azure/hooks/adx.py +++ b/airflow/providers/microsoft/azure/hooks/adx.py @@ -17,7 +17,14 @@ # under the License. # -"""This module contains Azure Data Explorer hook""" +""" +This module contains Azure Data Explorer hook. + +.. spelling:: + + KustoResponseDataSetV + kusto +""" from typing import Any, Dict, Optional from azure.kusto.data.exceptions import KustoServiceError @@ -60,7 +67,6 @@ class AzureDataExplorerHook(BaseHook): :param azure_data_explorer_conn_id: Reference to the :ref:`Azure Data Explorer connection`. - :type azure_data_explorer_conn_id: str """ conn_name_attr = 'azure_data_explorer_conn_id' @@ -165,12 +171,9 @@ def run_query(self, query: str, database: str, options: Optional[Dict] = None) - If query is unsuccessful AirflowException is raised. :param query: KQL query to run - :type query: str :param database: Database to run the query on. - :type database: str :param options: Optional query options. See: https://docs.microsoft.com/en-us/azure/kusto/api/netfx/request-properties#list-of-clientrequestproperties - :type options: dict :return: dict """ properties = ClientRequestProperties() diff --git a/airflow/providers/microsoft/azure/hooks/base_azure.py b/airflow/providers/microsoft/azure/hooks/base_azure.py index ea79a84d0ee68..c0ba06bf88672 100644 --- a/airflow/providers/microsoft/azure/hooks/base_azure.py +++ b/airflow/providers/microsoft/azure/hooks/base_azure.py @@ -30,7 +30,6 @@ class AzureBaseHook(BaseHook): authenticate the client library used for upstream azure hooks. :param sdk_client: The SDKClient to use. - :type sdk_client: Optional[str] :param conn_id: The :ref:`Azure connection id` which refers to the information to connect to the service. :type: str diff --git a/airflow/providers/microsoft/azure/hooks/batch.py b/airflow/providers/microsoft/azure/hooks/batch.py index d60ab0579bb61..e2ea258061ab3 100644 --- a/airflow/providers/microsoft/azure/hooks/batch.py +++ b/airflow/providers/microsoft/azure/hooks/batch.py @@ -35,7 +35,6 @@ class AzureBatchHook(BaseHook): :param azure_batch_conn_id: :ref:`Azure Batch connection id` of a service principal which will be used to start the container instance. - :type azure_batch_conn_id: str """ conn_name_attr = 'azure_batch_conn_id' @@ -114,45 +113,33 @@ def configure_pool( Configures a pool :param pool_id: A string that uniquely identifies the Pool within the Account - :type pool_id: str :param vm_size: The size of virtual machines in the Pool. - :type vm_size: str :param display_name: The display name for the Pool - :type display_name: str :param target_dedicated_nodes: The desired number of dedicated Compute Nodes in the Pool. - :type target_dedicated_nodes: Optional[int] :param use_latest_image_and_sku: Whether to use the latest verified vm image and sku - :type use_latest_image_and_sku: bool :param vm_publisher: The publisher of the Azure Virtual Machines Marketplace Image. For example, Canonical or MicrosoftWindowsServer. - :type vm_publisher: Optional[str] :param vm_offer: The offer type of the Azure Virtual Machines Marketplace Image. For example, UbuntuServer or WindowsServer. - :type vm_offer: Optional[str] :param sku_starts_with: The start name of the sku to search - :type sku_starts_with: Optional[str] :param vm_sku: The name of the virtual machine sku to use - :type vm_sku: Optional[str] :param vm_version: The version of the virtual machine :param vm_version: str :param vm_node_agent_sku_id: The node agent sku id of the virtual machine - :type vm_node_agent_sku_id: Optional[str] :param os_family: The Azure Guest OS family to be installed on the virtual machines in the Pool. - :type os_family: Optional[str] :param os_version: The OS family version - :type os_version: Optional[str] """ if use_latest_image_and_sku: @@ -211,7 +198,6 @@ def create_pool(self, pool: PoolAddParameter) -> None: Creates a pool if not already existing :param pool: the pool object to create - :type pool: batch_models.PoolAddParameter """ try: @@ -235,12 +221,9 @@ def _get_latest_verified_image_vm_and_sku( :param publisher: The publisher of the Azure Virtual Machines Marketplace Image. For example, Canonical or MicrosoftWindowsServer. - :type publisher: str :param offer: The offer type of the Azure Virtual Machines Marketplace Image. For example, UbuntuServer or WindowsServer. - :type offer: str :param sku_starts_with: The start name of the sku to search - :type sku_starts_with: str """ options = batch_models.AccountListSupportedImagesOptions(filter="verificationType eq 'verified'") images = self.connection.account.list_supported_images(account_list_supported_images_options=options) @@ -262,9 +245,7 @@ def wait_for_all_node_state(self, pool_id: str, node_state: Set) -> list: Wait for all nodes in a pool to reach given states :param pool_id: A string that identifies the pool - :type pool_id: str :param node_state: A set of batch_models.ComputeNodeState - :type node_state: set """ self.log.info('waiting for all nodes in pool %s to reach one of: %s', pool_id, node_state) while True: @@ -292,11 +273,8 @@ def configure_job( Configures a job for use in the pool :param job_id: A string that uniquely identifies the job within the account - :type job_id: str :param pool_id: A string that identifies the pool - :type pool_id: str :param display_name: The display name for the job - :type display_name: str """ job = batch_models.JobAddParameter( id=job_id, @@ -311,7 +289,6 @@ def create_job(self, job: JobAddParameter) -> None: Creates a job in the pool :param job: The job object to create - :type job: batch_models.JobAddParameter """ try: self.connection.job.add(job) @@ -334,16 +311,12 @@ def configure_task( Creates a task :param task_id: A string that identifies the task to create - :type task_id: str :param command_line: The command line of the Task. - :type command_line: str :param display_name: A display name for the Task - :type display_name: str :param container_settings: The settings for the container under which the Task runs. If the Pool that will run this Task has containerConfiguration set, this must be set as well. If the Pool that will run this Task doesn't have containerConfiguration set, this must not be set. - :type container_settings: batch_models.TaskContainerSettings """ task = batch_models.TaskAddParameter( id=task_id, @@ -360,9 +333,7 @@ def add_single_task_to_job(self, job_id: str, task: TaskAddParameter) -> None: Add a single task to given job if it doesn't exist :param job_id: A string that identifies the given job - :type job_id: str :param task: The task to add - :type task: batch_models.TaskAddParameter """ try: @@ -378,9 +349,7 @@ def wait_for_job_tasks_to_complete(self, job_id: str, timeout: int) -> None: Wait for tasks in a particular job to complete :param job_id: A string that identifies the job - :type job_id: str :param timeout: The amount of time to wait before timing out in minutes - :type timeout: int """ timeout_time = timezone.utcnow() + timedelta(minutes=timeout) while timezone.utcnow() < timeout_time: diff --git a/airflow/providers/microsoft/azure/hooks/container_instance.py b/airflow/providers/microsoft/azure/hooks/container_instance.py index b30b648e44dfd..9b0cd5d17264f 100644 --- a/airflow/providers/microsoft/azure/hooks/container_instance.py +++ b/airflow/providers/microsoft/azure/hooks/container_instance.py @@ -38,7 +38,6 @@ class AzureContainerInstanceHook(AzureBaseHook): :param conn_id: :ref:`Azure connection id` of a service principal which will be used to start the container instance. - :type azure_conn_id: str """ conn_name_attr = 'azure_conn_id' @@ -55,11 +54,8 @@ def create_or_update(self, resource_group: str, name: str, container_group: Cont Create a new container group :param resource_group: the name of the resource group - :type resource_group: str :param name: the name of the container group - :type name: str :param container_group: the properties of the container group - :type container_group: azure.mgmt.containerinstance.models.ContainerGroup """ self.connection.container_groups.create_or_update(resource_group, name, container_group) @@ -68,9 +64,7 @@ def get_state_exitcode_details(self, resource_group: str, name: str) -> tuple: Get the state and exitcode of a container group :param resource_group: the name of the resource group - :type resource_group: str :param name: the name of the container group - :type name: str :return: A tuple with the state, exitcode, and details. If the exitcode is unknown 0 is returned. :rtype: tuple(state,exitcode,details) @@ -89,9 +83,7 @@ def get_messages(self, resource_group: str, name: str) -> list: Get the messages of a container group :param resource_group: the name of the resource group - :type resource_group: str :param name: the name of the container group - :type name: str :return: A list of the event messages :rtype: list[str] """ @@ -107,9 +99,7 @@ def get_state(self, resource_group: str, name: str) -> Any: Get the state of a container group :param resource_group: the name of the resource group - :type resource_group: str :param name: the name of the container group - :type name: str :return: ContainerGroup :rtype: ~azure.mgmt.containerinstance.models.ContainerGroup """ @@ -120,11 +110,8 @@ def get_logs(self, resource_group: str, name: str, tail: int = 1000) -> list: Get the tail from logs of a container group :param resource_group: the name of the resource group - :type resource_group: str :param name: the name of the container group - :type name: str :param tail: the size of the tail - :type tail: int :return: A list of log messages :rtype: list[str] """ @@ -136,9 +123,7 @@ def delete(self, resource_group: str, name: str) -> None: Delete a container group :param resource_group: the name of the resource group - :type resource_group: str :param name: the name of the container group - :type name: str """ self.connection.container_groups.delete(resource_group, name) @@ -147,9 +132,7 @@ def exists(self, resource_group: str, name: str) -> bool: Test if a container group exists :param resource_group: the name of the resource group - :type resource_group: str :param name: the name of the container group - :type name: str """ for container in self.connection.container_groups.list_by_resource_group(resource_group): if container.name == name: diff --git a/airflow/providers/microsoft/azure/hooks/container_registry.py b/airflow/providers/microsoft/azure/hooks/container_registry.py index f4c5d1adb40aa..f02fc795f0ab6 100644 --- a/airflow/providers/microsoft/azure/hooks/container_registry.py +++ b/airflow/providers/microsoft/azure/hooks/container_registry.py @@ -31,7 +31,6 @@ class AzureContainerRegistryHook(BaseHook): :param conn_id: :ref:`Azure Container Registry connection id` of a service principal which will be used to start the container instance - :type conn_id: str """ conn_name_attr = 'azure_container_registry_conn_id' diff --git a/airflow/providers/microsoft/azure/hooks/container_volume.py b/airflow/providers/microsoft/azure/hooks/container_volume.py index cd760abe80bbf..4054a47a8ac87 100644 --- a/airflow/providers/microsoft/azure/hooks/container_volume.py +++ b/airflow/providers/microsoft/azure/hooks/container_volume.py @@ -29,7 +29,6 @@ class AzureContainerVolumeHook(BaseHook): :param azure_container_volume_conn_id: Reference to the :ref:`Azure Container Volume connection id ` of an Azure account of which container volumes should be used. - :type azure_container_volume_conn_id: str """ conn_name_attr = "azure_container_volume_conn_id" diff --git a/airflow/providers/microsoft/azure/hooks/cosmos.py b/airflow/providers/microsoft/azure/hooks/cosmos.py index b75d75bfbfa06..f54b81a6971c2 100644 --- a/airflow/providers/microsoft/azure/hooks/cosmos.py +++ b/airflow/providers/microsoft/azure/hooks/cosmos.py @@ -43,7 +43,6 @@ class AzureCosmosDBHook(BaseHook): :param azure_cosmos_conn_id: Reference to the :ref:`Azure CosmosDB connection`. - :type azure_cosmos_conn_id: str """ conn_name_attr = 'azure_cosmos_conn_id' diff --git a/airflow/providers/microsoft/azure/hooks/data_factory.py b/airflow/providers/microsoft/azure/hooks/data_factory.py index 890812107d07b..2781258d3a9c4 100644 --- a/airflow/providers/microsoft/azure/hooks/data_factory.py +++ b/airflow/providers/microsoft/azure/hooks/data_factory.py @@ -14,6 +14,19 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +""" +.. spelling:: + + CreateRunResponse + DatasetResource + LinkedServiceResource + LROPoller + PipelineResource + PipelineRun + TriggerResource + datafactory + mgmt +""" import inspect import time from functools import wraps @@ -101,7 +114,6 @@ class AzureDataFactoryHook(BaseHook): A hook to interact with Azure Data Factory. :param azure_data_factory_conn_id: The :ref:`Azure Data Factory connection id`. - :type azure_data_factory_conn_id: str """ conn_type: str = 'azure_data_factory' diff --git a/airflow/providers/microsoft/azure/hooks/data_lake.py b/airflow/providers/microsoft/azure/hooks/data_lake.py index 8cbd001686d1e..bb293805cdbf5 100644 --- a/airflow/providers/microsoft/azure/hooks/data_lake.py +++ b/airflow/providers/microsoft/azure/hooks/data_lake.py @@ -41,7 +41,6 @@ class AzureDataLakeHook(BaseHook): {"tenant": "", "account_name": "ACCOUNT_NAME"}. :param azure_data_lake_conn_id: Reference to the :ref:`Azure Data Lake connection`. - :type azure_data_lake_conn_id: str """ conn_name_attr = 'azure_data_lake_conn_id' @@ -108,7 +107,6 @@ def check_for_file(self, file_path: str) -> bool: Check if a file exists on Azure Data Lake. :param file_path: Path and name of the file. - :type file_path: str :return: True if the file exists, False otherwise. :rtype: bool """ @@ -134,25 +132,19 @@ def upload_file( :param local_path: local path. Can be single file, directory (in which case, upload recursively) or glob pattern. Recursive glob patterns using `**` are not supported. - :type local_path: str :param remote_path: Remote path to upload to; if multiple files, this is the directory root to write within. - :type remote_path: str :param nthreads: Number of threads to use. If None, uses the number of cores. - :type nthreads: int :param overwrite: Whether to forcibly overwrite existing files/directories. If False and remote path is a directory, will quit regardless if any files would be overwritten or not. If True, only matching filenames are actually overwritten. - :type overwrite: bool :param buffersize: int [2**22] Number of bytes for internal buffer. This block cannot be bigger than a chunk and cannot be smaller than a block. - :type buffersize: int :param blocksize: int [2**22] Number of bytes for a block. Within each chunk, we write a smaller block for each API call. This block cannot be bigger than a chunk. - :type blocksize: int """ multithread.ADLUploader( self.get_conn(), @@ -182,25 +174,19 @@ def download_file( specific file, unless it is an existing directory, in which case a file is created within it. If downloading multiple files, this is the root directory to write within. Will create directories as required. - :type local_path: str :param remote_path: remote path/globstring to use to find remote files. Recursive glob patterns using `**` are not supported. - :type remote_path: str :param nthreads: Number of threads to use. If None, uses the number of cores. - :type nthreads: int :param overwrite: Whether to forcibly overwrite existing files/directories. If False and remote path is a directory, will quit regardless if any files would be overwritten or not. If True, only matching filenames are actually overwritten. - :type overwrite: bool :param buffersize: int [2**22] Number of bytes for internal buffer. This block cannot be bigger than a chunk and cannot be smaller than a block. - :type buffersize: int :param blocksize: int [2**22] Number of bytes for a block. Within each chunk, we write a smaller block for each API call. This block cannot be bigger than a chunk. - :type blocksize: int """ multithread.ADLDownloader( self.get_conn(), @@ -218,7 +204,6 @@ def list(self, path: str) -> list: List files in Azure Data Lake Storage :param path: full path/globstring to use to list files in ADLS - :type path: str """ if "*" in path: return self.get_conn().glob(path) @@ -230,11 +215,8 @@ def remove(self, path: str, recursive: bool = False, ignore_not_found: bool = Tr Remove files in Azure Data Lake Storage :param path: A directory or file to remove in ADLS - :type path: str :param recursive: Whether to loop into directories in the location and remove the files - :type recursive: bool :param ignore_not_found: Whether to raise error if file to delete is not found - :type ignore_not_found: bool """ try: self.get_conn().remove(path=path, recursive=recursive) diff --git a/airflow/providers/microsoft/azure/hooks/fileshare.py b/airflow/providers/microsoft/azure/hooks/fileshare.py index d6c4df21f9338..0157c2a788218 100644 --- a/airflow/providers/microsoft/azure/hooks/fileshare.py +++ b/airflow/providers/microsoft/azure/hooks/fileshare.py @@ -118,12 +118,9 @@ def check_for_directory(self, share_name: str, directory_name: str, **kwargs) -> Check if a directory exists on Azure File Share. :param share_name: Name of the share. - :type share_name: str :param directory_name: Name of the directory. - :type directory_name: str :param kwargs: Optional keyword arguments that `FileService.exists()` takes. - :type kwargs: object :return: True if the file exists, False otherwise. :rtype: bool """ @@ -134,14 +131,10 @@ def check_for_file(self, share_name: str, directory_name: str, file_name: str, * Check if a file exists on Azure File Share. :param share_name: Name of the share. - :type share_name: str :param directory_name: Name of the directory. - :type directory_name: str :param file_name: Name of the file. - :type file_name: str :param kwargs: Optional keyword arguments that `FileService.exists()` takes. - :type kwargs: object :return: True if the file exists, False otherwise. :rtype: bool """ @@ -154,12 +147,9 @@ def list_directories_and_files( Return the list of directories and files stored on a Azure File Share. :param share_name: Name of the share. - :type share_name: str :param directory_name: Name of the directory. - :type directory_name: str :param kwargs: Optional keyword arguments that `FileService.list_directories_and_files()` takes. - :type kwargs: object :return: A list of files and directories :rtype: list """ @@ -170,12 +160,9 @@ def list_files(self, share_name: str, directory_name: Optional[str] = None, **kw Return the list of files stored on a Azure File Share. :param share_name: Name of the share. - :type share_name: str :param directory_name: Name of the directory. - :type directory_name: str :param kwargs: Optional keyword arguments that `FileService.list_directories_and_files()` takes. - :type kwargs: object :return: A list of files :rtype: list """ @@ -190,10 +177,8 @@ def create_share(self, share_name: str, **kwargs) -> bool: Create new Azure File Share. :param share_name: Name of the share. - :type share_name: str :param kwargs: Optional keyword arguments that `FileService.create_share()` takes. - :type kwargs: object :return: True if share is created, False if share already exists. :rtype: bool """ @@ -204,10 +189,8 @@ def delete_share(self, share_name: str, **kwargs) -> bool: Delete existing Azure File Share. :param share_name: Name of the share. - :type share_name: str :param kwargs: Optional keyword arguments that `FileService.delete_share()` takes. - :type kwargs: object :return: True if share is deleted, False if share does not exist. :rtype: bool """ @@ -218,12 +201,9 @@ def create_directory(self, share_name: str, directory_name: str, **kwargs) -> li Create a new directory on a Azure File Share. :param share_name: Name of the share. - :type share_name: str :param directory_name: Name of the directory. - :type directory_name: str :param kwargs: Optional keyword arguments that `FileService.create_directory()` takes. - :type kwargs: object :return: A list of files and directories :rtype: list """ @@ -236,16 +216,11 @@ def get_file( Download a file from Azure File Share. :param file_path: Where to store the file. - :type file_path: str :param share_name: Name of the share. - :type share_name: str :param directory_name: Name of the directory. - :type directory_name: str :param file_name: Name of the file. - :type file_name: str :param kwargs: Optional keyword arguments that `FileService.get_file_to_path()` takes. - :type kwargs: object """ self.get_conn().get_file_to_path(share_name, directory_name, file_name, file_path, **kwargs) @@ -256,16 +231,11 @@ def get_file_to_stream( Download a file from Azure File Share. :param stream: A filehandle to store the file to. - :type stream: file-like object :param share_name: Name of the share. - :type share_name: str :param directory_name: Name of the directory. - :type directory_name: str :param file_name: Name of the file. - :type file_name: str :param kwargs: Optional keyword arguments that `FileService.get_file_to_stream()` takes. - :type kwargs: object """ self.get_conn().get_file_to_stream(share_name, directory_name, file_name, stream, **kwargs) @@ -276,16 +246,11 @@ def load_file( Upload a file to Azure File Share. :param file_path: Path to the file to load. - :type file_path: str :param share_name: Name of the share. - :type share_name: str :param directory_name: Name of the directory. - :type directory_name: str :param file_name: Name of the file. - :type file_name: str :param kwargs: Optional keyword arguments that `FileService.create_file_from_path()` takes. - :type kwargs: object """ self.get_conn().create_file_from_path(share_name, directory_name, file_name, file_path, **kwargs) @@ -296,16 +261,11 @@ def load_string( Upload a string to Azure File Share. :param string_data: String to load. - :type string_data: str :param share_name: Name of the share. - :type share_name: str :param directory_name: Name of the directory. - :type directory_name: str :param file_name: Name of the file. - :type file_name: str :param kwargs: Optional keyword arguments that `FileService.create_file_from_text()` takes. - :type kwargs: object """ self.get_conn().create_file_from_text(share_name, directory_name, file_name, string_data, **kwargs) @@ -316,18 +276,12 @@ def load_stream( Upload a stream to Azure File Share. :param stream: Opened file/stream to upload as the file content. - :type stream: file-like :param share_name: Name of the share. - :type share_name: str :param directory_name: Name of the directory. - :type directory_name: str :param file_name: Name of the file. - :type file_name: str :param count: Size of the stream in bytes - :type count: int :param kwargs: Optional keyword arguments that `FileService.create_file_from_stream()` takes. - :type kwargs: object """ self.get_conn().create_file_from_stream( share_name, directory_name, file_name, stream, count, **kwargs diff --git a/airflow/providers/microsoft/azure/hooks/wasb.py b/airflow/providers/microsoft/azure/hooks/wasb.py index b9c031e12e60f..9f7ce5a19f1a2 100644 --- a/airflow/providers/microsoft/azure/hooks/wasb.py +++ b/airflow/providers/microsoft/azure/hooks/wasb.py @@ -50,9 +50,7 @@ class WasbHook(BaseHook): when using Azure compute infrastructure). :param wasb_conn_id: Reference to the :ref:`wasb connection `. - :type wasb_conn_id: str :param public_read: Whether an anonymous public read access should be used. default is False - :type public_read: bool """ conn_name_attr = 'wasb_conn_id' @@ -158,7 +156,6 @@ def _get_container_client(self, container_name: str) -> ContainerClient: Instantiates a container client :param container_name: The name of the container - :type container_name: str :return: ContainerClient """ return self.connection.get_container_client(container_name) @@ -168,9 +165,7 @@ def _get_blob_client(self, container_name: str, blob_name: str) -> BlobClient: Instantiates a blob client :param container_name: The name of the blob container - :type container_name: str :param blob_name: The name of the blob. This needs not be existing - :type blob_name: str """ container_client = self._get_container_client(container_name) return container_client.get_blob_client(blob_name) @@ -180,11 +175,8 @@ def check_for_blob(self, container_name: str, blob_name: str, **kwargs) -> bool: Check if a blob exists on Azure Blob Storage. :param container_name: Name of the container. - :type container_name: str :param blob_name: Name of the blob. - :type blob_name: str :param kwargs: Optional keyword arguments for ``BlobClient.get_blob_properties`` takes. - :type kwargs: object :return: True if the blob exists, False otherwise. :rtype: bool """ @@ -199,11 +191,8 @@ def check_for_prefix(self, container_name: str, prefix: str, **kwargs): Check if a prefix exists on Azure Blob storage. :param container_name: Name of the container. - :type container_name: str :param prefix: Prefix of the blob. - :type prefix: str :param kwargs: Optional keyword arguments that ``ContainerClient.walk_blobs`` takes - :type kwargs: object :return: True if blobs matching the prefix exist, False otherwise. :rtype: bool """ @@ -222,16 +211,12 @@ def get_blobs_list( List blobs in a given container :param container_name: The name of the container - :type container_name: str :param prefix: Filters the results to return only blobs whose names begin with the specified prefix. - :type prefix: str :param include: Specifies one or more additional datasets to include in the response. Options include: ``snapshots``, ``metadata``, ``uncommittedblobs``, ``copy`, ``deleted``. - :type include: List[str] :param delimiter: filters objects based on the delimiter (for e.g '.csv') - :type delimiter: str """ container = self._get_container_client(container_name) blob_list = [] @@ -245,13 +230,9 @@ def load_file(self, file_path: str, container_name: str, blob_name: str, **kwarg Upload a file to Azure Blob Storage. :param file_path: Path to the file to load. - :type file_path: str :param container_name: Name of the container. - :type container_name: str :param blob_name: Name of the blob. - :type blob_name: str :param kwargs: Optional keyword arguments that ``BlobClient.upload_blob()`` takes. - :type kwargs: object """ with open(file_path, 'rb') as data: self.upload(container_name=container_name, blob_name=blob_name, data=data, **kwargs) @@ -261,13 +242,9 @@ def load_string(self, string_data: str, container_name: str, blob_name: str, **k Upload a string to Azure Blob Storage. :param string_data: String to load. - :type string_data: str :param container_name: Name of the container. - :type container_name: str :param blob_name: Name of the blob. - :type blob_name: str :param kwargs: Optional keyword arguments that ``BlobClient.upload()`` takes. - :type kwargs: object """ # Reorder the argument order from airflow.providers.amazon.aws.hooks.s3.load_string. self.upload(container_name, blob_name, string_data, **kwargs) @@ -277,13 +254,9 @@ def get_file(self, file_path: str, container_name: str, blob_name: str, **kwargs Download a file from Azure Blob Storage. :param file_path: Path to the file to download. - :type file_path: str :param container_name: Name of the container. - :type container_name: str :param blob_name: Name of the blob. - :type blob_name: str :param kwargs: Optional keyword arguments that `BlobClient.download_blob()` takes. - :type kwargs: object """ with open(file_path, "wb") as fileblob: stream = self.download(container_name=container_name, blob_name=blob_name, **kwargs) @@ -294,11 +267,8 @@ def read_file(self, container_name: str, blob_name: str, **kwargs): Read a file from Azure Blob Storage and return as a string. :param container_name: Name of the container. - :type container_name: str :param blob_name: Name of the blob. - :type blob_name: str :param kwargs: Optional keyword arguments that `BlobClient.download_blob` takes. - :type kwargs: object """ return self.download(container_name, blob_name, **kwargs).content_as_text() @@ -315,16 +285,12 @@ def upload( Creates a new blob from a data source with automatic chunking. :param container_name: The name of the container to upload data - :type container_name: str :param blob_name: The name of the blob to upload. This need not exist in the container - :type blob_name: str :param data: The blob data to upload :param blob_type: The type of the blob. This can be either ``BlockBlob``, ``PageBlob`` or ``AppendBlob``. The default value is ``BlockBlob``. - :type blob_type: storage.BlobType :param length: Number of bytes to read from the stream. This is optional, but should be supplied for optimal performance. - :type length: int """ container_client = self.create_container(container_name) blob_client = container_client.get_blob_client(blob_name) @@ -337,14 +303,10 @@ def download( Downloads a blob to the StorageStreamDownloader :param container_name: The name of the container containing the blob - :type container_name: str :param blob_name: The name of the blob to download - :type blob_name: str :param offset: Start of byte range to use for downloading a section of the blob. Must be set if length is provided. - :type offset: int :param length: Number of bytes to read from the stream. - :type length: int """ blob_client = self._get_blob_client(container_name, blob_name) return blob_client.download_blob(offset=offset, length=length, **kwargs) @@ -354,7 +316,6 @@ def create_container(self, container_name: str) -> ContainerClient: Create container object if not already existing :param container_name: The name of the container to create - :type container_name: str """ container_client = self._get_container_client(container_name) try: @@ -374,7 +335,6 @@ def delete_container(self, container_name: str) -> None: Delete a container object :param container_name: The name of the container - :type container_name: str """ try: self.log.debug('Attempting to delete container: %s', container_name) @@ -391,10 +351,8 @@ def delete_blobs(self, container_name: str, *blobs, **kwargs) -> None: Marks the specified blobs or snapshots for deletion. :param container_name: The name of the container containing the blobs - :type container_name: str :param blobs: The blobs to delete. This can be a single blob, or multiple values can be supplied, where each value is either the name of the blob (str) or BlobProperties. - :type blobs: Union[str, BlobProperties] """ self._get_container_client(container_name).delete_blobs(*blobs, **kwargs) self.log.info("Deleted blobs: %s", blobs) @@ -412,16 +370,11 @@ def delete_file( Delete a file from Azure Blob Storage. :param container_name: Name of the container. - :type container_name: str :param blob_name: Name of the blob. - :type blob_name: str :param is_prefix: If blob_name is a prefix, delete all matching files - :type is_prefix: bool :param ignore_if_missing: if True, then return success even if the blob does not exist. - :type ignore_if_missing: bool :param kwargs: Optional keyword arguments that ``ContainerClient.delete_blobs()`` takes. - :type kwargs: object """ if is_prefix: blobs_to_delete = self.get_blobs_list( diff --git a/airflow/providers/microsoft/azure/log/wasb_task_handler.py b/airflow/providers/microsoft/azure/log/wasb_task_handler.py index 9174705b7a073..9ec0cdf646fc4 100644 --- a/airflow/providers/microsoft/azure/log/wasb_task_handler.py +++ b/airflow/providers/microsoft/azure/log/wasb_task_handler.py @@ -153,10 +153,8 @@ def wasb_read(self, remote_log_location: str, return_error: bool = False): logs are found or there is an error. :param remote_log_location: the log's location in remote storage - :type remote_log_location: str (path) :param return_error: if True, returns a string error message if an error occurs. Otherwise returns '' when an error occurs. - :type return_error: bool """ try: return self.hook.read_file(self.wasb_container, remote_log_location) @@ -174,12 +172,9 @@ def wasb_write(self, log: str, remote_log_location: str, append: bool = True) -> was created. :param log: the log to write to the remote_log_location - :type log: str :param remote_log_location: the log's location in remote storage - :type remote_log_location: str (path) :param append: if False, any existing log file is overwritten. If True, the new log is appended to any existing logs. - :type append: bool """ if append and self.wasb_log_exists(remote_log_location): old_log = self.wasb_read(remote_log_location) diff --git a/airflow/providers/microsoft/azure/operators/adls.py b/airflow/providers/microsoft/azure/operators/adls.py index 54cfd6efae165..9664a107e7a97 100644 --- a/airflow/providers/microsoft/azure/operators/adls.py +++ b/airflow/providers/microsoft/azure/operators/adls.py @@ -33,13 +33,9 @@ class ADLSDeleteOperator(BaseOperator): :ref:`howto/operator:ADLSDeleteOperator` :param path: A directory or file to remove - :type path: str :param recursive: Whether to loop into directories in the location and remove the files - :type recursive: bool :param ignore_not_found: Whether to raise error if file to delete is not found - :type ignore_not_found: bool :param azure_data_lake_conn_id: Reference to the :ref:`Azure Data Lake connection`. - :type azure_data_lake_conn_id: str """ template_fields: Sequence[str] = ('path',) @@ -74,9 +70,7 @@ class ADLSListOperator(BaseOperator): :param path: The Azure Data Lake path to find the objects. Supports glob strings (templated) - :type path: str :param azure_data_lake_conn_id: Reference to the :ref:`Azure Data Lake connection`. - :type azure_data_lake_conn_id: str **Example**: The following Operator would list all the Parquet files from ``folder/output/`` diff --git a/airflow/providers/microsoft/azure/operators/adx.py b/airflow/providers/microsoft/azure/operators/adx.py index 89c038d391e28..91b130ff0a85f 100644 --- a/airflow/providers/microsoft/azure/operators/adx.py +++ b/airflow/providers/microsoft/azure/operators/adx.py @@ -35,15 +35,11 @@ class AzureDataExplorerQueryOperator(BaseOperator): Operator for querying Azure Data Explorer (Kusto). :param query: KQL query to run (templated). - :type query: str :param database: Database to run the query on (templated). - :type database: str :param options: Optional query options. See: https://docs.microsoft.com/en-us/azure/kusto/api/netfx/request-properties#list-of-clientrequestproperties - :type options: dict :param azure_data_explorer_conn_id: Reference to the :ref:`Azure Data Explorer connection`. - :type azure_data_explorer_conn_id: str """ ui_color = '#00a1f2' diff --git a/airflow/providers/microsoft/azure/operators/batch.py b/airflow/providers/microsoft/azure/operators/batch.py index 1eecbd21a720c..b1e3ee5cd3c8c 100644 --- a/airflow/providers/microsoft/azure/operators/batch.py +++ b/airflow/providers/microsoft/azure/operators/batch.py @@ -33,95 +33,62 @@ class AzureBatchOperator(BaseOperator): Executes a job on Azure Batch Service :param batch_pool_id: A string that uniquely identifies the Pool within the Account. - :type batch_pool_id: str :param batch_pool_vm_size: The size of virtual machines in the Pool - :type batch_pool_vm_size: str :param batch_job_id: A string that uniquely identifies the Job within the Account. - :type batch_job_id: str :param batch_task_command_line: The command line of the Task - :type batch_task_command_line: str :param batch_task_id: A string that uniquely identifies the task within the Job. - :type batch_task_id: str :param batch_pool_display_name: The display name for the Pool. The display name need not be unique - :type batch_pool_display_name: Optional[str] :param batch_job_display_name: The display name for the Job. The display name need not be unique - :type batch_job_display_name: Optional[str] :param batch_job_manager_task: Details of a Job Manager Task to be launched when the Job is started. - :type batch_job_manager_task: Optional[batch_models.JobManagerTask] :param batch_job_preparation_task: The Job Preparation Task. If set, the Batch service will run the Job Preparation Task on a Node before starting any Tasks of that Job on that Compute Node. Required if batch_job_release_task is set. - :type batch_job_preparation_task: Optional[batch_models.JobPreparationTask] :param batch_job_release_task: The Job Release Task. Use to undo changes to Compute Nodes made by the Job Preparation Task - :type batch_job_release_task: Optional[batch_models.JobReleaseTask] :param batch_task_display_name: The display name for the task. The display name need not be unique - :type batch_task_display_name: Optional[str] :param batch_task_container_settings: The settings for the container under which the Task runs - :type batch_task_container_settings: Optional[batch_models.TaskContainerSettings] :param batch_start_task: A Task specified to run on each Compute Node as it joins the Pool. The Task runs when the Compute Node is added to the Pool or when the Compute Node is restarted. - :type batch_start_task: Optional[batch_models.StartTask] :param batch_max_retries: The number of times to retry this batch operation before it's considered a failed operation. Default is 3 - :type batch_max_retries: int :param batch_task_resource_files: A list of files that the Batch service will download to the Compute Node before running the command line. - :type batch_task_resource_files: Optional[List[batch_models.ResourceFile]] :param batch_task_output_files: A list of files that the Batch service will upload from the Compute Node after running the command line. - :type batch_task_output_files: Optional[List[batch_models.OutputFile]] :param batch_task_user_identity: The user identity under which the Task runs. If omitted, the Task runs as a non-administrative user unique to the Task. - :type batch_task_user_identity: Optional[batch_models.UserIdentity] :param target_low_priority_nodes: The desired number of low-priority Compute Nodes in the Pool. This property must not be specified if enable_auto_scale is set to true. - :type target_low_priority_nodes: Optional[int] :param target_dedicated_nodes: The desired number of dedicated Compute Nodes in the Pool. This property must not be specified if enable_auto_scale is set to true. - :type target_dedicated_nodes: Optional[int] :param enable_auto_scale: Whether the Pool size should automatically adjust over time. Default is false - :type enable_auto_scale: bool :param auto_scale_formula: A formula for the desired number of Compute Nodes in the Pool. This property must not be specified if enableAutoScale is set to false. It is required if enableAutoScale is set to true. - :type auto_scale_formula: Optional[str] :param azure_batch_conn_id: The :ref:`Azure Batch connection id` - :type azure_batch_conn_id: str :param use_latest_verified_vm_image_and_sku: Whether to use the latest verified virtual machine image and sku in the batch account. Default is false. - :type use_latest_verified_vm_image_and_sku: bool :param vm_publisher: The publisher of the Azure Virtual Machines Marketplace Image. For example, Canonical or MicrosoftWindowsServer. Required if use_latest_image_and_sku is set to True - :type vm_publisher: Optional[str] :param vm_offer: The offer type of the Azure Virtual Machines Marketplace Image. For example, UbuntuServer or WindowsServer. Required if use_latest_image_and_sku is set to True - :type vm_offer: Optional[str] :param sku_starts_with: The starting string of the Virtual Machine SKU. Required if use_latest_image_and_sku is set to True - :type sku_starts_with: Optional[str] :param vm_sku: The name of the virtual machine sku to use - :type vm_sku: Optional[str] :param vm_version: The version of the virtual machine :param vm_version: Optional[str] :param vm_node_agent_sku_id: The node agent sku id of the virtual machine - :type vm_node_agent_sku_id: Optional[str] :param os_family: The Azure Guest OS family to be installed on the virtual machines in the Pool. - :type os_family: Optional[str] :param os_version: The OS family version - :type os_version: Optional[str] :param timeout: The amount of time to wait for the job to complete in minutes. Default is 25 - :type timeout: int :param should_delete_job: Whether to delete job after execution. Default is False - :type should_delete_job: bool :param should_delete_pool: Whether to delete pool after execution of jobs. Default is False - :type should_delete_pool: bool """ template_fields: Sequence[str] = ( @@ -348,9 +315,7 @@ def clean_up(self, pool_id: Optional[str] = None, job_id: Optional[str] = None) Delete the given pool and job in the batch account :param pool_id: The id of the pool to delete - :type pool_id: str :param job_id: The id of the job to delete - :type job_id: str """ if job_id: diff --git a/airflow/providers/microsoft/azure/operators/container_instances.py b/airflow/providers/microsoft/azure/operators/container_instances.py index c9524f4cc045f..b234fc68c443c 100644 --- a/airflow/providers/microsoft/azure/operators/container_instances.py +++ b/airflow/providers/microsoft/azure/operators/container_instances.py @@ -62,51 +62,32 @@ class AzureContainerInstancesOperator(BaseOperator): :param ci_conn_id: connection id of a service principal which will be used to start the container instance - :type ci_conn_id: str :param registry_conn_id: connection id of a user which can login to a private docker registry. For Azure use :ref:`Azure connection id` - :type azure_conn_id: str If None, we assume a public registry - :type registry_conn_id: Optional[str] :param resource_group: name of the resource group wherein this container instance should be started - :type resource_group: str :param name: name of this container instance. Please note this name has to be unique in order to run containers in parallel. - :type name: str :param image: the docker image to be used - :type image: str :param region: the region wherein this container instance should be started - :type region: str :param environment_variables: key,value pairs containing environment variables which will be passed to the running container - :type environment_variables: Optional[dict] :param secured_variables: names of environmental variables that should not be exposed outside the container (typically passwords). - :type secured_variables: Optional[str] :param volumes: list of ``Volume`` tuples to be mounted to the container. Currently only Azure Fileshares are supported. - :type volumes: list[] :param memory_in_gb: the amount of memory to allocate to this container - :type memory_in_gb: double :param cpu: the number of cpus to allocate to this container - :type cpu: double :param gpu: GPU Resource for the container. - :type gpu: azure.mgmt.containerinstance.models.GpuResource :param command: the command to run inside the container - :type command: Optional[List[str]] :param container_timeout: max time allowed for the execution of the container instance. - :type container_timeout: datetime.timedelta :param tags: azure tags as dict of str:str - :type tags: Optional[dict[str, str]] :param os_type: The operating system type required by the containers in the container group. Possible values include: 'Windows', 'Linux' - :type os_type: str :param restart_policy: Restart policy for all containers within the container group. Possible values include: 'Always', 'OnFailure', 'Never' - :type restart_policy: str :param ip_address: The IP address type of the container group. - :type ip_address: IpAddress **Example**:: diff --git a/airflow/providers/microsoft/azure/operators/cosmos.py b/airflow/providers/microsoft/azure/operators/cosmos.py index e42891674938b..ef3638c6d7d85 100644 --- a/airflow/providers/microsoft/azure/operators/cosmos.py +++ b/airflow/providers/microsoft/azure/operators/cosmos.py @@ -30,14 +30,10 @@ class AzureCosmosInsertDocumentOperator(BaseOperator): It will create both the database and collection if they do not already exist :param database_name: The name of the database. (templated) - :type database_name: str :param collection_name: The name of the collection. (templated) - :type collection_name: str :param document: The document to insert - :type document: dict :param azure_cosmos_conn_id: Reference to the :ref:`Azure CosmosDB connection`. - :type azure_cosmos_conn_id: str """ template_fields: Sequence[str] = ('database_name', 'collection_name') diff --git a/airflow/providers/microsoft/azure/operators/data_factory.py b/airflow/providers/microsoft/azure/operators/data_factory.py index 61ba697e0b445..0df599b53c306 100644 --- a/airflow/providers/microsoft/azure/operators/data_factory.py +++ b/airflow/providers/microsoft/azure/operators/data_factory.py @@ -67,43 +67,31 @@ class AzureDataFactoryRunPipelineOperator(BaseOperator): :ref:`howto/operator:AzureDataFactoryRunPipelineOperator` :param azure_data_factory_conn_id: The connection identifier for connecting to Azure Data Factory. - :type azure_data_factory_conn_id: str :param pipeline_name: The name of the pipeline to execute. - :type pipeline_name: str :param wait_for_termination: Flag to wait on a pipeline run's termination. By default, this feature is enabled but could be disabled to perform an asynchronous wait for a long-running pipeline execution using the ``AzureDataFactoryPipelineRunSensor``. - :type wait_for_termination: bool :param resource_group_name: The resource group name. If a value is not passed in to the operator, the ``AzureDataFactoryHook`` will attempt to use the resource group name provided in the corresponding connection. - :type resource_group_name: str :param factory_name: The data factory name. If a value is not passed in to the operator, the ``AzureDataFactoryHook`` will attempt to use the factory name name provided in the corresponding connection. - :type factory_name: str :param reference_pipeline_run_id: The pipeline run identifier. If this run ID is specified the parameters of the specified run will be used to create a new run. - :type reference_pipeline_run_id: str :param is_recovery: Recovery mode flag. If recovery mode is set to `True`, the specified referenced pipeline run and the new run will be grouped under the same ``groupId``. - :type is_recovery: bool :param start_activity_name: In recovery mode, the rerun will start from this activity. If not specified, all activities will run. - :type start_activity_name: str :param start_from_failure: In recovery mode, if set to true, the rerun will start from failed activities. The property will be used only if ``start_activity_name`` is not specified. - :type start_from_failure: bool :param parameters: Parameters of the pipeline run. These parameters are referenced in a pipeline via ``@pipeline().parameters.parameterName`` and will be used only if the ``reference_pipeline_run_id`` is not specified. - :type parameters: Dict[str, Any] :param timeout: Time in seconds to wait for a pipeline to reach a terminal status for non-asynchronous waits. Used only if ``wait_for_termination`` is True. - :type timeout: int :param check_interval: Time in seconds to check on a pipeline run's status for non-asynchronous waits. Used only if ``wait_for_termination`` is True. - :type check_interval: int """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/microsoft/azure/operators/wasb_delete_blob.py b/airflow/providers/microsoft/azure/operators/wasb_delete_blob.py index 9a818352a2ded..1242c59593d33 100644 --- a/airflow/providers/microsoft/azure/operators/wasb_delete_blob.py +++ b/airflow/providers/microsoft/azure/operators/wasb_delete_blob.py @@ -30,18 +30,13 @@ class WasbDeleteBlobOperator(BaseOperator): Deletes blob(s) on Azure Blob Storage. :param container_name: Name of the container. (templated) - :type container_name: str :param blob_name: Name of the blob. (templated) - :type blob_name: str :param wasb_conn_id: Reference to the :ref:`wasb connection `. - :type wasb_conn_id: str :param check_options: Optional keyword arguments that `WasbHook.check_for_blob()` takes. :param is_prefix: If blob_name is a prefix, delete all files matching prefix. - :type is_prefix: bool :param ignore_if_missing: if True, then return success even if the blob does not exist. - :type ignore_if_missing: bool """ template_fields: Sequence[str] = ('container_name', 'blob_name') diff --git a/airflow/providers/microsoft/azure/secrets/key_vault.py b/airflow/providers/microsoft/azure/secrets/key_vault.py index 9ded6e1c13324..699bb47f4bcb3 100644 --- a/airflow/providers/microsoft/azure/secrets/key_vault.py +++ b/airflow/providers/microsoft/azure/secrets/key_vault.py @@ -59,17 +59,12 @@ class AzureKeyVaultBackend(BaseSecretsBackend, LoggingMixin): :param connections_prefix: Specifies the prefix of the secret to read to get Connections If set to None (null), requests for connections will not be sent to Azure Key Vault - :type connections_prefix: str :param variables_prefix: Specifies the prefix of the secret to read to get Variables If set to None (null), requests for variables will not be sent to Azure Key Vault - :type variables_prefix: str :param config_prefix: Specifies the prefix of the secret to read to get Variables. If set to None (null), requests for configurations will not be sent to Azure Key Vault - :type config_prefix: str :param vault_url: The URL of an Azure Key Vault to use - :type vault_url: str :param sep: separator used to concatenate secret_prefix and secret_id. Default: "-" - :type sep: str """ def __init__( @@ -110,7 +105,6 @@ def get_conn_uri(self, conn_id: str) -> Optional[str]: Get an Airflow Connection URI from an Azure Key Vault secret :param conn_id: The Airflow connection id to retrieve - :type conn_id: str """ if self.connections_prefix is None: return None @@ -122,7 +116,6 @@ def get_variable(self, key: str) -> Optional[str]: Get an Airflow Variable from an Azure Key Vault secret. :param key: Variable Key - :type key: str :return: Variable Value """ if self.variables_prefix is None: @@ -150,11 +143,8 @@ def build_path(path_prefix: str, secret_id: str, sep: str = '-') -> str: environment variables, so ``connection_default`` becomes ``connection-default``. :param path_prefix: The path prefix of the secret to retrieve - :type path_prefix: str :param secret_id: Name of the secret - :type secret_id: str :param sep: Separator used to concatenate path_prefix and secret_id - :type sep: str """ path = f'{path_prefix}{sep}{secret_id}' return path.replace('_', sep) @@ -164,9 +154,7 @@ def _get_secret(self, path_prefix: str, secret_id: str) -> Optional[str]: Get an Azure Key Vault secret value :param path_prefix: Prefix for the Path to get Secret - :type path_prefix: str :param secret_id: Secret Key - :type secret_id: str """ name = self.build_path(path_prefix, secret_id, self.sep) try: diff --git a/airflow/providers/microsoft/azure/sensors/cosmos.py b/airflow/providers/microsoft/azure/sensors/cosmos.py index 3a1ae59b6caf8..98c67193d7913 100644 --- a/airflow/providers/microsoft/azure/sensors/cosmos.py +++ b/airflow/providers/microsoft/azure/sensors/cosmos.py @@ -39,14 +39,10 @@ class AzureCosmosDocumentSensor(BaseSensorOperator): task_id="azure_cosmos_sensor") :param database_name: Target CosmosDB database_name. - :type database_name: str :param collection_name: Target CosmosDB collection_name. - :type collection_name: str :param document_id: The ID of the target document. - :type document_id: str :param azure_cosmos_conn_id: Reference to the :ref:`Azure CosmosDB connection`. - :type azure_cosmos_conn_id: str """ template_fields: Sequence[str] = ('database_name', 'collection_name', 'document_id') diff --git a/airflow/providers/microsoft/azure/sensors/data_factory.py b/airflow/providers/microsoft/azure/sensors/data_factory.py index 31ce16ce960cc..ab328986777a8 100644 --- a/airflow/providers/microsoft/azure/sensors/data_factory.py +++ b/airflow/providers/microsoft/azure/sensors/data_factory.py @@ -33,13 +33,9 @@ class AzureDataFactoryPipelineRunStatusSensor(BaseSensorOperator): Checks the status of a pipeline run. :param azure_data_factory_conn_id: The connection identifier for connecting to Azure Data Factory. - :type azure_data_factory_conn_id: str :param run_id: The pipeline run identifier. - :type run_id: str :param resource_group_name: The resource group name. - :type resource_group_name: str :param factory_name: The data factory name. - :type factory_name: str """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/microsoft/azure/sensors/wasb.py b/airflow/providers/microsoft/azure/sensors/wasb.py index 2480c053ed9da..5deda098d4e51 100644 --- a/airflow/providers/microsoft/azure/sensors/wasb.py +++ b/airflow/providers/microsoft/azure/sensors/wasb.py @@ -30,14 +30,10 @@ class WasbBlobSensor(BaseSensorOperator): Waits for a blob to arrive on Azure Blob Storage. :param container_name: Name of the container. - :type container_name: str :param blob_name: Name of the blob. - :type blob_name: str :param wasb_conn_id: Reference to the :ref:`wasb connection `. - :type wasb_conn_id: str :param check_options: Optional keyword arguments that `WasbHook.check_for_blob()` takes. - :type check_options: dict """ template_fields: Sequence[str] = ('container_name', 'blob_name') @@ -70,14 +66,10 @@ class WasbPrefixSensor(BaseSensorOperator): Waits for blobs matching a prefix to arrive on Azure Blob Storage. :param container_name: Name of the container. - :type container_name: str :param prefix: Prefix of the blob. - :type prefix: str :param wasb_conn_id: Reference to the wasb connection. - :type wasb_conn_id: str :param check_options: Optional keyword arguments that `WasbHook.check_for_prefix()` takes. - :type check_options: dict """ template_fields: Sequence[str] = ('container_name', 'prefix') diff --git a/airflow/providers/microsoft/azure/transfers/azure_blob_to_gcs.py b/airflow/providers/microsoft/azure/transfers/azure_blob_to_gcs.py index 8dd5e79355ae9..370bdfd146a3c 100644 --- a/airflow/providers/microsoft/azure/transfers/azure_blob_to_gcs.py +++ b/airflow/providers/microsoft/azure/transfers/azure_blob_to_gcs.py @@ -36,27 +36,17 @@ class AzureBlobStorageToGCSOperator(BaseOperator): :ref:`howto/operator:AzureBlobStorageToGCSOperator` :param wasb_conn_id: Reference to the wasb connection. - :type wasb_conn_id: str :param gcp_conn_id: The connection ID to use when fetching connection info. - :type gcp_conn_id: str :param blob_name: Name of the blob - :type blob_name: str :param file_path: Path to the file to download - :type file_path: str :param container_name: Name of the container - :type container_name: str :param bucket_name: The bucket to upload to - :type bucket_name: str :param object_name: The object name to set when uploading the file - :type object_name: str :param filename: The local file path to the file to be uploaded - :type filename: str :param gzip: Option to compress local file or file data for upload - :type gzip: bool :param delegate_to: The account to impersonate using domain-wide delegation of authority, if any. For this to work, the service account making the request must have domain-wide delegation enabled. - :type delegate_to: str :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token of the last account in the list, which will be impersonated in the request. @@ -65,7 +55,6 @@ class AzureBlobStorageToGCSOperator(BaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account. - :type impersonation_chain: Union[str, Sequence[str]] """ def __init__( diff --git a/airflow/providers/microsoft/azure/transfers/local_to_adls.py b/airflow/providers/microsoft/azure/transfers/local_to_adls.py index 29184b731d9db..dd7e76e135606 100644 --- a/airflow/providers/microsoft/azure/transfers/local_to_adls.py +++ b/airflow/providers/microsoft/azure/transfers/local_to_adls.py @@ -36,29 +36,21 @@ class LocalFilesystemToADLSOperator(BaseOperator): :param local_path: local path. Can be single file, directory (in which case, upload recursively) or glob pattern. Recursive glob patterns using `**` are not supported - :type local_path: str :param remote_path: Remote path to upload to; if multiple files, this is the directory root to write within - :type remote_path: str :param nthreads: Number of threads to use. If None, uses the number of cores. - :type nthreads: int :param overwrite: Whether to forcibly overwrite existing files/directories. If False and remote path is a directory, will quit regardless if any files would be overwritten or not. If True, only matching filenames are actually overwritten - :type overwrite: bool :param buffersize: int [2**22] Number of bytes for internal buffer. This block cannot be bigger than a chunk and cannot be smaller than a block - :type buffersize: int :param blocksize: int [2**22] Number of bytes for a block. Within each chunk, we write a smaller block for each API call. This block cannot be bigger than a chunk - :type blocksize: int :param extra_upload_options: Extra upload options to add to the hook upload method - :type extra_upload_options: dict :param azure_data_lake_conn_id: Reference to the Azure Data Lake connection - :type azure_data_lake_conn_id: str """ template_fields: Sequence[str] = ("local_path", "remote_path") diff --git a/airflow/providers/microsoft/azure/transfers/local_to_wasb.py b/airflow/providers/microsoft/azure/transfers/local_to_wasb.py index d795b98e028d7..417458a31eb67 100644 --- a/airflow/providers/microsoft/azure/transfers/local_to_wasb.py +++ b/airflow/providers/microsoft/azure/transfers/local_to_wasb.py @@ -30,16 +30,11 @@ class LocalFilesystemToWasbOperator(BaseOperator): Uploads a file to Azure Blob Storage. :param file_path: Path to the file to load. (templated) - :type file_path: str :param container_name: Name of the container. (templated) - :type container_name: str :param blob_name: Name of the blob. (templated) - :type blob_name: str :param wasb_conn_id: Reference to the wasb connection. - :type wasb_conn_id: str :param load_options: Optional keyword arguments that `WasbHook.load_file()` takes. - :type load_options: Optional[dict] """ template_fields: Sequence[str] = ('file_path', 'container_name', 'blob_name') diff --git a/airflow/providers/microsoft/azure/transfers/oracle_to_azure_data_lake.py b/airflow/providers/microsoft/azure/transfers/oracle_to_azure_data_lake.py index 37db7472c1a85..150ac6c0f2bbb 100644 --- a/airflow/providers/microsoft/azure/transfers/oracle_to_azure_data_lake.py +++ b/airflow/providers/microsoft/azure/transfers/oracle_to_azure_data_lake.py @@ -37,25 +37,15 @@ class OracleToAzureDataLakeOperator(BaseOperator): :param filename: file name to be used by the csv file. - :type filename: str :param azure_data_lake_conn_id: destination azure data lake connection. - :type azure_data_lake_conn_id: str :param azure_data_lake_path: destination path in azure data lake to put the file. - :type azure_data_lake_path: str :param oracle_conn_id: :ref:`Source Oracle connection `. - :type oracle_conn_id: str :param sql: SQL query to execute against the Oracle database. (templated) - :type sql: str :param sql_params: Parameters to use in sql query. (templated) - :type sql_params: Optional[dict] :param delimiter: field delimiter in the file. - :type delimiter: str :param encoding: encoding type for the file. - :type encoding: str :param quotechar: Character to use in quoting. - :type quotechar: str :param quoting: Quoting strategy. See unicodecsv quoting for more information. - :type quoting: str """ template_fields: Sequence[str] = ('filename', 'sql', 'sql_params') diff --git a/airflow/providers/microsoft/azure/transfers/sftp_to_wasb.py b/airflow/providers/microsoft/azure/transfers/sftp_to_wasb.py index 530145de2fce4..47f39c8796cfc 100644 --- a/airflow/providers/microsoft/azure/transfers/sftp_to_wasb.py +++ b/airflow/providers/microsoft/azure/transfers/sftp_to_wasb.py @@ -52,19 +52,13 @@ class SFTPToWasbOperator(BaseOperator): for downloading the single file or multiple files from the SFTP server. You can use only one wildcard within your path. The wildcard can appear inside the path or at the end of the path. - :type sftp_source_path: str :param container_name: Name of the container. - :type container_name: str :param blob_prefix: Prefix to name a blob. - :type blob_prefix: str :param sftp_conn_id: The sftp connection id. The name or identifier for establishing a connection to the SFTP server. - :type sftp_conn_id: str :param wasb_conn_id: Reference to the wasb connection. - :type wasb_conn_id: str :param load_options: Optional keyword arguments that ``WasbHook.load_file()`` takes. - :type load_options: dict :param move_object: When move object is True, the object is moved instead of copied to the new location. This is the equivalent of a mv command as opposed to a cp command. @@ -73,7 +67,6 @@ class SFTPToWasbOperator(BaseOperator): When wasb_overwrite_object is True, it will overwrite the existing data. If set to False, the operation might fail with ResourceExistsError in case a blob object already exists. - :type move_object: bool """ template_fields: Sequence[str] = ("sftp_source_path", "container_name", "blob_prefix") diff --git a/airflow/providers/microsoft/mssql/operators/mssql.py b/airflow/providers/microsoft/mssql/operators/mssql.py index e2df59c92e325..dd3847e1614e9 100644 --- a/airflow/providers/microsoft/mssql/operators/mssql.py +++ b/airflow/providers/microsoft/mssql/operators/mssql.py @@ -39,18 +39,12 @@ class MsSqlOperator(BaseOperator): If conn_type is ``'odbc'``, then :py:class:`~airflow.providers.odbc.hooks.odbc.OdbcHook` is used. Otherwise, :py:class:`~airflow.providers.microsoft.mssql.hooks.mssql.MsSqlHook` is used. - :param sql: the sql code to be executed - :type sql: str or string pointing to a template file with .sql - extension. (templated) + :param sql: the sql code to be executed (templated) :param mssql_conn_id: reference to a specific mssql database - :type mssql_conn_id: str :param parameters: (optional) the parameters to render the SQL query with. - :type parameters: dict or iterable :param autocommit: if True, each command is automatically committed. (default value: False) - :type autocommit: bool :param database: name of database which overwrite defined one in connection - :type database: str """ template_fields: Sequence[str] = ('sql',) diff --git a/airflow/providers/microsoft/psrp/operators/psrp.py b/airflow/providers/microsoft/psrp/operators/psrp.py index 6a2f97206c5b9..bedf4b2423cda 100644 --- a/airflow/providers/microsoft/psrp/operators/psrp.py +++ b/airflow/providers/microsoft/psrp/operators/psrp.py @@ -30,11 +30,8 @@ class PSRPOperator(BaseOperator): """PowerShell Remoting Protocol operator. :param psrp_conn_id: connection id - :type psrp_conn_id: str :param command: command to execute on remote host. (templated) - :type command: str :param powershell: powershell to execute on remote host. (templated) - :type powershell: str """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/microsoft/winrm/hooks/winrm.py b/airflow/providers/microsoft/winrm/hooks/winrm.py index 10895d5c355e2..0f8158279df4d 100644 --- a/airflow/providers/microsoft/winrm/hooks/winrm.py +++ b/airflow/providers/microsoft/winrm/hooks/winrm.py @@ -42,56 +42,36 @@ class WinRMHook(BaseHook): :param ssh_conn_id: connection id from airflow Connections from where all the required parameters can be fetched like username and password. Thought the priority is given to the param passed during init - :type ssh_conn_id: str :param endpoint: When not set, endpoint will be constructed like this: 'http://{remote_host}:{remote_port}/wsman' - :type endpoint: str :param remote_host: Remote host to connect to. Ignored if `endpoint` is set. - :type remote_host: str :param remote_port: Remote port to connect to. Ignored if `endpoint` is set. - :type remote_port: int :param transport: transport type, one of 'plaintext' (default), 'kerberos', 'ssl', 'ntlm', 'credssp' - :type transport: str :param username: username to connect to the remote_host - :type username: str :param password: password of the username to connect to the remote_host - :type password: str :param service: the service name, default is HTTP - :type service: str :param keytab: the path to a keytab file if you are using one - :type keytab: str :param ca_trust_path: Certification Authority trust path - :type ca_trust_path: str :param cert_pem: client authentication certificate file path in PEM format - :type cert_pem: str :param cert_key_pem: client authentication certificate key file path in PEM format - :type cert_key_pem: str :param server_cert_validation: whether server certificate should be validated on Python versions that support it; one of 'validate' (default), 'ignore' - :type server_cert_validation: str :param kerberos_delegation: if True, TGT is sent to target server to allow multiple hops - :type kerberos_delegation: bool :param read_timeout_sec: maximum seconds to wait before an HTTP connect/read times out (default 30). This value should be slightly higher than operation_timeout_sec, as the server can block *at least* that long. - :type read_timeout_sec: int :param operation_timeout_sec: maximum allowed time in seconds for any single wsman HTTP operation (default 20). Note that operation timeouts while receiving output (the only wsman operation that should take any significant time, and where these timeouts are expected) will be silently retried indefinitely. - :type operation_timeout_sec: int :param kerberos_hostname_override: the hostname to use for the kerberos exchange (defaults to the hostname in the endpoint URL) - :type kerberos_hostname_override: str :param message_encryption: Will encrypt the WinRM messages if set and the transport auth supports message encryption. (Default 'auto') - :type message_encryption: str :param credssp_disable_tlsv1_2: Whether to disable TLSv1.2 support and work with older protocols like TLSv1.0, default is False - :type credssp_disable_tlsv1_2: bool :param send_cbt: Will send the channel bindings over a HTTPS channel (Default: True) - :type send_cbt: bool """ def __init__( diff --git a/airflow/providers/microsoft/winrm/operators/winrm.py b/airflow/providers/microsoft/winrm/operators/winrm.py index 609ba5da9e801..ea96c43a8cd71 100644 --- a/airflow/providers/microsoft/winrm/operators/winrm.py +++ b/airflow/providers/microsoft/winrm/operators/winrm.py @@ -42,20 +42,13 @@ class WinRMOperator(BaseOperator): WinRMOperator to execute commands on given remote host using the winrm_hook. :param winrm_hook: predefined ssh_hook to use for remote execution - :type winrm_hook: airflow.providers.microsoft.winrm.hooks.winrm.WinRMHook :param ssh_conn_id: connection id from airflow Connections - :type ssh_conn_id: str :param remote_host: remote host to connect - :type remote_host: str :param command: command to execute on remote host. (templated) - :type command: str :param ps_path: path to powershell, `powershell` for v5.1- and `pwsh` for v6+. If specified, it will execute the command as powershell script. - :type ps_path: str :param output_encoding: the encoding used to decode stout and stderr - :type output_encoding: str :param timeout: timeout for executing the command. - :type timeout: int """ template_fields: Sequence[str] = ('command',) diff --git a/airflow/providers/mongo/hooks/mongo.py b/airflow/providers/mongo/hooks/mongo.py index 90ecf4cbae4a9..8f273e4789b50 100644 --- a/airflow/providers/mongo/hooks/mongo.py +++ b/airflow/providers/mongo/hooks/mongo.py @@ -42,7 +42,6 @@ class MongoHook(BaseHook): :param mongo_conn_id: The :ref:`Mongo connection id ` to use when connecting to MongoDB. - :type mongo: str """ conn_name_attr = 'conn_id' @@ -184,14 +183,10 @@ def update_one( https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.update_one :param mongo_collection: The name of the collection to update. - :type mongo_collection: str :param filter_doc: A query that matches the documents to update. - :type filter_doc: dict :param update_doc: The modifications to apply. - :type update_doc: dict :param mongo_db: The name of the database to use. Can be omitted; then the database from the connection string is used. - :type mongo_db: str """ collection = self.get_collection(mongo_collection, mongo_db=mongo_db) @@ -211,14 +206,10 @@ def update_many( https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.update_many :param mongo_collection: The name of the collection to update. - :type mongo_collection: str :param filter_doc: A query that matches the documents to update. - :type filter_doc: dict :param update_doc: The modifications to apply. - :type update_doc: dict :param mongo_db: The name of the database to use. Can be omitted; then the database from the connection string is used. - :type mongo_db: str """ collection = self.get_collection(mongo_collection, mongo_db=mongo_db) @@ -242,15 +233,11 @@ def replace_one( document contain the ``_id`` field which is then used as filters. :param mongo_collection: The name of the collection to update. - :type mongo_collection: str :param doc: The new document. - :type doc: dict :param filter_doc: A query that matches the documents to replace. Can be omitted; then the _id field from doc will be used. - :type filter_doc: dict :param mongo_db: The name of the database to use. Can be omitted; then the database from the connection string is used. - :type mongo_db: str """ collection = self.get_collection(mongo_collection, mongo_db=mongo_db) @@ -281,22 +268,16 @@ def replace_many( used as filters. :param mongo_collection: The name of the collection to update. - :type mongo_collection: str :param docs: The new documents. - :type docs: list[dict] :param filter_docs: A list of queries that match the documents to replace. Can be omitted; then the _id fields from docs will be used. - :type filter_docs: list[dict] :param mongo_db: The name of the database to use. Can be omitted; then the database from the connection string is used. - :type mongo_db: str :param upsert: If ``True``, perform an insert if no documents match the filters for the replace operation. - :type upsert: bool :param collation: An instance of :class:`~pymongo.collation.Collation`. This option is only supported on MongoDB 3.4 and above. - :type collation: pymongo.collation.Collation """ collection = self.get_collection(mongo_collection, mongo_db=mongo_db) @@ -318,12 +299,9 @@ def delete_one( https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.delete_one :param mongo_collection: The name of the collection to delete from. - :type mongo_collection: str :param filter_doc: A query that matches the document to delete. - :type filter_doc: dict :param mongo_db: The name of the database to use. Can be omitted; then the database from the connection string is used. - :type mongo_db: str """ collection = self.get_collection(mongo_collection, mongo_db=mongo_db) @@ -338,12 +316,9 @@ def delete_many( https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.delete_many :param mongo_collection: The name of the collection to delete from. - :type mongo_collection: str :param filter_doc: A query that matches the documents to delete. - :type filter_doc: dict :param mongo_db: The name of the database to use. Can be omitted; then the database from the connection string is used. - :type mongo_db: str """ collection = self.get_collection(mongo_collection, mongo_db=mongo_db) diff --git a/airflow/providers/mongo/sensors/mongo.py b/airflow/providers/mongo/sensors/mongo.py index 059431ebb78ae..9d9a85268ec3c 100644 --- a/airflow/providers/mongo/sensors/mongo.py +++ b/airflow/providers/mongo/sensors/mongo.py @@ -36,14 +36,10 @@ class MongoSensor(BaseSensorOperator): ... task_id="mongo_sensor") :param collection: Target MongoDB collection. - :type collection: str :param query: The query to find the target document. - :type query: dict :param mongo_conn_id: The :ref:`Mongo connection id ` to use when connecting to MongoDB. - :type mongo_conn_id: str :param mongo_db: Target MongoDB name. - :type mongo_db: str """ template_fields: Sequence[str] = ('collection', 'query') diff --git a/airflow/providers/mysql/hooks/mysql.py b/airflow/providers/mysql/hooks/mysql.py index c1e247f981f24..47f8cb7275889 100644 --- a/airflow/providers/mysql/hooks/mysql.py +++ b/airflow/providers/mysql/hooks/mysql.py @@ -45,9 +45,7 @@ class MySqlHook(DbApiHook): extras example: ``{"iam":true, "aws_conn_id":"my_aws_conn"}`` :param schema: The MySQL database schema to connect to. - :type schema: Optional[str] :param connection: The :ref:`MySQL connection id ` used for MySQL credentials. - :type connection: Optional[Dict] """ conn_name_attr = 'mysql_conn_id' @@ -67,9 +65,7 @@ def set_autocommit(self, conn: MySQLConnectionTypes, autocommit: bool) -> None: than an `autocommit` property to set the autocommit setting :param conn: connection to set autocommit setting - :type MySQLConnectionTypes: connection object. :param autocommit: autocommit setting - :type bool: True to enable autocommit, False to disable autocommit :rtype: None """ if hasattr(conn.__class__, 'autocommit') and isinstance(conn.__class__.autocommit, property): @@ -83,7 +79,6 @@ def get_autocommit(self, conn: MySQLConnectionTypes) -> bool: rather than an `autocommit` property to get the autocommit setting :param conn: connection to get autocommit setting from. - :type MySQLConnectionTypes: connection object. :return: connection autocommit setting :rtype: bool """ @@ -217,9 +212,7 @@ def _serialize_cell(cell: object, conn: Optional[Connection] = None) -> object: when passing those separately to execute. Hence, this method does nothing. :param cell: The cell to insert into the table - :type cell: object :param conn: The database connection - :type conn: connection object :return: The same cell :rtype: object """ @@ -254,19 +247,15 @@ def bulk_load_custom( This depends on the mysql client library used. :param table: The table were the file will be loaded into. - :type table: str :param tmp_file: The file (name) that contains the data. - :type tmp_file: str :param duplicate_key_handling: Specify what should happen to duplicate data. You can choose either `IGNORE` or `REPLACE`. .. seealso:: https://dev.mysql.com/doc/refman/8.0/en/load-data.html#load-data-duplicate-key-handling - :type duplicate_key_handling: str :param extra_options: More sql options to specify exactly how to load the data. .. seealso:: https://dev.mysql.com/doc/refman/8.0/en/load-data.html - :type extra_options: str """ conn = self.get_conn() cursor = conn.cursor() diff --git a/airflow/providers/mysql/operators/mysql.py b/airflow/providers/mysql/operators/mysql.py index 3fa7617a75901..2240c27a7284b 100644 --- a/airflow/providers/mysql/operators/mysql.py +++ b/airflow/providers/mysql/operators/mysql.py @@ -37,18 +37,13 @@ class MySqlOperator(BaseOperator): sql statement, a list of str (sql statements), or reference to a template file. Template reference are recognized by str ending in '.sql' (templated) - :type sql: str or list[str] :param mysql_conn_id: Reference to :ref:`mysql connection id `. - :type mysql_conn_id: str :param parameters: (optional) the parameters to render the SQL query with. Template reference are recognized by str ending in '.json' (templated) - :type parameters: dict or iterable :param autocommit: if True, each command is automatically committed. (default value: False) - :type autocommit: bool :param database: name of database which overwrite defined one in connection - :type database: str """ template_fields: Sequence[str] = ('sql', 'parameters') diff --git a/airflow/providers/mysql/transfers/presto_to_mysql.py b/airflow/providers/mysql/transfers/presto_to_mysql.py index 57b8c438207f9..aef8069467fde 100644 --- a/airflow/providers/mysql/transfers/presto_to_mysql.py +++ b/airflow/providers/mysql/transfers/presto_to_mysql.py @@ -32,19 +32,14 @@ class PrestoToMySqlOperator(BaseOperator): be used for smallish amount of data. :param sql: SQL query to execute against Presto. (templated) - :type sql: str :param mysql_table: target MySQL table, use dot notation to target a specific database. (templated) - :type mysql_table: str :param mysql_conn_id: Reference to :ref:`mysql connection id `. - :type mysql_conn_id: str :param presto_conn_id: source presto connection - :type presto_conn_id: str :param mysql_preoperator: sql statement to run against mysql prior to import, typically use to truncate of delete in place of the data coming in, allowing the task to be idempotent (running the task twice won't double load data). (templated) - :type mysql_preoperator: str """ template_fields: Sequence[str] = ('sql', 'mysql_table', 'mysql_preoperator') diff --git a/airflow/providers/mysql/transfers/s3_to_mysql.py b/airflow/providers/mysql/transfers/s3_to_mysql.py index 344dd401ebe52..51ad95fcfb9d5 100644 --- a/airflow/providers/mysql/transfers/s3_to_mysql.py +++ b/airflow/providers/mysql/transfers/s3_to_mysql.py @@ -31,21 +31,15 @@ class S3ToMySqlOperator(BaseOperator): Loads a file from S3 into a MySQL table. :param s3_source_key: The path to the file (S3 key) that will be loaded into MySQL. - :type s3_source_key: str :param mysql_table: The MySQL table into where the data will be sent. - :type mysql_table: str :param mysql_duplicate_key_handling: Specify what should happen to duplicate data. You can choose either `IGNORE` or `REPLACE`. .. seealso:: https://dev.mysql.com/doc/refman/8.0/en/load-data.html#load-data-duplicate-key-handling - :type mysql_duplicate_key_handling: str :param mysql_extra_options: MySQL options to specify exactly how to load the data. - :type mysql_extra_options: Optional[str] :param aws_conn_id: The S3 connection that contains the credentials to the S3 Bucket. - :type aws_conn_id: str :param mysql_conn_id: Reference to :ref:`mysql connection id `. - :type mysql_conn_id: str """ template_fields: Sequence[str] = ( @@ -79,7 +73,6 @@ def execute(self, context: 'Context') -> None: Executes the transfer operation from S3 to MySQL. :param context: The context that is being provided when executing. - :type context: dict """ self.log.info('Loading %s to MySql table %s...', self.s3_source_key, self.mysql_table) diff --git a/airflow/providers/mysql/transfers/trino_to_mysql.py b/airflow/providers/mysql/transfers/trino_to_mysql.py index 081a3163a1518..d560d9c6912f7 100644 --- a/airflow/providers/mysql/transfers/trino_to_mysql.py +++ b/airflow/providers/mysql/transfers/trino_to_mysql.py @@ -32,19 +32,14 @@ class TrinoToMySqlOperator(BaseOperator): be used for smallish amount of data. :param sql: SQL query to execute against Trino. (templated) - :type sql: str :param mysql_table: target MySQL table, use dot notation to target a specific database. (templated) - :type mysql_table: str :param mysql_conn_id: Reference to :ref:`mysql connection id `. - :type mysql_conn_id: str :param trino_conn_id: source trino connection - :type trino_conn_id: str :param mysql_preoperator: sql statement to run against mysql prior to import, typically use to truncate of delete in place of the data coming in, allowing the task to be idempotent (running the task twice won't double load data). (templated) - :type mysql_preoperator: str """ template_fields: Sequence[str] = ('sql', 'mysql_table', 'mysql_preoperator') diff --git a/airflow/providers/mysql/transfers/vertica_to_mysql.py b/airflow/providers/mysql/transfers/vertica_to_mysql.py index e273e5957453e..3392ec925a37d 100644 --- a/airflow/providers/mysql/transfers/vertica_to_mysql.py +++ b/airflow/providers/mysql/transfers/vertica_to_mysql.py @@ -36,28 +36,21 @@ class VerticaToMySqlOperator(BaseOperator): Moves data from Vertica to MySQL. :param sql: SQL query to execute against the Vertica database. (templated) - :type sql: str :param vertica_conn_id: source Vertica connection - :type vertica_conn_id: str :param mysql_table: target MySQL table, use dot notation to target a specific database. (templated) - :type mysql_table: str :param mysql_conn_id: Reference to :ref:`mysql connection id `. - :type mysql_conn_id: str :param mysql_preoperator: sql statement to run against MySQL prior to import, typically use to truncate of delete in place of the data coming in, allowing the task to be idempotent (running the task twice won't double load data). (templated) - :type mysql_preoperator: str :param mysql_postoperator: sql statement to run against MySQL after the import, typically used to move data from staging to production and issue cleanup commands. (templated) - :type mysql_postoperator: str :param bulk_load: flag to use bulk_load option. This loads MySQL directly from a tab-delimited text file using the LOAD DATA LOCAL INFILE command. This option requires an extra connection parameter for the destination MySQL connection: {'local_infile': true}. - :type bulk_load: bool """ template_fields: Sequence[str] = ('sql', 'mysql_table', 'mysql_preoperator', 'mysql_postoperator') diff --git a/airflow/providers/neo4j/hooks/neo4j.py b/airflow/providers/neo4j/hooks/neo4j.py index 0e41bc7e0f517..399be2c63abeb 100644 --- a/airflow/providers/neo4j/hooks/neo4j.py +++ b/airflow/providers/neo4j/hooks/neo4j.py @@ -31,7 +31,6 @@ class Neo4jHook(BaseHook): Performs a connection to Neo4j and runs the query. :param neo4j_conn_id: Reference to :ref:`Neo4j connection id `. - :type neo4j_conn_id: str """ conn_name_attr = 'neo4j_conn_id' diff --git a/airflow/providers/neo4j/operators/neo4j.py b/airflow/providers/neo4j/operators/neo4j.py index e519bac8d0f53..b61f0734f0841 100644 --- a/airflow/providers/neo4j/operators/neo4j.py +++ b/airflow/providers/neo4j/operators/neo4j.py @@ -34,9 +34,7 @@ class Neo4jOperator(BaseOperator): :param sql: the sql code to be executed. Can receive a str representing a sql statement - :type sql: str :param neo4j_conn_id: Reference to :ref:`Neo4j connection id `. - :type neo4j_conn_id: str """ template_fields: Sequence[str] = ('sql',) diff --git a/airflow/providers/openfaas/hooks/openfaas.py b/airflow/providers/openfaas/hooks/openfaas.py index 23886f4a0b1b6..a5d40b600004b 100644 --- a/airflow/providers/openfaas/hooks/openfaas.py +++ b/airflow/providers/openfaas/hooks/openfaas.py @@ -31,10 +31,8 @@ class OpenFaasHook(BaseHook): Interact with OpenFaaS to query, deploy, invoke and update function :param function_name: Name of the function, Defaults to None - :type function_name: str :param conn_id: openfaas connection to use, Defaults to open_faas_default for example host : http://openfaas.faas.com, Connection Type : Http - :type conn_id: str """ GET_FUNCTION = "/system/function/" diff --git a/airflow/providers/opsgenie/hooks/opsgenie.py b/airflow/providers/opsgenie/hooks/opsgenie.py index 1e17747ed381f..28e7c7452ab15 100644 --- a/airflow/providers/opsgenie/hooks/opsgenie.py +++ b/airflow/providers/opsgenie/hooks/opsgenie.py @@ -43,7 +43,6 @@ class OpsgenieAlertHook(BaseHook): You can override these defaults in this hook. :param opsgenie_conn_id: The name of the Opsgenie connection to use - :type opsgenie_conn_id: str """ @@ -86,7 +85,6 @@ def create_alert(self, payload: Optional[dict] = None) -> SuccessResponse: :param payload: Opsgenie API Create Alert payload values See https://docs.opsgenie.com/docs/alert-api#section-create-alert - :type payload: dict :return: api response :rtype: opsgenie_sdk.SuccessResponse """ @@ -111,15 +109,11 @@ def close_alert( Close an alert in Opsgenie :param identifier: Identifier of alert which could be alert id, tiny id or alert alias - :type identifier: str :param identifier_type: Type of the identifier that is provided as an in-line parameter. Possible values are 'id', 'alias' or 'tiny' - :type identifier_type: str :param payload: Request payload of closing alert action. see https://github.com/opsgenie/opsgenie-python-sdk/blob/master/docs/AlertApi.md#close_alert - :type payload: dict :param kwargs: params to pass to the function - :type kwargs: dict :return: SuccessResponse If the method is called asynchronously, returns the request thread. diff --git a/airflow/providers/opsgenie/operators/opsgenie.py b/airflow/providers/opsgenie/operators/opsgenie.py index c1ffbf01a356f..d072d82f41cca 100644 --- a/airflow/providers/opsgenie/operators/opsgenie.py +++ b/airflow/providers/opsgenie/operators/opsgenie.py @@ -40,37 +40,23 @@ class OpsgenieCreateAlertOperator(BaseOperator): :ref:`howto/operator:OpsgenieCreateAlertOperator` :param opsgenie_conn_id: The name of the Opsgenie connection to use - :type opsgenie_conn_id: str :param message: The Message of the Opsgenie alert (templated) - :type message: str :param alias: Client-defined identifier of the alert (templated) - :type alias: str :param description: Description field of the alert (templated) - :type description: str :param responders: Teams, users, escalations and schedules that the alert will be routed to send notifications. - :type responders: list[dict] :param visible_to: Teams and users that the alert will become visible to without sending any notification. - :type visible_to: list[dict] :param actions: Custom actions that will be available for the alert. - :type actions: list[str] :param tags: Tags of the alert. - :type tags: list[str] :param details: Map of key-value pairs to use as custom properties of the alert. - :type details: dict :param entity: Entity field of the alert that is generally used to specify which domain alert is related to. (templated) - :type entity: str :param source: Source field of the alert. Default value is IP address of the incoming request. - :type source: str :param priority: Priority level of the alert. Default value is P3. (templated) - :type priority: str :param user: Display name of the request owner. - :type user: str :param note: Additional note that will be added while creating the alert. (templated) - :type note: str """ template_fields: Sequence[str] = ('message', 'alias', 'description', 'entity', 'priority', 'note') @@ -162,20 +148,13 @@ class OpsgenieCloseAlertOperator(BaseOperator): :ref:`howto/operator:OpsgenieCloseAlertOperator` :param opsgenie_conn_id: The name of the Opsgenie connection to use - :type opsgenie_conn_id: str :param identifier: Identifier of alert which could be alert id, tiny id or alert alias - :type identifier: str :param identifier_type: Type of the identifier that is provided as an in-line parameter. Possible values are 'id', 'alias' or 'tiny' - :type identifier_type: str :param user: display name of the request owner - :type user: str :param note: additional note that will be added while creating the alert - :type note: str :param source: source field of the alert. Default value is IP address of the incoming request - :type source: str :param close_alert_kwargs: additional params to pass - :type close_alert_kwargs: dict """ def __init__( diff --git a/airflow/providers/oracle/hooks/oracle.py b/airflow/providers/oracle/hooks/oracle.py index d5f3a8595a132..95b5fdca9faee 100644 --- a/airflow/providers/oracle/hooks/oracle.py +++ b/airflow/providers/oracle/hooks/oracle.py @@ -42,7 +42,6 @@ class OracleHook(DbApiHook): :param oracle_conn_id: The :ref:`Oracle connection id ` used for Oracle credentials. - :type oracle_conn_id: str """ conn_name_attr = 'oracle_conn_id' @@ -170,17 +169,12 @@ def insert_rows( :param table: target Oracle table, use dot notation to target a specific database - :type table: str :param rows: the rows to insert into the table - :type rows: iterable of tuples :param target_fields: the names of the columns to fill in the table - :type target_fields: iterable of str :param commit_every: the maximum number of rows to insert in one transaction Default 1000, Set greater than 0. Set 1 to insert each row in each single transaction - :type commit_every: int :param replace: Whether to replace instead of insert - :type replace: bool """ if target_fields: target_fields = ', '.join(target_fields) @@ -235,15 +229,11 @@ def bulk_insert_rows( :param table: target Oracle table, use dot notation to target a specific database - :type table: str :param rows: the rows to insert into the table - :type rows: iterable of tuples :param target_fields: the names of the columns to fill in the table, default None. If None, each rows should have some order as table columns name - :type target_fields: iterable of str Or None :param commit_every: the maximum number of rows to insert in one transaction Default 5000. Set greater than 0. Set 1 to insert each row in each transaction - :type commit_every: int """ if not rows: raise ValueError("parameter rows could not be None or empty iterable") diff --git a/airflow/providers/oracle/operators/oracle.py b/airflow/providers/oracle/operators/oracle.py index b78b9eec211bf..27463a7692495 100644 --- a/airflow/providers/oracle/operators/oracle.py +++ b/airflow/providers/oracle/operators/oracle.py @@ -32,15 +32,11 @@ class OracleOperator(BaseOperator): a list of str (sql statements), or reference to a template file. Template reference are recognized by str ending in '.sql' (templated) - :type sql: str or list[str] :param oracle_conn_id: The :ref:`Oracle connection id ` reference to a specific Oracle database. - :type oracle_conn_id: str :param parameters: (optional) the parameters to render the SQL query with. - :type parameters: dict or iterable :param autocommit: if True, each command is automatically committed. (default value: False) - :type autocommit: bool """ template_fields: Sequence[str] = ('sql',) @@ -74,12 +70,9 @@ class OracleStoredProcedureOperator(BaseOperator): Executes stored procedure in a specific Oracle database. :param procedure: name of stored procedure to call (templated) - :type procedure: str :param oracle_conn_id: The :ref:`Oracle connection id ` reference to a specific Oracle database. - :type oracle_conn_id: str :param parameters: (optional) the parameters provided in the call - :type parameters: dict or iterable """ template_fields: Sequence[str] = ('procedure',) diff --git a/airflow/providers/oracle/transfers/oracle_to_oracle.py b/airflow/providers/oracle/transfers/oracle_to_oracle.py index 91e37f81f5881..9d16fa85f2061 100644 --- a/airflow/providers/oracle/transfers/oracle_to_oracle.py +++ b/airflow/providers/oracle/transfers/oracle_to_oracle.py @@ -30,18 +30,12 @@ class OracleToOracleOperator(BaseOperator): :param oracle_destination_conn_id: destination Oracle connection. - :type oracle_destination_conn_id: str :param destination_table: destination table to insert rows. - :type destination_table: str :param oracle_source_conn_id: :ref:`Source Oracle connection `. - :type oracle_source_conn_id: str :param source_sql: SQL query to execute against the source Oracle database. (templated) - :type source_sql: str :param source_sql_params: Parameters to use in sql query. (templated) - :type source_sql_params: dict :param rows_chunk: number of rows per chunk to commit. - :type rows_chunk: int """ template_fields: Sequence[str] = ('source_sql', 'source_sql_params') diff --git a/airflow/providers/pagerduty/hooks/pagerduty.py b/airflow/providers/pagerduty/hooks/pagerduty.py index 17e1edb77109d..b921e5c42f9af 100644 --- a/airflow/providers/pagerduty/hooks/pagerduty.py +++ b/airflow/providers/pagerduty/hooks/pagerduty.py @@ -109,42 +109,30 @@ def create_event( Create event for service integration. :param summary: Summary for the event - :type summary: str :param severity: Severity for the event, needs to be one of: info, warning, error, critical - :type severity: str :param source: Specific human-readable unique identifier, such as a hostname, for the system having the problem. - :type source: str :param action: Event action, needs to be one of: trigger, acknowledge, resolve. Default to trigger if not specified. - :type action: str :param routing_key: Integration key. If not specified, will try to read from connection's extra json blob. - :type routing_key: str :param dedup_key: A string which identifies the alert triggered for the given event. Required for the actions acknowledge and resolve. - :type dedup_key: str :param custom_details: Free-form details from the event. Can be a dictionary or a string. If a dictionary is passed it will show up in PagerDuty as a table. - :type custom_details: dict or str :param group: A cluster or grouping of sources. For example, sources “prod-datapipe-02” and “prod-datapipe-03” might both be part of “prod-datapipe” - :type group: str :param component: The part or component of the affected system that is broken. - :type component: str :param class_type: The class/type of the event. - :type class_type: str :param images: List of images to include. Each dictionary in the list accepts the following keys: `src`: The source (URL) of the image being attached to the incident. This image must be served via HTTPS. `href`: [Optional] URL to make the image a clickable link. `alt`: [Optional] Alternative text for the image. - :type images: list[dict] :param links: List of links to include. Each dictionary in the list accepts the following keys: `href`: URL of the link to be attached. `text`: [Optional] Plain text that describes the purpose of the link, and can be used as the link's text. - :type links: list[dict] :return: PagerDuty Events API v2 response. :rtype: dict """ diff --git a/airflow/providers/pagerduty/hooks/pagerduty_events.py b/airflow/providers/pagerduty/hooks/pagerduty_events.py index d66755fc50a19..08e6265ae77fb 100644 --- a/airflow/providers/pagerduty/hooks/pagerduty_events.py +++ b/airflow/providers/pagerduty/hooks/pagerduty_events.py @@ -87,39 +87,28 @@ def create_event( Create event for service integration. :param summary: Summary for the event - :type summary: str :param severity: Severity for the event, needs to be one of: info, warning, error, critical - :type severity: str :param source: Specific human-readable unique identifier, such as a hostname, for the system having the problem. - :type source: str :param action: Event action, needs to be one of: trigger, acknowledge, resolve. Default to trigger if not specified. - :type action: str :param dedup_key: A string which identifies the alert triggered for the given event. Required for the actions acknowledge and resolve. - :type dedup_key: str :param custom_details: Free-form details from the event. Can be a dictionary or a string. If a dictionary is passed it will show up in PagerDuty as a table. - :type custom_details: dict or str :param group: A cluster or grouping of sources. For example, sources “prod-datapipe-02” and “prod-datapipe-03” might both be part of “prod-datapipe” - :type group: str :param component: The part or component of the affected system that is broken. - :type component: str :param class_type: The class/type of the event. - :type class_type: str :param images: List of images to include. Each dictionary in the list accepts the following keys: `src`: The source (URL) of the image being attached to the incident. This image must be served via HTTPS. `href`: [Optional] URL to make the image a clickable link. `alt`: [Optional] Alternative text for the image. - :type images: list[dict] :param links: List of links to include. Each dictionary in the list accepts the following keys: `href`: URL of the link to be attached. `text`: [Optional] Plain text that describes the purpose of the link, and can be used as the link's text. - :type links: list[dict] :return: PagerDuty Events API v2 response. :rtype: dict """ diff --git a/airflow/providers/papermill/operators/papermill.py b/airflow/providers/papermill/operators/papermill.py index 4650599d6198a..36e8539b1e9c1 100644 --- a/airflow/providers/papermill/operators/papermill.py +++ b/airflow/providers/papermill/operators/papermill.py @@ -42,14 +42,10 @@ class PapermillOperator(BaseOperator): Executes a jupyter notebook through papermill that is annotated with parameters :param input_nb: input notebook (can also be a NoteBook or a File inlet) - :type input_nb: str :param output_nb: output notebook (can also be a NoteBook or File outlet) - :type output_nb: str :param parameters: the notebook parameters to set - :type parameters: dict :param kernel_name: (optional) name of kernel to execute the notebook against (ignores kernel name in the notebook document metadata) - :type kernel_name: str """ supports_lineage = True diff --git a/airflow/providers/plexus/operators/job.py b/airflow/providers/plexus/operators/job.py index 1a428374d84ba..d252ba8cab285 100644 --- a/airflow/providers/plexus/operators/job.py +++ b/airflow/providers/plexus/operators/job.py @@ -33,7 +33,6 @@ class PlexusJobOperator(BaseOperator): Submits a Plexus job. :param job_params: parameters required to launch a job. - :type job_params: dict Required job parameters are the following - "name": job name created by user. @@ -143,7 +142,6 @@ def construct_job_params(self, hook: Any) -> Dict[Any, Optional[Any]]: user-provided value. :param hook: plexus hook object - :type hook: airflow hook """ missing_params = self.required_params - set(self.job_params) if len(missing_params) > 0: diff --git a/airflow/providers/postgres/hooks/postgres.py b/airflow/providers/postgres/hooks/postgres.py index fe6b53a9ecb31..4b495f4a8c07c 100644 --- a/airflow/providers/postgres/hooks/postgres.py +++ b/airflow/providers/postgres/hooks/postgres.py @@ -57,7 +57,6 @@ class PostgresHook(DbApiHook): :param postgres_conn_id: The :ref:`postgres conn id ` reference to a specific postgres database. - :type postgres_conn_id: str """ conn_name_attr = 'postgres_conn_id' @@ -164,9 +163,7 @@ def _serialize_cell(cell: object, conn: Optional[connection] = None) -> object: more information. :param cell: The cell to insert into the table - :type cell: object :param conn: The database connection - :type conn: connection object :return: The cell :rtype: object """ @@ -216,9 +213,7 @@ def get_table_primary_key(self, table: str, schema: Optional[str] = "public") -> Helper method that returns the table primary key :param table: Name of the target table - :type table: str :param schema: Name of the target schema, public by default - :type table: str :return: Primary key columns list :rtype: List[str] """ @@ -245,16 +240,11 @@ def _generate_insert_sql( The REPLACE variant is specific to PostgreSQL syntax. :param table: Name of the target table - :type table: str :param values: The row to insert into the table - :type values: tuple of cell values :param target_fields: The names of the columns to fill in the table - :type target_fields: iterable of strings :param replace: Whether to replace instead of insert - :type replace: bool :param replace_index: the column or list of column names to act as index for the ON CONFLICT clause - :type replace_index: str or list :return: The generated INSERT or REPLACE SQL statement :rtype: str """ diff --git a/airflow/providers/postgres/operators/postgres.py b/airflow/providers/postgres/operators/postgres.py index 6bd0902f03633..e8e6931483a64 100644 --- a/airflow/providers/postgres/operators/postgres.py +++ b/airflow/providers/postgres/operators/postgres.py @@ -28,20 +28,15 @@ class PostgresOperator(BaseOperator): """ Executes sql code in a specific Postgres database - :param sql: the sql code to be executed. (templated) - :type sql: Can receive a str representing a sql statement, - a list of str (sql statements), or reference to a template file. - Template reference are recognized by str ending in '.sql' + :param sql: the SQL code to be executed as a single string, or + a list of str (sql statements), or a reference to a template file. + Template references are recognized by str ending in '.sql' :param postgres_conn_id: The :ref:`postgres conn id ` reference to a specific postgres database. - :type postgres_conn_id: str :param autocommit: if True, each command is automatically committed. (default value: False) - :type autocommit: bool :param parameters: (optional) the parameters to render the SQL query with. - :type parameters: dict or iterable :param database: name of database which overwrite defined one in connection - :type database: str """ template_fields: Sequence[str] = ('sql',) diff --git a/airflow/providers/presto/hooks/presto.py b/airflow/providers/presto/hooks/presto.py index 1962e1b91cd92..83f680c9bb376 100644 --- a/airflow/providers/presto/hooks/presto.py +++ b/airflow/providers/presto/hooks/presto.py @@ -166,16 +166,11 @@ def insert_rows( A generic way to insert a set of tuples into a table. :param table: Name of the target table - :type table: str :param rows: The rows to insert into the table - :type rows: iterable of tuples :param target_fields: The names of the columns to fill in the table - :type target_fields: iterable of strings :param commit_every: The maximum number of rows to insert in one transaction. Set to 0 to insert all rows in one transaction. - :type commit_every: int :param replace: Whether to replace instead of insert - :type replace: bool """ if self.get_isolation_level() == IsolationLevel.AUTOCOMMIT: self.log.info( diff --git a/airflow/providers/qubole/example_dags/example_qubole.py b/airflow/providers/qubole/example_dags/example_qubole.py index 4823a7f95bd1c..550c8e108e291 100644 --- a/airflow/providers/qubole/example_dags/example_qubole.py +++ b/airflow/providers/qubole/example_dags/example_qubole.py @@ -56,11 +56,8 @@ def compare_result(hive_show_table, hive_s3_location, ti=None): Compares the results of two QuboleOperator tasks. :param hive_show_table: The "hive_show_table" task. - :type hive_show_table: QuboleOperator :param hive_s3_location: The "hive_s3_location" task. - :type hive_s3_location: QuboleOperator :param ti: The TaskInstance object. - :type ti: airflow.models.TaskInstance :return: True if the files are the same, False otherwise. :rtype: bool """ diff --git a/airflow/providers/qubole/operators/qubole.py b/airflow/providers/qubole/operators/qubole.py index 86f34fa0cc2d2..0e7adf575059f 100644 --- a/airflow/providers/qubole/operators/qubole.py +++ b/airflow/providers/qubole/operators/qubole.py @@ -71,7 +71,6 @@ class QuboleOperator(BaseOperator): :ref:`howto/operator:QuboleOperator` :param qubole_conn_id: Connection id which consists of qds auth_token - :type qubole_conn_id: str kwargs: :command_type: type of command to be executed, e.g. hivecmd, shellcmd, hadoopcmd diff --git a/airflow/providers/qubole/operators/qubole_check.py b/airflow/providers/qubole/operators/qubole_check.py index a112f400d41e1..e63ff308b3d66 100644 --- a/airflow/providers/qubole/operators/qubole_check.py +++ b/airflow/providers/qubole/operators/qubole_check.py @@ -86,7 +86,6 @@ class QuboleCheckOperator(_QuboleCheckOperatorMixin, SQLCheckOperator, QuboleOpe :ref:`howto/operator:QuboleCheckOperator` :param qubole_conn_id: Connection id which consists of qds auth_token - :type qubole_conn_id: str kwargs: @@ -139,16 +138,13 @@ class QuboleValueCheckOperator(_QuboleCheckOperatorMixin, SQLValueCheckOperator, is not within the permissible limit of expected value. :param qubole_conn_id: Connection id which consists of qds auth_token - :type qubole_conn_id: str :param pass_value: Expected value of the query results. - :type pass_value: str or int or float :param tolerance: Defines the permissible pass_value range, for example if tolerance is 2, the Qubole command output can be anything between -2*pass_value and 2*pass_value, without the operator erring out. - :type tolerance: int or float kwargs: diff --git a/airflow/providers/qubole/sensors/qubole.py b/airflow/providers/qubole/sensors/qubole.py index 09ba32135474b..1d1fdb62eda41 100644 --- a/airflow/providers/qubole/sensors/qubole.py +++ b/airflow/providers/qubole/sensors/qubole.py @@ -76,12 +76,10 @@ class QuboleFileSensor(QuboleSensor): :ref:`howto/operator:QuboleFileSensor` :param qubole_conn_id: Connection id which consists of qds auth_token - :type qubole_conn_id: str :param data: a JSON object containing payload, whose presence needs to be checked Check this `example `_ for sample payload structure. - :type data: dict .. note:: Both ``data`` and ``qubole_conn_id`` fields support templating. You can also use ``.txt`` files for template-driven use cases. @@ -102,12 +100,10 @@ class QubolePartitionSensor(QuboleSensor): :ref:`howto/operator:QubolePartitionSensor` :param qubole_conn_id: Connection id which consists of qds auth_token - :type qubole_conn_id: str :param data: a JSON object containing payload, whose presence needs to be checked. Check this `example `_ for sample payload structure. - :type data: dict .. note:: Both ``data`` and ``qubole_conn_id`` fields support templating. You can also use ``.txt`` files for template-driven use cases. diff --git a/airflow/providers/redis/operators/redis_publish.py b/airflow/providers/redis/operators/redis_publish.py index 576ceeffdae4d..67315acfd8fb8 100644 --- a/airflow/providers/redis/operators/redis_publish.py +++ b/airflow/providers/redis/operators/redis_publish.py @@ -29,11 +29,8 @@ class RedisPublishOperator(BaseOperator): Publish a message to Redis. :param channel: redis channel to which the message is published (templated) - :type channel: str :param message: the message to publish (templated) - :type message: str :param redis_conn_id: redis connection to use - :type redis_conn_id: str """ template_fields: Sequence[str] = ('channel', 'message') @@ -50,7 +47,6 @@ def execute(self, context: 'Context') -> None: Publish the message to Redis channel :param context: the context object - :type context: dict """ redis_hook = RedisHook(redis_conn_id=self.redis_conn_id) diff --git a/airflow/providers/redis/sensors/redis_pub_sub.py b/airflow/providers/redis/sensors/redis_pub_sub.py index f193737f27606..dedfde72c8530 100644 --- a/airflow/providers/redis/sensors/redis_pub_sub.py +++ b/airflow/providers/redis/sensors/redis_pub_sub.py @@ -30,9 +30,7 @@ class RedisPubSubSensor(BaseSensorOperator): Redis sensor for reading a message from pub sub channels :param channels: The channels to be subscribed to (templated) - :type channels: str or list of str :param redis_conn_id: the redis connection id - :type redis_conn_id: str """ template_fields: Sequence[str] = ('channels',) @@ -52,7 +50,6 @@ def poke(self, context: 'Context') -> bool: An example of message ``{'type': 'message', 'pattern': None, 'channel': b'test', 'data': b'hello'}`` :param context: the context object - :type context: dict :return: ``True`` if message (with type 'message') is available or ``False`` if not """ self.log.info('RedisPubSubSensor checking for message on channels: %s', self.channels) diff --git a/airflow/providers/salesforce/hooks/salesforce.py b/airflow/providers/salesforce/hooks/salesforce.py index 59f42f6c7015b..da928421cbf0b 100644 --- a/airflow/providers/salesforce/hooks/salesforce.py +++ b/airflow/providers/salesforce/hooks/salesforce.py @@ -50,12 +50,9 @@ class SalesforceHook(BaseHook): :param conn_id: The name of the connection that has the parameters needed to connect to Salesforce. The connection should be of type `Salesforce`. - :type conn_id: str :param session_id: The access token for a given HTTP request session. - :type session_id: str :param session: A custom HTTP request session. This enables the use of requests Session features not otherwise exposed by `simple_salesforce`. - :type session: requests.Session .. note:: A connection to Salesforce can be created via several authentication options: @@ -171,11 +168,8 @@ def make_query( Make a query to Salesforce. :param query: The query to make to Salesforce. - :type query: str :param include_deleted: True if the query should include deleted records. - :type include_deleted: bool :param query_params: Additional optional arguments - :type query_params: dict :return: The query result. :rtype: dict """ @@ -198,7 +192,6 @@ def describe_object(self, obj: str) -> dict: some extra metadata that Salesforce stores for each object. :param obj: The name of the Salesforce object that we are getting a description of. - :type obj: str :return: the description of the Salesforce object. :rtype: dict """ @@ -211,7 +204,6 @@ def get_available_fields(self, obj: str) -> List[str]: Get a list of all available fields for an object. :param obj: The name of the Salesforce object that we are getting a description of. - :type obj: str :return: the names of the fields. :rtype: list(str) """ @@ -228,9 +220,7 @@ def get_object_from_salesforce(self, obj: str, fields: Iterable[str]) -> dict: SELECT FROM ; :param obj: The object name to get from Salesforce. - :type obj: str :param fields: The fields to get from the object. - :type fields: iterable :return: all instances of the object from Salesforce. :rtype: dict """ @@ -249,7 +239,6 @@ def _to_timestamp(cls, column: pd.Series) -> pd.Series: Convert a column of a dataframe to UNIX timestamps if applicable :param column: A Series object representing a column of a dataframe. - :type column: pandas.Series :return: a new series that maintains the same index as the original :rtype: pandas.Series """ @@ -311,18 +300,13 @@ def write_object_to_file( and makes it easier to work with in other database environments :param query_results: the results from a SQL query - :type query_results: list of dict :param filename: the name of the file where the data should be dumped to - :type filename: str :param fmt: the format you want the output in. Default: 'csv' - :type fmt: str :param coerce_to_timestamp: True if you want all datetime fields to be converted into Unix timestamps. False if you want them to be left in the same format as they were in Salesforce. Leaving the value as False will result in datetimes being strings. Default: False - :type coerce_to_timestamp: bool :param record_time_added: True if you want to add a Unix timestamp field to the resulting data that marks when the data was fetched from Salesforce. Default: False - :type record_time_added: bool :return: the dataframe that gets written to the file. :rtype: pandas.Dataframe """ @@ -376,14 +360,11 @@ def object_to_df( and makes it easier to work with in other database environments :param query_results: the results from a SQL query - :type query_results: list of dict :param coerce_to_timestamp: True if you want all datetime fields to be converted into Unix timestamps. False if you want them to be left in the same format as they were in Salesforce. Leaving the value as False will result in datetimes being strings. Default: False - :type coerce_to_timestamp: bool :param record_time_added: True if you want to add a Unix timestamp field to the resulting data that marks when the data was fetched from Salesforce. Default: False - :type record_time_added: bool :return: the dataframe. :rtype: pandas.Dataframe """ diff --git a/airflow/providers/salesforce/operators/salesforce_apex_rest.py b/airflow/providers/salesforce/operators/salesforce_apex_rest.py index 732521d372999..703f5dfaeca3e 100644 --- a/airflow/providers/salesforce/operators/salesforce_apex_rest.py +++ b/airflow/providers/salesforce/operators/salesforce_apex_rest.py @@ -32,13 +32,9 @@ class SalesforceApexRestOperator(BaseOperator): :ref:`howto/operator:SalesforceApexRestOperator` :param endpoint: The REST endpoint for the request. - :type endpoint: str :param method: HTTP method for the request (default GET) - :type method: str :param payload: A dict of parameters to send in a POST / PUT request - :type payload: str :param salesforce_conn_id: The :ref:`Salesforce Connection id `. - :type salesforce_conn_id: str """ def __init__( @@ -60,7 +56,6 @@ def execute(self, context: 'Context') -> dict: """ Makes an HTTP request to an APEX REST endpoint and pushes results to xcom. :param context: The task context during execution. - :type context: dict :return: Apex response :rtype: dict """ diff --git a/airflow/providers/samba/hooks/samba.py b/airflow/providers/samba/hooks/samba.py index fbccfc7b83098..383dcf0c076d5 100644 --- a/airflow/providers/samba/hooks/samba.py +++ b/airflow/providers/samba/hooks/samba.py @@ -34,11 +34,9 @@ class SambaHook(BaseHook): set up a session and disconnect open connections upon exit. :param samba_conn_id: The connection id reference. - :type samba_conn_id: str :param share: An optional share name. If this is unset then the "schema" field of the connection is used in its place. - :type share: str """ conn_name_attr = 'samba_conn_id' diff --git a/airflow/providers/segment/hooks/segment.py b/airflow/providers/segment/hooks/segment.py index e834d1d009906..053c9e037d1ee 100644 --- a/airflow/providers/segment/hooks/segment.py +++ b/airflow/providers/segment/hooks/segment.py @@ -41,10 +41,8 @@ class SegmentHook(BaseHook): :param segment_conn_id: the name of the connection that has the parameters we need to connect to Segment. The connection should be type `json` and include a write_key security token in the `Extras` field. - :type segment_conn_id: str :param segment_debug_mode: Determines whether Segment should run in debug mode. Defaults to False - :type segment_debug_mode: bool .. note:: You must include a JSON structure in the `Extras` field. diff --git a/airflow/providers/segment/operators/segment_track_event.py b/airflow/providers/segment/operators/segment_track_event.py index 8810ef4559e5e..19f15df3f677a 100644 --- a/airflow/providers/segment/operators/segment_track_event.py +++ b/airflow/providers/segment/operators/segment_track_event.py @@ -29,16 +29,11 @@ class SegmentTrackEventOperator(BaseOperator): Send Track Event to Segment for a specified user_id and event :param user_id: The ID for this user in your database. (templated) - :type user_id: str :param event: The name of the event you're tracking. (templated) - :type event: str :param properties: A dictionary of properties for the event. (templated) - :type properties: dict :param segment_conn_id: The connection ID to use when connecting to Segment. - :type segment_conn_id: str :param segment_debug_mode: Determines whether Segment should run in debug mode. Defaults to False - :type segment_debug_mode: bool """ template_fields: Sequence[str] = ('user_id', 'event', 'properties') diff --git a/airflow/providers/sftp/hooks/sftp.py b/airflow/providers/sftp/hooks/sftp.py index f3d3ee01b31f0..23afa6c2d0dc5 100644 --- a/airflow/providers/sftp/hooks/sftp.py +++ b/airflow/providers/sftp/hooks/sftp.py @@ -52,9 +52,7 @@ class SFTPHook(SSHHook): to initialize the hook, but it will be removed in future Airflow versions. :param ssh_conn_id: The :ref:`sftp connection id` - :type ssh_conn_id: str :param ftp_conn_id (Outdated): The :ref:`sftp connection id` - :type ftp_conn_id: str """ conn_name_attr = 'ssh_conn_id' @@ -184,7 +182,6 @@ def describe_directory(self, path: str) -> Dict[str, Dict[str, str]]: on the remote system (where the MLSD command is supported). :param path: full path to the remote directory - :type path: str """ conn = self.get_conn() flist = conn.listdir_attr(path) @@ -203,7 +200,6 @@ def list_directory(self, path: str) -> List[str]: Returns a list of files on the remote system. :param path: full path to the remote directory to list - :type path: str """ conn = self.get_conn() files = conn.listdir(path) @@ -214,7 +210,6 @@ def create_directory(self, path: str, mode: int = 777) -> None: Creates a directory on the remote system. :param path: full path to the remote directory to create - :type path: str :param mode: int representation of octal mode for directory """ conn = self.get_conn() @@ -225,7 +220,6 @@ def delete_directory(self, path: str) -> None: Deletes a directory on the remote system. :param path: full path to the remote directory to delete - :type path: str """ conn = self.get_conn() conn.rmdir(path) @@ -237,9 +231,7 @@ def retrieve_file(self, remote_full_path: str, local_full_path: str) -> None: at that location :param remote_full_path: full path to the remote file - :type remote_full_path: str :param local_full_path: full path to the local file - :type local_full_path: str """ conn = self.get_conn() conn.get(remote_full_path, local_full_path) @@ -251,9 +243,7 @@ def store_file(self, remote_full_path: str, local_full_path: str) -> None: from that location :param remote_full_path: full path to the remote file - :type remote_full_path: str :param local_full_path: full path to the local file - :type local_full_path: str """ conn = self.get_conn() conn.put(local_full_path, remote_full_path) @@ -263,7 +253,6 @@ def delete_file(self, path: str) -> None: Removes a file on the FTP Server :param path: full path to the remote file - :type path: str """ conn = self.get_conn() conn.remove(path) @@ -273,7 +262,6 @@ def get_mod_time(self, path: str) -> str: Returns modification time. :param path: full path to the remote file - :type path: str """ conn = self.get_conn() ftp_mdtm = conn.stat(path).st_mtime @@ -284,7 +272,6 @@ def path_exists(self, path: str) -> bool: Returns True if a remote entity exists :param path: full path to the remote file or directory - :type path: str """ conn = self.get_conn() return conn.exists(path) @@ -295,11 +282,8 @@ def _is_path_match(path: str, prefix: Optional[str] = None, delimiter: Optional[ Return True if given path starts with prefix (if set) and ends with delimiter (if set). :param path: path to be checked - :type path: str :param prefix: if set path will be checked is starting with prefix - :type prefix: str :param delimiter: if set path will be checked is ending with suffix - :type delimiter: str :return: bool """ if prefix is not None and not path.startswith(prefix): @@ -316,11 +300,8 @@ def get_tree_map( It is possible to filter results by giving prefix and/or delimiter parameters. :param path: path from which tree will be built - :type path: str :param prefix: if set paths will be added if start with prefix - :type prefix: str :param delimiter: if set paths will be added if end with delimiter - :type delimiter: str :return: tuple with list of files, dirs and unknown items :rtype: Tuple[List[str], List[str], List[str]] """ diff --git a/airflow/providers/sftp/operators/sftp.py b/airflow/providers/sftp/operators/sftp.py index 076c0b8d7c227..c78c7f4c04d64 100644 --- a/airflow/providers/sftp/operators/sftp.py +++ b/airflow/providers/sftp/operators/sftp.py @@ -40,23 +40,16 @@ class SFTPOperator(BaseOperator): :param ssh_hook: predefined ssh_hook to use for remote execution. Either `ssh_hook` or `ssh_conn_id` needs to be provided. - :type ssh_hook: airflow.providers.ssh.hooks.ssh.SSHHook :param ssh_conn_id: :ref:`ssh connection id` from airflow Connections. `ssh_conn_id` will be ignored if `ssh_hook` is provided. - :type ssh_conn_id: str :param remote_host: remote host to connect (templated) Nullable. If provided, it will replace the `remote_host` which was defined in `ssh_hook` or predefined in the connection of `ssh_conn_id`. - :type remote_host: str :param local_filepath: local file path to get or put. (templated) - :type local_filepath: str :param remote_filepath: remote file path to get or put. (templated) - :type remote_filepath: str :param operation: specify operation 'get' or 'put', defaults to put - :type operation: str :param confirm: specify if the SFTP operation should be confirmed, defaults to True - :type confirm: bool :param create_intermediate_dirs: create missing intermediate directories when copying from remote to local and vice-versa. Default is False. @@ -75,7 +68,6 @@ class SFTPOperator(BaseOperator): dag=dag ) - :type create_intermediate_dirs: bool """ template_fields: Sequence[str] = ('local_filepath', 'remote_filepath', 'remote_host') diff --git a/airflow/providers/sftp/sensors/sftp.py b/airflow/providers/sftp/sensors/sftp.py index 14d196aa41973..d005141f6e5db 100644 --- a/airflow/providers/sftp/sensors/sftp.py +++ b/airflow/providers/sftp/sensors/sftp.py @@ -32,9 +32,7 @@ class SFTPSensor(BaseSensorOperator): Waits for a file or directory to be present on SFTP. :param path: Remote file or directory path - :type path: str :param sftp_conn_id: The connection to run the sensor against - :type sftp_conn_id: str """ template_fields: Sequence[str] = ('path',) diff --git a/airflow/providers/singularity/operators/singularity.py b/airflow/providers/singularity/operators/singularity.py index 2da8ec86763a7..9a5587f0540f6 100644 --- a/airflow/providers/singularity/operators/singularity.py +++ b/airflow/providers/singularity/operators/singularity.py @@ -40,28 +40,18 @@ class SingularityOperator(BaseOperator): be done with --volumes :param image: Singularity image or URI from which to create the container. - :type image: str :param auto_remove: Delete the container when the process exits. The default is False. - :type auto_remove: bool :param command: Command to be run in the container. (templated) - :type command: str or list :param start_command: Start command to pass to the container instance. - :type start_command: str or list :param environment: Environment variables to set in the container. (templated) - :type environment: dict :param working_dir: Set a working directory for the instance. - :type working_dir: str :param force_pull: Pull the image on every run. Default is False. - :type force_pull: bool :param volumes: List of volumes to mount into the container, e.g. ``['/host/path:/container/path', '/host/path2:/container/path2']``. - :type volumes: Optional[List[str]] :param options: Other flags (list) to provide to the instance start. - :type options: list :param working_dir: Working directory to set on the container (equivalent to the -w switch the docker client). - :type working_dir: str """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/slack/hooks/slack.py b/airflow/providers/slack/hooks/slack.py index 9173bc921eb79..499802cd6f98c 100644 --- a/airflow/providers/slack/hooks/slack.py +++ b/airflow/providers/slack/hooks/slack.py @@ -45,19 +45,14 @@ class SlackHook(BaseHook): slack_hook.client.chat_postMessage(channel="#random", text="Hello world!") :param token: Slack API token - :type token: str :param slack_conn_id: :ref:`Slack connection id ` that has Slack API token in the password field. - :type slack_conn_id: str :param use_session: A boolean specifying if the client should take advantage of connection pooling. Default is True. - :type use_session: bool :param base_url: A string representing the Slack API base URL. Default is ``https://www.slack.com/api/`` - :type base_url: str :param timeout: The maximum number of seconds the client will wait to connect and receive a response from Slack. Default is 30 seconds. - :type timeout: int """ def __init__( @@ -88,17 +83,11 @@ def call(self, api_method: str, **kwargs) -> None: Calls Slack WebClient `WebClient.api_call` with given arguments. :param api_method: The target Slack API method. e.g. 'chat.postMessage'. Required. - :type api_method: str :param http_verb: HTTP Verb. Optional (defaults to 'POST') - :type http_verb: str :param files: Files to multipart upload. e.g. {imageORfile: file_objectORfile_path} - :type files: dict :param data: The body to attach to the request. If a dictionary is provided, form-encoding will take place. Optional. - :type data: dict or aiohttp.FormData :param params: The URL parameters to append to the URL. Optional. - :type params: dict :param json: JSON for the body to attach to the request. Optional. - :type json: dict """ self.client.api_call(api_method, **kwargs) diff --git a/airflow/providers/slack/hooks/slack_webhook.py b/airflow/providers/slack/hooks/slack_webhook.py index 1c024fdbaa30f..27d45c0741090 100644 --- a/airflow/providers/slack/hooks/slack_webhook.py +++ b/airflow/providers/slack/hooks/slack_webhook.py @@ -35,30 +35,19 @@ class SlackWebhookHook(HttpHook): icon. You can override these defaults in this hook. :param http_conn_id: connection that has Slack webhook token in the password field - :type http_conn_id: str :param webhook_token: Slack webhook token - :type webhook_token: str :param message: The message you want to send on Slack - :type message: str :param attachments: The attachments to send on Slack. Should be a list of dictionaries representing Slack attachments. - :type attachments: list :param blocks: The blocks to send on Slack. Should be a list of dictionaries representing Slack blocks. - :type blocks: list :param channel: The channel the message should be posted to - :type channel: str :param username: The username to post to slack with - :type username: str :param icon_emoji: The emoji to use as icon for the user posting to Slack - :type icon_emoji: str :param icon_url: The icon image URL string to use in place of the default icon. - :type icon_url: str :param link_names: Whether or not to find and link channel and usernames in your message - :type link_names: bool :param proxy: Proxy to use to make the Slack webhook call - :type proxy: str """ conn_name_attr = 'http_conn_id' @@ -99,9 +88,7 @@ def _get_token(self, token: str, http_conn_id: Optional[str]) -> str: Given either a manually set token or a conn_id, return the webhook_token to use. :param token: The manually provided token - :type token: str :param http_conn_id: The conn_id provided - :type http_conn_id: str :return: webhook_token to use :rtype: str """ diff --git a/airflow/providers/slack/operators/slack.py b/airflow/providers/slack/operators/slack.py index fa5c20c1fb3b8..1aa5edc22be4a 100644 --- a/airflow/providers/slack/operators/slack.py +++ b/airflow/providers/slack/operators/slack.py @@ -31,15 +31,10 @@ class SlackAPIOperator(BaseOperator): :param slack_conn_id: :ref:`Slack connection id ` which its password is Slack API token. Optional - :type slack_conn_id: str :param token: Slack API token (https://api.slack.com/web). Optional - :type token: str :param method: The Slack API Method to Call (https://api.slack.com/methods). Optional - :type method: str :param api_params: API Method call parameters (https://api.slack.com/methods). Optional - :type api_params: dict :param client_args: Slack Hook parameters. Optional. Check airflow.providers.slack.hooks.SlackHook - :type client_args: dict """ def __init__( @@ -102,19 +97,13 @@ class SlackAPIPostOperator(SlackAPIOperator): :param channel: channel in which to post message on slack name (#general) or ID (C12318391). (templated) - :type channel: str :param username: Username that airflow will be posting to Slack as. (templated) - :type username: str :param text: message to send to slack. (templated) - :type text: str :param icon_url: url to icon used for this message - :type icon_url: str :param attachments: extra formatting details. (templated) - see https://api.slack.com/docs/attachments. - :type attachments: list of hashes :param blocks: extra block layouts. (templated) - see https://api.slack.com/reference/block-kit/blocks. - :type blocks: list of hashes """ template_fields: Sequence[str] = ('username', 'text', 'attachments', 'blocks', 'channel') @@ -182,16 +171,11 @@ class SlackAPIFileOperator(SlackAPIOperator): ) :param channel: channel in which to sent file on slack name (templated) - :type channel: str :param initial_comment: message to send to slack. (templated) - :type initial_comment: str :param filename: name of the file (templated) - :type filename: str :param filetype: slack filetype. (templated) - see https://api.slack.com/types/file - :type filetype: str :param content: file content. (templated) - :type content: str """ template_fields: Sequence[str] = ('channel', 'initial_comment', 'filename', 'filetype', 'content') diff --git a/airflow/providers/slack/operators/slack_webhook.py b/airflow/providers/slack/operators/slack_webhook.py index 94c25da0791ac..c9a4c78fb0b2b 100644 --- a/airflow/providers/slack/operators/slack_webhook.py +++ b/airflow/providers/slack/operators/slack_webhook.py @@ -36,30 +36,19 @@ class SlackWebhookOperator(SimpleHttpOperator): icon. You can override these defaults in this hook. :param http_conn_id: connection that has Slack webhook token in the extra field - :type http_conn_id: str :param webhook_token: Slack webhook token - :type webhook_token: str :param message: The message you want to send on Slack - :type message: str :param attachments: The attachments to send on Slack. Should be a list of dictionaries representing Slack attachments. - :type attachments: list :param blocks: The blocks to send on Slack. Should be a list of dictionaries representing Slack blocks. - :type blocks: list :param channel: The channel the message should be posted to - :type channel: str :param username: The username to post to slack with - :type username: str :param icon_emoji: The emoji to use as icon for the user posting to Slack - :type icon_emoji: str :param icon_url: The icon image URL string to use in place of the default icon. - :type icon_url: str :param link_names: Whether or not to find and link channel and usernames in your message - :type link_names: bool :param proxy: Proxy to use to make the Slack webhook call - :type proxy: str """ template_fields: Sequence[str] = ( diff --git a/airflow/providers/snowflake/hooks/snowflake.py b/airflow/providers/snowflake/hooks/snowflake.py index 7ef0853463b31..a61f6e0af13ce 100644 --- a/airflow/providers/snowflake/hooks/snowflake.py +++ b/airflow/providers/snowflake/hooks/snowflake.py @@ -43,9 +43,7 @@ class SnowflakeHook(DbApiHook): :param snowflake_conn_id: Reference to :ref:`Snowflake connection id` - :type snowflake_conn_id: str :param account: snowflake account name - :type account: Optional[str] :param authenticator: authenticator for Snowflake. 'snowflake' (default) to use the internal Snowflake authenticator 'externalbrowser' to authenticate using your web browser and @@ -53,24 +51,16 @@ class SnowflakeHook(DbApiHook): (IdP) that has been defined for your account 'https://.okta.com' to authenticate through native Okta. - :type authenticator: Optional[str] :param warehouse: name of snowflake warehouse - :type warehouse: Optional[str] :param database: name of snowflake database - :type database: Optional[str] :param region: name of snowflake region - :type region: Optional[str] :param role: name of snowflake role - :type role: Optional[str] :param schema: name of snowflake schema - :type schema: Optional[str] :param session_parameters: You can set session-level parameters at the time you connect to Snowflake - :type session_parameters: Optional[dict] :param insecure_mode: Turns off OCSP certificate checks. For details, see: `How To: Turn Off OCSP Checking in Snowflake Client Drivers - Snowflake Community `__ - :type insecure_mode: Optional[bool] .. note:: get_sqlalchemy_engine() depends on snowflake-sqlalchemy @@ -279,14 +269,10 @@ def run( :param sql: the sql string to be executed with possibly multiple statements, or a list of sql statements to execute - :type sql: str or list :param autocommit: What to set the connection's autocommit setting to before executing the query. - :type autocommit: bool :param parameters: The parameters to render the SQL query with. - :type parameters: dict or iterable :param handler: The result handler which is called with the result of each statement. - :type handler: callable """ self.query_ids = [] diff --git a/airflow/providers/snowflake/operators/snowflake.py b/airflow/providers/snowflake/operators/snowflake.py index 3e3412edf59c9..5cd341636f3c0 100644 --- a/airflow/providers/snowflake/operators/snowflake.py +++ b/airflow/providers/snowflake/operators/snowflake.py @@ -50,28 +50,20 @@ class SnowflakeOperator(BaseOperator): :param snowflake_conn_id: Reference to :ref:`Snowflake connection id` - :type snowflake_conn_id: str - :param sql: the sql code to be executed. (templated) - :type sql: Can receive a str representing a sql statement, - a list of str (sql statements), or reference to a template file. - Template reference are recognized by str ending in '.sql' + :param sql: the SQL code to be executed as a single string, or + a list of str (sql statements), or a reference to a template file. + Template references are recognized by str ending in '.sql' :param autocommit: if True, each command is automatically committed. (default value: True) - :type autocommit: bool :param parameters: (optional) the parameters to render the SQL query with. - :type parameters: dict or iterable :param warehouse: name of warehouse (will overwrite any warehouse defined in the connection's extra JSON) - :type warehouse: str :param database: name of database (will overwrite database defined in connection) - :type database: str :param schema: name of schema (will overwrite schema defined in connection) - :type schema: str :param role: name of role (will overwrite any role defined in connection's extra JSON) - :type role: str :param authenticator: authenticator for Snowflake. 'snowflake' (default) to use the internal Snowflake authenticator 'externalbrowser' to authenticate using your web browser and @@ -79,10 +71,8 @@ class SnowflakeOperator(BaseOperator): (IdP) that has been defined for your account 'https://.okta.com' to authenticate through native Okta. - :type authenticator: str :param session_parameters: You can set session-level parameters at the time you connect to Snowflake - :type session_parameters: dict """ template_fields: Sequence[str] = ('sql',) @@ -161,30 +151,22 @@ class SnowflakeCheckOperator(SQLCheckOperator): publishing dubious data, or on the side and receive email alerts without stopping the progress of the DAG. - :param sql: the sql code to be executed. (templated) - :type sql: Can receive a str representing a sql statement, - a list of str (sql statements), or reference to a template file. - Template reference are recognized by str ending in '.sql' + :param sql: the SQL code to be executed as a single string, or + a list of str (sql statements), or a reference to a template file. + Template references are recognized by str ending in '.sql' :param snowflake_conn_id: Reference to :ref:`Snowflake connection id` - :type snowflake_conn_id: str :param autocommit: if True, each command is automatically committed. (default value: True) - :type autocommit: bool :param parameters: (optional) the parameters to render the SQL query with. - :type parameters: dict or iterable :param warehouse: name of warehouse (will overwrite any warehouse defined in the connection's extra JSON) - :type warehouse: str :param database: name of database (will overwrite database defined in connection) - :type database: str :param schema: name of schema (will overwrite schema defined in connection) - :type schema: str :param role: name of role (will overwrite any role defined in connection's extra JSON) - :type role: str :param authenticator: authenticator for Snowflake. 'snowflake' (default) to use the internal Snowflake authenticator 'externalbrowser' to authenticate using your web browser and @@ -192,10 +174,8 @@ class SnowflakeCheckOperator(SQLCheckOperator): (IdP) that has been defined for your account 'https://.okta.com' to authenticate through native Okta. - :type authenticator: str :param session_parameters: You can set session-level parameters at the time you connect to Snowflake - :type session_parameters: dict """ template_fields: Sequence[str] = ('sql',) @@ -242,32 +222,22 @@ class SnowflakeValueCheckOperator(SQLValueCheckOperator): certain level of tolerance. :param sql: the sql to be executed - :type sql: str :param pass_value: the value to check against - :type pass_value: Any :param tolerance: (optional) the tolerance allowed to accept the query as passing - :type tolerance: Any :param snowflake_conn_id: Reference to :ref:`Snowflake connection id` - :type snowflake_conn_id: str :param autocommit: if True, each command is automatically committed. (default value: True) - :type autocommit: bool :param parameters: (optional) the parameters to render the SQL query with. - :type parameters: dict or iterable :param warehouse: name of warehouse (will overwrite any warehouse defined in the connection's extra JSON) - :type warehouse: str :param database: name of database (will overwrite database defined in connection) - :type database: str :param schema: name of schema (will overwrite schema defined in connection) - :type schema: str :param role: name of role (will overwrite any role defined in connection's extra JSON) - :type role: str :param authenticator: authenticator for Snowflake. 'snowflake' (default) to use the internal Snowflake authenticator 'externalbrowser' to authenticate using your web browser and @@ -275,10 +245,8 @@ class SnowflakeValueCheckOperator(SQLValueCheckOperator): (IdP) that has been defined for your account 'https://.okta.com' to authenticate through native Okta. - :type authenticator: str :param session_parameters: You can set session-level parameters at the time you connect to Snowflake - :type session_parameters: dict """ def __init__( @@ -328,34 +296,24 @@ class SnowflakeIntervalCheckOperator(SQLIntervalCheckOperator): WHERE {date_filter_column}= :param table: the table name - :type table: str :param days_back: number of days between ds and the ds we want to check against. Defaults to 7 days - :type days_back: int :param metrics_thresholds: a dictionary of ratios indexed by metrics, for example 'COUNT(*)': 1.5 would require a 50 percent or less difference between the current day, and the prior days_back. - :type metrics_thresholds: dict :param snowflake_conn_id: Reference to :ref:`Snowflake connection id` - :type snowflake_conn_id: str :param autocommit: if True, each command is automatically committed. (default value: True) - :type autocommit: bool :param parameters: (optional) the parameters to render the SQL query with. - :type parameters: dict or iterable :param warehouse: name of warehouse (will overwrite any warehouse defined in the connection's extra JSON) - :type warehouse: str :param database: name of database (will overwrite database defined in connection) - :type database: str :param schema: name of schema (will overwrite schema defined in connection) - :type schema: str :param role: name of role (will overwrite any role defined in connection's extra JSON) - :type role: str :param authenticator: authenticator for Snowflake. 'snowflake' (default) to use the internal Snowflake authenticator 'externalbrowser' to authenticate using your web browser and @@ -363,10 +321,8 @@ class SnowflakeIntervalCheckOperator(SQLIntervalCheckOperator): (IdP) that has been defined for your account 'https://.okta.com' to authenticate through native Okta. - :type authenticator: str :param session_parameters: You can set session-level parameters at the time you connect to Snowflake - :type session_parameters: dict """ def __init__( diff --git a/airflow/providers/snowflake/transfers/s3_to_snowflake.py b/airflow/providers/snowflake/transfers/s3_to_snowflake.py index b33d6d4e73e6c..9b8eecb4dd292 100644 --- a/airflow/providers/snowflake/transfers/s3_to_snowflake.py +++ b/airflow/providers/snowflake/transfers/s3_to_snowflake.py @@ -32,32 +32,21 @@ class S3ToSnowflakeOperator(BaseOperator): :ref:`howto/operator:S3ToSnowflakeOperator` :param s3_keys: reference to a list of S3 keys - :type s3_keys: list :param table: reference to a specific table in snowflake database - :type table: str :param schema: name of schema (will overwrite schema defined in connection) - :type schema: str :param stage: reference to a specific snowflake stage. If the stage's schema is not the same as the table one, it must be specified - :type stage: str :param prefix: cloud storage location specified to limit the set of files to load - :type prefix: str :param file_format: reference to a specific file format - :type file_format: str :param warehouse: name of warehouse (will overwrite any warehouse defined in the connection's extra JSON) - :type warehouse: str :param database: reference to a specific database in Snowflake connection - :type database: str :param columns_array: reference to a specific columns array in snowflake database - :type columns_array: list :param snowflake_conn_id: Reference to :ref:`Snowflake connection id` - :type snowflake_conn_id: str :param role: name of role (will overwrite any role defined in connection's extra JSON) - :type role: str :param authenticator: authenticator for Snowflake. 'snowflake' (default) to use the internal Snowflake authenticator 'externalbrowser' to authenticate using your web browser and @@ -65,10 +54,8 @@ class S3ToSnowflakeOperator(BaseOperator): (IdP) that has been defined for your account 'https://.okta.com' to authenticate through native Okta. - :type authenticator: str :param session_parameters: You can set session-level parameters at the time you connect to Snowflake - :type session_parameters: dict """ template_fields: Sequence[str] = ("s3_keys",) diff --git a/airflow/providers/snowflake/transfers/snowflake_to_slack.py b/airflow/providers/snowflake/transfers/snowflake_to_slack.py index 6fc181b8d8c3d..fadb3ccbd0e1f 100644 --- a/airflow/providers/snowflake/transfers/snowflake_to_slack.py +++ b/airflow/providers/snowflake/transfers/snowflake_to_slack.py @@ -43,31 +43,20 @@ class SnowflakeToSlackOperator(BaseOperator): :ref:`howto/operator:SnowflakeToSlackOperator` :param sql: The SQL statement to execute on Snowflake (templated) - :type sql: str :param slack_message: The templated Slack message to send with the data returned from Snowflake. You can use the default JINJA variable {{ results_df }} to access the pandas dataframe containing the SQL results - :type slack_message: str :param snowflake_conn_id: Reference to :ref:`Snowflake connection id` - :type snowflake_conn_id: str :param slack_conn_id: The connection id for Slack - :type slack_conn_id: str :param results_df_name: The name of the JINJA template's dataframe variable, default is 'results_df' - :type results_df_name: str :param parameters: The parameters to pass to the SQL query - :type parameters: Optional[Union[Iterable, Mapping]] :param warehouse: The Snowflake virtual warehouse to use to run the SQL query - :type warehouse: Optional[str] :param database: The Snowflake database to use for the SQL query - :type database: Optional[str] :param schema: The schema to run the SQL against in Snowflake - :type schema: Optional[str] :param role: The role to use when connecting to Snowflake - :type role: Optional[str] :param slack_token: The token to use to authenticate to Slack. If this is not provided, the 'webhook_token' attribute needs to be specified in the 'Extra' JSON field against the slack_conn_id - :type slack_token: Optional[str] """ template_fields: Sequence[str] = ('sql', 'slack_message') diff --git a/airflow/providers/sqlite/hooks/sqlite.py b/airflow/providers/sqlite/hooks/sqlite.py index 47a5457097737..f1885cf191107 100644 --- a/airflow/providers/sqlite/hooks/sqlite.py +++ b/airflow/providers/sqlite/hooks/sqlite.py @@ -43,13 +43,9 @@ def _generate_insert_sql(table, values, target_fields, replace, **kwargs): The REPLACE variant is specific to MySQL syntax. :param table: Name of the target table - :type table: str :param values: The row to insert into the table - :type values: tuple of cell values :param target_fields: The names of the columns to fill in the table - :type target_fields: iterable of strings :param replace: Whether to replace instead of insert - :type replace: bool :return: The generated INSERT or REPLACE SQL statement :rtype: str """ diff --git a/airflow/providers/sqlite/operators/sqlite.py b/airflow/providers/sqlite/operators/sqlite.py index 691ad7f87f690..86e687ca897d3 100644 --- a/airflow/providers/sqlite/operators/sqlite.py +++ b/airflow/providers/sqlite/operators/sqlite.py @@ -33,11 +33,8 @@ class SqliteOperator(BaseOperator): sql statement, a list of str (sql statements), or reference to a template file. Template reference are recognized by str ending in '.sql' (templated) - :type sql: str or list[str] :param sqlite_conn_id: reference to a specific sqlite database - :type sqlite_conn_id: str :param parameters: (optional) the parameters to render the SQL query with. - :type parameters: dict or iterable """ template_fields: Sequence[str] = ('sql',) diff --git a/airflow/providers/ssh/hooks/ssh.py b/airflow/providers/ssh/hooks/ssh.py index 2416387087d2f..419125cf7b709 100644 --- a/airflow/providers/ssh/hooks/ssh.py +++ b/airflow/providers/ssh/hooks/ssh.py @@ -53,27 +53,18 @@ class SSHHook(BaseHook): Connections from where all the required parameters can be fetched like username, password or key_file. Thought the priority is given to the param passed during init - :type ssh_conn_id: str :param remote_host: remote host to connect - :type remote_host: str :param username: username to connect to the remote_host - :type username: str :param password: password of the username to connect to the remote_host - :type password: str :param key_file: path to key file to use to connect to the remote_host - :type key_file: str :param port: port of remote host to connect (Default is paramiko SSH_PORT) - :type port: int :param conn_timeout: timeout (in seconds) for the attempt to connect to the remote_host. The default is 10 seconds. If provided, it will replace the `conn_timeout` which was predefined in the connection of `ssh_conn_id`. - :type conn_timeout: int :param timeout: (Deprecated). timeout for the attempt to connect to the remote_host. Use conn_timeout instead. - :type timeout: int :param keepalive_interval: send a keepalive packet to remote host every keepalive_interval seconds - :type keepalive_interval: int """ # List of classes to try loading private keys as, ordered (roughly) by most common to least common @@ -344,11 +335,8 @@ def get_tunnel( Creates a tunnel between two hosts. Like ssh -L :host:. :param remote_port: The remote port to create a tunnel to - :type remote_port: int :param remote_host: The remote host to create a tunnel to (default localhost) - :type remote_host: str :param local_port: The local port to attach the tunnel to - :type local_port: int :return: sshtunnel.SSHTunnelForwarder object """ diff --git a/airflow/providers/ssh/operators/ssh.py b/airflow/providers/ssh/operators/ssh.py index e30334bf138e8..9c2e6cc683eca 100644 --- a/airflow/providers/ssh/operators/ssh.py +++ b/airflow/providers/ssh/operators/ssh.py @@ -37,34 +37,25 @@ class SSHOperator(BaseOperator): :param ssh_hook: predefined ssh_hook to use for remote execution. Either `ssh_hook` or `ssh_conn_id` needs to be provided. - :type ssh_hook: airflow.providers.ssh.hooks.ssh.SSHHook :param ssh_conn_id: :ref:`ssh connection id` from airflow Connections. `ssh_conn_id` will be ignored if `ssh_hook` is provided. - :type ssh_conn_id: str :param remote_host: remote host to connect (templated) Nullable. If provided, it will replace the `remote_host` which was defined in `ssh_hook` or predefined in the connection of `ssh_conn_id`. - :type remote_host: str :param command: command to execute on remote host. (templated) - :type command: str :param conn_timeout: timeout (in seconds) for maintaining the connection. The default is 10 seconds. Nullable. If provided, it will replace the `conn_timeout` which was predefined in the connection of `ssh_conn_id`. - :type conn_timeout: int :param cmd_timeout: timeout (in seconds) for executing the command. The default is 10 seconds. - :type cmd_timeout: int :param timeout: (deprecated) timeout (in seconds) for executing the command. The default is 10 seconds. Use conn_timeout and cmd_timeout parameters instead. - :type timeout: int :param environment: a dict of shell environment variables. Note that the server will reject them silently if `AcceptEnv` is not set in SSH config. - :type environment: dict :param get_pty: request a pseudo-terminal from the server. Set to ``True`` to have the remote process killed upon task timeout. The default is ``False`` but note that `get_pty` is forced to ``True`` when the `command` starts with ``sudo``. - :type get_pty: bool """ template_fields: Sequence[str] = ('command', 'remote_host') diff --git a/airflow/providers/tableau/hooks/tableau.py b/airflow/providers/tableau/hooks/tableau.py index 1b9e41fc50c5f..e0d890b605bfd 100644 --- a/airflow/providers/tableau/hooks/tableau.py +++ b/airflow/providers/tableau/hooks/tableau.py @@ -66,10 +66,8 @@ class TableauHook(BaseHook): :param site_id: The id of the site where the workbook belongs to. It will connect to the default site if you don't provide an id. - :type site_id: Optional[str] :param tableau_conn_id: The :ref:`Tableau Connection id ` containing the credentials to authenticate to the Tableau Server. - :type tableau_conn_id: str """ conn_name_attr = 'tableau_conn_id' @@ -140,7 +138,6 @@ def get_all(self, resource_name: str) -> Pager: :param resource_name: The name of the resource to paginate. For example: jobs or workbooks. - :type resource_name: str :return: all items by returning a Pager. :rtype: tableauserverclient.Pager """ @@ -156,7 +153,6 @@ def get_job_status(self, job_id: str) -> TableauJobFinishCode: .. see also:: https://tableau.github.io/server-client-python/docs/api-ref#jobs :param job_id: The id of the job to check. - :type job_id: str :return: An Enum that describe the Tableau job’s return code :rtype: TableauJobFinishCode """ @@ -168,12 +164,9 @@ def wait_for_state(self, job_id: str, target_state: TableauJobFinishCode, check_ to target_state or different from PENDING. :param job_id: The id of the job to check. - :type job_id: str :param target_state: Enum that describe the Tableau job’s target state - :type target_state: TableauJobFinishCode :param check_interval: time in seconds that the job should wait in between each instance state checks until operation is completed - :type check_interval: float :return: return True if the job is equal to the target_status, False otherwise. :rtype: bool """ diff --git a/airflow/providers/tableau/operators/tableau.py b/airflow/providers/tableau/operators/tableau.py index d919331cff8e9..7f78d598ad47f 100644 --- a/airflow/providers/tableau/operators/tableau.py +++ b/airflow/providers/tableau/operators/tableau.py @@ -51,23 +51,15 @@ class TableauOperator(BaseOperator): :ref:`howto/operator:TableauOperator` :param resource: The name of the resource to use. - :type resource: str :param method: The name of the resource's method to execute. - :type method: str :param find: The reference of resource that will receive the action. - :type find: str :param match_with: The resource field name to be matched with find parameter. - :type match_with: Optional[str] :param site_id: The id of the site where the workbook belongs to. - :type site_id: Optional[str] :param blocking_refresh: By default will be blocking means it will wait until it has finished. - :type blocking_refresh: bool :param check_interval: time in seconds that the job should wait in between each instance state checks until operation is completed - :type check_interval: float :param tableau_conn_id: The :ref:`Tableau Connection id ` containing the credentials to authenticate to the Tableau Server. - :type tableau_conn_id: str """ def __init__( @@ -97,7 +89,6 @@ def execute(self, context: 'Context') -> str: """ Executes the Tableau API resource and pushes the job id or downloaded file URI to xcom. :param context: The task context during execution. - :type context: dict :return: the id of the job that executes the extract refresh or downloaded file URI. :rtype: str """ diff --git a/airflow/providers/tableau/operators/tableau_refresh_workbook.py b/airflow/providers/tableau/operators/tableau_refresh_workbook.py index 7a1e29fa70a66..306c3ded629bf 100644 --- a/airflow/providers/tableau/operators/tableau_refresh_workbook.py +++ b/airflow/providers/tableau/operators/tableau_refresh_workbook.py @@ -40,19 +40,14 @@ class TableauRefreshWorkbookOperator(BaseOperator): .. seealso:: https://tableau.github.io/server-client-python/docs/api-ref#workbooks :param workbook_name: The name of the workbook to refresh. - :type workbook_name: str :param site_id: The id of the site where the workbook belongs to. - :type site_id: Optional[str] :param blocking: Defines if the job waits until the refresh has finished. Default: True. - :type blocking: bool :param tableau_conn_id: The :ref:`Tableau Connection id ` containing the credentials to authenticate to the Tableau Server. Default: 'tableau_default'. - :type tableau_conn_id: str :param check_interval: time in seconds that the job should wait in between each instance state checks until operation is completed - :type check_interval: float """ def __init__( @@ -77,7 +72,6 @@ def execute(self, context: 'Context') -> str: Executes the Tableau Extract Refresh and pushes the job id to xcom. :param context: The task context during execution. - :type context: dict :return: the id of the job that executes the extract refresh :rtype: str """ diff --git a/airflow/providers/tableau/sensors/tableau_job_status.py b/airflow/providers/tableau/sensors/tableau_job_status.py index 145caa35fe32e..7602d28076051 100644 --- a/airflow/providers/tableau/sensors/tableau_job_status.py +++ b/airflow/providers/tableau/sensors/tableau_job_status.py @@ -34,12 +34,9 @@ class TableauJobStatusSensor(BaseSensorOperator): .. seealso:: https://tableau.github.io/server-client-python/docs/api-ref#jobs :param job_id: Id of the job to watch. - :type job_id: str :param site_id: The id of the site where the workbook belongs to. - :type site_id: Optional[str] :param tableau_conn_id: The :ref:`Tableau Connection id ` containing the credentials to authenticate to the Tableau Server. - :type tableau_conn_id: str """ template_fields: Sequence[str] = ('job_id',) @@ -62,7 +59,6 @@ def poke(self, context: 'Context') -> bool: Pokes until the job has successfully finished. :param context: The task context during execution. - :type context: dict :return: True if it succeeded and False if not. :rtype: bool """ diff --git a/airflow/providers/telegram/hooks/telegram.py b/airflow/providers/telegram/hooks/telegram.py index 5b143868726c0..3de4f90a96ba5 100644 --- a/airflow/providers/telegram/hooks/telegram.py +++ b/airflow/providers/telegram/hooks/telegram.py @@ -52,11 +52,8 @@ class TelegramHook(BaseHook): # or telegram_hook.send_message(None', {"text": "message"}) :param telegram_conn_id: connection that optionally has Telegram API token in the password field - :type telegram_conn_id: str :param token: optional telegram API token - :type token: str :param chat_id: optional chat_id of the telegram chat/channel/group - :type chat_id: str """ def __init__( @@ -84,9 +81,7 @@ def __get_token(self, token: Optional[str], telegram_conn_id: Optional[str]) -> Returns the telegram API token :param token: telegram API token - :type token: str :param telegram_conn_id: telegram connection name - :type telegram_conn_id: str :return: telegram API token :rtype: str """ @@ -108,9 +103,7 @@ def __get_chat_id(self, chat_id: Optional[str], telegram_conn_id: Optional[str]) Returns the telegram chat ID for a chat/channel/group :param chat_id: optional chat ID - :type chat_id: str :param telegram_conn_id: telegram connection name - :type telegram_conn_id: str :return: telegram chat ID :rtype: str """ @@ -133,7 +126,6 @@ def send_message(self, api_params: dict) -> None: Sends the message to a telegram channel or chat. :param api_params: params for telegram_instance.send_message. It can also be used to override chat_id - :type api_params: dict """ kwargs = { "chat_id": self.chat_id, diff --git a/airflow/providers/telegram/operators/telegram.py b/airflow/providers/telegram/operators/telegram.py index 006ab4ad47a48..59a92f6546746 100644 --- a/airflow/providers/telegram/operators/telegram.py +++ b/airflow/providers/telegram/operators/telegram.py @@ -37,15 +37,10 @@ class TelegramOperator(BaseOperator): :ref:`howto/operator:TelegramOperator` :param telegram_conn_id: Telegram connection ID which its password is Telegram API token - :type telegram_conn_id: str :param token: Telegram API Token - :type token: str :param chat_id: Telegram chat ID for a chat/channel/group - :type chat_id: str :param text: Message to be sent on telegram - :type text: str :param telegram_kwargs: Extra args to be passed to telegram client - :type telegram_kwargs: dict """ template_fields: Sequence[str] = ('text', 'chat_id') diff --git a/airflow/providers/trino/hooks/trino.py b/airflow/providers/trino/hooks/trino.py index 501ef585ad95e..9b7a95bbd84cd 100644 --- a/airflow/providers/trino/hooks/trino.py +++ b/airflow/providers/trino/hooks/trino.py @@ -155,16 +155,11 @@ def insert_rows( A generic way to insert a set of tuples into a table. :param table: Name of the target table - :type table: str :param rows: The rows to insert into the table - :type rows: iterable of tuples :param target_fields: The names of the columns to fill in the table - :type target_fields: iterable of strings :param commit_every: The maximum number of rows to insert in one transaction. Set to 0 to insert all rows in one transaction. - :type commit_every: int :param replace: Whether to replace instead of insert - :type replace: bool """ if self.get_isolation_level() == IsolationLevel.AUTOCOMMIT: self.log.info( diff --git a/airflow/providers/vertica/operators/vertica.py b/airflow/providers/vertica/operators/vertica.py index a3ccf56614a7c..f772123bb348e 100644 --- a/airflow/providers/vertica/operators/vertica.py +++ b/airflow/providers/vertica/operators/vertica.py @@ -29,11 +29,9 @@ class VerticaOperator(BaseOperator): Executes sql code in a specific Vertica database. :param vertica_conn_id: reference to a specific Vertica database - :type vertica_conn_id: str - :param sql: the sql code to be executed. (templated) - :type sql: Can receive a str representing a sql statement, - a list of str (sql statements), or reference to a template file. - Template reference are recognized by str ending in '.sql' + :param sql: the SQL code to be executed as a single string, or + a list of str (sql statements), or a reference to a template file. + Template references are recognized by str ending in '.sql' """ template_fields: Sequence[str] = ('sql',) diff --git a/airflow/providers/yandex/hooks/yandex.py b/airflow/providers/yandex/hooks/yandex.py index 89d4f24235640..84cc3ee45641b 100644 --- a/airflow/providers/yandex/hooks/yandex.py +++ b/airflow/providers/yandex/hooks/yandex.py @@ -30,7 +30,6 @@ class YandexCloudBaseHook(BaseHook): A base hook for Yandex.Cloud related tasks. :param yandex_conn_id: The connection ID to use when fetching connection info. - :type yandex_conn_id: str """ conn_name_attr = 'yandex_conn_id' diff --git a/airflow/providers/yandex/hooks/yandexcloud_dataproc.py b/airflow/providers/yandex/hooks/yandexcloud_dataproc.py index 4cb52155f5ae6..6597dba5ee0ef 100644 --- a/airflow/providers/yandex/hooks/yandexcloud_dataproc.py +++ b/airflow/providers/yandex/hooks/yandexcloud_dataproc.py @@ -24,7 +24,6 @@ class DataprocHook(YandexCloudBaseHook): A base hook for Yandex.Cloud Data Proc. :param yandex_conn_id: The connection ID to use when fetching connection info. - :type yandex_conn_id: str """ def __init__(self, *args, **kwargs) -> None: diff --git a/airflow/providers/yandex/operators/yandexcloud_dataproc.py b/airflow/providers/yandex/operators/yandexcloud_dataproc.py index a4ec67c861cb2..5f7681dd14b38 100644 --- a/airflow/providers/yandex/operators/yandexcloud_dataproc.py +++ b/airflow/providers/yandex/operators/yandexcloud_dataproc.py @@ -28,77 +28,50 @@ class DataprocCreateClusterOperator(BaseOperator): """Creates Yandex.Cloud Data Proc cluster. :param folder_id: ID of the folder in which cluster should be created. - :type folder_id: Optional[str] :param cluster_name: Cluster name. Must be unique inside the folder. - :type cluster_name: Optional[str] :param cluster_description: Cluster description. - :type cluster_description: str :param cluster_image_version: Cluster image version. Use default. - :type cluster_image_version: str :param ssh_public_keys: List of SSH public keys that will be deployed to created compute instances. - :type ssh_public_keys: Optional[Union[str, Iterable[str]]] :param subnet_id: ID of the subnetwork. All Data Proc cluster nodes will use one subnetwork. - :type subnet_id: str :param services: List of services that will be installed to the cluster. Possible options: HDFS, YARN, MAPREDUCE, HIVE, TEZ, ZOOKEEPER, HBASE, SQOOP, FLUME, SPARK, SPARK, ZEPPELIN, OOZIE - :type services: Iterable[str] :param s3_bucket: Yandex.Cloud S3 bucket to store cluster logs. Jobs will not work if the bucket is not specified. - :type s3_bucket: Optional[str] :param zone: Availability zone to create cluster in. Currently there are ru-central1-a, ru-central1-b and ru-central1-c. - :type zone: str :param service_account_id: Service account id for the cluster. Service account can be created inside the folder. - :type service_account_id: Optional[str] :param masternode_resource_preset: Resources preset (CPU+RAM configuration) for the primary node of the cluster. - :type masternode_resource_preset: str :param masternode_disk_size: Masternode storage size in GiB. - :type masternode_disk_size: int :param masternode_disk_type: Masternode storage type. Possible options: network-ssd, network-hdd. - :type masternode_disk_type: str :param datanode_resource_preset: Resources preset (CPU+RAM configuration) for the data nodes of the cluster. - :type datanode_resource_preset: str :param datanode_disk_size: Datanodes storage size in GiB. - :type datanode_disk_size: int :param datanode_disk_type: Datanodes storage type. Possible options: network-ssd, network-hdd. - :type datanode_disk_type: str :param computenode_resource_preset: Resources preset (CPU+RAM configuration) for the compute nodes of the cluster. - :type computenode_resource_preset: str :param computenode_disk_size: Computenodes storage size in GiB. - :type computenode_disk_size: int :param computenode_disk_type: Computenodes storage type. Possible options: network-ssd, network-hdd. - :type computenode_disk_type: str :param connection_id: ID of the Yandex.Cloud Airflow connection. - :type connection_id: Optional[str] - :type computenode_max_count: int :param computenode_max_count: Maximum number of nodes of compute autoscaling subcluster. :param computenode_warmup_duration: The warmup time of the instance in seconds. During this time, traffic is sent to the instance, but instance metrics are not collected. In seconds. - :type computenode_warmup_duration: int :param computenode_stabilization_duration: Minimum amount of time in seconds for monitoring before Instance Groups can reduce the number of instances in the group. During this time, the group size doesn't decrease, even if the new metric values indicate that it should. In seconds. - :type computenode_stabilization_duration: int :param computenode_preemptible: Preemptible instances are stopped at least once every 24 hours, and can be stopped at any time if their resources are needed by Compute. - :type computenode_preemptible: bool :param computenode_cpu_utilization_target: Defines an autoscaling rule based on the average CPU utilization of the instance group. in percents. 10-100. By default is not set and default autoscaling strategy is used. - :type computenode_cpu_utilization_target: int :param computenode_decommission_timeout: Timeout to gracefully decommission nodes during downscaling. In seconds. - :type computenode_decommission_timeout: int :param log_group_id: Id of log group to write logs. By default logs will be sent to default log group. To disable cloud log sending set cluster property dataproc:disable_cloud_logging = true - :type log_group_id: str """ def __init__( @@ -213,9 +186,7 @@ class DataprocDeleteClusterOperator(BaseOperator): """Deletes Yandex.Cloud Data Proc cluster. :param connection_id: ID of the Yandex.Cloud Airflow connection. - :type connection_id: Optional[str] :param cluster_id: ID of the cluster to remove. (templated) - :type cluster_id: Optional[str] """ template_fields: Sequence[str] = ('cluster_id',) @@ -243,22 +214,14 @@ class DataprocCreateHiveJobOperator(BaseOperator): """Runs Hive job in Data Proc cluster. :param query: Hive query. - :type query: Optional[str] :param query_file_uri: URI of the script that contains Hive queries. Can be placed in HDFS or S3. - :type query_file_uri: Optional[str] :param properties: A mapping of property names to values, used to configure Hive. - :type properties: Optional[Dist[str, str]] :param script_variables: Mapping of query variable names to values. - :type script_variables: Optional[Dist[str, str]] :param continue_on_failure: Whether to continue executing queries if a query fails. - :type continue_on_failure: bool :param name: Name of the job. Used for labeling. - :type name: str :param cluster_id: ID of the cluster to run job in. Will try to take the ID from Dataproc Hook object if ot specified. (templated) - :type cluster_id: Optional[str] :param connection_id: ID of the Yandex.Cloud Airflow connection. - :type connection_id: Optional[str] """ template_fields: Sequence[str] = ('cluster_id',) @@ -311,26 +274,16 @@ class DataprocCreateMapReduceJobOperator(BaseOperator): :param main_jar_file_uri: URI of jar file with job. Can be placed in HDFS or S3. Can be specified instead of main_class. - :type main_jar_file_uri: Optional[str] :param main_class: Name of the main class of the job. Can be specified instead of main_jar_file_uri. - :type main_class: Optional[str] :param file_uris: URIs of files used in the job. Can be placed in HDFS or S3. - :type file_uris: Optional[Iterable[str]] :param archive_uris: URIs of archive files used in the job. Can be placed in HDFS or S3. - :type archive_uris: Optional[Iterable[str]] :param jar_file_uris: URIs of JAR files used in the job. Can be placed in HDFS or S3. - :type jar_file_uris: Optional[Iterable[str]] :param properties: Properties for the job. - :type properties: Optional[Dist[str, str]] :param args: Arguments to be passed to the job. - :type args: Optional[Iterable[str]] :param name: Name of the job. Used for labeling. - :type name: str :param cluster_id: ID of the cluster to run job in. Will try to take the ID from Dataproc Hook object if ot specified. (templated) - :type cluster_id: Optional[str] :param connection_id: ID of the Yandex.Cloud Airflow connection. - :type connection_id: Optional[str] """ template_fields: Sequence[str] = ('cluster_id',) @@ -388,34 +341,21 @@ class DataprocCreateSparkJobOperator(BaseOperator): """Runs Spark job in Data Proc cluster. :param main_jar_file_uri: URI of jar file with job. Can be placed in HDFS or S3. - :type main_jar_file_uri: Optional[str] :param main_class: Name of the main class of the job. - :type main_class: Optional[str] :param file_uris: URIs of files used in the job. Can be placed in HDFS or S3. - :type file_uris: Optional[Iterable[str]] :param archive_uris: URIs of archive files used in the job. Can be placed in HDFS or S3. - :type archive_uris: Optional[Iterable[str]] :param jar_file_uris: URIs of JAR files used in the job. Can be placed in HDFS or S3. - :type jar_file_uris: Optional[Iterable[str]] :param properties: Properties for the job. - :type properties: Optional[Dist[str, str]] :param args: Arguments to be passed to the job. - :type args: Optional[Iterable[str]] :param name: Name of the job. Used for labeling. - :type name: str :param cluster_id: ID of the cluster to run job in. Will try to take the ID from Dataproc Hook object if ot specified. (templated) - :type cluster_id: Optional[str] :param connection_id: ID of the Yandex.Cloud Airflow connection. - :type connection_id: Optional[str] :param packages: List of maven coordinates of jars to include on the driver and executor classpaths. - :type packages: Optional[Iterable[str]] :param repositories: List of additional remote repositories to search for the maven coordinates given with --packages. - :type repositories: Optional[Iterable[str]] :param exclude_packages: List of groupId:artifactId, to exclude while resolving the dependencies provided in --packages to avoid dependency conflicts. - :type exclude_packages: Optional[Iterable[str]] """ template_fields: Sequence[str] = ('cluster_id',) @@ -482,34 +422,21 @@ class DataprocCreatePysparkJobOperator(BaseOperator): """Runs Pyspark job in Data Proc cluster. :param main_python_file_uri: URI of python file with job. Can be placed in HDFS or S3. - :type main_python_file_uri: Optional[str] :param python_file_uris: URIs of python files used in the job. Can be placed in HDFS or S3. - :type python_file_uris: Optional[Iterable[str]] :param file_uris: URIs of files used in the job. Can be placed in HDFS or S3. - :type file_uris: Optional[Iterable[str]] :param archive_uris: URIs of archive files used in the job. Can be placed in HDFS or S3. - :type archive_uris: Optional[Iterable[str]] :param jar_file_uris: URIs of JAR files used in the job. Can be placed in HDFS or S3. - :type jar_file_uris: Optional[Iterable[str]] :param properties: Properties for the job. - :type properties: Optional[Dist[str, str]] :param args: Arguments to be passed to the job. - :type args: Optional[Iterable[str]] :param name: Name of the job. Used for labeling. - :type name: str :param cluster_id: ID of the cluster to run job in. Will try to take the ID from Dataproc Hook object if ot specified. (templated) - :type cluster_id: Optional[str] :param connection_id: ID of the Yandex.Cloud Airflow connection. - :type connection_id: Optional[str] :param packages: List of maven coordinates of jars to include on the driver and executor classpaths. - :type packages: Optional[Iterable[str]] :param repositories: List of additional remote repositories to search for the maven coordinates given with --packages. - :type repositories: Optional[Iterable[str]] :param exclude_packages: List of groupId:artifactId, to exclude while resolving the dependencies provided in --packages to avoid dependency conflicts. - :type exclude_packages: Optional[Iterable[str]] """ template_fields: Sequence[str] = ('cluster_id',) diff --git a/airflow/providers/zendesk/hooks/zendesk.py b/airflow/providers/zendesk/hooks/zendesk.py index 3573dc661995a..e139f50480625 100644 --- a/airflow/providers/zendesk/hooks/zendesk.py +++ b/airflow/providers/zendesk/hooks/zendesk.py @@ -29,7 +29,6 @@ class ZendeskHook(BaseHook): Interact with Zendesk. This hook uses the Zendesk conn_id. :param zendesk_conn_id: The Airflow connection used for Zendesk credentials. - :type zendesk_conn_id: str """ def __init__(self, zendesk_conn_id: str) -> None: diff --git a/airflow/secrets/base_secrets.py b/airflow/secrets/base_secrets.py index 91f28be0bbcc9..f058e4a390dd7 100644 --- a/airflow/secrets/base_secrets.py +++ b/airflow/secrets/base_secrets.py @@ -35,11 +35,8 @@ def build_path(path_prefix: str, secret_id: str, sep: str = "/") -> str: Given conn_id, build path for Secrets Backend :param path_prefix: Prefix of the path to get secret - :type path_prefix: str :param secret_id: Secret id - :type secret_id: str :param sep: separator used to concatenate connections_prefix and conn_id. Default: "/" - :type sep: str """ return f"{path_prefix}{sep}{secret_id}" @@ -48,7 +45,6 @@ def get_conn_uri(self, conn_id: str) -> Optional[str]: Get conn_uri from Secrets Backend :param conn_id: connection id - :type conn_id: str """ raise NotImplementedError() @@ -57,7 +53,6 @@ def get_connection(self, conn_id: str) -> Optional['Connection']: Return connection object with a given ``conn_id``. :param conn_id: connection id - :type conn_id: str """ from airflow.models.connection import Connection @@ -72,7 +67,6 @@ def get_connections(self, conn_id: str) -> List['Connection']: Return connection object with a given ``conn_id``. :param conn_id: connection id - :type conn_id: str """ warnings.warn( "This method is deprecated. Please use " @@ -90,7 +84,6 @@ def get_variable(self, key: str) -> Optional[str]: Return value for Airflow Variable :param key: Variable Key - :type key: str :return: Variable Value """ raise NotImplementedError() diff --git a/airflow/secrets/environment_variables.py b/airflow/secrets/environment_variables.py index 92fa186ec3878..1397e782e3e33 100644 --- a/airflow/secrets/environment_variables.py +++ b/airflow/secrets/environment_variables.py @@ -38,7 +38,6 @@ def get_variable(self, key: str) -> Optional[str]: Get Airflow Variable from Environment Variable :param key: Variable Key - :type key: str :return: Variable Value """ return os.environ.get(VAR_ENV_PREFIX + key.upper()) diff --git a/airflow/secrets/local_filesystem.py b/airflow/secrets/local_filesystem.py index 1b1980b0dbfd5..7c30aa241cc2f 100644 --- a/airflow/secrets/local_filesystem.py +++ b/airflow/secrets/local_filesystem.py @@ -58,7 +58,6 @@ def _parse_env_file(file_path: str) -> Tuple[Dict[str, List[str]], List[FileSynt MY_CONN_ID=my-conn-type://my-login:my-pa%2Fssword@my-host:5432/my-schema?param1=val1¶m2=val2 :param file_path: The location of the file that will be processed. - :type file_path: str :return: Tuple with mapping of key and list of values and list of syntax errors """ with open(file_path) as f: @@ -102,7 +101,6 @@ def _parse_yaml_file(file_path: str) -> Tuple[Dict[str, List[str]], List[FileSyn Parse a file in the YAML format. :param file_path: The location of the file that will be processed. - :type file_path: str :return: Tuple with mapping of key and list of values and list of syntax errors """ with open(file_path) as f: @@ -127,7 +125,6 @@ def _parse_json_file(file_path: str) -> Tuple[Dict[str, Any], List[FileSyntaxErr Parse a file in the JSON format. :param file_path: The location of the file that will be processed. - :type file_path: str :return: Tuple with mapping of key and list of values and list of syntax errors """ with open(file_path) as f: @@ -157,7 +154,6 @@ def _parse_secret_file(file_path: str) -> Dict[str, Any]: Based on the file extension format, selects a parser, and parses the file. :param file_path: The location of the file that will be processed. - :type file_path: str :return: Map of secret key (e.g. connection ID) and value. """ if not os.path.exists(file_path): @@ -231,7 +227,6 @@ def load_variables(file_path: str) -> Dict[str, str]: ``JSON``, `YAML` and ``.env`` files are supported. :param file_path: The location of the file that will be processed. - :type file_path: str :rtype: Dict[str, List[str]] """ log.debug("Loading variables from a text file") @@ -291,9 +286,7 @@ class LocalFilesystemBackend(BaseSecretsBackend, LoggingMixin): ``JSON``, `YAML` and ``.env`` files are supported. :param variables_file_path: File location with variables data. - :type variables_file_path: str :param connections_file_path: File location with connection data. - :type connections_file_path: str """ def __init__( diff --git a/airflow/secrets/metastore.py b/airflow/secrets/metastore.py index 268f281535441..100a35d8fd2c2 100644 --- a/airflow/secrets/metastore.py +++ b/airflow/secrets/metastore.py @@ -56,7 +56,6 @@ def get_variable(self, key: str, session=None): Get Airflow Variable from Metadata DB :param key: Variable Key - :type key: str :return: Variable Value """ from airflow.models.variable import Variable diff --git a/airflow/sensors/base.py b/airflow/sensors/base.py index 523caae4f3824..e063a2f9ce776 100644 --- a/airflow/sensors/base.py +++ b/airflow/sensors/base.py @@ -64,12 +64,9 @@ class BaseSensorOperator(BaseOperator, SkipMixin): a criteria is met and fail if and when they time out. :param soft_fail: Set to true to mark the task as SKIPPED on failure - :type soft_fail: bool :param poke_interval: Time in seconds that the job should wait in between each tries - :type poke_interval: float :param timeout: Time, in seconds before the task times out and fails. - :type timeout: float :param mode: How the sensor operates. Options are: ``{ poke | reschedule }``, default is ``poke``. When set to ``poke`` the sensor is taking up a worker slot for its @@ -82,10 +79,8 @@ class BaseSensorOperator(BaseOperator, SkipMixin): this mode if the time before the criteria is met is expected to be quite long. The poke interval should be more than one minute to prevent too much load on the scheduler. - :type mode: str :param exponential_backoff: allow progressive longer waits between pokes by using exponential backoff algorithm - :type exponential_backoff: bool """ ui_color = '#e6f1f2' # type: str @@ -335,7 +330,6 @@ def poke_mode_only(cls): the mode from 'poke'. :param cls: BaseSensor class to enforce methods only use 'poke' mode. - :type cls: type """ def decorate(cls_type): diff --git a/airflow/sensors/bash.py b/airflow/sensors/bash.py index 8059fc69f114d..c651727e8412d 100644 --- a/airflow/sensors/bash.py +++ b/airflow/sensors/bash.py @@ -32,15 +32,12 @@ class BashSensor(BaseSensorOperator): :param bash_command: The command, set of commands or reference to a bash script (must be '.sh') to be executed. - :type bash_command: str :param env: If env is not None, it must be a mapping that defines the environment variables for the new process; these are used instead of inheriting the current process environment, which is the default behavior. (templated) - :type env: dict :param output_encoding: output encoding of bash command. - :type output_encoding: str """ template_fields: Sequence[str] = ('bash_command', 'env') diff --git a/airflow/sensors/date_time.py b/airflow/sensors/date_time.py index 34989d481b1a8..52c41753a8a5a 100644 --- a/airflow/sensors/date_time.py +++ b/airflow/sensors/date_time.py @@ -52,7 +52,6 @@ class DateTimeSensor(BaseSensorOperator): ) :param target_time: datetime after which the job succeeds. (templated) - :type target_time: str or datetime.datetime """ template_fields: Sequence[str] = ("target_time",) @@ -83,7 +82,6 @@ class DateTimeSensorAsync(DateTimeSensor): It is a drop-in replacement for DateTimeSensor. :param target_time: datetime after which the job succeeds. (templated) - :type target_time: str or datetime.datetime """ def execute(self, context: Context): diff --git a/airflow/sensors/external_task.py b/airflow/sensors/external_task.py index c9c025d326046..f1e35444e935f 100644 --- a/airflow/sensors/external_task.py +++ b/airflow/sensors/external_task.py @@ -51,36 +51,28 @@ class ExternalTaskSensor(BaseSensorOperator): :param external_dag_id: The dag_id that contains the task you want to wait for - :type external_dag_id: str :param external_task_id: The task_id that contains the task you want to wait for. If ``None`` (default value) the sensor waits for the DAG - :type external_task_id: str or None :param external_task_ids: The list of task_ids that you want to wait for. If ``None`` (default value) the sensor waits for the DAG. Either external_task_id or external_task_ids can be passed to ExternalTaskSensor, but not both. - :type external_task_ids: Iterable of task_ids or None, default is None :param allowed_states: Iterable of allowed states, default is ``['success']`` - :type allowed_states: Iterable :param failed_states: Iterable of failed or dis-allowed states, default is ``None`` - :type failed_states: Iterable :param execution_delta: time difference with the previous execution to look at, the default is the same logical date as the current task or DAG. For yesterday, use [positive!] datetime.timedelta(days=1). Either execution_delta or execution_date_fn can be passed to ExternalTaskSensor, but not both. - :type execution_delta: Optional[datetime.timedelta] :param execution_date_fn: function that receives the current execution's logical date as the first positional argument and optionally any number of keyword arguments available in the context dictionary, and returns the desired logical dates to query. Either execution_delta or execution_date_fn can be passed to ExternalTaskSensor, but not both. - :type execution_date_fn: Optional[Callable] :param check_existence: Set to `True` to check if the external task exists (when external_task_id is not None) or check if the DAG to wait for exists (when external_task_id is None), and immediately cease waiting if the external task or DAG does not exist (default value: False). - :type check_existence: bool """ template_fields = ['external_dag_id', 'external_task_id'] @@ -217,11 +209,8 @@ def get_count(self, dttm_filter, session, states) -> int: Get the count of records against dttm filter and states :param dttm_filter: date time filter for execution date - :type dttm_filter: list :param session: airflow session object - :type session: SASession :param states: task or dag states - :type states: list :return: count of record against the filters """ TI = TaskInstance @@ -279,11 +268,8 @@ class ExternalTaskMarker(DummyOperator): until the recursion_depth is reached. :param external_dag_id: The dag_id that contains the dependent task that needs to be cleared. - :type external_dag_id: str :param external_task_id: The task_id of the dependent task that needs to be cleared. - :type external_task_id: str :param execution_date: The logical date of the dependent task execution that needs to be cleared. - :type execution_date: str or datetime.datetime :param recursion_depth: The maximum level of transitive dependencies allowed. Default is 10. This is mostly used for preventing cyclic dependencies. It is fine to increase this number if necessary. However, too many levels of transitive dependencies will make diff --git a/airflow/sensors/filesystem.py b/airflow/sensors/filesystem.py index f3bd32b469a76..0ccf6586951e1 100644 --- a/airflow/sensors/filesystem.py +++ b/airflow/sensors/filesystem.py @@ -36,13 +36,10 @@ class FileSensor(BaseSensorOperator): :param fs_conn_id: reference to the File (path) connection id - :type fs_conn_id: str :param filepath: File or folder name (relative to the base path set within the connection), can be a glob. - :type filepath: str :param recursive: when set to ``True``, enables recursive directory matching behavior of ``**`` in glob filepath parameter. Defaults to ``False``. - :type recursive: bool """ template_fields: Sequence[str] = ('filepath',) diff --git a/airflow/sensors/python.py b/airflow/sensors/python.py index 04e1ab25af08f..c139c39a6a80e 100644 --- a/airflow/sensors/python.py +++ b/airflow/sensors/python.py @@ -32,18 +32,14 @@ class PythonSensor(BaseSensorOperator): in the callable :param python_callable: A reference to an object that is callable - :type python_callable: python callable :param op_kwargs: a dictionary of keyword arguments that will get unpacked in your function - :type op_kwargs: dict :param op_args: a list of positional arguments that will get unpacked when calling your callable - :type op_args: list :param templates_dict: a dictionary where the values are templates that will get templated by the Airflow engine sometime between ``__init__`` and ``execute`` takes place and are made available in your callable's context after the template has been applied. - :type templates_dict: dict of str """ template_fields: Sequence[str] = ('templates_dict', 'op_args', 'op_kwargs') diff --git a/airflow/sensors/smart_sensor.py b/airflow/sensors/smart_sensor.py index f26ce3a88f01c..63c394e71814a 100644 --- a/airflow/sensors/smart_sensor.py +++ b/airflow/sensors/smart_sensor.py @@ -292,19 +292,13 @@ class SmartSensorOperator(BaseOperator, SkipMixin): all sensor task state in task_instance table :param soft_fail: Set to true to mark the task as SKIPPED on failure - :type soft_fail: bool :param poke_interval: Time in seconds that the job should wait in between each tries. - :type poke_interval: int :param smart_sensor_timeout: Time, in seconds before the internal sensor job times out if poke_timeout is not defined. - :type smart_sensor_timeout: float :param shard_min: shard code lower bound (inclusive) - :type shard_min: int :param shard_max: shard code upper bound (exclusive) - :type shard_max: int :param poke_timeout: Time, in seconds before the task times out and fails. - :type poke_timeout: float """ ui_color = '#e6f1f2' @@ -495,9 +489,7 @@ def _retry_or_fail_task(self, sensor_work, error, session=None): logically experienced all retries and the try_number should be set to max_tries. :param sensor_work: The sensor_work with exception. - :type sensor_work: SensorWork :param error: The error message for this sensor_work. - :type error: str. :param session: The sqlalchemy session. """ diff --git a/airflow/sensors/sql.py b/airflow/sensors/sql.py index 5b5a8cf37fbce..a35d7566ceb41 100644 --- a/airflow/sensors/sql.py +++ b/airflow/sensors/sql.py @@ -36,23 +36,16 @@ class SqlSensor(BaseSensorOperator): be passed to the sensor in which case it will fail if no rows have been returned :param conn_id: The connection to run the sensor against - :type conn_id: str :param sql: The sql to run. To pass, it needs to return at least one cell that contains a non-zero / empty string value. - :type sql: str :param parameters: The parameters to render the SQL query with (optional). - :type parameters: dict or iterable :param success: Success criteria for the sensor is a Callable that takes first_cell as the only argument, and returns a boolean (optional). - :type success: Optional :param failure: Failure criteria for the sensor is a Callable that takes first_cell as the only argument and return a boolean (optional). - :type failure: Optional :param fail_on_empty: Explicitly fail on no rows returned. - :type fail_on_empty: bool :param hook_params: Extra config params to be passed to the underlying hook. Should match the desired hook constructor params. - :type hook_params: dict """ template_fields: Sequence[str] = ('sql',) diff --git a/airflow/sensors/time_delta.py b/airflow/sensors/time_delta.py index f0b0dff318174..5b336c3f88ccd 100644 --- a/airflow/sensors/time_delta.py +++ b/airflow/sensors/time_delta.py @@ -27,7 +27,6 @@ class TimeDeltaSensor(BaseSensorOperator): Waits for a timedelta after the run's data interval. :param delta: time length to wait after the data interval before succeeding. - :type delta: datetime.timedelta """ def __init__(self, *, delta, **kwargs): @@ -47,7 +46,6 @@ class TimeDeltaSensorAsync(TimeDeltaSensor): taking up a worker slot while it is waiting. :param delta: time length to wait after the data interval before succeeding. - :type delta: datetime.timedelta """ def execute(self, context: Context): diff --git a/airflow/sensors/time_sensor.py b/airflow/sensors/time_sensor.py index 8f7027fea926a..117390925def6 100644 --- a/airflow/sensors/time_sensor.py +++ b/airflow/sensors/time_sensor.py @@ -28,7 +28,6 @@ class TimeSensor(BaseSensorOperator): Waits until the specified time of the day. :param target_time: time after which the job succeeds - :type target_time: datetime.time """ def __init__(self, *, target_time, **kwargs): @@ -46,7 +45,6 @@ class TimeSensorAsync(BaseSensorOperator): it is waiting. :param target_time: time after which the job succeeds - :type target_time: datetime.time """ def __init__(self, *, target_time, **kwargs): diff --git a/airflow/sensors/weekday.py b/airflow/sensors/weekday.py index 2194d011749bb..bdf9275e107b5 100644 --- a/airflow/sensors/weekday.py +++ b/airflow/sensors/weekday.py @@ -64,12 +64,10 @@ class DayOfWeekSensor(BaseSensorOperator): * ``{WeekDay.TUESDAY}`` * ``{WeekDay.SATURDAY, WeekDay.SUNDAY}`` - :type week_day: iterable or airflow.utils.weekday.WeekDay :param use_task_execution_day: If ``True``, uses task's execution day to compare with week_day. Execution Date is Useful for backfilling. If ``False``, uses system's day of the week. Useful when you don't want to run anything on weekdays on the system. - :type use_task_execution_day: bool """ def __init__(self, *, week_day, use_task_execution_day=False, **kwargs): diff --git a/airflow/settings.py b/airflow/settings.py index eea4d5b45fd2d..588eba94e8533 100644 --- a/airflow/settings.py +++ b/airflow/settings.py @@ -142,7 +142,6 @@ def task_policy(task) -> None: for more than 48 hours :param task: task to be mutated - :type task: airflow.models.baseoperator.BaseOperator """ @@ -162,7 +161,6 @@ def dag_policy(dag) -> None: * Check if every DAG has configured tags :param dag: dag to be mutated - :type dag: airflow.models.dag.DAG """ @@ -177,7 +175,6 @@ def task_instance_mutation_hook(task_instance): This could be used, for instance, to modify the task instance during retries. :param task_instance: task instance to be mutated - :type task_instance: airflow.models.taskinstance.TaskInstance """ @@ -208,7 +205,6 @@ def get_airflow_context_vars(context): to your PYTHONPATH that defines this ``get_airflow_context_vars`` function. :param context: The context for the task_instance of interest. - :type context: dict """ return {} diff --git a/airflow/task/task_runner/__init__.py b/airflow/task/task_runner/__init__.py index e00bceaecd51b..dc63883e5b4bd 100644 --- a/airflow/task/task_runner/__init__.py +++ b/airflow/task/task_runner/__init__.py @@ -43,7 +43,6 @@ def get_task_runner(local_task_job): :param local_task_job: The LocalTaskJob associated with the TaskInstance that needs to be executed. - :type local_task_job: airflow.jobs.local_task_job.LocalTaskJob :return: The task runner to use to run the task. :rtype: airflow.task.task_runner.base_task_runner.BaseTaskRunner """ diff --git a/airflow/task/task_runner/base_task_runner.py b/airflow/task/task_runner/base_task_runner.py index 64d528b4cce57..47be386b7489a 100644 --- a/airflow/task/task_runner/base_task_runner.py +++ b/airflow/task/task_runner/base_task_runner.py @@ -47,7 +47,6 @@ class BaseTaskRunner(LoggingMixin): :param local_task_job: The local task job associated with running the associated task instance. - :type local_task_job: airflow.jobs.local_task_job.LocalTaskJob """ def __init__(self, local_task_job): @@ -133,7 +132,6 @@ def run_command(self, run_with=None): Run the task command. :param run_with: list of tokens to run the task command with e.g. ``['bash', '-c']`` - :type run_with: list :return: the process that was run :rtype: subprocess.Popen """ diff --git a/airflow/ti_deps/dep_context.py b/airflow/ti_deps/dep_context.py index 6c747c7656e2d..264e4b55226db 100644 --- a/airflow/ti_deps/dep_context.py +++ b/airflow/ti_deps/dep_context.py @@ -43,29 +43,20 @@ class DepContext: :param deps: The context-specific dependencies that need to be evaluated for a task instance to run in this execution context. - :type deps: set(airflow.ti_deps.deps.base_ti_dep.BaseTIDep) :param flag_upstream_failed: This is a hack to generate the upstream_failed state creation while checking to see whether the task instance is runnable. It was the shortest path to add the feature. This is bad since this class should be pure (no side effects). - :type flag_upstream_failed: bool :param ignore_all_deps: Whether or not the context should ignore all ignorable dependencies. Overrides the other ignore_* parameters - :type ignore_all_deps: bool :param ignore_depends_on_past: Ignore depends_on_past parameter of DAGs (e.g. for Backfills) - :type ignore_depends_on_past: bool :param ignore_in_retry_period: Ignore the retry period for task instances - :type ignore_in_retry_period: bool :param ignore_in_reschedule_period: Ignore the reschedule period for task instances - :type ignore_in_reschedule_period: bool :param ignore_task_deps: Ignore task-specific dependencies such as depends_on_past and trigger rule - :type ignore_task_deps: bool :param ignore_ti_state: Ignore the task instance's previous failure/success - :type ignore_ti_state: bool :param finished_tasks: A list of all the finished tasks of this run - :type finished_tasks: list[airflow.models.TaskInstance] """ def __init__( @@ -96,7 +87,6 @@ def ensure_finished_tasks(self, dag_run: "DagRun", session: Session) -> "List[Ta This is for the strange feature of running tasks without dag_run. :param dag_run: The DagRun for which to find finished tasks - :type dag_run: airflow.models.DagRun :return: A list of all the finished tasks of this DAG and execution_date :rtype: list[airflow.models.TaskInstance] """ diff --git a/airflow/ti_deps/deps/base_ti_dep.py b/airflow/ti_deps/deps/base_ti_dep.py index 06c6870d1732f..109545b8c541d 100644 --- a/airflow/ti_deps/deps/base_ti_dep.py +++ b/airflow/ti_deps/deps/base_ti_dep.py @@ -66,11 +66,8 @@ def _get_dep_statuses(self, ti, session, dep_context): representing if each of the passed in task's upstream tasks succeeded or not. :param ti: the task instance to get the dependency status for - :type ti: airflow.models.TaskInstance :param session: database session - :type session: sqlalchemy.orm.session.Session :param dep_context: the context for which this dependency should be evaluated for - :type dep_context: DepContext """ raise NotImplementedError @@ -81,11 +78,8 @@ def get_dep_statuses(self, ti, session, dep_context=None): checks for all dependencies. :param ti: the task instance to get the dependency status for - :type ti: airflow.models.TaskInstance :param session: database session - :type session: sqlalchemy.orm.session.Session :param dep_context: the context for which this dependency should be evaluated for - :type dep_context: DepContext """ if dep_context is None: dep_context = DepContext() @@ -108,12 +102,9 @@ def is_met(self, ti, session, dep_context=None): passing. :param ti: the task instance to see if this dependency is met for - :type ti: airflow.models.TaskInstance :param session: database session - :type session: sqlalchemy.orm.session.Session :param dep_context: The context this dependency is being checked under that stores state that can be used by this dependency. - :type dep_context: BaseDepContext """ return all(status.passed for status in self.get_dep_statuses(ti, session, dep_context)) @@ -123,12 +114,9 @@ def get_failure_reasons(self, ti, session, dep_context=None): Returns an iterable of strings that explain why this dependency wasn't met. :param ti: the task instance to see if this dependency is met for - :type ti: airflow.models.TaskInstance :param session: database session - :type session: sqlalchemy.orm.session.Session :param dep_context: The context this dependency is being checked under that stores state that can be used by this dependency. - :type dep_context: BaseDepContext """ for dep_status in self.get_dep_statuses(ti, session, dep_context): if not dep_status.passed: diff --git a/airflow/ti_deps/deps/dagrun_id_dep.py b/airflow/ti_deps/deps/dagrun_id_dep.py index a60951414da90..84be503300f4b 100644 --- a/airflow/ti_deps/deps/dagrun_id_dep.py +++ b/airflow/ti_deps/deps/dagrun_id_dep.py @@ -35,11 +35,8 @@ def _get_dep_statuses(self, ti, session, dep_context=None): Determines if the DagRun is valid for scheduling from scheduler. :param ti: the task instance to get the dependency status for - :type ti: airflow.models.TaskInstance :param session: database session - :type session: sqlalchemy.orm.session.Session :param dep_context: the context for which this dependency should be evaluated for - :type dep_context: DepContext :return: True if DagRun ID is valid for scheduling from scheduler. """ dagrun = ti.get_dagrun(session) diff --git a/airflow/ti_deps/deps/pool_slots_available_dep.py b/airflow/ti_deps/deps/pool_slots_available_dep.py index a37e62aac0471..db3a592719e70 100644 --- a/airflow/ti_deps/deps/pool_slots_available_dep.py +++ b/airflow/ti_deps/deps/pool_slots_available_dep.py @@ -35,11 +35,8 @@ def _get_dep_statuses(self, ti, session, dep_context=None): Determines if the pool task instance is in has available slots :param ti: the task instance to get the dependency status for - :type ti: airflow.models.TaskInstance :param session: database session - :type session: sqlalchemy.orm.session.Session :param dep_context: the context for which this dependency should be evaluated for - :type dep_context: DepContext :return: True if there are available slots in the pool. """ from airflow.models.pool import Pool # To avoid a circular dependency diff --git a/airflow/ti_deps/deps/trigger_rule_dep.py b/airflow/ti_deps/deps/trigger_rule_dep.py index 2864b7bfa7c33..ca66a170fd304 100644 --- a/airflow/ti_deps/deps/trigger_rule_dep.py +++ b/airflow/ti_deps/deps/trigger_rule_dep.py @@ -45,9 +45,7 @@ def _get_states_count_upstream_ti(ti, finished_tasks): whether this ti can run in this iteration :param ti: the ti that we want to calculate deps for - :type ti: airflow.models.TaskInstance :param finished_tasks: all the finished tasks of the dag_run - :type finished_tasks: list[airflow.models.TaskInstance] """ counter = Counter(task.state for task in finished_tasks if task.task_id in ti.task.upstream_task_ids) return ( @@ -101,24 +99,16 @@ def _evaluate_trigger_rule( rule was met. :param ti: the task instance to evaluate the trigger rule of - :type ti: airflow.models.TaskInstance :param successes: Number of successful upstream tasks - :type successes: int :param skipped: Number of skipped upstream tasks - :type skipped: int :param failed: Number of failed upstream tasks - :type failed: int :param upstream_failed: Number of upstream_failed upstream tasks - :type upstream_failed: int :param done: Number of completed upstream tasks - :type done: int :param flag_upstream_failed: This is a hack to generate the upstream_failed state creation while checking to see whether the task instance is runnable. It was the shortest path to add the feature - :type flag_upstream_failed: bool :param session: database session - :type session: sqlalchemy.orm.session.Session """ task = ti.task upstream = len(task.upstream_task_ids) diff --git a/airflow/ti_deps/deps/valid_state_dep.py b/airflow/ti_deps/deps/valid_state_dep.py index 0e89e6852906f..4216ed3c417da 100644 --- a/airflow/ti_deps/deps/valid_state_dep.py +++ b/airflow/ti_deps/deps/valid_state_dep.py @@ -27,7 +27,6 @@ class ValidStateDep(BaseTIDep): :param valid_states: A list of valid states that a task instance can have to meet this dependency. - :type valid_states: set(str) :return: whether or not the task instance's state is valid """ diff --git a/airflow/utils/configuration.py b/airflow/utils/configuration.py index ec810a9d55d50..053f7b06ec3e0 100644 --- a/airflow/utils/configuration.py +++ b/airflow/utils/configuration.py @@ -31,11 +31,9 @@ def tmp_configuration_copy(chmod=0o600, include_env=True, include_cmds=True): :param include_env: Should the value of configuration from ``AIRFLOW__`` environment variables be included or not - :type include_env: bool :param include_cmds: Should the result of calling any *_cmd config be set (True, default), or should the _cmd options be left as the command to run (False) - :type include_cmds: bool :return: a path to a temporary file """ cfg_dict = conf.as_dict( diff --git a/airflow/utils/dates.py b/airflow/utils/dates.py index 30771b1b2c360..ca8aaa84c0924 100644 --- a/airflow/utils/dates.py +++ b/airflow/utils/dates.py @@ -63,15 +63,11 @@ def date_range( datetime.datetime(2016, 3, 1, 0, 0, tzinfo=Timezone('UTC'))] :param start_date: anchor date to start the series from - :type start_date: datetime.datetime :param end_date: right boundary for the date range - :type end_date: datetime.datetime :param num: alternatively to end_date, you can specify the number of number of entries you want in the range. This number can be negative, output will always be sorted regardless - :type num: int :param delta: step length. It can be datetime.timedelta or cron expression as string - :type delta: datetime.timedelta or str or dateutil.relativedelta """ warnings.warn( "`airflow.utils.dates.date_range()` is deprecated. Please use `airflow.timetables`.", diff --git a/airflow/utils/dot_renderer.py b/airflow/utils/dot_renderer.py index 81f6a5e165911..bd65300e409bd 100644 --- a/airflow/utils/dot_renderer.py +++ b/airflow/utils/dot_renderer.py @@ -141,7 +141,6 @@ def render_dag_dependencies(deps: Dict[str, List['DagDependency']]) -> graphviz. Renders the DAG dependency to the DOT object. :param deps: List of DAG dependencies - :type deps: List[DagDependency] :return: Graphviz object :rtype: graphviz.Digraph """ @@ -170,9 +169,7 @@ def render_dag(dag: DAG, tis: Optional[List[TaskInstance]] = None) -> graphviz.D If an task instance list is passed, the nodes will be painted according to task statuses. :param dag: DAG that will be rendered. - :type dag: airflow.models.dag.DAG :param tis: List of task instances - :type tis: Optional[List[TaskInstance]] :return: Graphviz object :rtype: graphviz.Digraph """ diff --git a/airflow/utils/file.py b/airflow/utils/file.py index 0c5ebca0ddf54..2ca49f6f96e56 100644 --- a/airflow/utils/file.py +++ b/airflow/utils/file.py @@ -51,9 +51,7 @@ def mkdirs(path, mode): as necessary. If directory already exists, this is a no-op. :param path: The directory to create - :type path: str :param mode: The mode to give to the directory e.g. 0o755, ignores umask - :type mode: int """ import warnings @@ -160,16 +158,12 @@ def list_py_file_paths( Traverse a directory and look for Python files. :param directory: the directory to traverse - :type directory: unicode :param safe_mode: whether to use a heuristic to determine whether a file contains Airflow DAG definitions. If not provided, use the core.DAG_DISCOVERY_SAFE_MODE configuration setting. If not set, default to safe. - :type safe_mode: bool :param include_examples: include example DAGs - :type include_examples: bool :param include_smart_sensor: include smart sensor native control DAGs - :type include_examples: bool :return: a list of paths to Python files in the specified directory :rtype: list[unicode] """ diff --git a/airflow/utils/log/log_reader.py b/airflow/utils/log/log_reader.py index 623af90fbe5dd..36811d1572c1d 100644 --- a/airflow/utils/log/log_reader.py +++ b/airflow/utils/log/log_reader.py @@ -38,12 +38,9 @@ def read_log_chunks( Reads chunks of Task Instance logs. :param ti: The taskInstance - :type ti: TaskInstance :param try_number: If provided, logs for the given try will be returned. Otherwise, logs from all attempts are returned. - :type try_number: Optional[int] :param metadata: A dictionary containing information about how to read the task log - :type metadata: dict :rtype: Tuple[List[Tuple[Tuple[str, str]]], Dict[str, str]] The following is an example of how to use this method to read log: @@ -66,11 +63,8 @@ def read_log_stream(self, ti: TaskInstance, try_number: Optional[int], metadata: Used to continuously read log to the end :param ti: The Task Instance - :type ti: TaskInstance :param try_number: the task try number - :type try_number: Optional[int] :param metadata: A dictionary containing information about how to read the task log - :type metadata: dict :rtype: Iterator[str] """ if try_number is None: @@ -120,9 +114,7 @@ def render_log_filename( Renders the log attachment filename :param ti: The task instance - :type ti: TaskInstance :param try_number: The task try number - :type try_number: Optional[int] :rtype: str """ dagrun = ti.get_dagrun(session=session) diff --git a/airflow/utils/operator_helpers.py b/airflow/utils/operator_helpers.py index 03d870f1ea003..e8513fa5d6f59 100644 --- a/airflow/utils/operator_helpers.py +++ b/airflow/utils/operator_helpers.py @@ -63,9 +63,7 @@ def context_to_airflow_vars(context: Mapping[str, Any], in_env_var_format: bool in_env_var_format is set to True. :param context: The context for the task_instance of interest. - :type context: dict :param in_env_var_format: If returned vars should be in ABC_DEF_GHI format. - :type in_env_var_format: bool :return: task_instance context as dict. """ params = {} diff --git a/airflow/utils/operator_resources.py b/airflow/utils/operator_resources.py index 0f855e4831da6..8c3263247faa9 100644 --- a/airflow/utils/operator_resources.py +++ b/airflow/utils/operator_resources.py @@ -32,13 +32,10 @@ class Resource: Represents a resource requirement in an execution environment for an operator. :param name: Name of the resource - :type name: str :param units_str: The string representing the units of a resource (e.g. MB for a CPU resource) to be used for display purposes - :type units_str: str :param qty: The number of units of the specified resource that are required for execution of the operator. - :type qty: long """ def __init__(self, name, units_str, qty): @@ -111,13 +108,9 @@ class Resources: default values from the airflow config. :param cpus: The number of cpu cores that are required - :type cpus: long :param ram: The amount of RAM required - :type ram: long :param disk: The amount of disk space required - :type disk: long :param gpus: The number of gpu units that are required - :type gpus: long """ def __init__( diff --git a/airflow/utils/process_utils.py b/airflow/utils/process_utils.py index a369bd641cb27..74d4b83b98fc3 100644 --- a/airflow/utils/process_utils.py +++ b/airflow/utils/process_utils.py @@ -160,7 +160,6 @@ def execute_in_subprocess(cmd: List[str]): Execute a process and stream output to logger :param cmd: command and arguments to run - :type cmd: List[str] """ log.info("Executing cmd: %s", " ".join(shlex.quote(c) for c in cmd)) with subprocess.Popen( @@ -222,9 +221,7 @@ def kill_child_processes_by_pids(pids_to_kill: List[int], timeout: int = 5) -> N the SIGKILL signal, if the process is still alive. :param pids_to_kill: List of PID to be killed. - :type pids_to_kill: List[int] :param timeout: The time to wait before sending the SIGKILL signal. - :type timeout: Optional[int] """ this_process = psutil.Process(os.getpid()) # Only check child processes to ensure that we don't have a case diff --git a/airflow/utils/python_virtualenv.py b/airflow/utils/python_virtualenv.py index 6b7265456a321..412260f072e7f 100644 --- a/airflow/utils/python_virtualenv.py +++ b/airflow/utils/python_virtualenv.py @@ -86,16 +86,11 @@ def prepare_virtualenv( """Creates a virtual environment and installs the additional python packages. :param venv_directory: The path for directory where the environment will be created. - :type venv_directory: str :param python_bin: Path to the Python executable. - :type python_bin: str :param system_site_packages: Whether to include system_site_packages in your virtualenv. See virtualenv documentation for more information. - :type system_site_packages: bool :param requirements: List of additional python packages. - :type requirements: List[str] :param requirements_file_path: Path to the ``requirements.txt`` file. - :type requirements_file_path: str :return: Path to a binary file with Python in a virtual environment. :rtype: str """ @@ -127,9 +122,7 @@ def write_python_script( :param jinja_context: The jinja context variables to unpack and replace with its placeholders in the template file. - :type jinja_context: dict :param filename: The name of the file to dump the rendered script to. - :type filename: str :param render_template_as_native_obj: If ``True``, rendered Jinja template would be converted to a native Python object """ diff --git a/airflow/utils/task_group.py b/airflow/utils/task_group.py index 3e47e09dafc73..8f193f4134169 100644 --- a/airflow/utils/task_group.py +++ b/airflow/utils/task_group.py @@ -44,16 +44,12 @@ class TaskGroup(DAGNode): :param group_id: a unique, meaningful id for the TaskGroup. group_id must not conflict with group_id of TaskGroup or task_id of tasks in the DAG. Root TaskGroup has group_id set to None. - :type group_id: str :param prefix_group_id: If set to True, child task_id and group_id will be prefixed with this TaskGroup's group_id. If set to False, child task_id and group_id are not prefixed. Default is True. - :type prefix_group_id: bool :param parent_group: The parent TaskGroup of this TaskGroup. parent_group is set to None for the root TaskGroup. - :type parent_group: TaskGroup :param dag: The DAG that this TaskGroup belongs to. - :type dag: airflow.models.DAG :param default_args: A dictionary of default parameters to be used as constructor keyword parameters when initialising operators, will override default_args defined in the DAG level. @@ -61,16 +57,11 @@ class TaskGroup(DAGNode): here, meaning that if your dict contains `'depends_on_past': True` here and `'depends_on_past': False` in the operator's call `default_args`, the actual value will be `False`. - :type default_args: dict :param tooltip: The tooltip of the TaskGroup node when displayed in the UI - :type tooltip: str :param ui_color: The fill color of the TaskGroup node when displayed in the UI - :type ui_color: str :param ui_fgcolor: The label color of the TaskGroup node when displayed in the UI - :type ui_fgcolor: str :param add_suffix_on_collision: If this task group name already exists, automatically add `__1` etc suffixes - :type from_decorator: add_suffix_on_collision """ used_group_ids: Set[Optional[str]] diff --git a/airflow/utils/weekday.py b/airflow/utils/weekday.py index 68413076bb26e..21ee8a4c94c93 100644 --- a/airflow/utils/weekday.py +++ b/airflow/utils/weekday.py @@ -37,7 +37,6 @@ def get_weekday_number(cls, week_day_str: str): Return the ISO Week Day Number for a Week Day :param week_day_str: Full Name of the Week Day. Example: "Sunday" - :type week_day_str: str :return: ISO Week Day Number corresponding to the provided Weekday """ sanitized_week_day_str = week_day_str.upper() diff --git a/airflow/www/fab_security/manager.py b/airflow/www/fab_security/manager.py index aae3c04a733a2..ce3f68af377d2 100644 --- a/airflow/www/fab_security/manager.py +++ b/airflow/www/fab_security/manager.py @@ -1140,7 +1140,6 @@ def auth_user_oid(self, email): Openid user Authentication :param email: user's email to authenticate - :type self: User model """ user = self.find_user(email=email) if user is None or (not user.is_active): @@ -1155,7 +1154,6 @@ def auth_user_remote_user(self, username): REMOTE_USER user Authentication :param username: user's username for remote auth - :type self: User model """ user = self.find_user(username=username) @@ -1537,7 +1535,6 @@ def get_action(self, name: str): Gets an existing action record. :param name: name - :type name: str :return: Action record, if it exists :rtype: Action """ @@ -1566,7 +1563,6 @@ def delete_action(self, name: str) -> bool: Deletes a permission action. :param name: Name of action to delete (e.g. can_read). - :type name: str :return: Whether or not delete was successful. :rtype: bool """ @@ -1583,7 +1579,6 @@ def get_resource(self, name: str): Returns a resource record by name, if it exists. :param name: Name of resource - :type name: str """ raise NotImplementedError @@ -1601,7 +1596,6 @@ def create_resource(self, name): Create a resource with the given name. :param name: The name of the resource to create created. - :type name: str """ raise NotImplementedError @@ -1625,9 +1619,7 @@ def get_permission(self, action_name: str, resource_name: str): Gets a permission made with the given action->resource pair, if the permission already exists. :param action_name: Name of action - :type action_name: str :param resource_name: Name of resource - :type resource_name: str :return: The existing permission :rtype: Permission """ @@ -1638,7 +1630,6 @@ def get_resource_permissions(self, resource): Retrieve permission pairs associated with a specific resource object. :param resource: Object representing a single resource. - :type resource: Resource :return: Action objects representing resource->action pair :rtype: Permission """ @@ -1649,9 +1640,7 @@ def create_permission(self, action_name: str, resource_name: str): Creates a permission linking an action and resource. :param action_name: Name of existing action - :type action_name: str :param resource_name: Name of existing resource - :type resource_name: str :return: Resource created :rtype: Permission """ @@ -1663,9 +1652,7 @@ def delete_permission(self, action_name: str, resource_name: str) -> None: underlying action or resource. :param action_name: Name of existing action - :type action_name: str :param resource_name: Name of existing resource - :type resource_name: str :return: None :rtype: None """ @@ -1679,9 +1666,7 @@ def add_permission_to_role(self, role, permission) -> None: Add an existing permission pair to a role. :param role: The role about to get a new permission. - :type role :param permission: The permission pair to add to a role. - :type permission: Permission :return: None :rtype: None """ @@ -1692,9 +1677,7 @@ def remove_permission_from_role(self, role, permission) -> None: Remove a permission pair from a role. :param role: User role containing permissions. - :type role :param permission: Object representing resource-> action pair - :type permission: Permission """ raise NotImplementedError diff --git a/airflow/www/fab_security/sqla/manager.py b/airflow/www/fab_security/sqla/manager.py index d86b077314259..2d0c9500be91a 100644 --- a/airflow/www/fab_security/sqla/manager.py +++ b/airflow/www/fab_security/sqla/manager.py @@ -284,7 +284,6 @@ def get_action(self, name: str) -> Action: Gets an existing action record. :param name: name - :type name: str :return: Action record, if it exists :rtype: Action """ @@ -378,7 +377,6 @@ def delete_action(self, name: str) -> bool: Deletes a permission action. :param name: Name of action to delete (e.g. can_read). - :type name: str :return: Whether or not delete was successful. :rtype: bool """ @@ -408,7 +406,6 @@ def get_resource(self, name: str) -> Resource: Returns a resource record by name, if it exists. :param name: Name of resource - :type name: str :return: Resource record :rtype: Resource """ @@ -428,7 +425,6 @@ def create_resource(self, name) -> Resource: Create a resource with the given name. :param name: The name of the resource to create created. - :type name: str :return: The FAB resource created. :rtype: Resource """ @@ -484,9 +480,7 @@ def get_permission(self, action_name: str, resource_name: str) -> Optional[Permi Gets a permission made with the given action->resource pair, if the permission already exists. :param action_name: Name of action - :type action_name: str :param resource_name: Name of resource - :type resource_name: str :return: The existing permission :rtype: Permission """ @@ -505,7 +499,6 @@ def get_resource_permissions(self, resource: Resource) -> Permission: Retrieve permission pairs associated with a specific resource object. :param resource: Object representing a single resource. - :type resource: Resource :return: Action objects representing resource->action pair :rtype: Permission """ @@ -545,9 +538,7 @@ def delete_permission(self, action_name: str, resource_name: str) -> None: underlying action or resource. :param action_name: Name of existing action - :type action_name: str :param resource_name: Name of existing resource - :type resource_name: str :return: None :rtype: None """ @@ -585,9 +576,7 @@ def add_permission_to_role(self, role: Role, permission: Permission) -> None: Add an existing permission pair to a role. :param role: The role about to get a new permission. - :type role: Role :param permission: The permission pair to add to a role. - :type permission: Permission :return: None :rtype: None """ @@ -606,9 +595,7 @@ def remove_permission_from_role(self, role: Role, permission: Permission) -> Non Remove a permission pair from a role. :param role: User role containing permissions. - :type role: Role :param permission: Object representing resource-> action pair - :type permission: Permission """ if permission in role.permissions: try: diff --git a/airflow/www/security.py b/airflow/www/security.py index 5f331f3780ed9..7cc8801cd3dd5 100644 --- a/airflow/www/security.py +++ b/airflow/www/security.py @@ -390,11 +390,8 @@ def has_access(self, action_name, resource_name, user=None) -> bool: (e.g can_read, can_write) on the given resource. :param action_name: action_name on resource (e.g can_read, can_edit). - :type action_name: str :param resource_name: name of view-menu or resource. - :type resource_name: str :param user: user name - :type user: str :return: Whether user could perform certain action on the resource. :rtype bool """ @@ -419,11 +416,8 @@ def _has_access(self, user: User, action_name: str, resource_name: str) -> bool: Wraps the FAB built-in view access method. Won't work for AllDag access. :param user: user object - :type user: User :param action_name: action_name on resource (e.g can_read, can_edit). - :type action_name: str :param resource_name: name of resource. - :type resource_name: str :return: a bool whether user could perform certain action on the resource. :rtype bool """ @@ -489,9 +483,7 @@ def _merge_perm(self, action_name, resource_name): It will add the related entry to ab_permission and ab_resource two meta tables as well. :param action_name: Name of the action - :type action_name: str :param resource_name: Name of the resource - :type resource_name: str :return: """ action = self.get_action(action_name) @@ -636,11 +628,9 @@ def sync_perm_for_dag(self, dag_id, access_control=None): as only / refresh button or DagBag will call this function :param dag_id: the ID of the DAG whose permissions should be updated - :type dag_id: str :param access_control: a dict where each key is a rolename and each value is a set() of action names (e.g., {'can_read'} - :type access_control: dict :return: """ dag_resource_name = permissions.resource_name_for_dag(dag_id) @@ -655,10 +645,8 @@ def _sync_dag_view_permissions(self, dag_id, access_control): Set the access policy on the given DAG's ViewModel. :param dag_id: the ID of the DAG whose permissions should be updated - :type dag_id: str :param access_control: a dict where each key is a rolename and each value is a set() of action names (e.g. {'can_read'}) - :type access_control: dict """ dag_resource_name = permissions.resource_name_for_dag(dag_id) diff --git a/airflow/www/utils.py b/airflow/www/utils.py index b3945af756613..cea50114e12b1 100644 --- a/airflow/www/utils.py +++ b/airflow/www/utils.py @@ -612,14 +612,10 @@ class UIAlert: Helper for alerts messages shown on the UI :param message: The message to display, either a string or Markup - :type message: Union[str,Markup] :param category: The category of the message, one of "info", "warning", "error", or any custom category. Defaults to "info". - :type category: str :param roles: List of roles that should be shown the message. If ``None``, show to all users. - :type roles: Optional[List[str]] :param html: Whether the message has safe html markup in it. Defaults to False. - :type html: bool For example, show a message to all users: diff --git a/docs/apache-airflow/howto/export-more-env-vars.rst b/docs/apache-airflow/howto/export-more-env-vars.rst index 218e0d0547dcf..336e83ae68386 100644 --- a/docs/apache-airflow/howto/export-more-env-vars.rst +++ b/docs/apache-airflow/howto/export-more-env-vars.rst @@ -47,7 +47,6 @@ In your ``airflow_local_settings.py`` file. def get_airflow_context_vars(context) -> Dict[str, str]: """ :param context: The context for the task_instance of interest. - :type context: dict """ # more env vars return {"airflow_cluster": "main"} diff --git a/docs/conf.py b/docs/conf.py index f21a1f40e02b0..149f01600e6b3 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -589,6 +589,11 @@ def _get_params(root_schema: dict, prefix: str = "", default_section: str = "") # The default options for autodoc directives. They are applied to all autodoc directives automatically. autodoc_default_options = {'show-inheritance': True, 'members': True} +autodoc_typehints = 'description' +autodoc_typehints_description_target = 'documented' +autodoc_typehints_format = 'short' + + # -- Options for sphinx.ext.intersphinx ---------------------------------------- # See: https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index 6dad135762a89..f989bcb3aa60e 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -80,6 +80,7 @@ Ctrl DAGs DBs Daemonize +DagCallbackRequest DagFileProcessorManager DagRun DagRunState @@ -136,6 +137,7 @@ EmrCreateJobFlow Enum Env EnvVar +EventBufferValueType ExaConnection Exasol Failover @@ -329,6 +331,7 @@ Sasl SecretManagerClient Seedlist Sendgrid +SerializedDAG SlackHook SnowflakeHook Spark diff --git a/setup.py b/setup.py index c9d4d0ba87d8d..78f1b275e020c 100644 --- a/setup.py +++ b/setup.py @@ -252,7 +252,10 @@ def write_version(filename: str = os.path.join(*[my_dir, "airflow", "git_version ] doc = [ 'click>=7.1,<9', - 'sphinx>=4.0.0, <5.0.0', + 'sphinx>=4.4.0, <5.0.0', + # Without this, Sphinx goes in to a _very_ large backtrack on Python 3.7, + # even though Sphinx 4.4.0 has this but with python_version<3.10. + 'importlib-metadata>=4.4; python_version < "3.8"', 'sphinx-airflow-theme', 'sphinx-argparse>=0.1.13', 'sphinx-autoapi~=1.8.0', diff --git a/tests/jobs/test_scheduler_job.py b/tests/jobs/test_scheduler_job.py index 0b03e4a894688..bfca2e88939e2 100644 --- a/tests/jobs/test_scheduler_job.py +++ b/tests/jobs/test_scheduler_job.py @@ -169,7 +169,6 @@ def run_single_scheduler_loop_with_no_dags(self, dags_folder): dags_folder. :param dags_folder: the directory to traverse - :type dags_folder: str """ self.scheduler_job = SchedulerJob( executor=self.null_exec, num_times_parse_dags=1, subdir=os.path.join(dags_folder) diff --git a/tests/providers/amazon/aws/utils/eks_test_utils.py b/tests/providers/amazon/aws/utils/eks_test_utils.py index f059eb7c4f042..eac923e879c30 100644 --- a/tests/providers/amazon/aws/utils/eks_test_utils.py +++ b/tests/providers/amazon/aws/utils/eks_test_utils.py @@ -49,13 +49,9 @@ def attributes_to_test( :param inputs: A class containing lists of tuples to use for verifying the output of cluster or nodegroup creation tests. - :type inputs: InputTypes :param cluster_name: The name of the cluster under test. - :type cluster_name: str :param fargate_profile_name: The name of the Fargate profile under test if applicable. - :type fargate_profile_name: str :param nodegroup_name: The name of the nodegroup under test if applicable. - :type nodegroup_name: str :return: Returns a list of tuples containing the keys and values to be validated in testing. :rtype: List[Tuple] """ @@ -90,11 +86,8 @@ def generate_clusters(eks_hook: EksHook, num_clusters: int, minimal: bool) -> Li Generates a number of EKS Clusters with data and adds them to the mocked backend. :param eks_hook: An EksHook object used to call the EKS API. - :type eks_hook: EksHook :param num_clusters: Number of clusters to generate. - :type num_clusters: int :param minimal: If True, only the required values are generated; if False all values are generated. - :type minimal: bool :return: Returns a list of the names of the generated clusters. :rtype: List[str] """ @@ -114,13 +107,9 @@ def generate_fargate_profiles( Generates a number of EKS Fargate profiles with data and adds them to the mocked backend. :param eks_hook: An EksHook object used to call the EKS API. - :type eks_hook: EksHook :param cluster_name: The name of the EKS Cluster to attach the nodegroups to. - :type cluster_name: str :param num_profiles: Number of Fargate profiles to generate. - :type num_profiles: int :param minimal: If True, only the required values are generated; if False all values are generated. - :type minimal: bool :return: Returns a list of the names of the generated nodegroups. :rtype: List[str] """ @@ -142,13 +131,9 @@ def generate_nodegroups( Generates a number of EKS Managed Nodegroups with data and adds them to the mocked backend. :param eks_hook: An EksHook object used to call the EKS API. - :type eks_hook: EksHook :param cluster_name: The name of the EKS Cluster to attach the nodegroups to. - :type cluster_name: str :param num_nodegroups: Number of clusters to generate. - :type num_nodegroups: int :param minimal: If True, only the required values are generated; if False all values are generated. - :type minimal: bool :return: Returns a list of the names of the generated nodegroups. :rtype: List[str] """ @@ -170,7 +155,6 @@ def region_matches_partition(region: str, partition: str) -> bool: :param region: AWS region code to test. :type: region: str :param partition: AWS partition code to test. - :type partition: str :return: Returns True if the provided region and partition are a valid pair. :rtype: bool """ @@ -193,9 +177,7 @@ def _input_builder(options: InputTypes, minimal: bool) -> Dict: :param options: A class containing lists of tuples to use for to create the cluster or nodegroup used in testing. - :type options: InputTypes :param minimal: If True, only the required values are generated; if False all values are generated. - :type minimal: bool :return: Returns a dict containing the keys and values to be validated in testing. :rtype: Dict """ @@ -210,7 +192,6 @@ def string_to_regex(value: str) -> Pattern[str]: Converts a string template into a regex template for pattern matching. :param value: The template string to convert. - :type value: str :returns: Returns a regex pattern :rtype: Pattern[str] """ diff --git a/tests/providers/google/cloud/transfers/test_facebook_ads_to_gcs_system.py b/tests/providers/google/cloud/transfers/test_facebook_ads_to_gcs_system.py index 304a30a0afe3c..1aac8e2e945c0 100644 --- a/tests/providers/google/cloud/transfers/test_facebook_ads_to_gcs_system.py +++ b/tests/providers/google/cloud/transfers/test_facebook_ads_to_gcs_system.py @@ -43,7 +43,6 @@ def provide_facebook_connection(key_file_path: str): required scopes and project id. :param key_file_path: Path to file with FACEBOOK credentials .json file. - :type key_file_path: str """ if not key_file_path.endswith(".json"): raise AirflowException("Use a JSON key file.") diff --git a/tests/sensors/test_timeout_sensor.py b/tests/sensors/test_timeout_sensor.py index 9ee42694da0f4..ad778e14c8204 100644 --- a/tests/sensors/test_timeout_sensor.py +++ b/tests/sensors/test_timeout_sensor.py @@ -37,7 +37,6 @@ class TimeoutTestSensor(BaseSensorOperator): Sensor that always returns the return_value provided :param return_value: Set to true to mark the task as SKIPPED on failure - :type return_value: any """ def __init__(self, return_value=False, **kwargs): diff --git a/tests/test_utils/amazon_system_helpers.py b/tests/test_utils/amazon_system_helpers.py index bc7e9165ca8c4..067c780897075 100644 --- a/tests/test_utils/amazon_system_helpers.py +++ b/tests/test_utils/amazon_system_helpers.py @@ -88,9 +88,7 @@ def create_connection(aws_conn_id: str, region: str) -> None: Create aws connection with region :param aws_conn_id: id of the aws connection to create - :type aws_conn_id: str :param region: aws region name to use in extra field of the aws connection - :type region: str """ db.merge_conn( Connection( @@ -141,9 +139,7 @@ def create_ecs_cluster(aws_conn_id: str, cluster_name: str) -> None: If specified cluster exists, it doesn't change and new cluster will not be created. :param aws_conn_id: id of the aws connection to use when creating boto3 client/resource - :type aws_conn_id: str :param cluster_name: name of the cluster to create in aws ecs - :type cluster_name: str """ hook = AwsBaseHook( aws_conn_id=aws_conn_id, @@ -175,9 +171,7 @@ def delete_ecs_cluster(aws_conn_id: str, cluster_name: str) -> None: Delete ecs cluster with given short name or full Amazon Resource Name (ARN) :param aws_conn_id: id of the aws connection to use when creating boto3 client/resource - :type aws_conn_id: str :param cluster_name: name of the cluster to delete in aws ecs - :type cluster_name: str """ hook = AwsBaseHook( aws_conn_id=aws_conn_id, @@ -202,23 +196,15 @@ def create_ecs_task_definition( Create ecs task definition with given name :param aws_conn_id: id of the aws connection to use when creating boto3 client/resource - :type aws_conn_id: str :param task_definition: family name for task definition to create in aws ecs - :type task_definition: str :param container: name of the container - :type container: str :param image: image used to start a container, format: `registry_id`.dkr.ecr.`region`.amazonaws.com/`repository_name`:`tag` - :type image: str :param execution_role_arn: task execution role that the Amazon ECS container agent can assume, format: arn:aws:iam::`registry_id`:role/`role_name` - :type execution_role_arn: str :param awslogs_group: awslogs group option in log configuration - :type awslogs_group: str :param awslogs_region: awslogs region option in log configuration - :type awslogs_region: str :param awslogs_stream_prefix: awslogs stream prefix option in log configuration - :type awslogs_stream_prefix: str """ hook = AwsBaseHook( aws_conn_id=aws_conn_id, @@ -258,9 +244,7 @@ def delete_ecs_task_definition(aws_conn_id: str, task_definition: str) -> None: Delete all revisions of given ecs task definition :param aws_conn_id: id of the aws connection to use when creating boto3 client/resource - :type aws_conn_id: str :param task_definition: family prefix for task definition to delete in aws ecs - :type task_definition: str """ hook = AwsBaseHook( aws_conn_id=aws_conn_id, @@ -284,9 +268,7 @@ def is_ecs_task_definition_exists(aws_conn_id: str, task_definition: str) -> boo Check whether given task definition exits in ecs :param aws_conn_id: id of the aws connection to use when creating boto3 client/resource - :type aws_conn_id: str :param task_definition: family prefix for task definition to check in aws ecs - :type task_definition: str """ hook = AwsBaseHook( aws_conn_id=aws_conn_id, diff --git a/tests/test_utils/azure_system_helpers.py b/tests/test_utils/azure_system_helpers.py index e6599b5b39639..0c16964c58bd9 100644 --- a/tests/test_utils/azure_system_helpers.py +++ b/tests/test_utils/azure_system_helpers.py @@ -45,7 +45,6 @@ def provide_wasb_default_connection(key_file_path: str): Context manager to provide a temporary value for wasb_default connection :param key_file_path: Path to file with wasb_default credentials .json file. - :type key_file_path: str """ if not key_file_path.endswith(".json"): raise AirflowException("Use a JSON key file.") @@ -68,7 +67,6 @@ def provide_azure_data_lake_default_connection(key_file_path: str): """ Context manager to provide a temporary value for azure_data_lake_default connection :param key_file_path: Path to file with azure_data_lake_default credentials .json file. - :type key_file_path: str """ required_fields = {'login', 'password', 'extra'} diff --git a/tests/test_utils/gcp_system_helpers.py b/tests/test_utils/gcp_system_helpers.py index 9964df712b37e..9a452e28d8640 100644 --- a/tests/test_utils/gcp_system_helpers.py +++ b/tests/test_utils/gcp_system_helpers.py @@ -56,7 +56,6 @@ def resolve_full_gcp_key_path(key: str) -> str: Returns path full path to provided GCP key. :param key: Name of the GCP key, for example ``my_service.json`` - :type key: str :returns: Full path to the key """ path = os.environ.get("CREDENTIALS_DIR", "/files/airflow-breeze-config/keys") @@ -82,12 +81,9 @@ def provide_gcp_context( as ``key_file_path``. :param key_file_path: Path to file with GCP credentials .json file. - :type key_file_path: str :param scopes: OAuth scopes for the connection - :type scopes: Sequence :param project_id: The id of GCP project for the connection. Default: ``os.environ["GCP_PROJECT_ID"]`` or None - :type project_id: str """ key_file_path = resolve_full_gcp_key_path(key_file_path) # type: ignore if project_id is None: diff --git a/tests/test_utils/salesforce_system_helpers.py b/tests/test_utils/salesforce_system_helpers.py index 811219ba79738..e9982170cc6be 100644 --- a/tests/test_utils/salesforce_system_helpers.py +++ b/tests/test_utils/salesforce_system_helpers.py @@ -34,7 +34,6 @@ def provide_salesforce_connection(key_file_path: str): Context manager that provides a temporary value of SALESFORCE_DEFAULT connection. :param key_file_path: Path to file with SALESFORCE credentials .json file. - :type key_file_path: str """ if not key_file_path.endswith(".json"): raise AirflowException("Use a JSON key file.") diff --git a/tests/test_utils/sftp_system_helpers.py b/tests/test_utils/sftp_system_helpers.py index accf3c9504591..0b026b99a787b 100644 --- a/tests/test_utils/sftp_system_helpers.py +++ b/tests/test_utils/sftp_system_helpers.py @@ -32,7 +32,6 @@ def provide_sftp_default_connection(key_file_path: str): Context manager to provide a temporary value for sftp_default connection :param key_file_path: Path to file with sftp_default credentials .json file. - :type key_file_path: str """ if not key_file_path.endswith(".json"): raise AirflowException("Use a JSON key file.") diff --git a/tests/test_utils/system_tests_class.py b/tests/test_utils/system_tests_class.py index 000cfc9bcf98f..51e27b6c26d92 100644 --- a/tests/test_utils/system_tests_class.py +++ b/tests/test_utils/system_tests_class.py @@ -123,9 +123,7 @@ def run_dag(self, dag_id: str, dag_folder: str = DEFAULT_DAG_FOLDER) -> None: Runs example dag by it's ID. :param dag_id: id of a DAG to be run - :type dag_id: str :param dag_folder: directory where to look for the specific DAG. Relative to AIRFLOW_HOME. - :type dag_folder: str """ if os.environ.get("RUN_AIRFLOW_1_10") == "true": # For system tests purpose we are changing airflow/providers