Skip to content

Commit

Permalink
Add check for duplicates in provider.yaml files (apache#12578)
Browse files Browse the repository at this point in the history
  • Loading branch information
Tobiasz Kędzierski authored Nov 24, 2020
1 parent 6d0dcd2 commit 3fa51f9
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 6 deletions.
3 changes: 0 additions & 3 deletions airflow/providers/amazon/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -167,9 +167,6 @@ operators:
- airflow.providers.amazon.aws.operators.step_function_start_execution

sensors:
- integration-name: Amazon Athena
python-modules:
- airflow.providers.amazon.aws.sensors.athena
- integration-name: Amazon Athena
python-modules:
- airflow.providers.amazon.aws.sensors.athena
Expand Down
3 changes: 0 additions & 3 deletions airflow/providers/google/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -398,9 +398,6 @@ sensors:
- integration-name: Google Campaign Manager
python-modules:
- airflow.providers.google.marketing_platform.sensors.campaign_manager
- integration-name: Google Dataflow
python-modules:
- airflow.providers.google.cloud.sensors.dataflow
- integration-name: Google Display&Video 360
python-modules:
- airflow.providers.google.marketing_platform.sensors.display_video
Expand Down
56 changes: 56 additions & 0 deletions scripts/ci/pre_commit/pre_commit_check_provider_yaml_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,30 @@ def check_completeness_of_list_of_hooks_sensors_hooks(yaml_files: Dict[str, Dict
sys.exit(1)


def check_duplicates_in_integrations_names_of_hooks_sensors_operators(yaml_files: Dict[str, Dict]):
print("Checking for duplicates in list of {sensors, hooks, operators}")
errors = []
for (yaml_file_path, provider_data), resource_type in product(
yaml_files.items(), ["sensors", "operators", "hooks"]
):
resource_data = provider_data.get(resource_type, [])
current_integrations = [r.get("integration-name", "") for r in resource_data]
if len(current_integrations) != len(set(current_integrations)):
for integration in current_integrations:
if current_integrations.count(integration) > 1:
errors.append(
f"Duplicated content of '{resource_type}/integration-name/{integration}' "
f"in file: {yaml_file_path}"
)

if errors:
print(f"Found {len(errors)} errors")
for error in errors:
print(error)
print()
sys.exit(1)


def check_completeness_of_list_of_transfers(yaml_files: Dict[str, Dict]):
print("Checking completeness of list of transfers")
errors = []
Expand Down Expand Up @@ -185,6 +209,35 @@ def check_completeness_of_list_of_transfers(yaml_files: Dict[str, Dict]):
sys.exit(1)


def check_duplicates_in_list_of_transfers(yaml_files: Dict[str, Dict]):
print("Checking for duplicates in list of transfers")
errors = []
resource_type = "transfers"
for yaml_file_path, provider_data in yaml_files.items():
resource_data = provider_data.get(resource_type, [])

source_target_integrations = [
(r.get("source-integration-name", ""), r.get("target-integration-name", ""))
for r in resource_data
]
if len(source_target_integrations) != len(set(source_target_integrations)):
for integration_couple in source_target_integrations:
if source_target_integrations.count(integration_couple) > 1:
errors.append(
f"Duplicated content of \n"
f" '{resource_type}/source-integration-name/{integration_couple[0]}' "
f" '{resource_type}/target-integration-name/{integration_couple[1]}' "
f"in file: {yaml_file_path}"
)

if errors:
print(f"Found {len(errors)} errors")
for error in errors:
print(error)
print()
sys.exit(1)


def check_invalid_integration(yaml_files: Dict[str, Dict]):
print("Detect unregistered integrations")
errors = []
Expand Down Expand Up @@ -278,7 +331,10 @@ def check_doc_files(yaml_files: Dict[str, Dict]):
check_integration_duplicates(all_parsed_yaml_files)

check_completeness_of_list_of_hooks_sensors_hooks(all_parsed_yaml_files)
check_duplicates_in_integrations_names_of_hooks_sensors_operators(all_parsed_yaml_files)

check_completeness_of_list_of_transfers(all_parsed_yaml_files)
check_duplicates_in_list_of_transfers(all_parsed_yaml_files)

if all_files_loaded:
# Only check those if all provider files are loaded
Expand Down

0 comments on commit 3fa51f9

Please sign in to comment.