Skip to content

Commit

Permalink
feat: Add config.yml and load_config
Browse files Browse the repository at this point in the history
  • Loading branch information
yeha98552 committed May 23, 2024
1 parent 9ee10d1 commit a5fea85
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 0 deletions.
62 changes: 62 additions & 0 deletions airflow/utils/common.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import hashlib
import os

import yaml
from fuzzywuzzy import process


Expand All @@ -24,3 +26,63 @@ def mapping_place_id(place_name: str, df) -> str:
return df[df["attraction_name"] == best_match]["attraction_id"].values[0]

return None


def resolve_env_variables(config: dict) -> dict:
"""
Recursively resolve environment variables in the configuration.
Args:
config (dict): The configuration dictionary.
Returns:
dict: The configuration dictionary with environment variables resolved.
"""
if isinstance(config, dict):
for key, value in config.items():
config[key] = resolve_env_variables(value)
elif isinstance(config, str) and config.startswith("ENV_"):
env_var = config.split("ENV_")[1]
return os.getenv(env_var, config) # Default to original if not found
return config


def add_env_suffix(config: dict) -> dict:
"""
Add environment suffix to table names in the configuration.
Args:
config (dict): The configuration dictionary.
Returns:
dict: The configuration dictionary with environment suffix added to table names.
"""
env_suffix = os.getenv("ENV_SUBFIX", config.get("env", ""))
if "gcp" in config and "table_name" in config["gcp"]:
for key, value in config["gcp"]["table_name"].items():
if isinstance(value, str) and not value.startswith("ENV_"):
config["gcp"]["table_name"][key] = f"{env_suffix}-{value}"
return config


def load_config() -> dict:
"""
Load and process the configuration from a YAML file.
Args:
None
Returns:
dict: The configuration dictionary.
"""
config_path = os.path.join(os.path.dirname(__file__), "config.yml")
with open(config_path, "r") as f:
config = yaml.safe_load(f)

# Resolve environment variables
config = resolve_env_variables(config)

# Add environment suffix to table names
config = add_env_suffix(config)

return config
18 changes: 18 additions & 0 deletions airflow/utils/config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
env: dev
gcp:
bucket:
raw: ENV_GCP_GCS_RAW_BUCKET
processed: ENV_GCP_GCS_PROCESSED_BUCKET
archive: ENV_GCP_GCS_ARCHIVE_BUCKET
blob_name:
gmaps:
reviews: "gmaps/detailed-reviews/*.parquet"
places: "gmaps/attractions/*.parquet"
bigquery:
ods_dataset: ENV_BIGQUERY_ODS_DATASET
dim_dataset: ENV_BIGQUERY_DIM_DATASET
fact_dataset: ENV_BIGQUERY_FACT_DATASET
mart_dataset: ENV_BIGQUERY_FACT_DATASET
table_name:
gmaps_places: "gmaps_places"
gmaps_reviews: "gmaps_reviews"

0 comments on commit a5fea85

Please sign in to comment.