forked from josephmachado/de_101
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
2024-10-30-08-33-16 - airflow-docker
- Loading branch information
1 parent
cdbec18
commit f261935
Showing
50 changed files
with
87,921 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
16 changes: 16 additions & 0 deletions
16
6-Scheduling-&-Orchestration/containers/airflow/Dockerfile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
FROM apache/airflow:2.9.2 | ||
|
||
# Install requirements | ||
COPY requirements.txt / | ||
RUN pip install --no-cache-dir -r /requirements.txt | ||
|
||
User root | ||
|
||
# RUN python $AIRFLOW_HOME/setup_conn.py | ||
# RUN apt-get update && \ | ||
# apt-get install -y --no-install-recommends \ | ||
# default-jdk | ||
|
||
RUN apt-get update && \ | ||
apt-get install -y --no-install-recommends \ | ||
default-jdk |
4 changes: 4 additions & 0 deletions
4
6-Scheduling-&-Orchestration/containers/airflow/requirements.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
apache-airflow-client==2.9.0 | ||
duckdb==1.0.0 | ||
dbt-core==1.8.0 | ||
dbt-duckdb==1.8.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
import csv | ||
import os | ||
from datetime import datetime, timedelta | ||
|
||
import requests | ||
|
||
from airflow import DAG | ||
from airflow.decorators import task | ||
from airflow.operators.bash import BashOperator | ||
from airflow.operators.dummy import DummyOperator | ||
|
||
with DAG( | ||
'coincap_elt', | ||
description='A simple DAG to fetch data \ | ||
from CoinCap Exchanges API and write to a file', | ||
schedule_interval=timedelta(days=1), | ||
start_date=datetime(2023, 1, 1), | ||
catchup=False, | ||
) as dag: | ||
|
||
url = "https://api.coincap.io/v2/exchanges" | ||
file_path = f'{os.getenv("AIRFLOW_HOME")}/data/coincap_exchanges.csv' | ||
|
||
@task | ||
def fetch_coincap_exchanges(url, file_path): | ||
response = requests.get(url) | ||
data = response.json() | ||
exchanges = data['data'] | ||
if exchanges: | ||
keys = exchanges[0].keys() | ||
with open(file_path, 'w') as f: | ||
dict_writer = csv.DictWriter(f, fieldnames=keys) | ||
dict_writer.writeheader() | ||
dict_writer.writerows(exchanges) | ||
|
||
stop_pipeline = DummyOperator(task_id='stop_pipeline') | ||
|
||
fetch_coincap_exchanges(url, file_path) >> stop_pipeline |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
from datetime import datetime, timedelta | ||
|
||
from airflow import DAG | ||
from airflow.decorators import task | ||
from airflow.operators.bash import BashOperator | ||
from airflow.operators.dummy import DummyOperator | ||
|
||
with DAG( | ||
'customer_outreach_etl', | ||
description='A simple DAG to generate customer outreach metrics', | ||
schedule_interval=timedelta(days=1), | ||
start_date=datetime(2023, 1, 1), | ||
catchup=False, | ||
) as dag: | ||
|
||
run_dbt = BashOperator( | ||
task_id='run_dbt', | ||
bash_command='cd /opt/airflow/dags/tpch_warehouse && dbt run', | ||
) | ||
|
||
test_dbt = BashOperator( | ||
task_id='test_dbt', | ||
bash_command='cd /opt/airflow/dags/tpch_warehouse && dbt run', | ||
) | ||
|
||
stop_pipeline = DummyOperator(task_id='stop_pipeline') | ||
|
||
run_dbt >> test_dbt >> stop_pipeline |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
|
||
target/ | ||
dbt_packages/ | ||
logs/ |
15 changes: 15 additions & 0 deletions
15
6-Scheduling-&-Orchestration/dags/tpch_warehouse/README.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
Welcome to your new dbt project! | ||
|
||
### Using the starter project | ||
|
||
Try running the following commands: | ||
- dbt run | ||
- dbt test | ||
|
||
|
||
### Resources: | ||
- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction) | ||
- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers | ||
- Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support | ||
- Find [dbt events](https://events.getdbt.com) near you | ||
- Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices |
Empty file.
36 changes: 36 additions & 0 deletions
36
6-Scheduling-&-Orchestration/dags/tpch_warehouse/dbt_project.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
|
||
# Name your project! Project names should contain only lowercase characters | ||
# and underscores. A good package name should reflect your organization's | ||
# name or the intended use of these models | ||
name: 'tpch_warehouse' | ||
version: '1.0.0' | ||
|
||
# This setting configures which "profile" dbt uses for this project. | ||
profile: 'tpch_warehouse' | ||
|
||
# These configurations specify where dbt should look for different types of files. | ||
# The `model-paths` config, for example, states that models in this project can be | ||
# found in the "models/" directory. You probably won't need to change these! | ||
model-paths: ["models"] | ||
analysis-paths: ["analyses"] | ||
test-paths: ["tests"] | ||
seed-paths: ["seeds"] | ||
macro-paths: ["macros"] | ||
snapshot-paths: ["snapshots"] | ||
|
||
clean-targets: # directories to be removed by `dbt clean` | ||
- "target" | ||
- "dbt_packages" | ||
|
||
|
||
# Configuring models | ||
# Full documentation: https://docs.getdbt.com/docs/configuring-models | ||
|
||
# In this example config, we tell dbt to build all models in the example/ | ||
# directory as views. These settings can be overridden in the individual model | ||
# files using the `{{ config(...) }}` macro. | ||
models: | ||
tpch_warehouse: | ||
# Config indicated by + and applies to all files under models/example/ | ||
example: | ||
+materialized: view |
Empty file.
44 changes: 44 additions & 0 deletions
44
6-Scheduling-&-Orchestration/dags/tpch_warehouse/models/intermediate/dim_customers.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
with | ||
|
||
customers as ( | ||
|
||
select * from {{ ref('stg_customers') }} | ||
), | ||
|
||
nation as ( | ||
|
||
select * from {{ ref('stg_nation') }} | ||
), | ||
|
||
region as ( | ||
|
||
select * from {{ ref('stg_region') }} | ||
), | ||
|
||
renamed as ( | ||
|
||
select | ||
|
||
---------- customer info | ||
c.customer_key, | ||
c.name as customer_name, | ||
c.address, | ||
c.phone, | ||
c.acctbal, | ||
c.mktsegment, | ||
|
||
---------- nation info | ||
n.name as nation_name, | ||
n.comment as nation_comment, | ||
|
||
---------- region info | ||
r.name as region_name, | ||
r.comment as region_comment | ||
|
||
from customers c | ||
left join nation n on c.nationkey = n.nationkey | ||
left join region r on n.regionkey = r.regionkey | ||
|
||
) | ||
|
||
select * from renamed |
27 changes: 27 additions & 0 deletions
27
6-Scheduling-&-Orchestration/dags/tpch_warehouse/models/intermediate/dim_customers.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
models: | ||
- name: dim_customers | ||
description: Dimension table for customer data with enriched information from nation and region. | ||
columns: | ||
- name: customer_key | ||
description: Unique identifier for each customer. | ||
data_tests: | ||
- not_null | ||
- unique | ||
- name: customer_name | ||
description: The name of the customer. | ||
- name: address | ||
description: The address of the customer. | ||
- name: phone | ||
description: Contact phone number of the customer. | ||
- name: acctbal | ||
description: Account balance of the customer. | ||
- name: mktsegment | ||
description: The market segment associated with the customer. | ||
- name: nation_name | ||
description: The name of the nation associated with the customer. | ||
- name: nation_comment | ||
description: Comments or notes related to the nation. | ||
- name: region_name | ||
description: The name of the region associated with the nation. | ||
- name: region_comment | ||
description: Comments or notes related to the region. |
35 changes: 35 additions & 0 deletions
35
6-Scheduling-&-Orchestration/dags/tpch_warehouse/models/intermediate/fct_lineitems.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
with | ||
|
||
source as ( | ||
|
||
select * from {{ ref('stg_lineitems') }} | ||
|
||
), | ||
|
||
renamed as ( | ||
|
||
select | ||
|
||
---------- line item details | ||
orderkey, | ||
partkey, | ||
suppkey, | ||
linenumber, | ||
quantity, | ||
extendedprice, | ||
discount, | ||
tax, | ||
returnflag, | ||
linestatus, | ||
shipdate, | ||
commitdate, | ||
receiptdate, | ||
shipinstruct, | ||
shipmode, | ||
comment | ||
|
||
from source | ||
|
||
) | ||
|
||
select * from renamed |
Oops, something went wrong.