Skip to content

Commit

Permalink
python etl
Browse files Browse the repository at this point in the history
  • Loading branch information
IamMiracleAlex committed Mar 17, 2022
0 parents commit 6317a73
Showing 37 changed files with 2,035 additions and 0 deletions.
Binary file added .DS_Store
Binary file not shown.
3 changes: 3 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"python.pythonPath": "env/bin/python"
}
21 changes: 21 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Overview

- Extracts, Transforms and Loads data into Mongo DB
- Mongo express (viewer app) available (Docker required)
- Additional field, `original_report`
- Generated Data Reports
- Test cases

# Running

1. To spin up MongoDB and Mongo Express (for viewing the database), run
`docker-compose up` N/B: Must have docker installed

2. Goto `http://127.0.0.1:8081` to see the visual database

3. To run the etl app, run `python3 main.py`


# Testing
To run tests, use: ` python3 -m unittest discover`

242 changes: 242 additions & 0 deletions cake_data.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
entry|cake_diameter|diam_unit|flavor|is_cake_vegan
1|21.8830537inches||RED|FALSE
2|13.75| in|Strawberry|Strawberry
3|480|||FALSE
4|9.8690561|inches|BUTTER|NO
5|17.98|in |Avocado Cake|0
6|251.95554|mm|butter|
7|186.9 mm|mm|Babka|
8|283.5 mm||Chiffon Cake|FALSE
9|293.04925 mm|MM|chocolate|N/A
10|451.4 MM|mm|chokolade|N/A
11|418.4|mm|avocado|yes
12|15.96|inches|Chiffon Cake|
13|24.04in|inches|Strawberry|no
14|24.777824|inches|Caramel Cake|FALSE
15|231.3mm||BISCUIT|NO
16|15.21|in|BLACKFOREST|Y
17|492.28463 mm|mm|avocado|N/A
18|7.9989575|inches|VANILLA|FALSE
19|"2.43"""|""""|Chiffon Cake|
20|16.7502938|inches|rainbow|t
21|514|mm|biscuit|N/A
22|12.64|inches||0
23|360|millimeters|Strawbery|
24|"5.81"""|""""|Chiffon|FALSE
25|6.3082326|in|BLACKFOREST|6
26|435.99012||APFEL|
27|175 mm|mm|N/A|
28|398.01697 millimeters|mm|butter|
29|211.37809|mm|Avocado|Y
30|23.300681||Chiffon|blue
31|588.7|mm|Strawbery|
32|518mm|mm|CARAMEL|
33|4.8|inches|chokolade|
34|301||baunilha|N/A
35|12.58||N/A|NO
36|13.77 in|inches|vanilla|NO
37|171.53843mm|mm|butter|FALSE
38|342.8|mm|VANILLA|
39|513.8 millimeters|mm|N/A|no
40|14.3|in|BUTTER|
41|382.0|mm|vanilla cake|no
42|196 mm|inches|apple pie|
43|534.4mm||APFEL|YES
44|432.3|mm|Avocado Cake|
45|537.21803|mm|sponge|
46|16.75|inches|butter cake|NO
47|21.8325634|inches|apple pie|partially
48|196.82931 millimeters|mm||FALSE
49|9.72|inches|chokolade|
50|10.66 inches||BUTTER|TRUE
51|223||baunilha|FALSE
52|15.73|inches||
53|6.21|inches|apple|FALSE
54|558.53092 mm|mm|CREAM|
55|473.5mm|inches|vanilla|
56|163mm|millimeters|Strawberry|
57|6.89|inches|biscuit|N/A
58|534.9|millimeters|RED|0
59|581|mm|Avocado|FALSE
60|556.2mm||Chiffon Cake|1
61|0.65||vanilla cake|
62|14.83|inches|Caramel Cake|
63|314.04175|mm|Caramel|0
64|394 MM|inches|sponge|6
65|551||choc|YES
66|562.22131 mm||apple pie|N/A
67|248|mm|Caramel|f
68|9.9470194 inches|inches|cream|0
69|456|mm|b. forest|
70|8.68881|inches|biscuit |FALSE
71|392|mm|CREAM|yes
72|280||Chiffon|
73|437.0071 mm||RED|N
74|230|mm|carrot|yes
75|1.26in|inches||FALSE
76|18.7621759|inches|BLACKFOREST|YES
77|"23.25"""|in|caramel|
78|16.59|inches|vanilla|-1
79|10.31 inches|mm|butter cake|YES
80|12.81||rainbow|
81|12.8||sponge|no
82|466.9|mm|biscuit |no
83|"19.8874942"""||APPLE|NO
84|13.33in||carrot|
85|11.74||vanilla cake|N/A
86|1.2686032|inches|Strawbery|
87|11.75 inch||strawbery|TRUE
88|18.57|inches||N
89|7.45 inch||b. forest|6
90|369.5mm|||N/A
91|"6.5824703"""|in|Caramel Cake|
92|9.31||b. forest|N/A
93|test|test|Strawbery|Not sure
94|429.11287||apple pie|no
95|553|mm|chocolate|N/A
96|298.01697 millimeters|mm|butter|not in the slightest!!
97|7.97inches|inches|biscuit|
98|268||CREAM|no
99|575|MM|APPLE|
100|6.19inch|inches||6
101|187.49519||carrot|
102|364.54837|mm|Caramel Cake|
103|202.1mm||caramel|
104|166.2| |BISCUIT|NO
105|5.3301284|inches|CREAM|
106|16.36|inches|chocolate|FALSE
107|211.9 millimeters|mm|APPLE|
108|10.2inch|inches|apple|
109|329|mm|rainbow|N/A
110|22.19|inches |Chiffon|
111|5.1604952|inches|rainbow|
112|18.92762in|18.92762in|18.92762in|18.92762in
113|167.33247|mm|14|
114|"5.8846421 """|in|strawbery|
115|432||SWBERRY|
116|432||apple|0
117|17.63|in||
118|3.8251793|inches|caramel|f
119|24.6356199|inches|Avocado|Y
120|356.20528||biscuit|NO
121|328.03988mm||butter|N/A
122|17.64 inch|inches|chocolate|
123|"12.78 """|inches||FALSE
124|22.21 inch|inches|CREAM|NO
125|295.56098 millimeters||BLACKFOREST|6
126|10.8 inches|mm||no
127|429 mm|mm|CREAM|0
128|13.43|in|black forest|
129|fill this info later|inches||FALSE
130|6.9251056|inches|strawbery|
131|266.2|mm|black forest|0.1
132|12.3453057|inches|SWBERRY|NO
133|16.34 in|||NO
134|"23.28"""|inches||
135|456.1|mm|BUTTER_CAKE|yes
136|22.590211|inches|APPLE|
137|251|mm|Avocado Cake|Y
138|4.71|inches|Caramel Cake|FALSE
139|"24.09"""|inches||f
140|218.24368|mm|vanilla|N
141|0.2m|m|carrot|yes
142|22.93m|inches|avocado|
143|19.19|inches|black forest|no
144|"18.7984662"""|in|SWBERRY|
145|266|millimeters|Caramel|FALSE
146|599.78417|MM|BISCUIT|
147|42.11287|cake|apple pie|no
148|166.941|mm|apple pie|FALSE
149|19.79|inches|RED|
150|22.69|in|BISCUIT|1
151|273.1|mm|BUTTER_CAKE|f
152|"21.9 """|inches|Avocado Cake|6
153|421.1|mm|Strawbery|
154|3.49|inches|butter|
155|very large||butter|
156|227|millimeters|SWBERRY|N
157|534.6mm|mm|biscuit |Y
158|"24.3589237"""|inches|choc|FALSE
159|2.85 in|inches|BLACKFOREST|TRUE
160|5.16 inch|inches||NO
161|35.328749||vanilla cake|
162|496.53185|millimeters|sponge|
163|528.96386 MM||Chiffon Cake|FALSE
164|"12.8"""||Caramel Cake|t
165|174.55053|mm|apple|Y
166|3.1982887 inches|inches|avocado|
167|189.6|millimeters|BISCUIT|
168|266|MM|chokolade|no
169|8.2284843 inches|inches|black forest|NO
170|23.75|inches|Chiffon Cake|0
171|591.00988 millimeters|MM|BLACKFOREST|0
172|17.5759116inch|inches|vanilla|f
173|428.31116 millimeters|inches|BISCUIT|
174|24.4502541|inches||
175|351|millimeters|carrot|N/A
176|14.31|inches|APPLE|N/A
177|14.153215|in|butter|t
178|214.61343mm|MM|Avocado Cake|yes
179|265||BUTTER_CAKE|N/A
180|11.23inches|mm |biscuit |0
181|371|mm|BUTTER|FALSE
182|214|millimeters|Strawbery|
183|21.33||baunilha|
184|260 millimeters|mm|rainbow|
185|12.48|inches|cream|N/A
186|230.33024|millimeters|N/A|
187|196.3||BUTTER_CAKE|0
188|13.96inches|inches||no
189|2|average human head||no
190|15.0069184|inches |Avocado Cake|6
191|275.54087||chokolade|14
192|571|mm|Chiffon|no
193|21.4084627|inches|Chiffon Cake|
194|488.9 MM|mm|BLACKFOREST|NO
195|425.8|mm|apple pie|0
196|319.5|mm|BUTTER|0
197|0.1719|meters|RED|
198|56.640123||baunilha|
199|335.68962 MM||SWBERRY|FALSE
200|514.2||chocolate|NO
201|402|mm|biscuit|
202|313|millimeters| biscuit |0
203|4.7152478||VANILLA |
204|504||biscuit |
205|2.1631699|in|Strawbery|
206|13.9in|inches|biscuit |
207|292|mm|APFEL |FALSE
208|9.2043963|in|strawbery|
209|22.24 in|""""|carrot|0
210|250.04218||carrot|0
211|18.0199549|inches|Chiffon|NO
212|16.62|inches|avocado|FALSE
213|14.07|""""|rainbow|Y
214|301.7|mm|SWBERRY|TRUE
215|2.6792519inches|inches|strawbery|
216|11.8875088|inches|b. forest|no
217|2.83|inches|chokolade|no
218|10.54|in|black forest|
219|505||choc|6
220|519385039|millimeters|carrot|NO
221|518.40178|mm|Avocado|f
222|23.8636638inches|inches|apple|FALSE
223|21.9175111|inches|CREAM|TRUE
224|14.73|inches||
225|3.42|inches|RED|f
226|2.57inches|mm||inches
227|219mm|millimeters|sponge|
228|22.9384568|inches|BUTTER|NO
229|21.48in|inches|Avocado Cake|NO
230|9.05 inch| inches||no
231|198 millimeters|inches|biscuit |1
232|0.5542|m|Babka|
233|588.6 millimeters|mm|carrot|f
234|440.2|mm|APFEL|
235|13.11|inches |caramel|yes
236|555.18474|millimeters|avocado|
237|308.1 MM|mm|APFEL|NO
238|249.6|mm|Chiffon Cake|N/A
239|162.3 mm|MM|Avocado Cake|NO
240|351.79509|mm|butter|FALSE
241|23.5|inches|BLACKFOREST|
18 changes: 18 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
version: "3.8"

services:

mongo:
image: mongo
restart: always
ports:
- 27017:27017

mongo-express:
image: mongo-express
restart: always
ports:
- 8081:8081
environment:
ME_CONFIG_MONGODB_URL: mongodb://mongo:27017/

Empty file added etl/__init__.py
Empty file.
Binary file added etl/__pycache__/__init__.cpython-38.pyc
Binary file not shown.
Binary file added etl/__pycache__/extractor.cpython-38.pyc
Binary file not shown.
Binary file added etl/__pycache__/loader.cpython-38.pyc
Binary file not shown.
Binary file added etl/__pycache__/master.cpython-38.pyc
Binary file not shown.
Binary file added etl/__pycache__/models.cpython-38.pyc
Binary file not shown.
Binary file added etl/__pycache__/reports.cpython-38.pyc
Binary file not shown.
Binary file added etl/__pycache__/transformer.cpython-38.pyc
Binary file not shown.
Binary file added etl/__pycache__/utils.cpython-38.pyc
Binary file not shown.
27 changes: 27 additions & 0 deletions etl/extractor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import csv

from typing import List


class Extractor:
def __init__(self, in_file_path: str):
"""
This class extracts data from source file
Args:
in_file_path: path to the source file
"""
self.in_file_path = in_file_path

def extract_data(self) -> List[dict]:
"""
Extracts data from CSV file
Returns:
data as a list of dictionaries
"""

with open(self.in_file_path, "r") as csvfile:
reader = csv.DictReader(csvfile, delimiter='|')
next(reader)
return list(reader)
43 changes: 43 additions & 0 deletions etl/loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from typing import List

import mongoengine as me

from .models import CakeModel, CakeMongoOrm


def connect():
"""
Connects to the database
"""
me.connect("cakes")


class Loader:
def __init__(self, cake_data: List[CakeModel], test_mode: bool = False):
"""
This class loads transformed data into the database
Args:
cake_data: transformed data
test_mode: live mode or unit testing mode
"""

if not test_mode:
connect()

self.cake_data = cake_data

def load_data(self):
"""
Inserts data into the database
"""

print("Preparing data...")
cakes = [CakeMongoOrm(**data.dict()) for data in self.cake_data]

CakeMongoOrm.objects.delete()

print("Inserting data into the database... please wait")
CakeMongoOrm.objects.insert(cakes)

print("Data loaded into the database successfully!")
22 changes: 22 additions & 0 deletions etl/master.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from .extractor import Extractor
from .loader import Loader
from .transformer import Transformer
from .reports import Report


def run_etl(input_file: str):
"""
Runs whole ETL pipeline
Args:
input_file: path to the source file
"""
extractor = Extractor(input_file)
transformer = Transformer(extractor.extract_data())
loader = Loader(transformer.transform_data())

loader.load_data()

# create reports
report = Report()
report.create_report()
Loading
Oops, something went wrong.

0 comments on commit 6317a73

Please sign in to comment.