diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000..74fc874
Binary files /dev/null and b/.DS_Store differ
diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 0000000..6bd93e6
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+ "python.pythonPath": "env/bin/python"
+}
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..ea6f3a2
--- /dev/null
+++ b/README.md
@@ -0,0 +1,21 @@
+# Overview
+
+- Extracts, Transforms and Loads data into Mongo DB
+- Mongo express (viewer app) available (Docker required)
+- Additional field, `original_report`
+- Generated Data Reports
+- Test cases
+
+# Running
+
+1. To spin up MongoDB and Mongo Express (for viewing the database), run
+`docker-compose up` N/B: Must have docker installed
+
+2. Goto `http://127.0.0.1:8081` to see the visual database
+
+3. To run the etl app, run `python3 main.py`
+
+
+# Testing
+ To run tests, use: ` python3 -m unittest discover`
+
diff --git a/cake_data.csv b/cake_data.csv
new file mode 100644
index 0000000..61a6446
--- /dev/null
+++ b/cake_data.csv
@@ -0,0 +1,242 @@
+entry|cake_diameter|diam_unit|flavor|is_cake_vegan
+1|21.8830537inches||RED|FALSE
+2|13.75| in|Strawberry|Strawberry
+3|480|||FALSE
+4|9.8690561|inches|BUTTER|NO
+5|17.98|in |Avocado Cake|0
+6|251.95554|mm|butter|
+7|186.9 mm|mm|Babka|
+8|283.5 mm||Chiffon Cake|FALSE
+9|293.04925 mm|MM|chocolate|N/A
+10|451.4 MM|mm|chokolade|N/A
+11|418.4|mm|avocado|yes
+12|15.96|inches|Chiffon Cake|
+13|24.04in|inches|Strawberry|no
+14|24.777824|inches|Caramel Cake|FALSE
+15|231.3mm||BISCUIT|NO
+16|15.21|in|BLACKFOREST|Y
+17|492.28463 mm|mm|avocado|N/A
+18|7.9989575|inches|VANILLA|FALSE
+19|"2.43"""|""""|Chiffon Cake|
+20|16.7502938|inches|rainbow|t
+21|514|mm|biscuit|N/A
+22|12.64|inches||0
+23|360|millimeters|Strawbery|
+24|"5.81"""|""""|Chiffon|FALSE
+25|6.3082326|in|BLACKFOREST|6
+26|435.99012||APFEL|
+27|175 mm|mm|N/A|
+28|398.01697 millimeters|mm|butter|
+29|211.37809|mm|Avocado|Y
+30|23.300681||Chiffon|blue
+31|588.7|mm|Strawbery|
+32|518mm|mm|CARAMEL|
+33|4.8|inches|chokolade|
+34|301||baunilha|N/A
+35|12.58||N/A|NO
+36|13.77 in|inches|vanilla|NO
+37|171.53843mm|mm|butter|FALSE
+38|342.8|mm|VANILLA|
+39|513.8 millimeters|mm|N/A|no
+40|14.3|in|BUTTER|
+41|382.0|mm|vanilla cake|no
+42|196 mm|inches|apple pie|
+43|534.4mm||APFEL|YES
+44|432.3|mm|Avocado Cake|
+45|537.21803|mm|sponge|
+46|16.75|inches|butter cake|NO
+47|21.8325634|inches|apple pie|partially
+48|196.82931 millimeters|mm||FALSE
+49|9.72|inches|chokolade|
+50|10.66 inches||BUTTER|TRUE
+51|223||baunilha|FALSE
+52|15.73|inches||
+53|6.21|inches|apple|FALSE
+54|558.53092 mm|mm|CREAM|
+55|473.5mm|inches|vanilla|
+56|163mm|millimeters|Strawberry|
+57|6.89|inches|biscuit|N/A
+58|534.9|millimeters|RED|0
+59|581|mm|Avocado|FALSE
+60|556.2mm||Chiffon Cake|1
+61|0.65||vanilla cake|
+62|14.83|inches|Caramel Cake|
+63|314.04175|mm|Caramel|0
+64|394 MM|inches|sponge|6
+65|551||choc|YES
+66|562.22131 mm||apple pie|N/A
+67|248|mm|Caramel|f
+68|9.9470194 inches|inches|cream|0
+69|456|mm|b. forest|
+70|8.68881|inches|biscuit |FALSE
+71|392|mm|CREAM|yes
+72|280||Chiffon|
+73|437.0071 mm||RED|N
+74|230|mm|carrot|yes
+75|1.26in|inches||FALSE
+76|18.7621759|inches|BLACKFOREST|YES
+77|"23.25"""|in|caramel|
+78|16.59|inches|vanilla|-1
+79|10.31 inches|mm|butter cake|YES
+80|12.81||rainbow|
+81|12.8||sponge|no
+82|466.9|mm|biscuit |no
+83|"19.8874942"""||APPLE|NO
+84|13.33in||carrot|
+85|11.74||vanilla cake|N/A
+86|1.2686032|inches|Strawbery|
+87|11.75 inch||strawbery|TRUE
+88|18.57|inches||N
+89|7.45 inch||b. forest|6
+90|369.5mm|||N/A
+91|"6.5824703"""|in|Caramel Cake|
+92|9.31||b. forest|N/A
+93|test|test|Strawbery|Not sure
+94|429.11287||apple pie|no
+95|553|mm|chocolate|N/A
+96|298.01697 millimeters|mm|butter|not in the slightest!!
+97|7.97inches|inches|biscuit|
+98|268||CREAM|no
+99|575|MM|APPLE|
+100|6.19inch|inches||6
+101|187.49519||carrot|
+102|364.54837|mm|Caramel Cake|
+103|202.1mm||caramel|
+104|166.2| |BISCUIT|NO
+105|5.3301284|inches|CREAM|
+106|16.36|inches|chocolate|FALSE
+107|211.9 millimeters|mm|APPLE|
+108|10.2inch|inches|apple|
+109|329|mm|rainbow|N/A
+110|22.19|inches |Chiffon|
+111|5.1604952|inches|rainbow|
+112|18.92762in|18.92762in|18.92762in|18.92762in
+113|167.33247|mm|14|
+114|"5.8846421 """|in|strawbery|
+115|432||SWBERRY|
+116|432||apple|0
+117|17.63|in||
+118|3.8251793|inches|caramel|f
+119|24.6356199|inches|Avocado|Y
+120|356.20528||biscuit|NO
+121|328.03988mm||butter|N/A
+122|17.64 inch|inches|chocolate|
+123|"12.78 """|inches||FALSE
+124|22.21 inch|inches|CREAM|NO
+125|295.56098 millimeters||BLACKFOREST|6
+126|10.8 inches|mm||no
+127|429 mm|mm|CREAM|0
+128|13.43|in|black forest|
+129|fill this info later|inches||FALSE
+130|6.9251056|inches|strawbery|
+131|266.2|mm|black forest|0.1
+132|12.3453057|inches|SWBERRY|NO
+133|16.34 in|||NO
+134|"23.28"""|inches||
+135|456.1|mm|BUTTER_CAKE|yes
+136|22.590211|inches|APPLE|
+137|251|mm|Avocado Cake|Y
+138|4.71|inches|Caramel Cake|FALSE
+139|"24.09"""|inches||f
+140|218.24368|mm|vanilla|N
+141|0.2m|m|carrot|yes
+142|22.93m|inches|avocado|
+143|19.19|inches|black forest|no
+144|"18.7984662"""|in|SWBERRY|
+145|266|millimeters|Caramel|FALSE
+146|599.78417|MM|BISCUIT|
+147|42.11287|cake|apple pie|no
+148|166.941|mm|apple pie|FALSE
+149|19.79|inches|RED|
+150|22.69|in|BISCUIT|1
+151|273.1|mm|BUTTER_CAKE|f
+152|"21.9 """|inches|Avocado Cake|6
+153|421.1|mm|Strawbery|
+154|3.49|inches|butter|
+155|very large||butter|
+156|227|millimeters|SWBERRY|N
+157|534.6mm|mm|biscuit |Y
+158|"24.3589237"""|inches|choc|FALSE
+159|2.85 in|inches|BLACKFOREST|TRUE
+160|5.16 inch|inches||NO
+161|35.328749||vanilla cake|
+162|496.53185|millimeters|sponge|
+163|528.96386 MM||Chiffon Cake|FALSE
+164|"12.8"""||Caramel Cake|t
+165|174.55053|mm|apple|Y
+166|3.1982887 inches|inches|avocado|
+167|189.6|millimeters|BISCUIT|
+168|266|MM|chokolade|no
+169|8.2284843 inches|inches|black forest|NO
+170|23.75|inches|Chiffon Cake|0
+171|591.00988 millimeters|MM|BLACKFOREST|0
+172|17.5759116inch|inches|vanilla|f
+173|428.31116 millimeters|inches|BISCUIT|
+174|24.4502541|inches||
+175|351|millimeters|carrot|N/A
+176|14.31|inches|APPLE|N/A
+177|14.153215|in|butter|t
+178|214.61343mm|MM|Avocado Cake|yes
+179|265||BUTTER_CAKE|N/A
+180|11.23inches|mm |biscuit |0
+181|371|mm|BUTTER|FALSE
+182|214|millimeters|Strawbery|
+183|21.33||baunilha|
+184|260 millimeters|mm|rainbow|
+185|12.48|inches|cream|N/A
+186|230.33024|millimeters|N/A|
+187|196.3||BUTTER_CAKE|0
+188|13.96inches|inches||no
+189|2|average human head||no
+190|15.0069184|inches |Avocado Cake|6
+191|275.54087||chokolade|14
+192|571|mm|Chiffon|no
+193|21.4084627|inches|Chiffon Cake|
+194|488.9 MM|mm|BLACKFOREST|NO
+195|425.8|mm|apple pie|0
+196|319.5|mm|BUTTER|0
+197|0.1719|meters|RED|
+198|56.640123||baunilha|
+199|335.68962 MM||SWBERRY|FALSE
+200|514.2||chocolate|NO
+201|402|mm|biscuit|
+202|313|millimeters| biscuit |0
+203|4.7152478||VANILLA |
+204|504||biscuit |
+205|2.1631699|in|Strawbery|
+206|13.9in|inches|biscuit |
+207|292|mm|APFEL |FALSE
+208|9.2043963|in|strawbery|
+209|22.24 in|""""|carrot|0
+210|250.04218||carrot|0
+211|18.0199549|inches|Chiffon|NO
+212|16.62|inches|avocado|FALSE
+213|14.07|""""|rainbow|Y
+214|301.7|mm|SWBERRY|TRUE
+215|2.6792519inches|inches|strawbery|
+216|11.8875088|inches|b. forest|no
+217|2.83|inches|chokolade|no
+218|10.54|in|black forest|
+219|505||choc|6
+220|519385039|millimeters|carrot|NO
+221|518.40178|mm|Avocado|f
+222|23.8636638inches|inches|apple|FALSE
+223|21.9175111|inches|CREAM|TRUE
+224|14.73|inches||
+225|3.42|inches|RED|f
+226|2.57inches|mm||inches
+227|219mm|millimeters|sponge|
+228|22.9384568|inches|BUTTER|NO
+229|21.48in|inches|Avocado Cake|NO
+230|9.05 inch| inches||no
+231|198 millimeters|inches|biscuit |1
+232|0.5542|m|Babka|
+233|588.6 millimeters|mm|carrot|f
+234|440.2|mm|APFEL|
+235|13.11|inches |caramel|yes
+236|555.18474|millimeters|avocado|
+237|308.1 MM|mm|APFEL|NO
+238|249.6|mm|Chiffon Cake|N/A
+239|162.3 mm|MM|Avocado Cake|NO
+240|351.79509|mm|butter|FALSE
+241|23.5|inches|BLACKFOREST|
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..96716b1
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,18 @@
+version: "3.8"
+
+services:
+
+ mongo:
+ image: mongo
+ restart: always
+ ports:
+ - 27017:27017
+
+ mongo-express:
+ image: mongo-express
+ restart: always
+ ports:
+ - 8081:8081
+ environment:
+ ME_CONFIG_MONGODB_URL: mongodb://mongo:27017/
+
diff --git a/etl/__init__.py b/etl/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/etl/__pycache__/__init__.cpython-38.pyc b/etl/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000..f7da27c
Binary files /dev/null and b/etl/__pycache__/__init__.cpython-38.pyc differ
diff --git a/etl/__pycache__/extractor.cpython-38.pyc b/etl/__pycache__/extractor.cpython-38.pyc
new file mode 100644
index 0000000..5255c8d
Binary files /dev/null and b/etl/__pycache__/extractor.cpython-38.pyc differ
diff --git a/etl/__pycache__/loader.cpython-38.pyc b/etl/__pycache__/loader.cpython-38.pyc
new file mode 100644
index 0000000..8ee50c1
Binary files /dev/null and b/etl/__pycache__/loader.cpython-38.pyc differ
diff --git a/etl/__pycache__/master.cpython-38.pyc b/etl/__pycache__/master.cpython-38.pyc
new file mode 100644
index 0000000..a2d9a24
Binary files /dev/null and b/etl/__pycache__/master.cpython-38.pyc differ
diff --git a/etl/__pycache__/models.cpython-38.pyc b/etl/__pycache__/models.cpython-38.pyc
new file mode 100644
index 0000000..e546c80
Binary files /dev/null and b/etl/__pycache__/models.cpython-38.pyc differ
diff --git a/etl/__pycache__/reports.cpython-38.pyc b/etl/__pycache__/reports.cpython-38.pyc
new file mode 100644
index 0000000..a63f447
Binary files /dev/null and b/etl/__pycache__/reports.cpython-38.pyc differ
diff --git a/etl/__pycache__/transformer.cpython-38.pyc b/etl/__pycache__/transformer.cpython-38.pyc
new file mode 100644
index 0000000..9726f6b
Binary files /dev/null and b/etl/__pycache__/transformer.cpython-38.pyc differ
diff --git a/etl/__pycache__/utils.cpython-38.pyc b/etl/__pycache__/utils.cpython-38.pyc
new file mode 100644
index 0000000..20f1a83
Binary files /dev/null and b/etl/__pycache__/utils.cpython-38.pyc differ
diff --git a/etl/extractor.py b/etl/extractor.py
new file mode 100644
index 0000000..ede35e0
--- /dev/null
+++ b/etl/extractor.py
@@ -0,0 +1,27 @@
+import csv
+
+from typing import List
+
+
+class Extractor:
+ def __init__(self, in_file_path: str):
+ """
+ This class extracts data from source file
+
+ Args:
+ in_file_path: path to the source file
+ """
+ self.in_file_path = in_file_path
+
+ def extract_data(self) -> List[dict]:
+ """
+ Extracts data from CSV file
+
+ Returns:
+ data as a list of dictionaries
+ """
+
+ with open(self.in_file_path, "r") as csvfile:
+ reader = csv.DictReader(csvfile, delimiter='|')
+ next(reader)
+ return list(reader)
\ No newline at end of file
diff --git a/etl/loader.py b/etl/loader.py
new file mode 100644
index 0000000..6ac19cd
--- /dev/null
+++ b/etl/loader.py
@@ -0,0 +1,43 @@
+from typing import List
+
+import mongoengine as me
+
+from .models import CakeModel, CakeMongoOrm
+
+
+def connect():
+ """
+ Connects to the database
+ """
+ me.connect("cakes")
+
+
+class Loader:
+ def __init__(self, cake_data: List[CakeModel], test_mode: bool = False):
+ """
+ This class loads transformed data into the database
+
+ Args:
+ cake_data: transformed data
+ test_mode: live mode or unit testing mode
+ """
+
+ if not test_mode:
+ connect()
+
+ self.cake_data = cake_data
+
+ def load_data(self):
+ """
+ Inserts data into the database
+ """
+
+ print("Preparing data...")
+ cakes = [CakeMongoOrm(**data.dict()) for data in self.cake_data]
+
+ CakeMongoOrm.objects.delete()
+
+ print("Inserting data into the database... please wait")
+ CakeMongoOrm.objects.insert(cakes)
+
+ print("Data loaded into the database successfully!")
diff --git a/etl/master.py b/etl/master.py
new file mode 100644
index 0000000..fe9c269
--- /dev/null
+++ b/etl/master.py
@@ -0,0 +1,22 @@
+from .extractor import Extractor
+from .loader import Loader
+from .transformer import Transformer
+from .reports import Report
+
+
+def run_etl(input_file: str):
+ """
+ Runs whole ETL pipeline
+
+ Args:
+ input_file: path to the source file
+ """
+ extractor = Extractor(input_file)
+ transformer = Transformer(extractor.extract_data())
+ loader = Loader(transformer.transform_data())
+
+ loader.load_data()
+
+ # create reports
+ report = Report()
+ report.create_report()
diff --git a/etl/models.py b/etl/models.py
new file mode 100644
index 0000000..351addb
--- /dev/null
+++ b/etl/models.py
@@ -0,0 +1,48 @@
+from typing import Optional
+
+import mongoengine as me
+from pydantic import BaseModel, Field
+
+VALID_CAKE_FLAVORS = [
+ "butter",
+ "carrot",
+ "black forest",
+ "avocado",
+ "vanilla",
+ "caramel",
+ "rainbow",
+ "chiffon",
+ "cream",
+ "babka",
+ "sponge",
+ "apple",
+ "strawberry",
+ "biscuit",
+ "chocolate",
+]
+
+VALID_UNITS = ["mm", "in", "m"]
+
+
+class CakeMongoOrm(me.Document):
+ """
+ Mongoengine model of Cake document
+ """
+
+ entry_id = me.IntField(required=True, unique=True)
+ name = me.StringField(null=True, choices=VALID_CAKE_FLAVORS)
+ diameter_in_mm = me.FloatField(required=True)
+ vegan = me.BooleanField(null=True)
+ original_unit = me.StringField(choices=VALID_UNITS, required=True)
+
+
+class CakeModel(BaseModel):
+ """
+ Pydantic model of a cake for data validation
+ """
+
+ entry_id: int = Field(description="The entry id of the cake")
+ name: Optional[str] = Field(description="Name (or type) of the cake", default=None)
+ diameter_in_mm: float = Field(description="Diameter of the cake in millimeters")
+ vegan: Optional[bool] = Field(description="Specifies if cake is vegan or not", default=None)
+ original_unit: str = Field(description="The original unit of cake's diameter")
\ No newline at end of file
diff --git a/etl/reports.py b/etl/reports.py
new file mode 100644
index 0000000..37cb4b4
--- /dev/null
+++ b/etl/reports.py
@@ -0,0 +1,122 @@
+from datetime import datetime
+from typing import List
+
+from mongoengine.queryset.visitor import Q
+
+from .loader import connect
+from .models import CakeMongoOrm
+
+
+class Report:
+ def __init__(
+ self, data: List[dict] = [],
+ caption: str = 'Cake Reports with Invalid Name or Vegan',
+ bg_color: str = '#FADBD8',
+ path: str = None):
+ '''
+ This class creates reports
+
+ Args:
+ data: a list of dictionaries
+ caption: a string to caption report
+ bg_color: string to give color to report
+ path: the path to write report to
+ '''
+ self.data = data if data else self.get_data_from_mongo()
+ self.caption = caption
+ self.bg_color = bg_color
+ self.path = path if path else './reports/reports.html'
+
+ def write_to_file(self, content: str):
+ '''
+ Writes the html string to a html file
+ '''
+
+ # Save the HTML code
+ file_obj = open(self.path, 'w')
+ file_obj.write(content)
+ file_obj.close()
+
+
+ def create_html_table(self) -> str:
+ '''
+ Creates table data for reports
+
+ Returns:
+ a string of html table
+ '''
+
+ table: str = "
\n"
+ table += "" + self.caption + "\n"
+ table += '\n'
+ for k in self.data[0].keys():
+ table += '' + k.capitalize() + ' | '
+ table += '
\n'
+
+ table += " \n"
+ for row in self.data:
+ for k in row.keys():
+ table += '' + str(row[k]) + ' | \n'
+ table += '
\n'
+
+ table += '\t
\n'
+ return table
+
+
+ def create_report(self):
+ '''
+ Creates html data for reports and calls the method that writes to html file
+ '''
+
+ # Start the page
+ content = '''
+
+
+ ''' + self.caption + '''
+
+
+
+ \n
+ '''
+
+ # Add content to the body
+ content += self.create_html_table()
+ content += '
'
+
+ content += "\t\n"
+ content += "\t\tSummary | Timestamp | Status |
\n"
+ content += '\t\tCake reports | ' + datetime.now().strftime("%d-%m-%Y, %H:%M") + ' | Success |
\n'
+ content += '\t
\n'
+
+ # Close the body and end the file
+ content += '''
+
+
+
+ '''
+
+ self.write_to_file(content)
+
+ print(f"Reports created successfully, please open '{self.path}' to view")
+
+ def get_data_from_mongo(self):
+ '''
+ Gets a list of possible cakes filled in error from mongo whose data might not make sense,
+ precisely, cake data with invalid name or vegan
+
+ Returns:
+ a list of dictionaries containing cake data
+ '''
+
+ connect()
+
+ cake_objects = CakeMongoOrm.objects(Q(name=None) | Q(vegan=None))
+
+ return [{
+ 'entry_id': cake.entry_id,
+ 'name': cake.name,
+ 'diameter_in_mm': cake.diameter_in_mm,
+ 'vegan': cake.vegan,
+ 'original_unit': cake.original_unit
+ } for cake in cake_objects ]
+
\ No newline at end of file
diff --git a/etl/transformer.py b/etl/transformer.py
new file mode 100644
index 0000000..24ee192
--- /dev/null
+++ b/etl/transformer.py
@@ -0,0 +1,151 @@
+from string import punctuation
+from typing import List, Optional
+
+from .models import CakeModel
+from .utils import split_text, get_base_unit, is_number, value_to_mm
+
+
+class Transformer:
+ def __init__(self, raw_data: List[dict]):
+ """
+ This class transforms extracted data according to the desired model
+
+ Args:
+ raw_data: extracted data
+ """
+ self.raw_data = raw_data
+
+ def transform_data(self) -> List[CakeModel]:
+ """
+ Transforms data
+
+ Returns:
+ transformed data as a list of models
+ """
+ transformed_cakes = list()
+ for in_cake in self.raw_data:
+ out_cake = self.transform_single_item(in_cake)
+ if out_cake:
+ transformed_cakes.append(out_cake)
+ return transformed_cakes
+
+ def transform_single_item(self, input_item: dict) -> Optional[CakeModel]:
+ """
+ Transforms single item of extracted data
+
+ Args:
+ input_item: part of extracted data
+
+ Returns:
+ model if transformation was successful
+ """
+
+ original_unit, diameter = self.process_diameter(
+ unit=input_item.get('diam_unit'),
+ diameter=input_item.get('cake_diameter')
+ )
+
+ new = {
+ 'original_unit': original_unit,
+ 'diameter_in_mm': diameter,
+ 'entry_id': input_item.get('entry'),
+ 'name': self.process_name(input_item.get('flavor')),
+ 'vegan': self.process_vegan(input_item.get('is_cake_vegan')),
+ }
+
+ return CakeModel(**new) if diameter and original_unit else None
+
+
+ def process_diameter(self, unit, diameter):
+ '''
+ Process the unit and diameter
+
+ Args:
+ unit: the diameter unit
+ diameter: the diameter
+
+ Returns:
+ original unit and processed diameter
+ '''
+
+ NON_MM_UNITS = ['in', 'm']
+ diameter = diameter.strip().lower()
+ unit = unit.strip().lower()
+
+ # when no units are mentioned, set to milimeters
+ if unit in ['', '"']:
+ unit = 'mm'
+
+ # get diamter value
+ # if diameter value is irrecoverable (a complete string), discard
+ if diameter[0].isalpha() and diameter[-1].isalpha():
+ return None, None
+
+ # if diameter has units, split into diameter and units
+ elif diameter[-1].isalpha():
+ diameter_detials = list(split_text(diameter))
+
+ # if units doesn't match, discard record
+ if get_base_unit(unit) != get_base_unit(diameter_detials[1]):
+ return None, None
+
+ # if they match, continue
+ else:
+ # if they're not millimeters, convert
+ if get_base_unit(unit) in NON_MM_UNITS:
+ diameter = value_to_mm(value=float(diameter_detials[0]), unit=get_base_unit(unit))
+
+ # if they're in millimeters, return diameter
+ else:
+ diameter = diameter_detials[0]
+
+ # check case diameter is in the form '2.43"' convert to ['2.43', '']
+ elif diameter[-1] in punctuation:
+ diameter_detials = diameter.split(diameter[-1])
+ diameter = diameter_detials[0]
+
+ # when diameter has no units
+ else:
+ # check if unit is in millimeters, else convert
+ if get_base_unit(unit) in NON_MM_UNITS:
+ diameter = value_to_mm(value=float(diameter), unit=get_base_unit(unit))
+
+
+ return get_base_unit(unit), diameter
+
+
+ def process_name(self, value):
+ '''
+ Process and return desired cake flavor
+
+ Args:
+ value: the flavour of cake
+
+ Returns:
+ the accepted flavour or name if it exists
+ '''
+ from etl import models
+
+ value = value.strip().lower()
+ return value if value in models.VALID_CAKE_FLAVORS else None
+
+
+ def process_vegan(self, value):
+ '''
+ Process and return desired vegan value
+
+ Args:
+ value: the vegan value
+
+ Returns:
+ True or False if vegan value exists
+ '''
+
+ value = value.strip().lower()
+ if value in ['t','true', 'y', 'yes']:
+ return True
+ elif value in ['f', 'false', 'n', 'no']:
+ return False
+ elif is_number(value):
+ return bool(float(value))
+ return None
\ No newline at end of file
diff --git a/etl/utils.py b/etl/utils.py
new file mode 100644
index 0000000..35617f3
--- /dev/null
+++ b/etl/utils.py
@@ -0,0 +1,34 @@
+from itertools import groupby
+
+
+def split_text(s):
+ '''split str with number and yield result'''
+
+ for k, g in groupby(s, str.isalpha):
+ yield ''.join(g)
+
+
+def get_base_unit(unit):
+ '''Resolves the unit to one'''
+
+ return {
+ 'm':'m', 'metres': 'm',
+ 'mm': 'mm', 'millimeters': 'm',
+ 'in': 'in', 'inches': 'in'
+ }.get(unit)
+
+
+def is_number(n):
+ '''Validates if a string is a number'''
+
+ try:
+ float(n)
+ return True
+ except ValueError:
+ return False
+
+
+def value_to_mm(value, unit):
+ '''Convert values to millimeters'''
+
+ return {'in': 25.4, 'm': 1000}[unit] * value
\ No newline at end of file
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..6f561d6
--- /dev/null
+++ b/main.py
@@ -0,0 +1,6 @@
+"""Run this script to launch the pipeline"""
+
+from etl.master import run_etl
+
+if __name__ == "__main__":
+ run_etl("cake_data.csv")
\ No newline at end of file
diff --git a/reports/reports.html b/reports/reports.html
new file mode 100644
index 0000000..db8fdb9
--- /dev/null
+++ b/reports/reports.html
@@ -0,0 +1,882 @@
+
+
+
+ Cake Reports with Invalid Name or Vegan
+
+
+
+
+
+
+Cake Reports with Invalid Name or Vegan
+
+Entry_id | Name | Diameter_in_mm | Vegan | Original_unit |
+
+2 |
+strawberry |
+349.25 |
+None |
+in |
+
+3 |
+None |
+480.0 |
+False |
+mm |
+
+5 |
+None |
+456.692 |
+False |
+in |
+
+6 |
+butter |
+251.95554 |
+None |
+mm |
+
+7 |
+babka |
+186.9 |
+None |
+mm |
+
+8 |
+None |
+283.5 |
+False |
+mm |
+
+9 |
+chocolate |
+293.04925 |
+None |
+mm |
+
+10 |
+None |
+451.4 |
+None |
+mm |
+
+12 |
+None |
+405.384 |
+None |
+in |
+
+14 |
+None |
+629.3567296 |
+False |
+in |
+
+16 |
+None |
+386.334 |
+True |
+in |
+
+17 |
+avocado |
+492.28463 |
+None |
+mm |
+
+19 |
+None |
+2.43 |
+None |
+mm |
+
+21 |
+biscuit |
+514.0 |
+None |
+mm |
+
+22 |
+None |
+321.056 |
+False |
+in |
+
+23 |
+None |
+360000.0 |
+None |
+m |
+
+25 |
+None |
+160.22910804 |
+True |
+in |
+
+26 |
+None |
+435.99012 |
+None |
+mm |
+
+27 |
+None |
+175.0 |
+None |
+mm |
+
+30 |
+chiffon |
+23.300681 |
+None |
+mm |
+
+31 |
+None |
+588.7 |
+None |
+mm |
+
+32 |
+caramel |
+518.0 |
+None |
+mm |
+
+33 |
+None |
+121.91999999999999 |
+None |
+in |
+
+34 |
+None |
+301.0 |
+None |
+mm |
+
+35 |
+None |
+12.58 |
+False |
+mm |
+
+38 |
+vanilla |
+342.8 |
+None |
+mm |
+
+40 |
+butter |
+363.21999999999997 |
+None |
+in |
+
+41 |
+None |
+382.0 |
+False |
+mm |
+
+43 |
+None |
+534.4 |
+True |
+mm |
+
+44 |
+None |
+432.3 |
+None |
+mm |
+
+45 |
+sponge |
+537.21803 |
+None |
+mm |
+
+46 |
+None |
+425.45 |
+False |
+in |
+
+47 |
+None |
+554.54711036 |
+None |
+in |
+
+49 |
+None |
+246.888 |
+None |
+in |
+
+51 |
+None |
+223.0 |
+False |
+mm |
+
+52 |
+None |
+399.542 |
+None |
+in |
+
+54 |
+cream |
+558.53092 |
+None |
+mm |
+
+57 |
+biscuit |
+175.00599999999997 |
+None |
+in |
+
+58 |
+None |
+534900.0 |
+False |
+m |
+
+60 |
+None |
+556.2 |
+True |
+mm |
+
+61 |
+None |
+0.65 |
+None |
+mm |
+
+62 |
+None |
+376.68199999999996 |
+None |
+in |
+
+65 |
+None |
+551.0 |
+True |
+mm |
+
+66 |
+None |
+562.22131 |
+None |
+mm |
+
+69 |
+None |
+456.0 |
+None |
+mm |
+
+72 |
+chiffon |
+280.0 |
+None |
+mm |
+
+73 |
+None |
+437.0071 |
+False |
+mm |
+
+75 |
+None |
+32.004 |
+False |
+in |
+
+76 |
+None |
+476.5592678599999 |
+True |
+in |
+
+77 |
+caramel |
+23.25 |
+None |
+in |
+
+80 |
+rainbow |
+12.81 |
+None |
+mm |
+
+85 |
+None |
+11.74 |
+None |
+mm |
+
+86 |
+None |
+32.22252128 |
+None |
+in |
+
+88 |
+None |
+471.678 |
+False |
+in |
+
+90 |
+None |
+369.5 |
+None |
+mm |
+
+91 |
+None |
+6.5824703 |
+None |
+in |
+
+92 |
+None |
+9.31 |
+None |
+mm |
+
+94 |
+None |
+429.11287 |
+False |
+mm |
+
+95 |
+chocolate |
+553.0 |
+None |
+mm |
+
+97 |
+biscuit |
+202.438 |
+None |
+in |
+
+99 |
+apple |
+575.0 |
+None |
+mm |
+
+101 |
+carrot |
+187.49519 |
+None |
+mm |
+
+102 |
+None |
+364.54837 |
+None |
+mm |
+
+103 |
+caramel |
+202.1 |
+None |
+mm |
+
+105 |
+cream |
+135.38526136000002 |
+None |
+in |
+
+109 |
+rainbow |
+329.0 |
+None |
+mm |
+
+110 |
+chiffon |
+563.626 |
+None |
+in |
+
+111 |
+rainbow |
+131.07657808 |
+None |
+in |
+
+113 |
+None |
+167.33247 |
+None |
+mm |
+
+114 |
+None |
+5.8846421 |
+None |
+in |
+
+115 |
+None |
+432.0 |
+None |
+mm |
+
+117 |
+None |
+447.80199999999996 |
+None |
+in |
+
+121 |
+butter |
+328.03988 |
+None |
+mm |
+
+123 |
+None |
+12.78 |
+False |
+in |
+
+128 |
+black forest |
+341.12199999999996 |
+None |
+in |
+
+130 |
+None |
+175.89768224 |
+None |
+in |
+
+132 |
+None |
+313.57076478 |
+False |
+in |
+
+134 |
+None |
+23.28 |
+None |
+in |
+
+135 |
+None |
+456.1 |
+True |
+mm |
+
+136 |
+apple |
+573.7913593999999 |
+None |
+in |
+
+137 |
+None |
+251.0 |
+True |
+mm |
+
+138 |
+None |
+119.63399999999999 |
+False |
+in |
+
+139 |
+None |
+24.09 |
+False |
+in |
+
+144 |
+None |
+18.7984662 |
+None |
+in |
+
+146 |
+biscuit |
+599.78417 |
+None |
+mm |
+
+148 |
+None |
+166.941 |
+False |
+mm |
+
+149 |
+None |
+502.66599999999994 |
+None |
+in |
+
+151 |
+None |
+273.1 |
+False |
+mm |
+
+152 |
+None |
+21.9 |
+True |
+in |
+
+153 |
+None |
+421.1 |
+None |
+mm |
+
+154 |
+butter |
+88.646 |
+None |
+in |
+
+156 |
+None |
+227000.0 |
+False |
+m |
+
+158 |
+None |
+24.3589237 |
+False |
+in |
+
+159 |
+None |
+72.39 |
+True |
+in |
+
+161 |
+None |
+35.328749 |
+None |
+mm |
+
+162 |
+sponge |
+496531.85000000003 |
+None |
+m |
+
+163 |
+None |
+528.96386 |
+False |
+mm |
+
+164 |
+None |
+12.8 |
+True |
+mm |
+
+166 |
+avocado |
+81.23653297999999 |
+None |
+in |
+
+167 |
+biscuit |
+189600.0 |
+None |
+m |
+
+168 |
+None |
+266.0 |
+False |
+mm |
+
+170 |
+None |
+603.25 |
+False |
+in |
+
+174 |
+None |
+621.0364541399999 |
+None |
+in |
+
+175 |
+carrot |
+351000.0 |
+None |
+m |
+
+176 |
+apple |
+363.474 |
+None |
+in |
+
+178 |
+None |
+214.61343 |
+True |
+mm |
+
+179 |
+None |
+265.0 |
+None |
+mm |
+
+182 |
+None |
+214000.0 |
+None |
+m |
+
+183 |
+None |
+21.33 |
+None |
+mm |
+
+185 |
+cream |
+316.992 |
+None |
+in |
+
+186 |
+None |
+230330.24 |
+None |
+m |
+
+187 |
+None |
+196.3 |
+False |
+mm |
+
+188 |
+None |
+354.584 |
+False |
+in |
+
+190 |
+None |
+381.17572736 |
+True |
+in |
+
+191 |
+None |
+275.54087 |
+True |
+mm |
+
+193 |
+None |
+543.77495258 |
+None |
+in |
+
+194 |
+None |
+488.9 |
+False |
+mm |
+
+195 |
+None |
+425.8 |
+False |
+mm |
+
+198 |
+None |
+56.640123 |
+None |
+mm |
+
+199 |
+None |
+335.68962 |
+False |
+mm |
+
+201 |
+biscuit |
+402.0 |
+None |
+mm |
+
+203 |
+vanilla |
+4.7152478 |
+None |
+mm |
+
+204 |
+biscuit |
+504.0 |
+None |
+mm |
+
+205 |
+None |
+54.944515460000005 |
+None |
+in |
+
+206 |
+biscuit |
+353.06 |
+None |
+in |
+
+207 |
+None |
+292.0 |
+False |
+mm |
+
+208 |
+None |
+233.79166602 |
+None |
+in |
+
+214 |
+None |
+301.7 |
+True |
+mm |
+
+215 |
+None |
+68.05299826 |
+None |
+in |
+
+216 |
+None |
+301.94272352 |
+False |
+in |
+
+217 |
+None |
+71.88199999999999 |
+False |
+in |
+
+218 |
+black forest |
+267.71599999999995 |
+None |
+in |
+
+219 |
+None |
+505.0 |
+True |
+mm |
+
+224 |
+None |
+374.142 |
+None |
+in |
+
+225 |
+None |
+86.868 |
+False |
+in |
+
+229 |
+None |
+545.592 |
+False |
+in |
+
+232 |
+babka |
+554.2 |
+None |
+m |
+
+234 |
+None |
+440.2 |
+None |
+mm |
+
+236 |
+avocado |
+555184.74 |
+None |
+m |
+
+237 |
+None |
+308.1 |
+False |
+mm |
+
+238 |
+None |
+249.6 |
+None |
+mm |
+
+239 |
+None |
+162.3 |
+False |
+mm |
+
+241 |
+None |
+596.9 |
+None |
+in |
+
+
+
+ Summary | Timestamp | Status |
+ Cake reports | 07-03-2022, 12:20 | Success |
+
+
+
+
+
+
\ No newline at end of file
diff --git a/reports/test_reports.html b/reports/test_reports.html
new file mode 100644
index 0000000..5985f23
--- /dev/null
+++ b/reports/test_reports.html
@@ -0,0 +1,36 @@
+
+
+
+ Test Reports (created from unit test)
+
+
+
+
+
+
+Test Reports (created from unit test)
+
+Entry_id | Name | Diameter_in_mm | Vegan | Original_unit |
+
+58 |
+None |
+534900.0 |
+False |
+m |
+
+60 |
+None |
+556.2 |
+True |
+mm |
+
+
+
+ Summary | Timestamp | Status |
+ Cake reports | 07-03-2022, 12:19 | Success |
+
+
+
+
+
+
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..23a012f
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,5 @@
+mongoengine==0.24.0
+mongomock==4.0.0
+pydantic==1.9.0
+pymongo==4.0
+typing_extensions==4.1.1
\ No newline at end of file
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/tests/__init__.py
@@ -0,0 +1 @@
+
diff --git a/tests/__pycache__/__init__.cpython-38.pyc b/tests/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000..f9c5e2c
Binary files /dev/null and b/tests/__pycache__/__init__.cpython-38.pyc differ
diff --git a/tests/__pycache__/test_extractor.cpython-38.pyc b/tests/__pycache__/test_extractor.cpython-38.pyc
new file mode 100644
index 0000000..cb6930c
Binary files /dev/null and b/tests/__pycache__/test_extractor.cpython-38.pyc differ
diff --git a/tests/__pycache__/test_loader.cpython-38.pyc b/tests/__pycache__/test_loader.cpython-38.pyc
new file mode 100644
index 0000000..b7a951a
Binary files /dev/null and b/tests/__pycache__/test_loader.cpython-38.pyc differ
diff --git a/tests/__pycache__/test_models.cpython-38.pyc b/tests/__pycache__/test_models.cpython-38.pyc
new file mode 100644
index 0000000..1451f66
Binary files /dev/null and b/tests/__pycache__/test_models.cpython-38.pyc differ
diff --git a/tests/__pycache__/test_reports.cpython-38.pyc b/tests/__pycache__/test_reports.cpython-38.pyc
new file mode 100644
index 0000000..813c5b7
Binary files /dev/null and b/tests/__pycache__/test_reports.cpython-38.pyc differ
diff --git a/tests/__pycache__/test_transformer.cpython-38.pyc b/tests/__pycache__/test_transformer.cpython-38.pyc
new file mode 100644
index 0000000..c5a272a
Binary files /dev/null and b/tests/__pycache__/test_transformer.cpython-38.pyc differ
diff --git a/tests/test_extractor.py b/tests/test_extractor.py
new file mode 100644
index 0000000..5cc65b2
--- /dev/null
+++ b/tests/test_extractor.py
@@ -0,0 +1,21 @@
+from unittest.case import TestCase
+
+from etl.extractor import Extractor
+
+
+class TestExtractor(TestCase):
+ """
+ Test Extractor
+ """
+
+ def test_extractor(self):
+ '''Assert data is extracted properly'''
+
+ file_path = './cake_data.csv'
+ extractor = Extractor(in_file_path=file_path)
+ data = extractor.extract_data()
+
+ self.assertIsInstance(data, list)
+ self.assertIsInstance(data[0], dict)
+ self.assertEqual(len(data[0]), 5)
+ self.assertIsNotNone(data[0].get('entry'))
diff --git a/tests/test_loader.py b/tests/test_loader.py
new file mode 100644
index 0000000..a49bd75
--- /dev/null
+++ b/tests/test_loader.py
@@ -0,0 +1,55 @@
+from unittest import TestCase
+
+import mongoengine as me
+
+from etl.loader import Loader
+from etl.models import CakeMongoOrm, CakeModel
+
+
+
+class TestLoader(TestCase):
+ """
+ Test Loader
+ """
+
+ @classmethod
+ def setUpClass(cls):
+ me.connect('caketest', host='mongomock://localhost')
+
+ @classmethod
+ def tearDownClass(cls):
+ me.disconnect()
+
+ def test_load_data(self):
+ '''Assert loader works properly'''
+
+ cake_data = [
+ CakeModel(
+ entry_id=180,
+ diameter_in_mm=522,
+ name='cream',
+ original_unit='mm',
+ vegan=False
+ ),
+ CakeModel(
+ entry_id=201,
+ diameter_in_mm=400,
+ name='strawberry',
+ original_unit='mm',
+ vegan=True
+ )
+ ]
+
+ loader = Loader(cake_data, test_mode=True)
+ loader.load_data()
+ cake_count = CakeMongoOrm.objects().count()
+
+ cake = CakeMongoOrm.objects(entry_id=cake_data[0].entry_id).first()
+
+ self.assertEqual(len(cake_data), cake_count)
+
+ self.assertEqual(cake_data[0].original_unit, cake.original_unit)
+ self.assertEqual(cake_data[0].diameter_in_mm, cake.diameter_in_mm)
+ self.assertEqual(cake_data[0].entry_id, cake.entry_id)
+ self.assertEqual(cake_data[0].name, cake.name)
+ self.assertEqual(cake_data[0].vegan, cake.vegan)
\ No newline at end of file
diff --git a/tests/test_models.py b/tests/test_models.py
new file mode 100644
index 0000000..eaa8d4b
--- /dev/null
+++ b/tests/test_models.py
@@ -0,0 +1,89 @@
+from unittest import TestCase
+
+import mongoengine as me
+
+from etl.models import CakeMongoOrm, CakeModel
+
+
+class TestCakeModel(TestCase):
+ """
+ Test Pydantic model of a cake used for data validation
+ """
+
+ def test_data_validation(self):
+ '''Assert that well formed data is consumed as expected'''
+
+ transformed_data = {
+ 'original_unit': 'mm',
+ 'diameter_in_mm': '440.2',
+ 'entry_id': '234',
+ 'name': None,
+ 'vegan': True
+ }
+ cake_model = CakeModel(**transformed_data)
+
+ self.assertEqual(transformed_data['original_unit'], cake_model.original_unit)
+ self.assertEqual(float(transformed_data['diameter_in_mm']), cake_model.diameter_in_mm)
+ self.assertEqual(int(transformed_data['entry_id']), cake_model.entry_id)
+ self.assertIsNone(transformed_data['name'], cake_model.name)
+ self.assertTrue(cake_model.vegan)
+
+
+class TestCakeMongoOrm(TestCase):
+ """
+ Test Mongoengine model of Cake document
+ """
+
+ @classmethod
+ def setUpClass(cls):
+ me.connect('caketest', host='mongomock://localhost')
+
+ @classmethod
+ def tearDownClass(cls):
+ me.disconnect()
+
+ def test_object_creation(self):
+ '''Assert data is created properply'''
+
+ data = {
+ 'original_unit': 'mm',
+ 'diameter_in_mm': '440.2',
+ 'entry_id': '234',
+ 'name': 'strawberry',
+ 'vegan': True
+ }
+ validated_data = CakeModel(**data)
+ CakeMongoOrm(**validated_data.dict()).save()
+ cake = CakeMongoOrm.objects(entry_id=234).first()
+
+ self.assertEqual(validated_data.original_unit, cake.original_unit)
+ self.assertEqual(validated_data.diameter_in_mm, cake.diameter_in_mm)
+ self.assertEqual(validated_data.entry_id, cake.entry_id)
+ self.assertEqual(validated_data.name, cake.name)
+ self.assertEqual(validated_data.vegan, cake.vegan)
+
+ def test_bulk_object_creation(self):
+ '''Assert bulk data creation works properly'''
+
+ bulk_data = [
+ {
+ 'diameter_in_mm': '514.2',
+ 'entry_id': '200',
+ 'name': 'cream',
+ 'original_unit': 'mm',
+ 'vegan': False
+ },
+ {
+ 'diameter_in_mm': '402',
+ 'entry_id': '201',
+ 'name': 'strawberry',
+ 'original_unit': 'mm',
+ 'vegan': True
+ },
+ ]
+ bulk_validated_data = [CakeModel(**data).dict() for data in bulk_data]
+ cakes = [CakeMongoOrm(**data) for data in bulk_validated_data]
+ CakeMongoOrm.objects.insert(cakes)
+ cake_count = CakeMongoOrm.objects().count()
+
+ self.assertEqual(len(bulk_validated_data), cake_count)
\ No newline at end of file
diff --git a/tests/test_reports.py b/tests/test_reports.py
new file mode 100644
index 0000000..9d3ca89
--- /dev/null
+++ b/tests/test_reports.py
@@ -0,0 +1,43 @@
+from pathlib import Path
+from unittest.case import TestCase
+
+from etl.reports import Report
+
+
+class TestReport(TestCase):
+ """
+ Test Report
+ """
+
+ def setUp(self):
+
+ self.data = [
+ {
+ 'entry_id': 58,
+ 'name': None,
+ 'diameter_in_mm': 534900.0,
+ 'vegan': False,
+ 'original_unit': 'm'
+ },
+ {
+ 'entry_id': 60,
+ 'name': None,
+ 'diameter_in_mm': 556.2,
+ 'vegan': True,
+ 'original_unit': 'mm'
+ }
+ ]
+
+ def test_report_is_generated(self):
+ '''Assert that report are generated'''
+
+ path = './reports/test_reports.html'
+ report = Report(
+ data=self.data,
+ caption='Test Reports (created from unit test)',
+ path=path
+ )
+ report.create_report()
+ new_file = Path(path).resolve()
+
+ self.assertEqual(new_file.is_file(), True)
\ No newline at end of file
diff --git a/tests/test_transformer.py b/tests/test_transformer.py
new file mode 100644
index 0000000..92a40ea
--- /dev/null
+++ b/tests/test_transformer.py
@@ -0,0 +1,166 @@
+from unittest.case import TestCase
+
+from etl.transformer import Transformer
+
+
+class TestTransformer(TestCase):
+ """
+ Test Transformer
+ """
+
+ def test_transformer_valid_unit_mm(self):
+ '''Assert that transformer converts properly'''
+
+ transformer = Transformer(
+ raw_data=[
+ {
+ "entry": "1",
+ "cake_diameter": "56.78",
+ "diam_unit": "mm",
+ "flavor": "caramel",
+ "is_cake_vegan": "No",
+ }
+ ]
+ )
+ res = transformer.transform_data()[0]
+
+ self.assertEqual(res.entry_id, 1)
+ self.assertEqual(res.name, "caramel")
+ self.assertEqual(res.diameter_in_mm, 56.78)
+ self.assertFalse(res.vegan)
+ self.assertEqual(res.original_unit, "mm")
+
+ def test_diameter_conversion(self):
+ '''Assert diameter in other units converts to mm'''
+
+ transformer = Transformer(
+ raw_data=[
+ {
+ "entry": "2",
+ "cake_diameter": "5",
+ "diam_unit": "m",
+ "flavor": "strawberry",
+ "is_cake_vegan": "yes",
+ }
+ ]
+ )
+ res = transformer.transform_data()[0]
+
+ self.assertEqual(res.entry_id, 2)
+ self.assertEqual(res.name, "strawberry")
+ self.assertEqual(res.diameter_in_mm, 5000)
+ self.assertTrue(res.vegan)
+ self.assertEqual(res.original_unit, "m")
+
+ def test_irrecoverable_data_quality(self):
+ '''Assert that record is discarded when data quality is irrecoverable'''
+
+ transformer = Transformer(
+ raw_data=[
+ {
+ "entry": "3",
+ "cake_diameter": "56.78mm",
+ "diam_unit": "in",
+ "flavor": "caramel",
+ "is_cake_vegan": "false",
+ },
+ {
+ "entry": "4",
+ "cake_diameter": "fill this info later",
+ "diam_unit": "in",
+ "flavor": "caramel",
+ "is_cake_vegan": "true",
+ }
+ ]
+ )
+ res = transformer.transform_data()
+
+ self.assertListEqual(res, [])
+
+ def test_mixed_diameter_value(self):
+ '''Assert diameter is resolved, even when it is in the form `56mm` '''
+
+ transformer = Transformer(
+ raw_data=[
+ {
+ "entry": "5",
+ "cake_diameter": "56.78mm",
+ "diam_unit": "mm",
+ "flavor": "caramel",
+ "is_cake_vegan": "No",
+ }
+ ]
+ )
+ res = transformer.transform_data()[0]
+
+ self.assertEqual(res.entry_id, 5)
+ self.assertEqual(res.name, "caramel")
+ self.assertEqual(res.diameter_in_mm, 56.78)
+ self.assertFalse(res.vegan)
+ self.assertEqual(res.original_unit, "mm")
+
+ def test_valid_flavor(self):
+ '''Assert only valid flavours/name are returned'''
+
+ transformer = Transformer(
+ raw_data=[
+ {
+ "entry": "6",
+ "cake_diameter": "60",
+ "diam_unit": "mm",
+ "flavor": "Invalid flavour",
+ "is_cake_vegan": "No",
+ }
+ ]
+ )
+ res = transformer.transform_data()[0]
+
+ self.assertEqual(res.entry_id, 6)
+ self.assertIsNone(res.name)
+ self.assertEqual(res.diameter_in_mm, 60)
+ self.assertFalse(res.vegan)
+ self.assertEqual(res.original_unit, "mm")
+
+ def test_valid_vegan(self):
+ '''Assert that vegan is validated, invalid ones resolves to None'''
+
+ transformer = Transformer(
+ raw_data=[
+ {
+ "entry": "7",
+ "cake_diameter": "78",
+ "diam_unit": "mm",
+ "flavor": "caramel",
+ "is_cake_vegan": "Invalid Vegan",
+ }
+ ]
+ )
+ res = transformer.transform_data()[0]
+
+ self.assertEqual(res.entry_id, 7)
+ self.assertEqual(res.name, 'caramel')
+ self.assertEqual(res.diameter_in_mm, 78)
+ self.assertIsNone(res.vegan)
+ self.assertEqual(res.original_unit, "mm")
+
+ def test_valid_diameter_unit(self):
+ '''Assert empty diameter unit defaults to mm'''
+
+ transformer = Transformer(
+ raw_data=[
+ {
+ "entry": "8",
+ "cake_diameter": "80",
+ "diam_unit": "",
+ "flavor": "caramel",
+ "is_cake_vegan": "y",
+ }
+ ]
+ )
+ res = transformer.transform_data()[0]
+
+ self.assertEqual(res.entry_id, 8)
+ self.assertEqual(res.name, 'caramel')
+ self.assertEqual(res.diameter_in_mm, 80)
+ self.assertTrue(res.vegan)
+ self.assertEqual(res.original_unit, "mm")
\ No newline at end of file