Skip to content

Commit

Permalink
2024-10-29-13-19-50 - wip-data-pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
josephmachado committed Oct 29, 2024
1 parent 74e757b commit 242fdbd
Show file tree
Hide file tree
Showing 22 changed files with 1,748 additions and 79 deletions.
39 changes: 25 additions & 14 deletions 2-Python/Python.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,21 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 8,
"id": "34b5f60b-2da0-41d9-be58-f410b365c194",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Cleaning up (if any existing) tpch db file tpch.db\n",
"Creating TPCH input data at tpch.db\n",
"Cleaning up (if any existing) sqlite3 db file example.db\n",
"Creating sqlite database file at example.db\n"
]
}
],
"source": [
"! python ../setup.py"
]
Expand Down Expand Up @@ -367,7 +378,7 @@
},
{
"cell_type": "code",
"execution_count": 58,
"execution_count": 9,
"id": "f193c1a7-7597-42be-9497-87926ef79a27",
"metadata": {},
"outputs": [],
Expand All @@ -379,21 +390,21 @@
"cursor = conn.cursor()\n",
"\n",
"# Connect to DuckDB and load TPC-H tables into Pandas DataFrames\n",
"customer_df = con.sql(\"SELECT * FROM customer\").df()\n",
"orders_df = con.sql(\"SELECT * FROM orders\").df()\n",
"lineitem_df = con.sql(\"SELECT * FROM lineitem\").df()\n",
"nation_df = con.sql(\"SELECT * FROM nation\").df()\n",
"region_df = con.sql(\"SELECT * FROM region\").df()\n",
"supplier_df = con.sql(\"SELECT * FROM supplier\").df()\n",
"part_df = con.sql(\"SELECT * FROM part\").df()\n",
"partsupp_df = con.sql(\"SELECT * FROM partsupp\").df()\n",
"customer_df = cursor.sql(\"SELECT * FROM customer\").df()\n",
"orders_df = cursor.sql(\"SELECT * FROM orders\").df()\n",
"lineitem_df = cursor.sql(\"SELECT * FROM lineitem\").df()\n",
"nation_df = cursor.sql(\"SELECT * FROM nation\").df()\n",
"region_df = cursor.sql(\"SELECT * FROM region\").df()\n",
"supplier_df = cursor.sql(\"SELECT * FROM supplier\").df()\n",
"part_df = cursor.sql(\"SELECT * FROM part\").df()\n",
"partsupp_df = cursor.sql(\"SELECT * FROM partsupp\").df()\n",
"\n",
"conn.close()"
]
},
{
"cell_type": "code",
"execution_count": 60,
"execution_count": 10,
"id": "a9080b3a-d11b-4cca-9754-a2c0b35dbe9d",
"metadata": {},
"outputs": [],
Expand All @@ -403,7 +414,7 @@
},
{
"cell_type": "code",
"execution_count": 61,
"execution_count": 11,
"id": "6a85008e-166e-4136-9611-ee9dc0fc7d61",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -591,7 +602,7 @@
"320 ng the final, bold requests. furiously regular... "
]
},
"execution_count": 61,
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
Expand Down
Binary file modified 2-Python/tpch.db
Binary file not shown.
1,097 changes: 1,035 additions & 62 deletions 3-Data-Warehousing/Data-Warehousing.ipynb

Large diffs are not rendered by default.

Binary file modified 3-Data-Warehousing/tpch.db
Binary file not shown.
Loading

0 comments on commit 242fdbd

Please sign in to comment.