Skip to content

Commit

Permalink
Update notebooks
Browse files Browse the repository at this point in the history
  • Loading branch information
Heizenburg committed Nov 16, 2023
1 parent 42c049d commit 6e23bed
Show file tree
Hide file tree
Showing 6 changed files with 1,232 additions and 340 deletions.
22 changes: 7 additions & 15 deletions notebooks/EST/devland.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
},
{
"cell_type": "code",
"execution_count": 36,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -32,7 +32,7 @@
},
{
"cell_type": "code",
"execution_count": 37,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -41,7 +41,7 @@
},
{
"cell_type": "code",
"execution_count": 38,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -118,7 +118,7 @@
},
{
"cell_type": "code",
"execution_count": 39,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -137,7 +137,7 @@
},
{
"cell_type": "code",
"execution_count": 40,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -158,17 +158,9 @@
},
{
"cell_type": "code",
"execution_count": 41,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Error: The file is already open. Please close it and try again.\n"
]
}
],
"outputs": [],
"source": [
"output_file = \"C:\\\\Users\\\\tsello01\\\\Documents\\\\Data\\\\Devland Latest.xlsx\"\n",
"\n",
Expand Down
137 changes: 25 additions & 112 deletions notebooks/EST/react.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,12 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from dotenv import load_dotenv\n",
"import pdb\n",
"\n",
"from collections import defaultdict\n",
"from pathlib import Path\n",
Expand All @@ -28,7 +29,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -37,7 +38,7 @@
},
{
"cell_type": "code",
"execution_count": 31,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -47,6 +48,7 @@
" with FTP() as ftp:\n",
" ftp.connect(ftp_host, ftp_port)\n",
" ftp.login(user=ftp_user, passwd=ftp_password)\n",
" pdb.set_trace()\n",
"\n",
" # Ensure download path exists\n",
" if not os.path.exists(download_path):\n",
Expand All @@ -58,68 +60,48 @@
" file_list = ftp.nlst()\n",
" for file_name in file_list:\n",
" local_file_path = os.path.join(download_path, file_name)\n",
" if ftp.nlst(file_name) == []:\n",
" os.makedirs(local_file_path, exist_ok=True)\n",
" else:\n",
" with open(local_file_path, 'wb') as file:\n",
" ftp.retrbinary('RETR ' + file_name, file.write)\n",
" with open(local_file_path, 'wb') as file:\n",
" ftp.retrbinary('RETR ' + file_name, file.write)\n",
" \n",
" except Exception as e:\n",
" error_message = str(e) if str(e) else \"Unknown FTP Error\"\n",
" print(f\"FTP Error: {error_message}\")\n"
" import traceback\n",
" print(\"An error occurred:\")\n",
" traceback.print_exc()\n"
]
},
{
"cell_type": "code",
"execution_count": 36,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"# Load environment variables.\n",
"load_dotenv()"
]
},
{
"cell_type": "code",
"execution_count": 44,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"FTP Error: Unknown FTP Error\n"
]
}
],
"outputs": [],
"source": [
"ftp_host = os.getenv('FTP_HOST')\n",
"ftp_port = int(os.getenv('FTP_PORT'))\n",
"ftp_user = os.getenv('FTP_USERNAME')\n",
"ftp_password = os.getenv('FTP_PASSWORD')\n",
"\n",
"folders_to_download = [\n",
" \"/euetrac1ftp/Up/A1 SUPERMARKET/\",\n",
" \"/euetrac1ftp/Up/Platinum/\",\n",
" \"/euetrac1ftp/Up/Premjee/\"\n",
" \"euetrac1ftp/Up/A1 SUPERMARKET\",\n",
" \"euetrac1ftp/Up/Platinum\",\n",
" \"euetrac1ftp/Up/Premjee\"\n",
"]\n",
"\n",
"fetch_files_from_ftp(ftp_host, 22, ftp_user, ftp_password, folders_to_download, directory)"
"fetch_files_from_ftp(ftp_host, ftp_port, ftp_user, ftp_password, folders_to_download, directory)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -147,8 +129,8 @@
" kept_files.append(os.path.basename(file))\n",
" else:\n",
" if os.path.getmtime(file) < os.path.getmtime(most_recent_files[directory]):\n",
" os.remove(file)\n",
" deleted_files.append(os.path.basename(file))\n",
" os.remove(file)\n",
" else:\n",
" os.remove(most_recent_files[directory])\n",
" deleted_files.append(os.path.basename(most_recent_files[directory]))\n",
Expand Down Expand Up @@ -180,87 +162,18 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"( Deleted Files\n",
" 28 SaleAudit-2023-09-11.csv\n",
" 29 SaleAudit-2023-09-18.csv\n",
" 27 SaleAudit-2023-09-18.csv\n",
" 18 SaleAudit-2023-09-25.csv\n",
" 9 SaleAudit-2023-10-02.csv\n",
" 0 SaleAudit-2023-10-09.csv\n",
" 33 SaleAudit_A1 SUPERMARKET_2023-09-18.csv\n",
" 22 SaleAudit_A1 SUPERMARKET_2023-09-25.csv\n",
" 13 SaleAudit_A1 SUPERMARKET_2023-10-02.csv\n",
" 4 SaleAudit_A1 SUPERMARKET_2023-10-09.csv\n",
" 36 SaleAudit_Mogwase DC_2023-09-18.csv\n",
" 26 SaleAudit_Mogwase DC_2023-09-25.csv\n",
" 17 SaleAudit_Mogwase DC_2023-10-02.csv\n",
" 8 SaleAudit_Mogwase DC_2023-10-09.csv\n",
" 34 SaleAudit_Mogwase Grocery_2023-09-18.csv\n",
" 23 SaleAudit_Mogwase Grocery_2023-09-25.csv\n",
" 15 SaleAudit_Mogwase Grocery_2023-10-02.csv\n",
" 5 SaleAudit_Mogwase Grocery_2023-10-09.csv\n",
" 31 SaleAudit_Northam_2023-09-18.csv\n",
" 20 SaleAudit_Northam_2023-09-25.csv\n",
" 10 SaleAudit_Northam_2023-10-02.csv\n",
" 2 SaleAudit_Northam_2023-10-09.csv\n",
" 35 SaleAudit_Potchefstroom_2023-09-18.csv\n",
" 24 SaleAudit_Potchefstroom_2023-09-25.csv\n",
" 16 SaleAudit_Potchefstroom_2023-10-02.csv\n",
" 7 SaleAudit_Potchefstroom_2023-10-09.csv\n",
" 32 SaleAudit_Premjee LT_2023-09-18.csv\n",
" 21 SaleAudit_Premjee LT_2023-09-25.csv\n",
" 12 SaleAudit_Premjee LT_2023-10-02.csv\n",
" 3 SaleAudit_Premjee LT_2023-10-09.csv\n",
" 30 SaleAudit_Premjee TZN_2023-09-18.csv\n",
" 19 SaleAudit_Premjee TZN_2023-09-25.csv\n",
" 11 SaleAudit_Premjee TZN_2023-10-02.csv\n",
" 1 SaleAudit_Premjee TZN_2023-10-09.csv\n",
" 25 SaleAudit_Zeerust_2023-09-25.csv\n",
" 14 SaleAudit_Zeerust_2023-10-02.csv\n",
" 6 SaleAudit_Zeerust_2023-10-09.csv,\n",
" Kept Files\n",
" 0 SaleAudit-2023-10-16.csv\n",
" 1 SaleAudit_Premjee TZN_2023-10-16.csv\n",
" 2 SaleAudit_Northam_2023-10-16.csv\n",
" 3 SaleAudit_Premjee LT_2023-10-16.csv\n",
" 4 SaleAudit_A1 SUPERMARKET_2023-10-16.csv\n",
" 5 SaleAudit_Mogwase Grocery_2023-10-16.csv\n",
" 6 SaleAudit_Zeerust_2023-10-16.csv\n",
" 7 SaleAudit_Potchefstroom_2023-10-16.csv\n",
" 8 SaleAudit_Mogwase DC_2023-10-16.csv)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"delete_all_except_recent(directory)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"9"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"_, dataframe_remainders = delete_all_except_recent(directory)\n",
"len(dataframe_remainders)"
Expand Down
2 changes: 1 addition & 1 deletion notebooks/EST/woermann brock.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"source": [
"## Woermann Brock \n",
"\n",
"Servers as a normalization script before the execution of the preload."
"Serves as a normalization script before the execution of the preload."
]
},
{
Expand Down
28 changes: 2 additions & 26 deletions notebooks/Massmart/rename_files.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,9 @@
"cells": [
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Renamed AfriqueSA_Makro_Delta_Delta_WE_20231029.txt to AfriqueSA_Makro_Delta_Delta_Makro_Delta_WE_20231029.txt\n",
"Renamed AfriqueSA_Makro_Delta_WE_20231105.txt to AfriqueSA_Makro_Delta_Makro_WE_20231105.txt\n",
"Renamed AlpenFoodsSA_Makro_Delta_Delta_WE_20231029.txt to AlpenFoodsSA_Makro_Delta_Delta_Makro_Delta_WE_20231029.txt\n",
"Renamed AlpenFoodsSA_Makro_Delta_WE_20231105.txt to AlpenFoodsSA_Makro_Delta_Makro_WE_20231105.txt\n",
"Renamed AspenSA_Makro_Delta_WE_20231029.txt to AspenSA_Makro_Delta_Makro_Delta_WE_20231029.txt\n",
"Renamed AspenSA_Makro_Delta_WE_20231105.txt to AspenSA_Makro_Delta_Makro_WE_20231105.txt\n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"\u001b[1;32mc:\\Users\\tsello01\\Documents\\Scripts\\notebooks\\Massmart\\rename_files.ipynb Cell 1\u001b[0m line \u001b[0;36m3\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/tsello01/Documents/Scripts/notebooks/Massmart/rename_files.ipynb#W0sZmlsZQ%3D%3D?line=33'>34</a>\u001b[0m new_filename \u001b[39m=\u001b[39m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mcompany_name\u001b[39m}\u001b[39;00m\u001b[39m_Makro_\u001b[39m\u001b[39m{\u001b[39;00mnew_keyword\u001b[39m}\u001b[39;00m\u001b[39m_\u001b[39m\u001b[39m{\u001b[39;00mweekending_date\u001b[39m}\u001b[39;00m\u001b[39m_\u001b[39m\u001b[39m{\u001b[39;00mcounter\u001b[39m}\u001b[39;00m\u001b[39m.txt\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/tsello01/Documents/Scripts/notebooks/Massmart/rename_files.ipynb#W0sZmlsZQ%3D%3D?line=35'>36</a>\u001b[0m \u001b[39m# Rename the file\u001b[39;00m\n\u001b[1;32m---> <a href='vscode-notebook-cell:/c%3A/Users/tsello01/Documents/Scripts/notebooks/Massmart/rename_files.ipynb#W0sZmlsZQ%3D%3D?line=36'>37</a>\u001b[0m os\u001b[39m.\u001b[39;49mrename(file_path, os\u001b[39m.\u001b[39;49mpath\u001b[39m.\u001b[39;49mjoin(folder_path, new_filename))\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/tsello01/Documents/Scripts/notebooks/Massmart/rename_files.ipynb#W0sZmlsZQ%3D%3D?line=37'>38</a>\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mRenamed \u001b[39m\u001b[39m{\u001b[39;00mfilename\u001b[39m}\u001b[39;00m\u001b[39m to \u001b[39m\u001b[39m{\u001b[39;00mnew_filename\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m)\n",
"\u001b[1;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"outputs": [],
"source": [
"import os\n",
"\n",
Expand Down
18 changes: 18 additions & 0 deletions notebooks/PnP/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
- path: R:\RawData\Elite Star\Devland\Done
age_threshold_days: 30
- path: R:\RawData\Elite Star\PriceRite Cash & Carry\done
age_threshold_days: 30
- path: R:\RawData\Elite Star\Phoenix\Done
age_threshold_days: 30
- path: R:\RawData\Elite Star\Yarona\Done
age_threshold_days: 30
- path: R:\RawData\Elite Star\One Up Cash & Carry\Done
age_threshold_days: 30
- path: R:\RawData\PNP B2B\Daily Client File Downloads
age_threshold_days: 2
- path: R:\RawData\PNP SAP\Client File Downloads
age_threshold_days: 30
- path: R:\RawData\OKFOODS\Done
age_threshold_days: 7
- path: R:\RawData\Engen\EngenWeekly
age_threshold_days: 7
Loading

0 comments on commit 6e23bed

Please sign in to comment.