Skip to content

Commit

Permalink
Added test notebooks.
Browse files Browse the repository at this point in the history
iangow committed Jan 8, 2024
1 parent f905803 commit 5eaee48
Showing 3 changed files with 107 additions and 254 deletions.
190 changes: 14 additions & 176 deletions wrds_update_csv_test.ipynb
Original file line number Diff line number Diff line change
@@ -8,7 +8,7 @@
"outputs": [],
"source": [
"import os\n",
"os.environ[\"CSV_DIR\"]='/Users/iangow/Dropbox/csv_data'"
"os.environ[\"CSV_DIR\"]='data'"
]
},
{
@@ -18,7 +18,7 @@
"metadata": {},
"outputs": [],
"source": [
"from wrds2pg import wrds_update_csv, wrds_update, wrds_update_pq"
"from wrds2pg import wrds_update_csv"
]
},
{
@@ -32,8 +32,9 @@
"output_type": "stream",
"text": [
"Forcing update based on user request.\n",
"Beginning file download at 15:34:36.\n",
"Completed file download at 15:34:44.\n"
"Beginning file download at 13:59:57.\n",
"Completed file download at 14:00:05.\n",
"\n"
]
},
{
@@ -55,134 +56,16 @@
{
"cell_type": "code",
"execution_count": 4,
"id": "4e44b05f-4a10-4806-aad9-7b6b784a50d0",
"id": "9808e36c-577c-4d8f-8a4f-ced768e77d70",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Forcing update based on user request.\n",
"Beginning file import at 15:34:51.\n",
"Importing data into audit.feed21_bankruptcy_notification\n",
"Completed file import at 15:35:00.\n"
]
},
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"wrds_update(\"feed21_bankruptcy_notification\", \"audit\", \n",
" force=True, drop=\"match: closest: prior:\")"
"# !bbedit data/audit/feed21_bankruptcy_notification.csv.gz"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "725dc2a1-b529-48b0-bc2d-71302f7c78b6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Forcing update based on user request.\n",
"Beginning file download at 15:35:04.\n",
"Saving data to temporary CSV.\n",
"Converting temporary CSV to parquet.\n",
"Parquet file: /Users/iangow/Library/CloudStorage/Dropbox/pq_data/audit/feed21_bankruptcy_notification.parquet\n",
"Completed creation of parquet file at 15:35:16.\n"
]
},
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wrds_update_pq(\"feed21_bankruptcy_notification\", \"audit\", \n",
" force=True, drop=\"match: closest: prior:\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "832b1e30-0da3-4ea1-80bd-6df4d4f25443",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Forcing update based on user request.\n",
"Beginning file import at 15:35:23.\n",
"Importing data into crsp.dsi\n",
"Completed file import at 15:35:32.\n"
]
},
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wrds_update(\"dsi\", \"crsp\", drop=\"usdval usdcnt\", force=True)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "d0ba8991-c882-45e0-bd6c-d86a15f9442d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Forcing update based on user request.\n",
"Beginning file import at 15:35:40.\n",
"Importing data into crsp.dsi\n",
"Completed file import at 15:35:49.\n"
]
},
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wrds_update(\"dsi\", \"crsp\", force=True)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "65dd21d5-2c28-494a-b45c-5dc0a97f1b92",
"metadata": {},
"outputs": [
@@ -191,11 +74,9 @@
"output_type": "stream",
"text": [
"Forcing update based on user request.\n",
"Beginning file download at 15:35:53.\n",
"Saving data to temporary CSV.\n",
"Converting temporary CSV to parquet.\n",
"Parquet file: /Users/iangow/Library/CloudStorage/Dropbox/pq_data/crsp/dsi.parquet\n",
"Completed creation of parquet file at 15:36:06.\n"
"Beginning file download at 14:00:10.\n",
"Completed file download at 14:00:18.\n",
"\n"
]
},
{
@@ -204,56 +85,13 @@
"True"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wrds_update_pq(\"dsi\", \"crsp\", drop=\"usdval usdcnt\", force=True)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "25f1e98f-65ee-42ec-9baa-948efd128951",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Forcing update based on user request.\n",
"Beginning file download at 15:36:10.\n",
"Saving data to temporary CSV.\n",
"Converting temporary CSV to parquet.\n",
"Parquet file: /Users/iangow/Library/CloudStorage/Dropbox/pq_data/crsp/dsi.parquet\n",
"Completed creation of parquet file at 15:36:22.\n"
]
},
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 9,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wrds_update_pq(\"dsi\", \"crsp\", force=True)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "9808e36c-577c-4d8f-8a4f-ced768e77d70",
"metadata": {},
"outputs": [],
"source": [
"# !bbedit /Users/iangow/Dropbox/csv_data/audit/feed21_bankruptcy_notification.csv.gz"
"wrds_update_csv(\"dsi\", \"crsp\", drop=\"usdval usdcnt\", force=True)"
]
}
],
@@ -273,7 +111,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
"version": "3.11.7"
}
},
"nbformat": 4,
86 changes: 8 additions & 78 deletions wrds_update_pq_test.ipynb
Original file line number Diff line number Diff line change
@@ -7,77 +7,12 @@
"metadata": {},
"outputs": [],
"source": [
"from wrds2pg import wrds_update, wrds_update_csv, wrds_update_pq"
"from wrds2pg import wrds_update_pq"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "046a53fd-ab7e-43ed-9778-3aedb3ccb1a1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Forcing update based on user request.\n",
"Beginning file download at 15:36:45.\n",
"Completed file download at 15:36:57.\n"
]
}
],
"source": [
"updated = wrds_update_csv(\"auditsox302\", \"audit\",\n",
" obs=100, force=True,\n",
" data_dir=\"/Users/iangow/Downloads\",\n",
" drop=\"prior: match: closest: ic_dc_text:\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "44cc4f9d-2efe-475a-b375-605d40878546",
"metadata": {},
"outputs": [],
"source": [
"# !bbedit /Users/iangow/Downloads/audit/auditsox302.csv.gz"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "860050e1-a996-4f4d-8707-6e5cc19237a9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Forcing update based on user request.\n",
"Beginning file import at 15:37:08.\n",
"Importing data into audit.auditsox302\n",
"Completed file import at 15:39:31.\n"
]
}
],
"source": [
"updated = wrds_update(\"auditsox302\", \"audit\",\n",
" force=True,\n",
" drop=\"prior: match: closest: ic_dc_text:\", \n",
" col_types = {\"ic_dc_key\": \"integer\", \n",
" \"is_effective\": \"integer\",\n",
" \"material_weakness\": \"boolean\",\n",
" \"sig_deficiency\": \"boolean\",\n",
" \"noteff_acc_rule\": \"integer\",\n",
" \"noteff_fin_fraud\": \"integer\",\n",
" \"notefferrors\": \"integer\",\n",
" \"noteff_other\": \"integer\",\n",
" \"eventdate_aud_fkey\": \"integer\"})"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "f17c0f23-b71a-4e62-b54c-17e8dad8363c",
"metadata": {},
"outputs": [
@@ -86,17 +21,20 @@
"output_type": "stream",
"text": [
"Forcing update based on user request.\n",
"Beginning file download at 15:39:35.\n",
"Beginning file download at 13:58:14.\n",
"Saving data to temporary CSV.\n",
"Converting temporary CSV to parquet.\n",
"Parquet file: /Users/iangow/Library/CloudStorage/Dropbox/pq_data/audit/auditsox302.parquet\n",
"Completed creation of parquet file at 15:41:59.\n"
"Parquet file: data/audit/auditsox302.parquet\n",
"Completed creation of parquet file at 13:58:27.\n",
"\n"
]
}
],
"source": [
"updated = wrds_update_pq(\"auditsox302\", \"audit\",\n",
" force=True,\n",
" obs=1000,\n",
" data_dir=\"data\",\n",
" drop=\"prior: match: closest: ic_dc_text:\", \n",
" col_types = {\"ic_dc_key\": \"integer\", \n",
" \"is_effective\": \"integer\",\n",
@@ -108,14 +46,6 @@
" \"noteff_other\": \"integer\",\n",
" \"eventdate_aud_fkey\": \"integer\"})"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3e9c56f9-fdd6-44b6-92d5-21f41654209b",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
@@ -134,7 +64,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
"version": "3.11.7"
}
},
"nbformat": 4,
85 changes: 85 additions & 0 deletions wrds_update_test.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "3f833736-3f6c-45eb-ab64-39f8ee69d433",
"metadata": {},
"outputs": [],
"source": [
"from wrds2pg import wrds_update, make_engine, process_sql"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "860050e1-a996-4f4d-8707-6e5cc19237a9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Forcing update based on user request.\n",
"Beginning file import at 13:57:10 UTC.\n",
"Importing data into audit_temp.auditsox302.\n",
"Completed file import at 13:57:23 UTC.\n",
"\n"
]
}
],
"source": [
"updated = wrds_update(\"auditsox302\", \"audit_temp\",\n",
" sas_schema=\"audit\",\n",
" obs=1000,\n",
" force=True,\n",
" drop=\"prior: match: closest: ic_dc_text:\", \n",
" col_types = {\"ic_dc_key\": \"integer\", \n",
" \"is_effective\": \"integer\",\n",
" \"material_weakness\": \"boolean\",\n",
" \"sig_deficiency\": \"boolean\",\n",
" \"noteff_acc_rule\": \"integer\",\n",
" \"noteff_fin_fraud\": \"integer\",\n",
" \"notefferrors\": \"integer\",\n",
" \"noteff_other\": \"integer\",\n",
" \"eventdate_aud_fkey\": \"integer\"})"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "424d62e0-b716-4462-adac-f04f5a3a3e59",
"metadata": {},
"outputs": [],
"source": [
"engine = make_engine()\n",
"process_sql(\"DROP TABLE audit_temp.auditsox302\", engine)\n",
"process_sql(\"DROP SCHEMA audit_temp\", engine)\n",
"process_sql(\"DROP ROLE audit_temp\", engine)\n",
"process_sql(\"DROP ROLE audit_temp_access\", engine)\n",
"engine.dispose()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

0 comments on commit 5eaee48

Please sign in to comment.