forked from databricks/devrel
-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fixing file system issue caused by Communit Edition disablement of FU…
…SE mount
- Loading branch information
1 parent
74c389c
commit e1e072b
Showing
2 changed files
with
2 additions
and
2 deletions.
There are no files selected for viewing
2 changes: 1 addition & 1 deletion
2
2020-04-15 | Data Analysis with Pandas /Data-Analysis-with-Pandas.ipynb
Large diffs are not rendered by default.
Oops, something went wrong.
2 changes: 1 addition & 1 deletion
2
2020-04-15 | Data Analysis with Pandas /Plotting My Area Lab.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
{"cells":[{"cell_type":"markdown","source":["## Plotting My Area\n\nUse the starter code below to explore the data for your area"],"metadata":{}},{"cell_type":"code","source":["import datetime\nimport glob\nimport pandas as pd\n\npath = \"/dbfs/databricks-datasets/COVID/CSSEGISandData/csse_covid_19_data/csse_covid_19_daily_reports\"\nall_files = glob.glob(path + \"/*.csv\")\n\ndfs = []\n\nfor filename in all_files:\n temp_df = pd.read_csv(filename)\n temp_df.columns = [c.replace(\"/\", \"_\") for c in temp_df.columns]\n temp_df.columns = [c.replace(\" \", \"_\") for c in temp_df.columns]\n \n month, day, year = filename.split(\"/\")[-1].replace(\".csv\", \"\").split(\"-\")\n d = datetime.date(int(year), int(month), int(day))\n temp_df[\"Date\"] = d\n\n dfs.append(temp_df)\n \nall_days_df = pd.concat(dfs, axis=0, ignore_index=True, sort=False)\nall_days_df = all_days_df.drop([\"Latitude\", \"Longitude\", \"Lat\", \"Long_\", \"FIPS\", \"Combined_Key\", \"Last_Update\"], axis=1)\n\nall_days_df.head(10)"],"metadata":{},"outputs":[{"metadata":{},"output_type":"display_data","data":{"text/html":["<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Province_State</th>\n <th>Country_Region</th>\n <th>Confirmed</th>\n <th>Deaths</th>\n <th>Recovered</th>\n <th>Date</th>\n <th>Admin2</th>\n <th>Active</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Anhui</td>\n <td>Mainland China</td>\n <td>1.0</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>2020-01-22</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Beijing</td>\n <td>Mainland China</td>\n <td>14.0</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>2020-01-22</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Chongqing</td>\n <td>Mainland China</td>\n <td>6.0</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>2020-01-22</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Fujian</td>\n <td>Mainland China</td>\n <td>1.0</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>2020-01-22</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Gansu</td>\n <td>Mainland China</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>2020-01-22</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>5</th>\n <td>Guangdong</td>\n <td>Mainland China</td>\n <td>26.0</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>2020-01-22</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>6</th>\n <td>Guangxi</td>\n <td>Mainland China</td>\n <td>2.0</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>2020-01-22</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>7</th>\n <td>Guizhou</td>\n <td>Mainland China</td>\n <td>1.0</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>2020-01-22</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>8</th>\n <td>Hainan</td>\n <td>Mainland China</td>\n <td>4.0</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>2020-01-22</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>9</th>\n <td>Hebei</td>\n <td>Mainland China</td>\n <td>1.0</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>2020-01-22</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n </tbody>\n</table>\n</div>"]}}],"execution_count":2},{"cell_type":"code","source":[""],"metadata":{},"outputs":[],"execution_count":3}],"metadata":{"name":"Plotting My Area Lab","notebookId":17365945},"nbformat":4,"nbformat_minor":0} | ||
{"cells":[{"cell_type":"markdown","source":["## Plotting My Area\n\nUse the starter code below to explore the data for your area"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"6ac80b32-96fe-419b-8de0-78435d8152e4"}}},{"cell_type":"code","source":["src_path_base = \"dbfs:/databricks-datasets/COVID/CSSEGISandData/csse_covid_19_data/csse_covid_19_daily_reports/\"\ndest_path_base = \"file:////tmp/covid_daily_reports/\"\n\nfiles = [\n '11-21-2020.csv',\n '11-22-2020.csv',\n '11-23-2020.csv',\n '11-24-2020.csv',\n '11-25-2020.csv',\n '11-26-2020.csv',\n '11-27-2020.csv',\n '11-28-2020.csv',\n '11-29-2020.csv',\n '11-30-2020.csv'\n]\n\nall_files = []\n\nfor file in files:\n filename = dest_path_base+file\n dbutils.fs.cp(src_path_base+file, filename)\n all_files.append(filename)\n\nall_files"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"7698baa0-81eb-476b-811c-8367680430eb"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"<div class=\"ansiout\">Out[3]: ['file:////tmp/covid_daily_reports/11-21-2020.csv',\n 'file:////tmp/covid_daily_reports/11-22-2020.csv',\n 'file:////tmp/covid_daily_reports/11-23-2020.csv',\n 'file:////tmp/covid_daily_reports/11-24-2020.csv',\n 'file:////tmp/covid_daily_reports/11-25-2020.csv',\n 'file:////tmp/covid_daily_reports/11-26-2020.csv',\n 'file:////tmp/covid_daily_reports/11-27-2020.csv',\n 'file:////tmp/covid_daily_reports/11-28-2020.csv',\n 'file:////tmp/covid_daily_reports/11-29-2020.csv',\n 'file:////tmp/covid_daily_reports/11-30-2020.csv']</div>","removedWidgets":[],"addedWidgets":{},"type":"html","arguments":{}}},"output_type":"display_data","data":{"text/html":["<style scoped>\n .ansiout {\n display: block;\n unicode-bidi: embed;\n white-space: pre-wrap;\n word-wrap: break-word;\n word-break: break-all;\n font-family: \"Source Code Pro\", \"Menlo\", monospace;;\n font-size: 13px;\n color: #555;\n margin-left: 4px;\n line-height: 19px;\n }\n</style>\n<div class=\"ansiout\">Out[3]: ['file:////tmp/covid_daily_reports/11-21-2020.csv',\n 'file:////tmp/covid_daily_reports/11-22-2020.csv',\n 'file:////tmp/covid_daily_reports/11-23-2020.csv',\n 'file:////tmp/covid_daily_reports/11-24-2020.csv',\n 'file:////tmp/covid_daily_reports/11-25-2020.csv',\n 'file:////tmp/covid_daily_reports/11-26-2020.csv',\n 'file:////tmp/covid_daily_reports/11-27-2020.csv',\n 'file:////tmp/covid_daily_reports/11-28-2020.csv',\n 'file:////tmp/covid_daily_reports/11-29-2020.csv',\n 'file:////tmp/covid_daily_reports/11-30-2020.csv']</div>"]}}],"execution_count":0},{"cell_type":"code","source":["import datetime\nimport pandas as pd\n\ndfs = []\n\nfor filename in all_files:\n temp_df = pd.read_csv(filename)\n temp_df.columns = [c.replace(\"/\", \"_\") for c in temp_df.columns]\n temp_df.columns = [c.replace(\" \", \"_\") for c in temp_df.columns]\n \n month, day, year = filename.split(\"/\")[-1].replace(\".csv\", \"\").split(\"-\")\n d = datetime.date(int(year), int(month), int(day))\n temp_df[\"Date\"] = d\n\n dfs.append(temp_df)\n \nall_days_df = pd.concat(dfs, axis=0, ignore_index=True, sort=False)\nall_days_df = all_days_df.drop([\"Lat\", \"Long_\", \"FIPS\", \"Combined_Key\", \"Last_Update\"], axis=1)\n\nall_days_df.head(10)"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"0c71a076-4e9e-434c-97b4-75bc5fceff57"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Admin2</th>\n <th>Province_State</th>\n <th>Country_Region</th>\n <th>Confirmed</th>\n <th>Deaths</th>\n <th>Recovered</th>\n <th>Active</th>\n <th>Incident_Rate</th>\n <th>Case_Fatality_Ratio</th>\n <th>Date</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>NaN</td>\n <td>NaN</td>\n <td>Afghanistan</td>\n <td>44503</td>\n <td>1675</td>\n <td>35422</td>\n <td>7406.0</td>\n <td>114.320310</td>\n <td>3.763791</td>\n <td>2020-11-21</td>\n </tr>\n <tr>\n <th>1</th>\n <td>NaN</td>\n <td>NaN</td>\n <td>Albania</td>\n <td>32196</td>\n <td>685</td>\n <td>15469</td>\n <td>16042.0</td>\n <td>1118.771284</td>\n <td>2.127593</td>\n <td>2020-11-21</td>\n </tr>\n <tr>\n <th>2</th>\n <td>NaN</td>\n <td>NaN</td>\n <td>Algeria</td>\n <td>73774</td>\n <td>2255</td>\n <td>48183</td>\n <td>23336.0</td>\n <td>168.237732</td>\n <td>3.056632</td>\n <td>2020-11-21</td>\n </tr>\n <tr>\n <th>3</th>\n <td>NaN</td>\n <td>NaN</td>\n <td>Andorra</td>\n <td>6207</td>\n <td>76</td>\n <td>5290</td>\n <td>841.0</td>\n <td>8033.391574</td>\n <td>1.224424</td>\n <td>2020-11-21</td>\n </tr>\n <tr>\n <th>4</th>\n <td>NaN</td>\n <td>NaN</td>\n <td>Angola</td>\n <td>14413</td>\n <td>336</td>\n <td>7273</td>\n <td>6804.0</td>\n <td>43.853473</td>\n <td>2.331229</td>\n <td>2020-11-21</td>\n </tr>\n <tr>\n <th>5</th>\n <td>NaN</td>\n <td>NaN</td>\n <td>Antigua and Barbuda</td>\n <td>139</td>\n <td>4</td>\n <td>128</td>\n <td>7.0</td>\n <td>141.941018</td>\n <td>2.877698</td>\n <td>2020-11-21</td>\n </tr>\n <tr>\n <th>6</th>\n <td>NaN</td>\n <td>NaN</td>\n <td>Argentina</td>\n <td>1366182</td>\n <td>36902</td>\n <td>1187053</td>\n <td>142227.0</td>\n <td>3022.808967</td>\n <td>2.701104</td>\n <td>2020-11-21</td>\n </tr>\n <tr>\n <th>7</th>\n <td>NaN</td>\n <td>NaN</td>\n <td>Armenia</td>\n <td>124839</td>\n <td>1931</td>\n <td>92829</td>\n <td>30079.0</td>\n <td>4212.930872</td>\n <td>1.546792</td>\n <td>2020-11-21</td>\n </tr>\n <tr>\n <th>8</th>\n <td>NaN</td>\n <td>Australian Capital Territory</td>\n <td>Australia</td>\n <td>115</td>\n <td>3</td>\n <td>111</td>\n <td>1.0</td>\n <td>26.862883</td>\n <td>2.608696</td>\n <td>2020-11-21</td>\n </tr>\n <tr>\n <th>9</th>\n <td>NaN</td>\n <td>New South Wales</td>\n <td>Australia</td>\n <td>4538</td>\n <td>53</td>\n <td>3173</td>\n <td>1312.0</td>\n <td>55.900468</td>\n <td>1.167915</td>\n <td>2020-11-21</td>\n </tr>\n </tbody>\n</table>\n</div>","textData":"<div class=\"ansiout\">Out[5]: </div>","removedWidgets":[],"addedWidgets":{},"type":"htmlSandbox","arguments":{}}},"output_type":"display_data","data":{"text/html":["<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Admin2</th>\n <th>Province_State</th>\n <th>Country_Region</th>\n <th>Confirmed</th>\n <th>Deaths</th>\n <th>Recovered</th>\n <th>Active</th>\n <th>Incident_Rate</th>\n <th>Case_Fatality_Ratio</th>\n <th>Date</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>NaN</td>\n <td>NaN</td>\n <td>Afghanistan</td>\n <td>44503</td>\n <td>1675</td>\n <td>35422</td>\n <td>7406.0</td>\n <td>114.320310</td>\n <td>3.763791</td>\n <td>2020-11-21</td>\n </tr>\n <tr>\n <th>1</th>\n <td>NaN</td>\n <td>NaN</td>\n <td>Albania</td>\n <td>32196</td>\n <td>685</td>\n <td>15469</td>\n <td>16042.0</td>\n <td>1118.771284</td>\n <td>2.127593</td>\n <td>2020-11-21</td>\n </tr>\n <tr>\n <th>2</th>\n <td>NaN</td>\n <td>NaN</td>\n <td>Algeria</td>\n <td>73774</td>\n <td>2255</td>\n <td>48183</td>\n <td>23336.0</td>\n <td>168.237732</td>\n <td>3.056632</td>\n <td>2020-11-21</td>\n </tr>\n <tr>\n <th>3</th>\n <td>NaN</td>\n <td>NaN</td>\n <td>Andorra</td>\n <td>6207</td>\n <td>76</td>\n <td>5290</td>\n <td>841.0</td>\n <td>8033.391574</td>\n <td>1.224424</td>\n <td>2020-11-21</td>\n </tr>\n <tr>\n <th>4</th>\n <td>NaN</td>\n <td>NaN</td>\n <td>Angola</td>\n <td>14413</td>\n <td>336</td>\n <td>7273</td>\n <td>6804.0</td>\n <td>43.853473</td>\n <td>2.331229</td>\n <td>2020-11-21</td>\n </tr>\n <tr>\n <th>5</th>\n <td>NaN</td>\n <td>NaN</td>\n <td>Antigua and Barbuda</td>\n <td>139</td>\n <td>4</td>\n <td>128</td>\n <td>7.0</td>\n <td>141.941018</td>\n <td>2.877698</td>\n <td>2020-11-21</td>\n </tr>\n <tr>\n <th>6</th>\n <td>NaN</td>\n <td>NaN</td>\n <td>Argentina</td>\n <td>1366182</td>\n <td>36902</td>\n <td>1187053</td>\n <td>142227.0</td>\n <td>3022.808967</td>\n <td>2.701104</td>\n <td>2020-11-21</td>\n </tr>\n <tr>\n <th>7</th>\n <td>NaN</td>\n <td>NaN</td>\n <td>Armenia</td>\n <td>124839</td>\n <td>1931</td>\n <td>92829</td>\n <td>30079.0</td>\n <td>4212.930872</td>\n <td>1.546792</td>\n <td>2020-11-21</td>\n </tr>\n <tr>\n <th>8</th>\n <td>NaN</td>\n <td>Australian Capital Territory</td>\n <td>Australia</td>\n <td>115</td>\n <td>3</td>\n <td>111</td>\n <td>1.0</td>\n <td>26.862883</td>\n <td>2.608696</td>\n <td>2020-11-21</td>\n </tr>\n <tr>\n <th>9</th>\n <td>NaN</td>\n <td>New South Wales</td>\n <td>Australia</td>\n <td>4538</td>\n <td>53</td>\n <td>3173</td>\n <td>1312.0</td>\n <td>55.900468</td>\n <td>1.167915</td>\n <td>2020-11-21</td>\n </tr>\n </tbody>\n</table>\n</div>"]}}],"execution_count":0},{"cell_type":"code","source":[""],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"b238a91a-46a8-4189-a0db-229e482b48f7"}},"outputs":[],"execution_count":0}],"metadata":{"application/vnd.databricks.v1+notebook":{"notebookName":"Plotting My Area Lab","dashboards":[],"language":"python","widgets":{},"notebookOrigID":4179985937936567}},"nbformat":4,"nbformat_minor":0} |