forked from ShiChJ/DAND-QA
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
36 changed files
with
70,868 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
2,405 changes: 2,405 additions & 0 deletions
2,405
Intro-DataAnalysis/L3-CaseStudy2/all_alpha_08.csv
Large diffs are not rendered by default.
Oops, something went wrong.
1,612 changes: 1,612 additions & 0 deletions
1,612
Intro-DataAnalysis/L3-CaseStudy2/all_alpha_18.csv
Large diffs are not rendered by default.
Oops, something went wrong.
12,146 changes: 12,146 additions & 0 deletions
12,146
Intro-DataAnalysis/L3-CaseStudy2/assessing.html
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,277 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"source": [ | ||
"# Assessing\n", | ||
"Use the space below to explore `all_alpha_08.csv` and `all_alpha_18.csv` to answer the quiz questions below." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import pandas as pd\n", | ||
"df_08 = pd.read_csv('all_alpha_08.csv')\n", | ||
"df_18 = pd.read_csv('all_alpha_18.csv')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 10, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"(2404, 18)\n", | ||
"25\n", | ||
"199\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"print(df_08.shape)\n", | ||
"print(df_08.duplicated().sum())\n", | ||
"print(sum(df_08.isnull().sum(axis = 1) > 0))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 12, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"<class 'pandas.core.frame.DataFrame'>\n", | ||
"RangeIndex: 2404 entries, 0 to 2403\n", | ||
"Data columns (total 18 columns):\n", | ||
"Model 2404 non-null object\n", | ||
"Displ 2404 non-null float64\n", | ||
"Cyl 2205 non-null object\n", | ||
"Trans 2205 non-null object\n", | ||
"Drive 2311 non-null object\n", | ||
"Fuel 2404 non-null object\n", | ||
"Sales Area 2404 non-null object\n", | ||
"Stnd 2404 non-null object\n", | ||
"Underhood ID 2404 non-null object\n", | ||
"Veh Class 2404 non-null object\n", | ||
"Air Pollution Score 2404 non-null object\n", | ||
"FE Calc Appr 2205 non-null object\n", | ||
"City MPG 2205 non-null object\n", | ||
"Hwy MPG 2205 non-null object\n", | ||
"Cmb MPG 2205 non-null object\n", | ||
"Unadj Cmb MPG 2205 non-null float64\n", | ||
"Greenhouse Gas Score 2205 non-null object\n", | ||
"SmartWay 2404 non-null object\n", | ||
"dtypes: float64(2), object(16)\n", | ||
"memory usage: 338.1+ KB\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"df_08.info()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 11, | ||
"metadata": { | ||
"scrolled": true | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"(1611, 18)\n", | ||
"0\n", | ||
"2\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"print(df_18.shape)\n", | ||
"print(df_18.duplicated().sum())\n", | ||
"print(sum(df_18.isnull().sum(axis = 1) > 0))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 13, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"<class 'pandas.core.frame.DataFrame'>\n", | ||
"RangeIndex: 1611 entries, 0 to 1610\n", | ||
"Data columns (total 18 columns):\n", | ||
"Model 1611 non-null object\n", | ||
"Displ 1609 non-null float64\n", | ||
"Cyl 1609 non-null float64\n", | ||
"Trans 1611 non-null object\n", | ||
"Drive 1611 non-null object\n", | ||
"Fuel 1611 non-null object\n", | ||
"Cert Region 1611 non-null object\n", | ||
"Stnd 1611 non-null object\n", | ||
"Stnd Description 1611 non-null object\n", | ||
"Underhood ID 1611 non-null object\n", | ||
"Veh Class 1611 non-null object\n", | ||
"Air Pollution Score 1611 non-null int64\n", | ||
"City MPG 1611 non-null object\n", | ||
"Hwy MPG 1611 non-null object\n", | ||
"Cmb MPG 1611 non-null object\n", | ||
"Greenhouse Gas Score 1611 non-null int64\n", | ||
"SmartWay 1611 non-null object\n", | ||
"Comb CO2 1611 non-null object\n", | ||
"dtypes: float64(2), int64(2), object(14)\n", | ||
"memory usage: 226.6+ KB\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"df_18.info()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 23, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Model : 436\n", | ||
"Displ : 47\n", | ||
"Cyl : 8\n", | ||
"Trans : 14\n", | ||
"Drive : 2\n", | ||
"Fuel : 5\n", | ||
"Sales Area : 3\n", | ||
"Stnd : 12\n", | ||
"Underhood ID : 343\n", | ||
"Veh Class : 9\n", | ||
"Air Pollution Score : 13\n", | ||
"FE Calc Appr : 2\n", | ||
"City MPG : 39\n", | ||
"Hwy MPG : 43\n", | ||
"Cmb MPG : 38\n", | ||
"Unadj Cmb MPG : 721\n", | ||
"Greenhouse Gas Score : 20\n", | ||
"SmartWay : 2\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"for col in df_08.columns:\n", | ||
" print('{} : {}'.format(col, len(df_08[col].dropna().unique())))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 24, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Model : 367\n", | ||
"Displ : 36\n", | ||
"Cyl : 7\n", | ||
"Trans : 26\n", | ||
"Drive : 2\n", | ||
"Fuel : 5\n", | ||
"Cert Region : 2\n", | ||
"Stnd : 19\n", | ||
"Stnd Description : 19\n", | ||
"Underhood ID : 230\n", | ||
"Veh Class : 9\n", | ||
"Air Pollution Score : 6\n", | ||
"City MPG : 58\n", | ||
"Hwy MPG : 62\n", | ||
"Cmb MPG : 57\n", | ||
"Greenhouse Gas Score : 10\n", | ||
"SmartWay : 3\n", | ||
"Comb CO2 : 299\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"for col in df_18.columns:\n", | ||
" print('{} : {}'.format(col, len(df_18[col].dropna().unique())))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 26, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"['(6 cyl)' '(4 cyl)' '(12 cyl)' '(8 cyl)' '(10 cyl)' '(16 cyl)' '(5 cyl)'\n", | ||
" nan '(2 cyl)']\n", | ||
"[ 6. 4. 5. 12. 16. 8. nan 3.]\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"print(df_08['Cyl'].unique())\n", | ||
"print(df_18['Cyl'].unique())" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 27, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"['Gasoline' 'ethanol/gas' 'ethanol' 'diesel' 'CNG']\n", | ||
"['Gasoline' 'Gasoline/Electricity' 'Diesel' 'Ethanol/Gas' 'Electricity']\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"print(df_08['Fuel'].unique())\n", | ||
"print(df_18['Fuel'].unique())" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.6.3" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Oops, something went wrong.