forked from ShiChJ/DAND-QA
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
38 changed files
with
344,460 additions
and
0 deletions.
There are no files selected for viewing
14,045 changes: 14,045 additions & 0 deletions
14,045
Intro-DataAnalysis/L1-DataAnalysisProcess/assessing-zh.html
Large diffs are not rendered by default.
Oops, something went wrong.
2,455 changes: 2,455 additions & 0 deletions
2,455
Intro-DataAnalysis/L1-DataAnalysisProcess/assessing-zh.ipynb
Large diffs are not rendered by default.
Oops, something went wrong.
12,046 changes: 12,046 additions & 0 deletions
12,046
Intro-DataAnalysis/L1-DataAnalysisProcess/assessing_quiz.html
Large diffs are not rendered by default.
Oops, something went wrong.
208 changes: 208 additions & 0 deletions
208
Intro-DataAnalysis/L1-DataAnalysisProcess/assessing_quiz.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,208 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"source": [ | ||
"# Assessing and Building Intuition Quiz\n", | ||
"Use the space below to explore `census_income_data.csv` to answer the quiz questions below." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Import pandas\n", | ||
"import pandas as pd\n", | ||
"# Load census income data\n", | ||
"df = pd.read_csv('census_income_data.csv')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"(32561, 15)" | ||
] | ||
}, | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"# Work to answer the quiz questions\n", | ||
"df.shape" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"<class 'pandas.core.frame.DataFrame'>\n", | ||
"RangeIndex: 32561 entries, 0 to 32560\n", | ||
"Data columns (total 15 columns):\n", | ||
"age 32561 non-null int64\n", | ||
"workclass 30725 non-null object\n", | ||
"fnlwgt 32561 non-null int64\n", | ||
"education 32561 non-null object\n", | ||
"education-num 32561 non-null int64\n", | ||
"marital-status 32561 non-null object\n", | ||
"occupation 30718 non-null object\n", | ||
"relationship 32561 non-null object\n", | ||
"race 32561 non-null object\n", | ||
"sex 32561 non-null object\n", | ||
"capital-gain 32561 non-null int64\n", | ||
"capital-loss 32561 non-null int64\n", | ||
"hours-per-week 32561 non-null int64\n", | ||
"native-country 31978 non-null object\n", | ||
"income 32561 non-null object\n", | ||
"dtypes: int64(6), object(9)\n", | ||
"memory usage: 3.7+ MB\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"df.info()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"age 0\n", | ||
"workclass 1836\n", | ||
"fnlwgt 0\n", | ||
"education 0\n", | ||
"education-num 0\n", | ||
"marital-status 0\n", | ||
"occupation 1843\n", | ||
"relationship 0\n", | ||
"race 0\n", | ||
"sex 0\n", | ||
"capital-gain 0\n", | ||
"capital-loss 0\n", | ||
"hours-per-week 0\n", | ||
"native-country 583\n", | ||
"income 0\n", | ||
"dtype: int64" | ||
] | ||
}, | ||
"execution_count": 5, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"df.isnull().sum()\n", | ||
"#计算每列缺失值的数量" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"16" | ||
] | ||
}, | ||
"execution_count": 7, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"len(df['education'].unique())" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 9, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"39.0" | ||
] | ||
}, | ||
"execution_count": 9, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"round(df['age'].mean())" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 8, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"count 32561.000000\n", | ||
"mean 40.437456\n", | ||
"std 12.347429\n", | ||
"min 1.000000\n", | ||
"25% 40.000000\n", | ||
"50% 40.000000\n", | ||
"75% 45.000000\n", | ||
"max 99.000000\n", | ||
"Name: hours-per-week, dtype: float64" | ||
] | ||
}, | ||
"execution_count": 8, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"df['hours-per-week'].describe()" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.6.3" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Oops, something went wrong.