diff --git a/Titanic.ipynb b/Titanic.ipynb index 3a296ca..2cb472a 100644 --- a/Titanic.ipynb +++ b/Titanic.ipynb @@ -3,7 +3,7 @@ { "cell_type": "markdown", "source": [ - "# Titanic Data Analysis\n", + "# Titanic Data Analysis\r\n", "## Goal of Analysis: Use machine learning algorithms to get best accuracy of predictions for who survived the sinking of the Titanic given the attributes in the dataset. " ], "metadata": { @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 36, "source": [ "#Imports \r\n", "import pandas as pd\r\n", @@ -34,7 +34,12 @@ "from sklearn import preprocessing\r\n", "from io import StringIO\r\n", "from sklearn.ensemble import RandomForestClassifier\r\n", - "from sklearn.preprocessing import StandardScaler" + "from sklearn.preprocessing import StandardScaler\r\n", + "from sklearn.model_selection import GridSearchCV\r\n", + "from keras.wrappers.scikit_learn import KerasClassifier\r\n", + "from keras.models import Sequential\r\n", + "from keras.layers import Dense, Activation, Dropout\r\n", + "from numpy.random import seed" ], "outputs": [], "metadata": { @@ -50,7 +55,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 37, "source": [ "titanic_df = pd.read_csv(\"titanic_data.csv\")\r\n", "titanic_df.info()" @@ -94,17 +99,149 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 38, "source": [ - "titanic_df.describe()\r\n", - "temp = titanic_df['Pclass']" + "titanic_df.describe()" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Pclass Age SibSp Parch Fare \\\n", + "count 1309.000000 1046.000000 1309.000000 1309.000000 1308.000000 \n", + "mean 2.294882 29.897706 0.498854 0.385027 33.296261 \n", + "std 0.837836 14.414973 1.041658 0.865560 51.758691 \n", + "min 1.000000 0.000000 0.000000 0.000000 0.000000 \n", + "25% 2.000000 21.000000 0.000000 0.000000 7.900000 \n", + "50% 3.000000 28.000000 0.000000 0.000000 14.450000 \n", + "75% 3.000000 39.000000 1.000000 0.000000 31.280000 \n", + "max 3.000000 80.000000 8.000000 9.000000 512.330000 \n", + "\n", + " Survived \n", + "count 1309.000000 \n", + "mean 0.381971 \n", + "std 0.486055 \n", + "min 0.000000 \n", + "25% 0.000000 \n", + "50% 0.000000 \n", + "75% 1.000000 \n", + "max 1.000000 " + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PclassAgeSibSpParchFareSurvived
count1309.0000001046.0000001309.0000001309.0000001308.0000001309.000000
mean2.29488229.8977060.4988540.38502733.2962610.381971
std0.83783614.4149731.0416580.86556051.7586910.486055
min1.0000000.0000000.0000000.0000000.0000000.000000
25%2.00000021.0000000.0000000.0000007.9000000.000000
50%3.00000028.0000000.0000000.00000014.4500000.000000
75%3.00000039.0000001.0000000.00000031.2800001.000000
max3.00000080.0000008.0000009.000000512.3300001.000000
\n", + "
" + ] + }, + "metadata": {}, + "execution_count": 38 + } ], - "outputs": [], "metadata": {} }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 39, "source": [ "# Search for overall trends in the dataset\r\n", "pandas_profiling.ProfileReport(titanic_df)" @@ -114,9 +251,9 @@ "output_type": "stream", "name": "stderr", "text": [ - "Summarize dataset: 100%|██████████| 24/24 [00:07<00:00, 3.12it/s, Completed]\n", - "Generate report structure: 100%|██████████| 1/1 [00:03<00:00, 3.52s/it]\n", - "Render HTML: 100%|██████████| 1/1 [00:00<00:00, 1.09it/s]\n" + "Summarize dataset: 100%|██████████| 24/24 [00:08<00:00, 2.70it/s, Completed]\n", + "Generate report structure: 100%|██████████| 1/1 [00:04<00:00, 4.04s/it]\n", + "Render HTML: 100%|██████████| 1/1 [00:00<00:00, 1.14it/s]\n" ] }, { @@ -126,7 +263,7 @@ "" ], "text/html": [ - "