diff --git a/Titanic.ipynb b/Titanic.ipynb index 3a296ca..2cb472a 100644 --- a/Titanic.ipynb +++ b/Titanic.ipynb @@ -3,7 +3,7 @@ { "cell_type": "markdown", "source": [ - "# Titanic Data Analysis\n", + "# Titanic Data Analysis\r\n", "## Goal of Analysis: Use machine learning algorithms to get best accuracy of predictions for who survived the sinking of the Titanic given the attributes in the dataset. " ], "metadata": { @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 36, "source": [ "#Imports \r\n", "import pandas as pd\r\n", @@ -34,7 +34,12 @@ "from sklearn import preprocessing\r\n", "from io import StringIO\r\n", "from sklearn.ensemble import RandomForestClassifier\r\n", - "from sklearn.preprocessing import StandardScaler" + "from sklearn.preprocessing import StandardScaler\r\n", + "from sklearn.model_selection import GridSearchCV\r\n", + "from keras.wrappers.scikit_learn import KerasClassifier\r\n", + "from keras.models import Sequential\r\n", + "from keras.layers import Dense, Activation, Dropout\r\n", + "from numpy.random import seed" ], "outputs": [], "metadata": { @@ -50,7 +55,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 37, "source": [ "titanic_df = pd.read_csv(\"titanic_data.csv\")\r\n", "titanic_df.info()" @@ -94,17 +99,149 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 38, "source": [ - "titanic_df.describe()\r\n", - "temp = titanic_df['Pclass']" + "titanic_df.describe()" + ], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Pclass Age SibSp Parch Fare \\\n", + "count 1309.000000 1046.000000 1309.000000 1309.000000 1308.000000 \n", + "mean 2.294882 29.897706 0.498854 0.385027 33.296261 \n", + "std 0.837836 14.414973 1.041658 0.865560 51.758691 \n", + "min 1.000000 0.000000 0.000000 0.000000 0.000000 \n", + "25% 2.000000 21.000000 0.000000 0.000000 7.900000 \n", + "50% 3.000000 28.000000 0.000000 0.000000 14.450000 \n", + "75% 3.000000 39.000000 1.000000 0.000000 31.280000 \n", + "max 3.000000 80.000000 8.000000 9.000000 512.330000 \n", + "\n", + " Survived \n", + "count 1309.000000 \n", + "mean 0.381971 \n", + "std 0.486055 \n", + "min 0.000000 \n", + "25% 0.000000 \n", + "50% 0.000000 \n", + "75% 1.000000 \n", + "max 1.000000 " + ], + "text/html": [ + "
\n", + " | Pclass | \n", + "Age | \n", + "SibSp | \n", + "Parch | \n", + "Fare | \n", + "Survived | \n", + "
---|---|---|---|---|---|---|
count | \n", + "1309.000000 | \n", + "1046.000000 | \n", + "1309.000000 | \n", + "1309.000000 | \n", + "1308.000000 | \n", + "1309.000000 | \n", + "
mean | \n", + "2.294882 | \n", + "29.897706 | \n", + "0.498854 | \n", + "0.385027 | \n", + "33.296261 | \n", + "0.381971 | \n", + "
std | \n", + "0.837836 | \n", + "14.414973 | \n", + "1.041658 | \n", + "0.865560 | \n", + "51.758691 | \n", + "0.486055 | \n", + "
min | \n", + "1.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
25% | \n", + "2.000000 | \n", + "21.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "7.900000 | \n", + "0.000000 | \n", + "
50% | \n", + "3.000000 | \n", + "28.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "14.450000 | \n", + "0.000000 | \n", + "
75% | \n", + "3.000000 | \n", + "39.000000 | \n", + "1.000000 | \n", + "0.000000 | \n", + "31.280000 | \n", + "1.000000 | \n", + "
max | \n", + "3.000000 | \n", + "80.000000 | \n", + "8.000000 | \n", + "9.000000 | \n", + "512.330000 | \n", + "1.000000 | \n", + "