diff --git a/02_end_to_end_machine_learning_project.ipynb b/02_end_to_end_machine_learning_project.ipynb index 2fffc6eaa..981dbad82 100644 --- a/02_end_to_end_machine_learning_project.ipynb +++ b/02_end_to_end_machine_learning_project.ipynb @@ -92,13 +92,14 @@ "source": [ "import os\n", "import tarfile\n", - "import urllib.request\n", + "from six.moves import urllib\n", "\n", "HOUSING_PATH = \"datasets/housing\"\n", "HOUSING_URL = DATASETS_URL + \"/housing/housing.tgz\"\n", "\n", "def fetch_housing_data(housing_url=HOUSING_URL, housing_path=HOUSING_PATH):\n", - " os.makedirs(housing_path, exist_ok=True)\n", + " if not os.path.exists(housing_path):\n", + " os.makedirs(housing_path)\n", " tgz_path = os.path.join(housing_path, \"housing.tgz\")\n", " urllib.request.urlretrieve(housing_url, tgz_path)\n", " housing_tgz = tarfile.open(tgz_path)\n", @@ -164,65 +165,65 @@ "
\n", + " | mean_fit_time | \n", + "mean_score_time | \n", + "mean_test_score | \n", + "mean_train_score | \n", + "param_bootstrap | \n", + "param_max_features | \n", + "param_n_estimators | \n", + "params | \n", + "rank_test_score | \n", + "split0_test_score | \n", + "... | \n", + "split2_test_score | \n", + "split2_train_score | \n", + "split3_test_score | \n", + "split3_train_score | \n", + "split4_test_score | \n", + "split4_train_score | \n", + "std_fit_time | \n", + "std_score_time | \n", + "std_test_score | \n", + "std_train_score | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "0.109786 | \n", + "0.005870 | \n", + "-4.213572e+09 | \n", + "-1.122089e+09 | \n", + "NaN | \n", + "2 | \n", + "3 | \n", + "{'max_features': 2, 'n_estimators': 3} | \n", + "18 | \n", + "-4.322392e+09 | \n", + "... | \n", + "-4.091199e+09 | \n", + "-1.132659e+09 | \n", + "-4.048299e+09 | \n", + "-1.084169e+09 | \n", + "-4.278616e+09 | \n", + "-1.181979e+09 | \n", + "0.008278 | \n", + "0.000273 | \n", + "1.194097e+08 | \n", + "4.503304e+07 | \n", + "
1 | \n", + "0.343632 | \n", + "0.016028 | \n", + "-3.084167e+09 | \n", + "-5.686194e+08 | \n", + "NaN | \n", + "2 | \n", + "10 | \n", + "{'max_features': 2, 'n_estimators': 10} | \n", + "11 | \n", + "-2.920668e+09 | \n", + "... | \n", + "-3.189759e+09 | \n", + "-5.684440e+08 | \n", + "-2.977423e+09 | \n", + "-5.753131e+08 | \n", + "-3.140389e+09 | \n", + "-5.569981e+08 | \n", + "0.008589 | \n", + "0.001322 | \n", + "1.133146e+08 | \n", + "1.555889e+07 | \n", + "
2 | \n", + "1.173791 | \n", + "0.057376 | \n", + "-2.802672e+09 | \n", + "-4.390709e+08 | \n", + "NaN | \n", + "2 | \n", + "30 | \n", + "{'max_features': 2, 'n_estimators': 30} | \n", + "9 | \n", + "-2.635798e+09 | \n", + "... | \n", + "-2.899767e+09 | \n", + "-4.299952e+08 | \n", + "-2.628577e+09 | \n", + "-4.459977e+08 | \n", + "-2.910563e+09 | \n", + "-4.319555e+08 | \n", + "0.157860 | \n", + "0.019980 | \n", + "1.398004e+08 | \n", + "6.703308e+06 | \n", + "
3 | \n", + "0.163757 | \n", + "0.005378 | \n", + "-3.646238e+09 | \n", + "-9.779480e+08 | \n", + "NaN | \n", + "4 | \n", + "3 | \n", + "{'max_features': 4, 'n_estimators': 3} | \n", + "16 | \n", + "-3.583831e+09 | \n", + "... | \n", + "-3.950913e+09 | \n", + "-9.887841e+08 | \n", + "-3.308822e+09 | \n", + "-1.011182e+09 | \n", + "-3.662211e+09 | \n", + "-9.190933e+08 | \n", + "0.003264 | \n", + "0.000207 | \n", + "2.083609e+08 | \n", + "3.282495e+07 | \n", + "
4 | \n", + "0.681459 | \n", + "0.019561 | \n", + "-2.778336e+09 | \n", + "-5.111719e+08 | \n", + "NaN | \n", + "4 | \n", + "10 | \n", + "{'max_features': 4, 'n_estimators': 10} | \n", + "8 | \n", + "-2.703532e+09 | \n", + "... | \n", + "-2.884782e+09 | \n", + "-4.948073e+08 | \n", + "-2.650746e+09 | \n", + "-5.355259e+08 | \n", + "-2.882622e+09 | \n", + "-5.245530e+08 | \n", + "0.174568 | \n", + "0.005263 | \n", + "9.396457e+07 | \n", + "2.504651e+07 | \n", + "
5 | \n", + "1.781424 | \n", + "0.047608 | \n", + "-2.550613e+09 | \n", + "-3.959620e+08 | \n", + "NaN | \n", + "4 | \n", + "30 | \n", + "{'max_features': 4, 'n_estimators': 30} | \n", + "3 | \n", + "-2.302148e+09 | \n", + "... | \n", + "-2.682066e+09 | \n", + "-3.923106e+08 | \n", + "-2.492072e+09 | \n", + "-4.120956e+08 | \n", + "-2.653622e+09 | \n", + "-3.940042e+08 | \n", + "0.177853 | \n", + "0.003331 | \n", + "1.402365e+08 | \n", + "8.599915e+06 | \n", + "
6 | \n", + "0.238068 | \n", + "0.005383 | \n", + "-3.487861e+09 | \n", + "-9.302686e+08 | \n", + "NaN | \n", + "6 | \n", + "3 | \n", + "{'max_features': 6, 'n_estimators': 3} | \n", + "13 | \n", + "-3.323532e+09 | \n", + "... | \n", + "-3.477330e+09 | \n", + "-8.673024e+08 | \n", + "-3.255834e+09 | \n", + "-9.719544e+08 | \n", + "-3.719818e+09 | \n", + "-9.505729e+08 | \n", + "0.022758 | \n", + "0.000401 | \n", + "1.818530e+08 | \n", + "3.502555e+07 | \n", + "
7 | \n", + "0.707470 | \n", + "0.014505 | \n", + "-2.721921e+09 | \n", + "-5.009736e+08 | \n", + "NaN | \n", + "6 | \n", + "10 | \n", + "{'max_features': 6, 'n_estimators': 10} | \n", + "5 | \n", + "-2.605933e+09 | \n", + "... | \n", + "-2.871511e+09 | \n", + "-4.944972e+08 | \n", + "-2.601547e+09 | \n", + "-5.127494e+08 | \n", + "-2.799685e+09 | \n", + "-4.853162e+08 | \n", + "0.006761 | \n", + "0.000688 | \n", + "1.062510e+08 | \n", + "1.527767e+07 | \n", + "
8 | \n", + "2.123511 | \n", + "0.041107 | \n", + "-2.495897e+09 | \n", + "-3.848766e+08 | \n", + "NaN | \n", + "6 | \n", + "30 | \n", + "{'max_features': 6, 'n_estimators': 30} | \n", + "1 | \n", + "-2.410445e+09 | \n", + "... | \n", + "-2.600516e+09 | \n", + "-3.791315e+08 | \n", + "-2.304437e+09 | \n", + "-3.834466e+08 | \n", + "-2.627380e+09 | \n", + "-3.763532e+08 | \n", + "0.014926 | \n", + "0.002775 | \n", + "1.215337e+08 | \n", + "7.051109e+06 | \n", + "
9 | \n", + "0.284902 | \n", + "0.006087 | \n", + "-3.495442e+09 | \n", + "-8.965714e+08 | \n", + "NaN | \n", + "8 | \n", + "3 | \n", + "{'max_features': 8, 'n_estimators': 3} | \n", + "14 | \n", + "-3.274179e+09 | \n", + "... | \n", + "-3.517974e+09 | \n", + "-9.317195e+08 | \n", + "-3.512932e+09 | \n", + "-9.331547e+08 | \n", + "-3.562802e+09 | \n", + "-8.541539e+08 | \n", + "0.009573 | \n", + "0.000559 | \n", + "1.160130e+08 | \n", + "3.128209e+07 | \n", + "
10 | \n", + "0.914881 | \n", + "0.014913 | \n", + "-2.750120e+09 | \n", + "-5.032131e+08 | \n", + "NaN | \n", + "8 | \n", + "10 | \n", + "{'max_features': 8, 'n_estimators': 10} | \n", + "6 | \n", + "-2.694581e+09 | \n", + "... | \n", + "-2.883188e+09 | \n", + "-4.955736e+08 | \n", + "-2.540331e+09 | \n", + "-5.046915e+08 | \n", + "-2.845775e+09 | \n", + "-4.838147e+08 | \n", + "0.009442 | \n", + "0.000396 | \n", + "1.227074e+08 | \n", + "1.561460e+07 | \n", + "
11 | \n", + "2.741693 | \n", + "0.040874 | \n", + "-2.504151e+09 | \n", + "-3.825022e+08 | \n", + "NaN | \n", + "8 | \n", + "30 | \n", + "{'max_features': 8, 'n_estimators': 30} | \n", + "2 | \n", + "-2.371638e+09 | \n", + "... | \n", + "-2.565840e+09 | \n", + "-3.751654e+08 | \n", + "-2.377880e+09 | \n", + "-3.897076e+08 | \n", + "-2.653704e+09 | \n", + "-3.785671e+08 | \n", + "0.037343 | \n", + "0.002114 | \n", + "1.112988e+08 | \n", + "5.231629e+06 | \n", + "
12 | \n", + "0.165229 | \n", + "0.006583 | \n", + "-3.890157e+09 | \n", + "0.000000e+00 | \n", + "False | \n", + "2 | \n", + "3 | \n", + "{'bootstrap': False, 'max_features': 2, 'n_est... | \n", + "17 | \n", + "-3.617603e+09 | \n", + "... | \n", + "-4.217359e+09 | \n", + "-0.000000e+00 | \n", + "-3.780422e+09 | \n", + "-0.000000e+00 | \n", + "-3.677274e+09 | \n", + "-0.000000e+00 | \n", + "0.005741 | \n", + "0.000405 | \n", + "2.492080e+08 | \n", + "0.000000e+00 | \n", + "
13 | \n", + "0.508539 | \n", + "0.018890 | \n", + "-2.978131e+09 | \n", + "0.000000e+00 | \n", + "False | \n", + "2 | \n", + "10 | \n", + "{'bootstrap': False, 'max_features': 2, 'n_est... | \n", + "10 | \n", + "-2.815093e+09 | \n", + "... | \n", + "-3.044746e+09 | \n", + "-0.000000e+00 | \n", + "-2.827508e+09 | \n", + "-0.000000e+00 | \n", + "-3.097349e+09 | \n", + "-0.000000e+00 | \n", + "0.015843 | \n", + "0.000527 | \n", + "1.298188e+08 | \n", + "0.000000e+00 | \n", + "
14 | \n", + "0.205045 | \n", + "0.006127 | \n", + "-3.556220e+09 | \n", + "0.000000e+00 | \n", + "False | \n", + "3 | \n", + "3 | \n", + "{'bootstrap': False, 'max_features': 3, 'n_est... | \n", + "15 | \n", + "-3.546021e+09 | \n", + "... | \n", + "-3.625256e+09 | \n", + "-0.000000e+00 | \n", + "-3.465998e+09 | \n", + "-0.000000e+00 | \n", + "-3.596042e+09 | \n", + "-0.000000e+00 | \n", + "0.008831 | \n", + "0.000067 | \n", + "5.415723e+07 | \n", + "0.000000e+00 | \n", + "
15 | \n", + "0.728687 | \n", + "0.020655 | \n", + "-2.751641e+09 | \n", + "0.000000e+00 | \n", + "False | \n", + "3 | \n", + "10 | \n", + "{'bootstrap': False, 'max_features': 3, 'n_est... | \n", + "7 | \n", + "-2.604595e+09 | \n", + "... | \n", + "-2.789225e+09 | \n", + "-0.000000e+00 | \n", + "-2.644243e+09 | \n", + "-0.000000e+00 | \n", + "-2.895713e+09 | \n", + "-0.000000e+00 | \n", + "0.091750 | \n", + "0.002131 | \n", + "1.101169e+08 | \n", + "0.000000e+00 | \n", + "
16 | \n", + "0.313193 | \n", + "0.011736 | \n", + "-3.460459e+09 | \n", + "0.000000e+00 | \n", + "False | \n", + "4 | \n", + "3 | \n", + "{'bootstrap': False, 'max_features': 4, 'n_est... | \n", + "12 | \n", + "-3.060089e+09 | \n", + "... | \n", + "-3.597422e+09 | \n", + "-0.000000e+00 | \n", + "-3.416000e+09 | \n", + "-0.000000e+00 | \n", + "-3.699168e+09 | \n", + "-0.000000e+00 | \n", + "0.042945 | \n", + "0.004941 | \n", + "2.203775e+08 | \n", + "0.000000e+00 | \n", + "
17 | \n", + "0.947249 | \n", + "0.019262 | \n", + "-2.705352e+09 | \n", + "0.000000e+00 | \n", + "False | \n", + "4 | \n", + "10 | \n", + "{'bootstrap': False, 'max_features': 4, 'n_est... | \n", + "4 | \n", + "-2.534795e+09 | \n", + "... | \n", + "-2.748411e+09 | \n", + "-0.000000e+00 | \n", + "-2.497470e+09 | \n", + "-0.000000e+00 | \n", + "-2.897782e+09 | \n", + "-0.000000e+00 | \n", + "0.113486 | \n", + "0.001746 | \n", + "1.622491e+08 | \n", + "0.000000e+00 | \n", + "
18 rows × 23 columns
\n", + "