From ed913833ea0d9da63b2d36ea4d38943652aa2d48 Mon Sep 17 00:00:00 2001
From: mkumakech <59573912+mkumakech@users.noreply.github.com>
Date: Fri, 2 Jul 2021 20:43:18 +0300
Subject: [PATCH] Add files via upload

---
 themath_car.ipynb | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 themath_car.ipynb
diff --git a/themath_car.ipynb b/themath_car.ipynb
new file mode 100644
index 0000000..63b2a91
--- /dev/null
+++ b/themath_car.ipynb
@@ -0,0 +1 @@
+{"cells": [{"metadata": {"collapsed": true}, "cell_type": "markdown", "source": "<h2> ANOVA</h2>"}, {"metadata": {}, "cell_type": "markdown", "source": "<b> By Michael Kumakech </b>"}, {"metadata": {}, "cell_type": "markdown", "source": "<b> ANOVA: Analysis of Variance</b>"}, {"metadata": {}, "cell_type": "markdown", "source": "The Analysis of Variance (ANOVA) is a statistical method used to test whether there are significant differences between the means of two or more groups. ANOVA returns two parameters:"}, {"metadata": {}, "cell_type": "markdown", "source": "<b> F-test score:</b> ANOVA assumes the means of all groups are the same, calculates how much the actual means deviate from the assumption, and reports it as the F-test score. A larger score means there is a larger difference between the means."}, {"metadata": {}, "cell_type": "markdown", "source": "<b> P-value: </b> P-value tells how statistically significant is our calculated score value."}, {"metadata": {}, "cell_type": "markdown", "source": "If our price variable is strongly correlated with the variable we are analyzing, expect ANOVA to return a sizeable F-test score and a small p-value."}, {"metadata": {}, "cell_type": "markdown", "source": "<b> \nImport libaries</b>"}, {"metadata": {}, "cell_type": "code", "source": "import itertools\nimport pandas as pd\nimport numpy as np\nimport matplotlib.ticker as ticker\nfrom sklearn import preprocessing\n%matplotlib inline", "execution_count": 20, "outputs": []}, {"metadata": {}, "cell_type": "markdown", "source": "<b> Load data and store in dataframe df:</b>"}, {"metadata": {}, "cell_type": "markdown", "source": "This dataset was hosted on IBM Cloud object click HERE for free storage"}, {"metadata": {}, "cell_type": "code", "source": "\npath='https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DA0101EN/automobileEDA.csv'\ndf = pd.read_csv(path)\ndf.head()", "execution_count": 21, "outputs": [{"output_type": "execute_result", "execution_count": 21, "data": {"text/plain": "   symboling  normalized-losses         make aspiration num-of-doors  \\\n0          3                122  alfa-romero        std          two   \n1          3                122  alfa-romero        std          two   \n2          1                122  alfa-romero        std          two   \n3          2                164         audi        std         four   \n4          2                164         audi        std         four   \n\n    body-style drive-wheels engine-location  wheel-base    length  ...  \\\n0  convertible          rwd           front        88.6  0.811148  ...   \n1  convertible          rwd           front        88.6  0.811148  ...   \n2    hatchback          rwd           front        94.5  0.822681  ...   \n3        sedan          fwd           front        99.8  0.848630  ...   \n4        sedan          4wd           front        99.4  0.848630  ...   \n\n   compression-ratio  horsepower  peak-rpm city-mpg highway-mpg    price  \\\n0                9.0       111.0    5000.0       21          27  13495.0   \n1                9.0       111.0    5000.0       21          27  16500.0   \n2                9.0       154.0    5000.0       19          26  16500.0   \n3               10.0       102.0    5500.0       24          30  13950.0   \n4                8.0       115.0    5500.0       18          22  17450.0   \n\n  city-L/100km  horsepower-binned  diesel  gas  \n0    11.190476             Medium       0    1  \n1    11.190476             Medium       0    1  \n2    12.368421             Medium       0    1  \n3     9.791667             Medium       0    1  \n4    13.055556             Medium       0    1  \n\n[5 rows x 29 columns]", "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>symboling</th>\n      <th>normalized-losses</th>\n      <th>make</th>\n      <th>aspiration</th>\n      <th>num-of-doors</th>\n      <th>body-style</th>\n      <th>drive-wheels</th>\n      <th>engine-location</th>\n      <th>wheel-base</th>\n      <th>length</th>\n      <th>...</th>\n      <th>compression-ratio</th>\n      <th>horsepower</th>\n      <th>peak-rpm</th>\n      <th>city-mpg</th>\n      <th>highway-mpg</th>\n      <th>price</th>\n      <th>city-L/100km</th>\n      <th>horsepower-binned</th>\n      <th>diesel</th>\n      <th>gas</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>3</td>\n      <td>122</td>\n      <td>alfa-romero</td>\n      <td>std</td>\n      <td>two</td>\n      <td>convertible</td>\n      <td>rwd</td>\n      <td>front</td>\n      <td>88.6</td>\n      <td>0.811148</td>\n      <td>...</td>\n      <td>9.0</td>\n      <td>111.0</td>\n      <td>5000.0</td>\n      <td>21</td>\n      <td>27</td>\n      <td>13495.0</td>\n      <td>11.190476</td>\n      <td>Medium</td>\n      <td>0</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>3</td>\n      <td>122</td>\n      <td>alfa-romero</td>\n      <td>std</td>\n      <td>two</td>\n      <td>convertible</td>\n      <td>rwd</td>\n      <td>front</td>\n      <td>88.6</td>\n      <td>0.811148</td>\n      <td>...</td>\n      <td>9.0</td>\n      <td>111.0</td>\n      <td>5000.0</td>\n      <td>21</td>\n      <td>27</td>\n      <td>16500.0</td>\n      <td>11.190476</td>\n      <td>Medium</td>\n      <td>0</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>1</td>\n      <td>122</td>\n      <td>alfa-romero</td>\n      <td>std</td>\n      <td>two</td>\n      <td>hatchback</td>\n      <td>rwd</td>\n      <td>front</td>\n      <td>94.5</td>\n      <td>0.822681</td>\n      <td>...</td>\n      <td>9.0</td>\n      <td>154.0</td>\n      <td>5000.0</td>\n      <td>19</td>\n      <td>26</td>\n      <td>16500.0</td>\n      <td>12.368421</td>\n      <td>Medium</td>\n      <td>0</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>2</td>\n      <td>164</td>\n      <td>audi</td>\n      <td>std</td>\n      <td>four</td>\n      <td>sedan</td>\n      <td>fwd</td>\n      <td>front</td>\n      <td>99.8</td>\n      <td>0.848630</td>\n      <td>...</td>\n      <td>10.0</td>\n      <td>102.0</td>\n      <td>5500.0</td>\n      <td>24</td>\n      <td>30</td>\n      <td>13950.0</td>\n      <td>9.791667</td>\n      <td>Medium</td>\n      <td>0</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>2</td>\n      <td>164</td>\n      <td>audi</td>\n      <td>std</td>\n      <td>four</td>\n      <td>sedan</td>\n      <td>4wd</td>\n      <td>front</td>\n      <td>99.4</td>\n      <td>0.848630</td>\n      <td>...</td>\n      <td>8.0</td>\n      <td>115.0</td>\n      <td>5500.0</td>\n      <td>18</td>\n      <td>22</td>\n      <td>17450.0</td>\n      <td>13.055556</td>\n      <td>Medium</td>\n      <td>0</td>\n      <td>1</td>\n    </tr>\n  </tbody>\n</table>\n<p>5 rows \u00d7 29 columns</p>\n</div>"}, "metadata": {}}]}, {"metadata": {}, "cell_type": "markdown", "source": "<h2> Drive Wheels</h2>"}, {"metadata": {}, "cell_type": "markdown", "source": "Since ANOVA analyzes the difference between different groups of the same variable, the groupby function will come in handy. Because the ANOVA algorithm averages the data automatically, we do not need to take the average before hand."}, {"metadata": {}, "cell_type": "markdown", "source": "Let's see if different types 'drive-wheels' impact 'price', we group the data"}, {"metadata": {}, "cell_type": "markdown", "source": "Let's see if different types 'drive-wheels' impact 'price', we group the data."}, {"metadata": {}, "cell_type": "code", "source": "\n%%capture\n! pip install seaborn", "execution_count": 3, "outputs": []}, {"metadata": {}, "cell_type": "markdown", "source": "<b> Visulisation</b>"}, {"metadata": {}, "cell_type": "code", "source": "import matplotlib.pyplot as plt\nimport seaborn as sns\n%matplotlib inline", "execution_count": 4, "outputs": []}, {"metadata": {}, "cell_type": "markdown", "source": "<h2> Grouping Methods</h2>"}, {"metadata": {}, "cell_type": "code", "source": "# grouping results\ndf_gptest = df[['drive-wheels','body-style','price']]\ngrouped_test1 = df_gptest.groupby(['drive-wheels','body-style'],as_index=False).mean()\ngrouped_test1", "execution_count": 9, "outputs": [{"output_type": "execute_result", "execution_count": 9, "data": {"text/plain": "   drive-wheels   body-style         price\n0           4wd    hatchback   7603.000000\n1           4wd        sedan  12647.333333\n2           4wd        wagon   9095.750000\n3           fwd  convertible  11595.000000\n4           fwd      hardtop   8249.000000\n5           fwd    hatchback   8396.387755\n6           fwd        sedan   9811.800000\n7           fwd        wagon   9997.333333\n8           rwd  convertible  23949.600000\n9           rwd      hardtop  24202.714286\n10          rwd    hatchback  14337.777778\n11          rwd        sedan  21711.833333\n12          rwd        wagon  16994.222222", "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>drive-wheels</th>\n      <th>body-style</th>\n      <th>price</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>4wd</td>\n      <td>hatchback</td>\n      <td>7603.000000</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>4wd</td>\n      <td>sedan</td>\n      <td>12647.333333</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>4wd</td>\n      <td>wagon</td>\n      <td>9095.750000</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>fwd</td>\n      <td>convertible</td>\n      <td>11595.000000</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>fwd</td>\n      <td>hardtop</td>\n      <td>8249.000000</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>fwd</td>\n      <td>hatchback</td>\n      <td>8396.387755</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>fwd</td>\n      <td>sedan</td>\n      <td>9811.800000</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>fwd</td>\n      <td>wagon</td>\n      <td>9997.333333</td>\n    </tr>\n    <tr>\n      <th>8</th>\n      <td>rwd</td>\n      <td>convertible</td>\n      <td>23949.600000</td>\n    </tr>\n    <tr>\n      <th>9</th>\n      <td>rwd</td>\n      <td>hardtop</td>\n      <td>24202.714286</td>\n    </tr>\n    <tr>\n      <th>10</th>\n      <td>rwd</td>\n      <td>hatchback</td>\n      <td>14337.777778</td>\n    </tr>\n    <tr>\n      <th>11</th>\n      <td>rwd</td>\n      <td>sedan</td>\n      <td>21711.833333</td>\n    </tr>\n    <tr>\n      <th>12</th>\n      <td>rwd</td>\n      <td>wagon</td>\n      <td>16994.222222</td>\n    </tr>\n  </tbody>\n</table>\n</div>"}, "metadata": {}}]}, {"metadata": {}, "cell_type": "code", "source": "grouped_test2=df_gptest[['drive-wheels', 'price']].groupby(['drive-wheels'])\ngrouped_test2.head(2)", "execution_count": 10, "outputs": [{"output_type": "execute_result", "execution_count": 10, "data": {"text/plain": "    drive-wheels    price\n0            rwd  13495.0\n1            rwd  16500.0\n3            fwd  13950.0\n4            4wd  17450.0\n5            fwd  15250.0\n136          4wd   7603.0", "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>drive-wheels</th>\n      <th>price</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>rwd</td>\n      <td>13495.0</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>rwd</td>\n      <td>16500.0</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>fwd</td>\n      <td>13950.0</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>4wd</td>\n      <td>17450.0</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>fwd</td>\n      <td>15250.0</td>\n    </tr>\n    <tr>\n      <th>136</th>\n      <td>4wd</td>\n      <td>7603.0</td>\n    </tr>\n  </tbody>\n</table>\n</div>"}, "metadata": {}}]}, {"metadata": {}, "cell_type": "code", "source": "# Write your code below and press Shift+Enter to execute \ndf_gptest2 = df_gptest[['body-style','price']]\ngrouped_test_bodystyle = df_gptest2.groupby(['body-style'],as_index= False).mean()\ngrouped_test_bodystyle", "execution_count": 11, "outputs": [{"output_type": "execute_result", "execution_count": 11, "data": {"text/plain": "    body-style         price\n0  convertible  21890.500000\n1      hardtop  22208.500000\n2    hatchback   9957.441176\n3        sedan  14459.755319\n4        wagon  12371.960000", "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>body-style</th>\n      <th>price</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>convertible</td>\n      <td>21890.500000</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>hardtop</td>\n      <td>22208.500000</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>hatchback</td>\n      <td>9957.441176</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>sedan</td>\n      <td>14459.755319</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>wagon</td>\n      <td>12371.960000</td>\n    </tr>\n  </tbody>\n</table>\n</div>"}, "metadata": {}}]}, {"metadata": {}, "cell_type": "code", "source": "grouped_test2 = df_gptest[['drive-wheels', 'price']].groupby(['drive-wheels'])\ngrouped_test2.head(2)", "execution_count": 12, "outputs": [{"output_type": "execute_result", "execution_count": 12, "data": {"text/plain": "    drive-wheels    price\n0            rwd  13495.0\n1            rwd  16500.0\n3            fwd  13950.0\n4            4wd  17450.0\n5            fwd  15250.0\n136          4wd   7603.0", "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>drive-wheels</th>\n      <th>price</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>rwd</td>\n      <td>13495.0</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>rwd</td>\n      <td>16500.0</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>fwd</td>\n      <td>13950.0</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>4wd</td>\n      <td>17450.0</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>fwd</td>\n      <td>15250.0</td>\n    </tr>\n    <tr>\n      <th>136</th>\n      <td>4wd</td>\n      <td>7603.0</td>\n    </tr>\n  </tbody>\n</table>\n</div>"}, "metadata": {}}]}, {"metadata": {}, "cell_type": "code", "source": "df_gptest", "execution_count": 13, "outputs": [{"output_type": "execute_result", "execution_count": 13, "data": {"text/plain": "    drive-wheels   body-style    price\n0            rwd  convertible  13495.0\n1            rwd  convertible  16500.0\n2            rwd    hatchback  16500.0\n3            fwd        sedan  13950.0\n4            4wd        sedan  17450.0\n..           ...          ...      ...\n196          rwd        sedan  16845.0\n197          rwd        sedan  19045.0\n198          rwd        sedan  21485.0\n199          rwd        sedan  22470.0\n200          rwd        sedan  22625.0\n\n[201 rows x 3 columns]", "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>drive-wheels</th>\n      <th>body-style</th>\n      <th>price</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>rwd</td>\n      <td>convertible</td>\n      <td>13495.0</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>rwd</td>\n      <td>convertible</td>\n      <td>16500.0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>rwd</td>\n      <td>hatchback</td>\n      <td>16500.0</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>fwd</td>\n      <td>sedan</td>\n      <td>13950.0</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>4wd</td>\n      <td>sedan</td>\n      <td>17450.0</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>196</th>\n      <td>rwd</td>\n      <td>sedan</td>\n      <td>16845.0</td>\n    </tr>\n    <tr>\n      <th>197</th>\n      <td>rwd</td>\n      <td>sedan</td>\n      <td>19045.0</td>\n    </tr>\n    <tr>\n      <th>198</th>\n      <td>rwd</td>\n      <td>sedan</td>\n      <td>21485.0</td>\n    </tr>\n    <tr>\n      <th>199</th>\n      <td>rwd</td>\n      <td>sedan</td>\n      <td>22470.0</td>\n    </tr>\n    <tr>\n      <th>200</th>\n      <td>rwd</td>\n      <td>sedan</td>\n      <td>22625.0</td>\n    </tr>\n  </tbody>\n</table>\n<p>201 rows \u00d7 3 columns</p>\n</div>"}, "metadata": {}}]}, {"metadata": {}, "cell_type": "markdown", "source": "We can obtain the values of the method group using the method \"get_group\"."}, {"metadata": {}, "cell_type": "code", "source": "\ngrouped_test2.get_group('4wd')['price']", "execution_count": 14, "outputs": [{"output_type": "execute_result", "execution_count": 14, "data": {"text/plain": "4      17450.0\n136     7603.0\n140     9233.0\n141    11259.0\n144     8013.0\n145    11694.0\n150     7898.0\n151     8778.0\nName: price, dtype: float64"}, "metadata": {}}]}, {"metadata": {}, "cell_type": "markdown", "source": "\nwe can use the function 'f_oneway' in the module 'stats' to obtain the <b>  F-test score and P-value</b>"}, {"metadata": {}, "cell_type": "code", "source": "# ANOVA\nfrom scipy import stats\nf_val, p_val = stats.f_oneway(grouped_test2.get_group('fwd')['price'], grouped_test2.get_group('rwd')['price'], grouped_test2.get_group('4wd')['price'])  \n \nprint( \"ANOVA results: F=\", f_val, \", P =\", p_val)", "execution_count": 19, "outputs": [{"output_type": "stream", "text": "ANOVA results: F= 67.95406500780399 , P = 3.3945443577151245e-23\n", "name": "stdout"}]}, {"metadata": {}, "cell_type": "markdown", "source": "<b> Comments</b>"}, {"metadata": {}, "cell_type": "markdown", "source": "This is a great result, with a large F test score showing a strong correlation and a P value of almost 0 implying almost certain statistical significance. But does this mean all three tested groups are all this highly correlated?"}, {"metadata": {}, "cell_type": "markdown", "source": "<h2> ACTIVITY</h2>"}, {"metadata": {}, "cell_type": "markdown", "source": "<b> Separately: fwd and rwd</b>"}, {"metadata": {}, "cell_type": "code", "source": "# Write your codes for ANOVA of fwd and rwd", "execution_count": null, "outputs": []}, {"metadata": {}, "cell_type": "code", "source": "# Write your Comment here!", "execution_count": null, "outputs": []}, {"metadata": {}, "cell_type": "markdown", "source": "<b> 4wd and rwd</b>"}, {"metadata": {}, "cell_type": "code", "source": "# Write your codes for ANOVA of 4wd and rwd", "execution_count": null, "outputs": []}, {"metadata": {}, "cell_type": "code", "source": "# Write your Comment here!", "execution_count": null, "outputs": []}, {"metadata": {}, "cell_type": "markdown", "source": "<b> 4wd and fwd</b>"}, {"metadata": {}, "cell_type": "code", "source": "# Write your codes for ANOVA of 4wd and fwd", "execution_count": null, "outputs": []}, {"metadata": {}, "cell_type": "code", "source": "# Write your Comment here!", "execution_count": null, "outputs": []}, {"metadata": {}, "cell_type": "markdown", "source": "<h2> Conclusion: Important Variables</h2>"}, {"metadata": {}, "cell_type": "markdown", "source": "\nWe now have a better idea of what our data looks like and which variables are important to take into account when predicting the car price. We have narrowed it down to the following variables:"}, {"metadata": {}, "cell_type": "markdown", "source": "<b>Continuous numerical variables: Using Pearson Correlation</b>"}, {"metadata": {}, "cell_type": "markdown", "source": "Length,\nWidth,\nCurb-weight,\nEngine-size,\nHorsepower,\nCity-mpg,\nHighway-mpg,\nWheel-base,\nBore,"}, {"metadata": {}, "cell_type": "markdown", "source": "<b> Categorical variables: ANOVA</b>"}, {"metadata": {}, "cell_type": "code", "source": "# write the Name of the feature(s) here", "execution_count": null, "outputs": []}, {"metadata": {}, "cell_type": "markdown", "source": "As we now move into building machine learning models to automate our analysis, feeding the model with variables that meaningfully affect our target variable will improve our model's prediction performance."}, {"metadata": {}, "cell_type": "markdown", "source": "<h2> References</h2>"}, {"metadata": {}, "cell_type": "markdown", "source": "<b> 1. IBM Developer Skills Network</b>"}, {"metadata": {}, "cell_type": "markdown", "source": "<b>2.  MIT </b>"}, {"metadata": {}, "cell_type": "code", "source": "", "execution_count": null, "outputs": []}], "metadata": {"kernelspec": {"name": "python3", "display_name": "Python 3.7", "language": "python"}, "language_info": {"name": "python", "version": "3.7.10", "mimetype": "text/x-python", "codemirror_mode": {"name": "ipython", "version": 3}, "pygments_lexer": "ipython3", "nbconvert_exporter": "python", "file_extension": ".py"}}, "nbformat": 4, "nbformat_minor": 1}
\ No newline at end of file

	symboling	normalized-losses	make	aspiration	num-of-doors	body-style	drive-wheels	engine-location	wheel-base	length	...	compression-ratio	horsepower	peak-rpm	city-mpg	highway-mpg	price	city-L/100km	horsepower-binned	gas
0	3	122	alfa-romero	std	two	convertible	rwd	front	88.6	0.811148	...	9.0	111.0	5000.0	21	27	13495.0	11.190476	Medium	1
1	3	122	alfa-romero	std	two	convertible	rwd	front	88.6	0.811148	...	9.0	111.0	5000.0	21	27	16500.0	11.190476	Medium	1
2	1	122	alfa-romero	std	two	hatchback	rwd	front	94.5	0.822681	...	9.0	154.0	5000.0	19	26	16500.0	12.368421	Medium	1
3	2	164	audi	std	four	sedan	fwd	front	99.8	0.848630	...	10.0	102.0	5500.0	24	30	13950.0	9.791667	Medium	1
4	2	164	audi	std	four	sedan	4wd	front	99.4	0.848630	...	8.0	115.0	5500.0	18	22	17450.0	13.055556	Medium	1
	drive-wheels	body-style	price
0	4wd	hatchback	7603.000000
1	4wd	sedan	12647.333333
2	4wd	wagon	9095.750000
3	fwd	convertible	11595.000000
4	fwd	hardtop	8249.000000
5	fwd	hatchback	8396.387755
6	fwd	sedan	9811.800000
7	fwd	wagon	9997.333333
8	rwd	convertible	23949.600000
9	rwd	hardtop	24202.714286
10	rwd	hatchback	14337.777778
11	rwd	sedan	21711.833333
12	rwd	wagon	16994.222222
	body-style	price
0	convertible	21890.500000
1	hardtop	22208.500000
2	hatchback	9957.441176
3	sedan	14459.755319
4	wagon	12371.960000