-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
1 addition
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"cells":[{"metadata":{},"cell_type":"markdown","source":"<h2> Multiple Linear Regression Machine Learning Model Example</h2>"},{"metadata":{},"cell_type":"markdown","source":"<b> by Michael Kumakech</b>"},{"metadata":{"trusted":true},"cell_type":"code","source":"import matplotlib.pyplot as plt\nimport pandas as pd\nimport pylab as pl\nimport numpy as np\n%matplotlib inline","execution_count":1,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":"<b> Download the data set</b>"},{"metadata":{"trusted":true},"cell_type":"code","source":"!wget -O FuelConsumption.csv https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/ML0101ENv3/labs/FuelConsumptionCo2.csv","execution_count":2,"outputs":[{"output_type":"stream","text":"--2021-10-31 08:24:50-- https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/ML0101ENv3/labs/FuelConsumptionCo2.csv\nResolving s3-api.us-geo.objectstorage.softlayer.net (s3-api.us-geo.objectstorage.softlayer.net)... 67.228.254.196\nConnecting to s3-api.us-geo.objectstorage.softlayer.net (s3-api.us-geo.objectstorage.softlayer.net)|67.228.254.196|:443... connected.\nHTTP request sent, awaiting response... 200 OK\nLength: 72629 (71K) [text/csv]\nSaving to: ‘FuelConsumption.csv’\n\nFuelConsumption.csv 100%[===================>] 70.93K 452KB/s in 0.2s \n\n2021-10-31 08:24:51 (452 KB/s) - ‘FuelConsumption.csv’ saved [72629/72629]\n\n","name":"stdout"}]},{"metadata":{"trusted":true},"cell_type":"code","source":"df =pd.read_csv(\"FuelConsumption.csv\")\ndf.head()","execution_count":3,"outputs":[{"output_type":"execute_result","execution_count":3,"data":{"text/plain":" MODELYEAR MAKE MODEL VEHICLECLASS ENGINESIZE CYLINDERS \\\n0 2014 ACURA ILX COMPACT 2.0 4 \n1 2014 ACURA ILX COMPACT 2.4 4 \n2 2014 ACURA ILX HYBRID COMPACT 1.5 4 \n3 2014 ACURA MDX 4WD SUV - SMALL 3.5 6 \n4 2014 ACURA RDX AWD SUV - SMALL 3.5 6 \n\n TRANSMISSION FUELTYPE FUELCONSUMPTION_CITY FUELCONSUMPTION_HWY \\\n0 AS5 Z 9.9 6.7 \n1 M6 Z 11.2 7.7 \n2 AV7 Z 6.0 5.8 \n3 AS6 Z 12.7 9.1 \n4 AS6 Z 12.1 8.7 \n\n FUELCONSUMPTION_COMB FUELCONSUMPTION_COMB_MPG CO2EMISSIONS \n0 8.5 33 196 \n1 9.6 29 221 \n2 5.9 48 136 \n3 11.1 25 255 \n4 10.6 27 244 ","text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>MODELYEAR</th>\n <th>MAKE</th>\n <th>MODEL</th>\n <th>VEHICLECLASS</th>\n <th>ENGINESIZE</th>\n <th>CYLINDERS</th>\n <th>TRANSMISSION</th>\n <th>FUELTYPE</th>\n <th>FUELCONSUMPTION_CITY</th>\n <th>FUELCONSUMPTION_HWY</th>\n <th>FUELCONSUMPTION_COMB</th>\n <th>FUELCONSUMPTION_COMB_MPG</th>\n <th>CO2EMISSIONS</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>2014</td>\n <td>ACURA</td>\n <td>ILX</td>\n <td>COMPACT</td>\n <td>2.0</td>\n <td>4</td>\n <td>AS5</td>\n <td>Z</td>\n <td>9.9</td>\n <td>6.7</td>\n <td>8.5</td>\n <td>33</td>\n <td>196</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2014</td>\n <td>ACURA</td>\n <td>ILX</td>\n <td>COMPACT</td>\n <td>2.4</td>\n <td>4</td>\n <td>M6</td>\n <td>Z</td>\n <td>11.2</td>\n <td>7.7</td>\n <td>9.6</td>\n <td>29</td>\n <td>221</td>\n </tr>\n <tr>\n <th>2</th>\n <td>2014</td>\n <td>ACURA</td>\n <td>ILX HYBRID</td>\n <td>COMPACT</td>\n <td>1.5</td>\n <td>4</td>\n <td>AV7</td>\n <td>Z</td>\n <td>6.0</td>\n <td>5.8</td>\n <td>5.9</td>\n <td>48</td>\n <td>136</td>\n </tr>\n <tr>\n <th>3</th>\n <td>2014</td>\n <td>ACURA</td>\n <td>MDX 4WD</td>\n <td>SUV - SMALL</td>\n <td>3.5</td>\n <td>6</td>\n <td>AS6</td>\n <td>Z</td>\n <td>12.7</td>\n <td>9.1</td>\n <td>11.1</td>\n <td>25</td>\n <td>255</td>\n </tr>\n <tr>\n <th>4</th>\n <td>2014</td>\n <td>ACURA</td>\n <td>RDX AWD</td>\n <td>SUV - SMALL</td>\n <td>3.5</td>\n <td>6</td>\n <td>AS6</td>\n <td>Z</td>\n <td>12.1</td>\n <td>8.7</td>\n <td>10.6</td>\n <td>27</td>\n <td>244</td>\n </tr>\n </tbody>\n</table>\n</div>"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"cdf = df[['ENGINESIZE','CYLINDERS','FUELCONSUMPTION_CITY','FUELCONSUMPTION_HWY','FUELCONSUMPTION_COMB','CO2EMISSIONS']]\ncdf.head()","execution_count":4,"outputs":[{"output_type":"execute_result","execution_count":4,"data":{"text/plain":" ENGINESIZE CYLINDERS FUELCONSUMPTION_CITY FUELCONSUMPTION_HWY \\\n0 2.0 4 9.9 6.7 \n1 2.4 4 11.2 7.7 \n2 1.5 4 6.0 5.8 \n3 3.5 6 12.7 9.1 \n4 3.5 6 12.1 8.7 \n\n FUELCONSUMPTION_COMB CO2EMISSIONS \n0 8.5 196 \n1 9.6 221 \n2 5.9 136 \n3 11.1 255 \n4 10.6 244 ","text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>ENGINESIZE</th>\n <th>CYLINDERS</th>\n <th>FUELCONSUMPTION_CITY</th>\n <th>FUELCONSUMPTION_HWY</th>\n <th>FUELCONSUMPTION_COMB</th>\n <th>CO2EMISSIONS</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>2.0</td>\n <td>4</td>\n <td>9.9</td>\n <td>6.7</td>\n <td>8.5</td>\n <td>196</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2.4</td>\n <td>4</td>\n <td>11.2</td>\n <td>7.7</td>\n <td>9.6</td>\n <td>221</td>\n </tr>\n <tr>\n <th>2</th>\n <td>1.5</td>\n <td>4</td>\n <td>6.0</td>\n <td>5.8</td>\n <td>5.9</td>\n <td>136</td>\n </tr>\n <tr>\n <th>3</th>\n <td>3.5</td>\n <td>6</td>\n <td>12.7</td>\n <td>9.1</td>\n <td>11.1</td>\n <td>255</td>\n </tr>\n <tr>\n <th>4</th>\n <td>3.5</td>\n <td>6</td>\n <td>12.1</td>\n <td>8.7</td>\n <td>10.6</td>\n <td>244</td>\n </tr>\n </tbody>\n</table>\n</div>"},"metadata":{}}]},{"metadata":{"trusted":true},"cell_type":"code","source":"\nfrom sklearn import linear_model\nregr = linear_model.LinearRegression()","execution_count":5,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"# Train the model\nmsk = np.random.rand(len(df)) < 0.8\ntrain = cdf[msk]\ntest = cdf[~msk]","execution_count":7,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"x = np.asanyarray(train[['ENGINESIZE','CYLINDERS','FUELCONSUMPTION_COMB',]])\ny = np.asanyarray(train[['CO2EMISSIONS']])\nregr.fit (x, y)\n# The coefficients\nprint ('Coefficients: ', regr.coef_)\nprint ('Intercept: ',regr.intercept_)","execution_count":8,"outputs":[{"output_type":"stream","text":"Coefficients: [[ 9.72636039 7.40132059 10.11607914]]\nIntercept: [63.76087285]\n","name":"stdout"}]},{"metadata":{},"cell_type":"markdown","source":"<h2> CO2EMISSIONS =63.76087285 + 9.72636039*ENGINESIZE + 7.40132059*CYLINDERS + 10.11607914*FUELCONSUMPTION_COMB </h2>"},{"metadata":{},"cell_type":"markdown","source":"<h2> EVALUATION OF THE MODEL</H2>"},{"metadata":{"trusted":true},"cell_type":"code","source":"y_hat= regr.predict(test[['ENGINESIZE','CYLINDERS','FUELCONSUMPTION_COMB']])\nx = np.asanyarray(test[['ENGINESIZE','CYLINDERS','FUELCONSUMPTION_COMB']])\ny = np.asanyarray(test[['CO2EMISSIONS']])\nprint(\"Residual sum of squares: %.2f\"\n % np.mean((y_hat - y) ** 2))\n\n# Explained variance score: 1 is perfect prediction\nprint('R-squared: %.2f' % regr.score(x, y))","execution_count":10,"outputs":[{"output_type":"stream","text":"Residual sum of squares: 511.69\nR-squared: 0.86\n","name":"stdout"}]},{"metadata":{"trusted":true},"cell_type":"code","source":"x = np.asanyarray(train[['ENGINESIZE','CYLINDERS','FUELCONSUMPTION_COMB', 'FUELCONSUMPTION_CITY','FUELCONSUMPTION_HWY']])\ny = np.asanyarray(train[['CO2EMISSIONS']])\nregr.fit (x, y)\n# The coefficients\nprint ('Coefficients: ', regr.coef_)\nprint ('Intercept: ',regr.intercept_)","execution_count":11,"outputs":[{"output_type":"stream","text":"Coefficients: [[ 9.85937206 6.94821804 20.09961581 -4.35506941 -6.00944907]]\nIntercept: [65.19404913]\n","name":"stdout"}]},{"metadata":{"trusted":true},"cell_type":"code","source":"y_hat= regr.predict(test[['ENGINESIZE','CYLINDERS','FUELCONSUMPTION_COMB','FUELCONSUMPTION_CITY','FUELCONSUMPTION_HWY']])\nx = np.asanyarray(test[['ENGINESIZE','CYLINDERS','FUELCONSUMPTION_COMB', 'FUELCONSUMPTION_CITY','FUELCONSUMPTION_HWY']])\ny = np.asanyarray(test[['CO2EMISSIONS']])\nprint(\"Residual sum of squares: %.2f\"\n % np.mean((y_hat - y) ** 2))\n\n# Explained variance score: 1 is perfect prediction\nprint('R-squared: %.2f' % regr.score(x, y))","execution_count":12,"outputs":[{"output_type":"stream","text":"Residual sum of squares: 513.62\nR-squared: 0.86\n","name":"stdout"}]},{"metadata":{"trusted":true},"cell_type":"code","source":"","execution_count":null,"outputs":[]}],"metadata":{"kernelspec":{"name":"python3","display_name":"Python 3","language":"python"},"language_info":{"name":"python","version":"3.6.13","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat":4,"nbformat_minor":5} |