-
-
Notifications
You must be signed in to change notification settings - Fork 46.5k
/
Copy pathregression_autos
52 lines (39 loc) · 1.52 KB
/
regression_autos
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# -*- coding: utf-8 -*-
"""Regression Autos
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1beRq-XbKLbs_4AOP_0nIX12uKUdTQW6U
"""
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
# Load the dataset
file_path = "/content/_autos - regression - autos.csv" # Replace with your actual file path
data = pd.read_csv(file_path)
# Preprocessing: Handle missing values, convert data types, etc.
# Fill missing numerical values with the median
data.fillna(data.median(numeric_only=True), inplace=True)
# Encode categorical variables using one-hot encoding
data = pd.get_dummies(data, drop_first=True)
# Define the features (X) and target variable (y)
# Assuming 'price' is the target variable
X = data.drop(columns=['price'])
y = data['price']
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Train a linear regression model
model = LinearRegression()
model.fit(X_train, y_train)
# Predict on the test set
y_pred = model.predict(X_test)
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
# Display results
print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")
# Optional: Display coefficients for interpretation
coefficients = pd.DataFrame(model.coef_, X.columns, columns=['Coefficient'])
print(coefficients)