|
| 1 | + |
| 2 | +# SLR: y = b0+ b1*x1 |
| 3 | +# y is Dependent Variable, b0 is Constant, b1 is Coefficient, x1 us Independent Variable. |
| 4 | + |
| 5 | +# Constant is the point where the line crosses the vertical axis. |
| 6 | +# B1 is the Slope of the line. |
| 7 | + |
| 8 | +# ------------------------------------------------ Importing Data -------------------------------------------- # |
| 9 | + |
| 10 | +Salary_Data = read.csv("Salary_Data.csv") |
| 11 | + |
| 12 | +# ------------------------- Splitting the Dataset into the Training set and Testing Set ---------------------- # |
| 13 | + |
| 14 | +# install.packages("caTools") <----- Remove comment if not installed |
| 15 | + |
| 16 | +library(caTools) |
| 17 | +set.seed(123) |
| 18 | + |
| 19 | +# In Python we put the percentage for Test Set, in R we put for Training Set. |
| 20 | +split = sample.split(Salary_Data$Salary, SplitRatio = 0.8) |
| 21 | +split |
| 22 | +# True mean observation goes to Training Set and False means observation goes to Test Set. |
| 23 | + |
| 24 | +train_set = subset(Salary_Data, split == TRUE) |
| 25 | +train_set |
| 26 | +test_set = subset(Salary_Data, split == FALSE) |
| 27 | +test_set |
| 28 | + |
| 29 | +# ----------------------------- Fitting Simple Linear Regression to the Training Set ------------------------- # |
| 30 | + |
| 31 | +reg = lm(Salary ~ YearsExperience, data = train_set) |
| 32 | +summary(reg) |
| 33 | + |
| 34 | +# Most important things are p-value and significance level, because these help us about the statistical |
| 35 | +# sifnificance of the independent variable onto the dependent variable. |
| 36 | + |
| 37 | +# The lower the p-value is and the most statistic significant independent variable is going to be. |
| 38 | +# If the p-valu eis lower than the 5% then that means that dependent variable would be highly statistically |
| 39 | +# significant and more than 5% then the less it will be statistically signifiant. |
| 40 | + |
| 41 | +# ------------------------------------------ Predicting the Test Set results ---------------------------------- # |
| 42 | + |
| 43 | +y_pred = predict(reg, newdata = test_set) |
| 44 | +y_pred |
| 45 | + |
| 46 | +# ------------------------------------------- Visualising the Training Set ------------------------------------ # |
| 47 | + |
| 48 | +# install.packages("ggplot2") <----- Remove comment if not installed |
| 49 | +library(ggplot2) |
| 50 | + |
| 51 | +ggplot() + |
| 52 | + geom_point(aes(x = train_set$YearsExperience, y = train_set$Salary), |
| 53 | + colour = "red") + |
| 54 | + geom_line(aes(x = train_set$YearsExperience, y = predict(reg, newdata = train_set)), |
| 55 | + color = "blue") + |
| 56 | + ggtitle("Salary vs Experience (Trainging Set)") + |
| 57 | + xlab("Years of Experience") + |
| 58 | + ylab("Salary") |
| 59 | + |
| 60 | +# --------------------------------------------- Visualising the Test Set -------------------------------------- # |
| 61 | + |
| 62 | +ggplot() + |
| 63 | + geom_point(aes(x = test_set$YearsExperience, y = test_set$Salary), |
| 64 | + colour = "red") + |
| 65 | + geom_line(aes(x = train_set$YearsExperience, y = predict(reg, newdata = train_set)), |
| 66 | + color = "blue") + |
| 67 | + ggtitle("Salary vs Experience (Test Set)") + |
| 68 | + xlab("Years of Experience") + |
| 69 | + ylab("Salary") |
0 commit comments