Skip to content

Commit bfb041a

Browse files
committed
Use Salary.csv File.
1 parent 444a279 commit bfb041a

File tree

1 file changed

+69
-0
lines changed

1 file changed

+69
-0
lines changed

Simple Linear Regression.R

+69
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
2+
# SLR: y = b0+ b1*x1
3+
# y is Dependent Variable, b0 is Constant, b1 is Coefficient, x1 us Independent Variable.
4+
5+
# Constant is the point where the line crosses the vertical axis.
6+
# B1 is the Slope of the line.
7+
8+
# ------------------------------------------------ Importing Data -------------------------------------------- #
9+
10+
Salary_Data = read.csv("Salary_Data.csv")
11+
12+
# ------------------------- Splitting the Dataset into the Training set and Testing Set ---------------------- #
13+
14+
# install.packages("caTools") <----- Remove comment if not installed
15+
16+
library(caTools)
17+
set.seed(123)
18+
19+
# In Python we put the percentage for Test Set, in R we put for Training Set.
20+
split = sample.split(Salary_Data$Salary, SplitRatio = 0.8)
21+
split
22+
# True mean observation goes to Training Set and False means observation goes to Test Set.
23+
24+
train_set = subset(Salary_Data, split == TRUE)
25+
train_set
26+
test_set = subset(Salary_Data, split == FALSE)
27+
test_set
28+
29+
# ----------------------------- Fitting Simple Linear Regression to the Training Set ------------------------- #
30+
31+
reg = lm(Salary ~ YearsExperience, data = train_set)
32+
summary(reg)
33+
34+
# Most important things are p-value and significance level, because these help us about the statistical
35+
# sifnificance of the independent variable onto the dependent variable.
36+
37+
# The lower the p-value is and the most statistic significant independent variable is going to be.
38+
# If the p-valu eis lower than the 5% then that means that dependent variable would be highly statistically
39+
# significant and more than 5% then the less it will be statistically signifiant.
40+
41+
# ------------------------------------------ Predicting the Test Set results ---------------------------------- #
42+
43+
y_pred = predict(reg, newdata = test_set)
44+
y_pred
45+
46+
# ------------------------------------------- Visualising the Training Set ------------------------------------ #
47+
48+
# install.packages("ggplot2") <----- Remove comment if not installed
49+
library(ggplot2)
50+
51+
ggplot() +
52+
geom_point(aes(x = train_set$YearsExperience, y = train_set$Salary),
53+
colour = "red") +
54+
geom_line(aes(x = train_set$YearsExperience, y = predict(reg, newdata = train_set)),
55+
color = "blue") +
56+
ggtitle("Salary vs Experience (Trainging Set)") +
57+
xlab("Years of Experience") +
58+
ylab("Salary")
59+
60+
# --------------------------------------------- Visualising the Test Set -------------------------------------- #
61+
62+
ggplot() +
63+
geom_point(aes(x = test_set$YearsExperience, y = test_set$Salary),
64+
colour = "red") +
65+
geom_line(aes(x = train_set$YearsExperience, y = predict(reg, newdata = train_set)),
66+
color = "blue") +
67+
ggtitle("Salary vs Experience (Test Set)") +
68+
xlab("Years of Experience") +
69+
ylab("Salary")

0 commit comments

Comments
 (0)