Use Salary.csv File.

dshah98 · dshah98 · commit bfb041a64198 · 2020-10-23T08:08:31.000+05:30
diff --git a/Simple Linear Regression.R b/Simple Linear Regression.R
@@ -0,0 +1,69 @@
+
+# SLR: y = b0+ b1*x1
+# y is Dependent Variable, b0 is Constant, b1 is Coefficient, x1 us Independent Variable.
+
+# Constant is the point where the line crosses the vertical axis.
+# B1 is the Slope of the line.
+
+# ------------------------------------------------ Importing Data -------------------------------------------- #
+
+Salary_Data = read.csv("Salary_Data.csv")
+
+# ------------------------- Splitting the Dataset into the Training set and Testing Set ---------------------- #
+
+# install.packages("caTools") <----- Remove comment if not installed
+
+library(caTools)
+set.seed(123)
+
+# In Python we put the percentage for Test Set, in R we put for Training Set.
+split = sample.split(Salary_Data$Salary, SplitRatio = 0.8)
+split
+# True mean observation goes to Training Set and False means observation goes to Test Set.
+
+train_set = subset(Salary_Data, split == TRUE)
+train_set
+test_set = subset(Salary_Data, split == FALSE)
+test_set
+
+# ----------------------------- Fitting Simple Linear Regression to the Training Set ------------------------- #
+
+reg = lm(Salary ~ YearsExperience, data = train_set)
+summary(reg)
+
+# Most important things are p-value and significance level, because these help us about the statistical 
+# sifnificance of the independent variable onto the dependent variable.
+
+# The lower the p-value is and the most statistic significant independent variable is going to be.
+# If the p-valu eis lower than the 5% then that means that dependent variable would be highly statistically 
+# significant and more than 5% then the less it will be statistically signifiant.
+
+# ------------------------------------------ Predicting the Test Set results ---------------------------------- #
+
+y_pred = predict(reg, newdata = test_set)
+y_pred
+
+# ------------------------------------------- Visualising the Training Set ------------------------------------ #
+
+# install.packages("ggplot2") <----- Remove comment if not installed
+library(ggplot2)
+
+ggplot() + 
+  geom_point(aes(x = train_set$YearsExperience, y = train_set$Salary),
+             colour = "red") +
+  geom_line(aes(x = train_set$YearsExperience, y = predict(reg, newdata = train_set)),
+            color = "blue") + 
+  ggtitle("Salary vs Experience (Trainging Set)") + 
+  xlab("Years of Experience") + 
+  ylab("Salary")
+
+# --------------------------------------------- Visualising the Test Set -------------------------------------- #
+
+ggplot() + 
+  geom_point(aes(x = test_set$YearsExperience, y = test_set$Salary),
+             colour = "red") +
+  geom_line(aes(x = train_set$YearsExperience, y = predict(reg, newdata = train_set)),
+            color = "blue") + 
+  ggtitle("Salary vs Experience (Test Set)") + 
+  xlab("Years of Experience") + 
+  ylab("Salary")