This report includes
Dataset
Spliting the dataset into the Training set and the Test set
Fitting Simple linear regression to the Training set
Predicting the test set results
Visualising the Training set results
Visualising the Test set results
Reference
dataset <- read.csv("~/Desktop/2018Spring/Modern Data Structure/Salary-vs-Experience-using-Simple-Linear-Regression-master/Salary_Data.csv")
#dataset <- dataset[, 2:3]
library(ggplot2)
head(dataset)
## YearsExperience Salary
## 1 1.1 39343
## 2 1.3 46205
## 3 1.5 37731
## 4 2.0 43525
## 5 2.2 39891
## 6 2.9 56642
ggplot(dataset, aes(x = YearsExperience, y = Salary)) +
geom_point(size = 3, shape = 21, color = "blue")
#install.packages("caTools")
library(caTools)
set.seed(123)
split = sample.split(dataset$Salary, SplitRatio = 2/3)
training_set = subset(dataset, split == TRUE)
test_set = subset(dataset, split == FALSE)
#Feature Scaling
#training_set[, 2:3] = scale(training_set[, 2:3])
#test_set[, 2:3] = scale(test_set[, 2:3])
regressor = lm(formula = Salary ~ YearsExperience,
data = training_set)
y_pred = predict(regressor, newdata = test_set)
#install.packages('ggplot2')
library(ggplot2)
ggplot() +
geom_point(aes(x = training_set$YearsExperience, y = training_set$Salary),
colour='red') +
geom_line(aes(x = training_set$YearsExperience, y = predict(regressor, newdata=training_set)),
colour='blue') +
ggtitle('Salary vs Experience (Training set)') +
xlab('Years of experience') +
ylab('Salary')
library(ggplot2)
ggplot() +
geom_point(aes(x = test_set$YearsExperience,y = test_set$Salary),
colour='red') +
geom_line(aes(x = training_set$YearsExperience,y = predict(regressor,newdata=training_set)),
colour='blue') +
ggtitle('Salary vs Experience (Test set)') +
xlab('Years of experience') +
ylab('Salary')
This is the original project https://github.com/Amolghogale/Salary-vs-Experience-using-Simple-Linear-Regression/blob/master/simple_linear_regression.R (Salary-vs-Experience-using-Simple-Linear-Regression) I found at Github