1 Dataset of experience and salary

This report includes

Dataset
Spliting the dataset into the Training set and the Test set
Fitting Simple linear regression to the Training set
Predicting the test set results
Visualising the Training set results
Visualising the Test set results
Reference

1.1 Importing the Dataset

dataset <- read.csv("~/Desktop/2018Spring/Modern Data Structure/Salary-vs-Experience-using-Simple-Linear-Regression-master/Salary_Data.csv")
#dataset <- dataset[, 2:3]

1.2 Using original data to draw a scatter plot

library(ggplot2)
head(dataset)

##   YearsExperience Salary
## 1             1.1  39343
## 2             1.3  46205
## 3             1.5  37731
## 4             2.0  43525
## 5             2.2  39891
## 6             2.9  56642

ggplot(dataset, aes(x = YearsExperience, y = Salary)) + 
  geom_point(size = 3, shape = 21, color = "blue")

2 Spliting the dataset into the Training set and the Test set

2.1 Split

#install.packages("caTools")
library(caTools)
set.seed(123)
split = sample.split(dataset$Salary, SplitRatio = 2/3)

2.2 training set

training_set = subset(dataset, split == TRUE)

2.3 testing set

test_set = subset(dataset, split == FALSE)
#Feature Scaling
#training_set[, 2:3] = scale(training_set[, 2:3])
#test_set[, 2:3] = scale(test_set[, 2:3])

3 Fitting Simple linear regression to the Training set

regressor = lm(formula = Salary ~ YearsExperience,
             data = training_set)

4 Predicting the test set results

y_pred = predict(regressor, newdata = test_set)

5 Visualising the Training set results

#install.packages('ggplot2')
library(ggplot2)
ggplot() + 
  geom_point(aes(x = training_set$YearsExperience, y = training_set$Salary),
             colour='red') + 
  geom_line(aes(x = training_set$YearsExperience, y = predict(regressor, newdata=training_set)),
            colour='blue') + 
  ggtitle('Salary vs Experience (Training set)') +
  xlab('Years of experience') +
  ylab('Salary')

6 Visualising the Test set results

library(ggplot2)
ggplot() + 
  geom_point(aes(x = test_set$YearsExperience,y = test_set$Salary),
             colour='red') + 
  geom_line(aes(x = training_set$YearsExperience,y = predict(regressor,newdata=training_set)),
            colour='blue') + 
  ggtitle('Salary vs Experience (Test set)') +
  xlab('Years of experience') +
  ylab('Salary')

7 Reference

This is the original project https://github.com/Amolghogale/Salary-vs-Experience-using-Simple-Linear-Regression/blob/master/simple_linear_regression.R (Salary-vs-Experience-using-Simple-Linear-Regression) I found at Github