simple linear regression

dataset=read.csv('Salary_Data.csv')
dataset
##    YearsExperience Salary
## 1              1.1  39343
## 2              1.3  46205
## 3              1.5  37731
## 4              2.0  43525
## 5              2.2  39891
## 6              2.9  56642
## 7              3.0  60150
## 8              3.2  54445
## 9              3.2  64445
## 10             3.7  57189
## 11             3.9  63218
## 12             4.0  55794
## 13             4.0  56957
## 14             4.1  57081
## 15             4.5  61111
## 16             4.9  67938
## 17             5.1  66029
## 18             5.3  83088
## 19             5.9  81363
## 20             6.0  93940
## 21             6.8  91738
## 22             7.1  98273
## 23             7.9 101302
## 24             8.2 113812
## 25             8.7 109431
## 26             9.0 105582
## 27             9.5 116969
## 28             9.6 112635
## 29            10.3 122391
## 30            10.5 121872

Splitting the dataset

library(caTools)
## Warning: package 'caTools' was built under R version 3.4.2
set.seed(123)
split=sample.split(dataset$Salary, SplitRatio= 2/3)
training_set=subset(dataset, split==TRUE)
test_set=subset(dataset, split==FALSE)

linear regression

regressor= lm(formula=Salary ~YearsExperience, data=training_set)
summary(regressor)
## 
## Call:
## lm(formula = Salary ~ YearsExperience, data = training_set)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7325.1 -3814.4   427.7  3559.7  8884.6 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        25592       2646   9.672 1.49e-08 ***
## YearsExperience     9365        421  22.245 1.52e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5391 on 18 degrees of freedom
## Multiple R-squared:  0.9649, Adjusted R-squared:  0.963 
## F-statistic: 494.8 on 1 and 18 DF,  p-value: 1.524e-14

Predicting the test set resuslts

y_pred=predict(regressor, newdata=test_set)
y_pred
##         2         4         5         8        11        16        20 
##  37766.77  44322.33  46195.35  55560.43  62115.99  71481.07  81782.66 
##        21        24        26 
##  89274.72 102385.84 109877.90

visiualization of the training set results

library(ggplot2)
ggplot()+
geom_point(aes(x=training_set$YearsExperience, y=training_set$Salary), colour='pink')+ 
geom_line(aes(x=training_set$YearsExperience, y=predict(regressor, newdata=training_set)), colour='green') +
ggtitle('Salary vs Experience(Training set)')+
xlab('Years of Experience')+
ylab('Salary')

visiualization of the test set results

ggplot()+
geom_point(aes(x=test_set$YearsExperience, y=test_set$Salary), colour='red')+ 
geom_line(aes(x=training_set$YearsExperience, y=predict(regressor, newdata=training_set)), colour='blue') +
ggtitle('Salary vs Experience(Test set)')+
xlab('Years of Experience')+
ylab('Salary')