Berikut adalah disajikan data perusahaan untuk diteliti lebih lanjut mengenai apakah terdapat hubungan antara Data Salary dan Experience Working

dataset= read.csv("D:/Data/Analisis Regresi/Salary_Data (2 Variables).csv")
dataset
##    YearsExperience Salary
## 1              1.1  39343
## 2              1.3  46205
## 3              1.5  37731
## 4              2.0  43525
## 5              2.2  39891
## 6              2.9  56642
## 7              3.0  60150
## 8              3.2  54445
## 9              3.2  64445
## 10             3.7  57189
## 11             3.9  63218
## 12             4.0  55794
## 13             4.0  56957
## 14             4.1  57081
## 15             4.5  61111
## 16             4.9  67938
## 17             5.1  66029
## 18             5.3  83088
## 19             5.9  81363
## 20             6.0  93940
## 21             6.8  91738
## 22             7.1  98273
## 23             7.9 101302
## 24             8.2 113812
## 25             8.7 109431
## 26             9.0 105582
## 27             9.5 116969
## 28             9.6 112635
## 29            10.3 122391
## 30            10.5 121872
library(caTools)
## Warning: package 'caTools' was built under R version 4.1.2
set.seed(123)
split = sample.split(dataset$Salary , SplitRatio = 0.60)
train_set = subset(dataset , split == T)
test_set = subset(dataset , split == F)
train_set
##    YearsExperience Salary
## 1              1.1  39343
## 3              1.5  37731
## 6              2.9  56642
## 7              3.0  60150
## 9              3.2  64445
## 10             3.7  57189
## 12             4.0  55794
## 14             4.1  57081
## 15             4.5  61111
## 17             5.1  66029
## 18             5.3  83088
## 19             5.9  81363
## 23             7.9 101302
## 25             8.7 109431
## 27             9.5 116969
## 28             9.6 112635
## 29            10.3 122391
## 30            10.5 121872
test_set
##    YearsExperience Salary
## 2              1.3  46205
## 4              2.0  43525
## 5              2.2  39891
## 8              3.2  54445
## 11             3.9  63218
## 13             4.0  56957
## 16             4.9  67938
## 20             6.0  93940
## 21             6.8  91738
## 22             7.1  98273
## 24             8.2 113812
## 26             9.0 105582

MODEL REGRESI

linearmodel = lm(formula = Salary ~. , data=train_set)
summary(linearmodel)
## 
## Call:
## lm(formula = Salary ~ ., data = train_set)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7444.7 -3055.6   731.9  2907.2  8602.7 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      26256.8     2651.6   9.902 3.15e-08 ***
## YearsExperience   9245.5      418.2  22.108 2.03e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5276 on 16 degrees of freedom
## Multiple R-squared:  0.9683, Adjusted R-squared:  0.9663 
## F-statistic: 488.8 on 1 and 16 DF,  p-value: 2.031e-13

Intrepretasi

anova(linearmodel)
## Analysis of Variance Table
## 
## Response: Salary
##                 Df     Sum Sq    Mean Sq F value    Pr(>F)    
## YearsExperience  1 1.3603e+10 1.3603e+10  488.75 2.031e-13 ***
## Residuals       16 4.4532e+08 2.7832e+07                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Intrepretasi:

y_predict = predict(linearmodel , newdata=test_set)
y_predict
##         2         4         5         8        11        13        16        20 
##  38275.95  44747.77  46596.87  55842.33  62314.16  63238.70  71559.62  81729.63 
##        21        22        24        26 
##  89126.00  91899.64 102069.65 109466.02

PLOT

library(ggplot2)
ggplot() + geom_point(aes(x=train_set$YearsExperience , y = train_set$Salary),colour='red') + geom_line(aes(x=train_set$YearsExperience, y = predict(linearmodel,newdata=train_set) ), colour='blue') + ggtitle('Salary vs Exprience(Train_set)') + xlab('Years of Exprience') + ylab('Salary')

Penjelasan: