Berikut adalah disajikan data perusahaan untuk diteliti lebih lanjut mengenai apakah terdapat hubungan antara Data Salary dan Experience Working
dataset= read.csv("D:/Data/Analisis Regresi/Salary_Data (2 Variables).csv")
dataset
## YearsExperience Salary
## 1 1.1 39343
## 2 1.3 46205
## 3 1.5 37731
## 4 2.0 43525
## 5 2.2 39891
## 6 2.9 56642
## 7 3.0 60150
## 8 3.2 54445
## 9 3.2 64445
## 10 3.7 57189
## 11 3.9 63218
## 12 4.0 55794
## 13 4.0 56957
## 14 4.1 57081
## 15 4.5 61111
## 16 4.9 67938
## 17 5.1 66029
## 18 5.3 83088
## 19 5.9 81363
## 20 6.0 93940
## 21 6.8 91738
## 22 7.1 98273
## 23 7.9 101302
## 24 8.2 113812
## 25 8.7 109431
## 26 9.0 105582
## 27 9.5 116969
## 28 9.6 112635
## 29 10.3 122391
## 30 10.5 121872
library(caTools)
## Warning: package 'caTools' was built under R version 4.1.2
set.seed(123)
split = sample.split(dataset$Salary , SplitRatio = 0.60)
train_set = subset(dataset , split == T)
test_set = subset(dataset , split == F)
train_set
## YearsExperience Salary
## 1 1.1 39343
## 3 1.5 37731
## 6 2.9 56642
## 7 3.0 60150
## 9 3.2 64445
## 10 3.7 57189
## 12 4.0 55794
## 14 4.1 57081
## 15 4.5 61111
## 17 5.1 66029
## 18 5.3 83088
## 19 5.9 81363
## 23 7.9 101302
## 25 8.7 109431
## 27 9.5 116969
## 28 9.6 112635
## 29 10.3 122391
## 30 10.5 121872
test_set
## YearsExperience Salary
## 2 1.3 46205
## 4 2.0 43525
## 5 2.2 39891
## 8 3.2 54445
## 11 3.9 63218
## 13 4.0 56957
## 16 4.9 67938
## 20 6.0 93940
## 21 6.8 91738
## 22 7.1 98273
## 24 8.2 113812
## 26 9.0 105582
MODEL REGRESI
linearmodel = lm(formula = Salary ~. , data=train_set)
summary(linearmodel)
##
## Call:
## lm(formula = Salary ~ ., data = train_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7444.7 -3055.6 731.9 2907.2 8602.7
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 26256.8 2651.6 9.902 3.15e-08 ***
## YearsExperience 9245.5 418.2 22.108 2.03e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5276 on 16 degrees of freedom
## Multiple R-squared: 0.9683, Adjusted R-squared: 0.9663
## F-statistic: 488.8 on 1 and 16 DF, p-value: 2.031e-13
Intrepretasi
anova(linearmodel)
## Analysis of Variance Table
##
## Response: Salary
## Df Sum Sq Mean Sq F value Pr(>F)
## YearsExperience 1 1.3603e+10 1.3603e+10 488.75 2.031e-13 ***
## Residuals 16 4.4532e+08 2.7832e+07
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Intrepretasi:
y_predict = predict(linearmodel , newdata=test_set)
y_predict
## 2 4 5 8 11 13 16 20
## 38275.95 44747.77 46596.87 55842.33 62314.16 63238.70 71559.62 81729.63
## 21 22 24 26
## 89126.00 91899.64 102069.65 109466.02
PLOT
library(ggplot2)
ggplot() + geom_point(aes(x=train_set$YearsExperience , y = train_set$Salary),colour='red') + geom_line(aes(x=train_set$YearsExperience, y = predict(linearmodel,newdata=train_set) ), colour='blue') + ggtitle('Salary vs Exprience(Train_set)') + xlab('Years of Exprience') + ylab('Salary')
Penjelasan: