Diagnostik Sisaan Part 2
Load Package
library(readxl)
library(olsrr)
##
## Attaching package: 'olsrr'
## The following object is masked from 'package:datasets':
##
## rivers
Preparing Data dan ploting data
#read data
antar=read_excel("E:\\Praktikum-5.xlsx",sheet="Sheet1")
#plot X vs Y : Pencilan dan Titik Leverage
plot(antar$x1,antar$y,xlab="Jumlah Minuman",ylab="Waktu",pch=19,main="Jumlah Minuman vs Waktu")

plot(antar$x2,antar$y,xlab="jarak tempuh",ylab="Waktu",pch=19,main="Jarak Tempuh vs Waktu")

Model dan sisaan
#membuat model regresi
reg_ganda=lm(y~x1+x2,data=antar)
yduga=predict(reg_ganda)
sisa=residuals(reg_ganda)
std_sisa=rstandard(reg_ganda) #standardized residual
stud_sisa=rstudent(reg_ganda) #studentized residual
#plot sisaan dan y duga
plot(yduga,sisa,xlab="Y duga",ylab="Sisaan",pch=19,main="Y duga vs Sisaan")

plot(yduga,std_sisa,xlab="Y duga",ylab="Sisaan Baku",pch=19,main="Y duga vs Sisaan Baku")

Deteksi pencilan
#==========deteksi pencilan===========
tabel=data.frame(antar,std_sisa)
tabel
## NO y x1 x2 std_sisa
## 1 1 16.68 7 560 -1.62767993
## 2 2 11.50 3 220 0.36484267
## 3 3 12.03 3 340 -0.01609165
## 4 4 14.88 4 80 1.57972040
## 5 5 13.75 6 150 -0.14176094
## 6 6 18.11 7 330 -0.09080847
## 7 7 8.00 2 110 0.27042496
## 8 8 17.83 7 210 0.36672118
## 9 9 79.24 30 1460 3.21376278
## 10 10 21.50 5 605 0.81325432
## 11 11 40.33 16 688 0.71807970
## 12 12 21.00 10 215 -0.19325733
## 13 13 13.50 4 255 0.32517935
## 14 14 19.75 6 462 0.34113547
## 15 15 24.00 9 448 0.21029137
## 16 16 29.00 10 776 -0.22270023
## 17 17 15.35 6 200 0.13803929
## 18 18 19.00 7 132 1.11295196
## 19 19 9.50 3 36 0.57876634
## 20 20 35.10 17 770 -1.87354643
## 21 21 17.90 10 140 -0.87784258
## 22 22 52.32 26 810 -1.44999541
## 23 23 18.75 9 450 -1.44368977
## 24 24 19.83 8 635 -1.49605875
## 25 25 10.75 4 150 -0.06750861
reg_minus_penc=lm(y~x1+x2,data=antar[-9,])
summary(reg_ganda)
##
## Call:
## lm(formula = y ~ x1 + x2, data = antar)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.7880 -0.6629 0.4364 1.1566 7.4197
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.341231 1.096730 2.135 0.044170 *
## x1 1.615907 0.170735 9.464 3.25e-09 ***
## x2 0.014385 0.003613 3.981 0.000631 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.259 on 22 degrees of freedom
## Multiple R-squared: 0.9596, Adjusted R-squared: 0.9559
## F-statistic: 261.2 on 2 and 22 DF, p-value: 4.687e-16
summary(reg_minus_penc)
##
## Call:
## lm(formula = y ~ x1 + x2, data = antar[-9, ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.0325 -1.2331 0.0199 1.4730 4.8167
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.447238 0.952469 4.669 0.000131 ***
## x1 1.497691 0.130207 11.502 1.58e-10 ***
## x2 0.010324 0.002854 3.618 0.001614 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.43 on 21 degrees of freedom
## Multiple R-squared: 0.9487, Adjusted R-squared: 0.9438
## F-statistic: 194.2 on 2 and 21 DF, p-value: 2.859e-14
#plot yduga vs sisaan baku
plot(yduga,std_sisa,xlab="Y duga",ylab="Sisaan Baku",pch=19,main="Y duga vs Sisaan Baku Data Lengkap")

plot(predict(reg_minus_penc),rstandard(reg_minus_penc),xlab="Y duga",ylab="Sisaan Baku",pch=19,
main="Y duga vs Sisaan Baku Data Tanpa Pencilan")

Titik leverage
#=========titik leverage==============
as.data.frame(hatvalues(reg_ganda, infl = influence(reg_ganda)))
## hatvalues(reg_ganda, infl = influence(reg_ganda))
## 1 0.10180178
## 2 0.07070164
## 3 0.09873476
## 4 0.08537479
## 5 0.07501050
## 6 0.04286693
## 7 0.08179867
## 8 0.06372559
## 9 0.49829216
## 10 0.19629595
## 11 0.08613260
## 12 0.11365570
## 13 0.06112463
## 14 0.07824332
## 15 0.04111077
## 16 0.16594043
## 17 0.05943202
## 18 0.09626046
## 19 0.09644857
## 20 0.10168486
## 21 0.16527689
## 22 0.39157522
## 23 0.04126005
## 24 0.12060826
## 25 0.06664345
p=3
n=25
batas_hat=2*(p/n)
batas_hat
## [1] 0.24
ols_plot_resid_lev(reg_ganda)

Amatan Berpengaruh
#=========amatan berpengaruh============
# cook's distance
as.data.frame(cooks.distance(reg_ganda, infl = influence(reg_ganda)))
## cooks.distance(reg_ganda, infl = influence(reg_ganda))
## 1 1.000921e-01
## 2 3.375704e-03
## 3 9.455785e-06
## 4 7.764718e-02
## 5 5.432217e-04
## 6 1.231067e-04
## 7 2.171604e-03
## 8 3.051135e-03
## 9 3.419318e+00
## 10 5.384516e-02
## 11 1.619975e-02
## 12 1.596392e-03
## 13 2.294737e-03
## 14 3.292786e-03
## 15 6.319880e-04
## 16 3.289086e-03
## 17 4.013419e-04
## 18 4.397807e-02
## 19 1.191868e-02
## 20 1.324449e-01
## 21 5.086063e-02
## 22 4.510455e-01
## 23 2.989892e-02
## 24 1.023224e-01
## 25 1.084694e-04
Ftabel=qf(0.95,3,22) #db1=p, db2=n-p
Ftabel
## [1] 3.049125
ols_plot_cooksd_chart(reg_ganda)

ols_plot_cooksd_bar(reg_ganda)

# dfbetas
dfbetas(reg_ganda, infl = lm.influence(reg_ganda))
## (Intercept) x1 x2
## 1 -0.187267279 0.4113118750 -0.434862094
## 2 0.089793299 -0.0477642427 0.014414155
## 3 -0.003515177 0.0039483525 -0.002846468
## 4 0.451964743 0.0882802920 -0.273373097
## 5 -0.031674102 -0.0133001129 0.024240457
## 6 -0.014681480 0.0017921068 0.001078986
## 7 0.078071285 -0.0222783194 -0.011018802
## 8 0.071202807 0.0333823324 -0.053823961
## 9 -2.575739806 0.9287433421 1.507550618
## 10 0.107919369 -0.3381628707 0.341326746
## 11 -0.034274535 0.0925271540 -0.002686252
## 12 -0.030268935 -0.0486664488 0.053973390
## 13 0.072366473 -0.0356212226 0.011335105
## 14 0.049516699 -0.0670868604 0.061816778
## 15 0.022279094 -0.0047895025 0.006838236
## 16 -0.002693186 0.0644208340 -0.084187552
## 17 0.028855555 0.0064876499 -0.015696507
## 18 0.248558020 0.1897331043 -0.272430555
## 19 0.172558506 0.0235737344 -0.098968842
## 20 0.168036548 -0.2149950233 -0.092915080
## 21 -0.161928685 -0.2971750929 0.336406248
## 22 0.398566309 -1.0254140704 0.573140240
## 23 -0.159852248 0.0372930389 -0.052651959
## 24 -0.119720216 0.4046225960 -0.465446949
## 25 -0.016816024 0.0008498979 0.005592192
batas_dfbetas=2/sqrt(n)
batas_dfbetas
## [1] 0.4
ols_plot_dfbetas(reg_ganda)

# dffits
as.data.frame(dffits(reg_ganda, infl=influence(reg_ganda)))
## dffits(reg_ganda, infl = influence(reg_ganda))
## 1 -0.570850478
## 2 0.098618619
## 3 -0.005203676
## 4 0.500801817
## 5 -0.039458989
## 6 -0.018779374
## 7 0.078990030
## 8 0.093760764
## 9 4.296080927
## 10 0.398713071
## 11 0.217953207
## 12 -0.067670223
## 13 0.081259033
## 14 0.097362643
## 15 0.042584374
## 16 -0.097159801
## 17 0.033915978
## 18 0.365309285
## 19 0.186167873
## 20 -0.671771402
## 21 -0.388501185
## 22 -1.195036104
## 23 -0.307538544
## 24 -0.571139627
## 25 -0.017626149
batas_dffits=2*sqrt(p/n)
batas_dffits
## [1] 0.6928203
ols_plot_dffits(reg_ganda)

# mengeluarkan semua nilai
influence.measures(reg_ganda, infl = influence(reg_ganda))
## Influence measures of
## lm(formula = y ~ x1 + x2, data = antar) :
##
## dfb.1_ dfb.x1 dfb.x2 dffit cov.r cook.d hat inf
## 1 -0.18727 0.41131 -0.43486 -0.5709 0.871 1.00e-01 0.1018
## 2 0.08979 -0.04776 0.01441 0.0986 1.215 3.38e-03 0.0707
## 3 -0.00352 0.00395 -0.00285 -0.0052 1.276 9.46e-06 0.0987
## 4 0.45196 0.08828 -0.27337 0.5008 0.876 7.76e-02 0.0854
## 5 -0.03167 -0.01330 0.02424 -0.0395 1.240 5.43e-04 0.0750
## 6 -0.01468 0.00179 0.00108 -0.0188 1.200 1.23e-04 0.0429
## 7 0.07807 -0.02228 -0.01102 0.0790 1.240 2.17e-03 0.0818
## 8 0.07120 0.03338 -0.05382 0.0938 1.206 3.05e-03 0.0637
## 9 -2.57574 0.92874 1.50755 4.2961 0.342 3.42e+00 0.4983 *
## 10 0.10792 -0.33816 0.34133 0.3987 1.305 5.38e-02 0.1963
## 11 -0.03427 0.09253 -0.00269 0.2180 1.172 1.62e-02 0.0861
## 12 -0.03027 -0.04867 0.05397 -0.0677 1.291 1.60e-03 0.1137
## 13 0.07237 -0.03562 0.01134 0.0813 1.207 2.29e-03 0.0611
## 14 0.04952 -0.06709 0.06182 0.0974 1.228 3.29e-03 0.0782
## 15 0.02228 -0.00479 0.00684 0.0426 1.192 6.32e-04 0.0411
## 16 -0.00269 0.06442 -0.08419 -0.0972 1.369 3.29e-03 0.1659
## 17 0.02886 0.00649 -0.01570 0.0339 1.219 4.01e-04 0.0594
## 18 0.24856 0.18973 -0.27243 0.3653 1.069 4.40e-02 0.0963
## 19 0.17256 0.02357 -0.09897 0.1862 1.215 1.19e-02 0.0964
## 20 0.16804 -0.21500 -0.09292 -0.6718 0.760 1.32e-01 0.1017
## 21 -0.16193 -0.29718 0.33641 -0.3885 1.238 5.09e-02 0.1653
## 22 0.39857 -1.02541 0.57314 -1.1950 1.398 4.51e-01 0.3916 *
## 23 -0.15985 0.03729 -0.05265 -0.3075 0.890 2.99e-02 0.0413
## 24 -0.11972 0.40462 -0.46545 -0.5711 0.948 1.02e-01 0.1206
## 25 -0.01682 0.00085 0.00559 -0.0176 1.231 1.08e-04 0.0666
Model regresi tanpa amatan berpengaruh
#regresi tanpa amatan berpengaruh
reg_tanpa9=lm(y~x1+x2,data=antar[-9,])
reg_tanpa22=lm(y~x1+x2,data=antar[-22,])
reg_tanpa9dan22=lm(y~x1+x2,data=antar[-c(9,22),])
anova(reg_ganda)
## Analysis of Variance Table
##
## Response: y
## Df Sum Sq Mean Sq F value Pr(>F)
## x1 1 5382.4 5382.4 506.619 < 2.2e-16 ***
## x2 1 168.4 168.4 15.851 0.0006312 ***
## Residuals 22 233.7 10.6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(reg_tanpa9)
## Analysis of Variance Table
##
## Response: y
## Df Sum Sq Mean Sq F value Pr(>F)
## x1 1 2215.95 2215.95 375.275 7.092e-15 ***
## x2 1 77.29 77.29 13.089 0.001614 **
## Residuals 21 124.00 5.90
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(reg_tanpa22)
## Analysis of Variance Table
##
## Response: y
## Df Sum Sq Mean Sq F value Pr(>F)
## x1 1 4531.2 4531.2 450.130 1.148e-15 ***
## x2 1 108.5 108.5 10.774 0.003553 **
## Residuals 21 211.4 10.1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(reg_tanpa9dan22)
## Analysis of Variance Table
##
## Response: y
## Df Sum Sq Mean Sq F value Pr(>F)
## x1 1 1128.14 1128.14 183.038 1.588e-11 ***
## x2 1 76.85 76.85 12.468 0.002099 **
## Residuals 20 123.27 6.16
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1