Load Package
library(readxl)
#install.packages("leaps"") #instal package leaps
library(leaps)
Preparing Data
Data 1
getwd()
## [1] "C:/Users/Ny 07/Documents/By dokument/R"
setwd("E:/") #file save located
#=================================DATA CEMENT=================================
cement=read_excel("Praktikum 14.xlsx",sheet="Sheet3")
cement
## # A tibble: 13 x 5
## y x1 x2 x3 x4
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 78.5 7 26 6 60
## 2 74.3 1 29 15 52
## 3 104. 11 56 8 20
## 4 87.6 11 31 8 47
## 5 95.9 7 52 6 33
## 6 109. 11 55 9 22
## 7 103. 3 71 17 6
## 8 72.5 1 31 22 44
## 9 93.1 2 54 18 22
## 10 116. 21 47 4 26
## 11 83.8 1 40 23 34
## 12 113. 11 66 9 12
## 13 109. 10 68 8 12
Akurasi Model
regsubsets.cement <-regsubsets(y ~ x1 + x2 + x3 + x4,
data = cement,
nbest = 2, # 1 best model for each number of predictors
nvmax = NULL, # NULL for no limit on number of variables
force.in = NULL, force.out = NULL,
method = "exhaustive")
sum.reg.cement=summary(regsubsets.cement)
output.cement=data.frame(rsq=sum.reg.cement$rsq,
adjr2=sum.reg.cement$adjr2,
cp=sum.reg.cement$cp,
outmat=sum.reg.cement$outmat)
output.cement
## rsq adjr2 cp outmat.x1 outmat.x2 outmat.x3 outmat.x4
## 1 ( 1 ) 0.6745420 0.6449549 138.730833 *
## 1 ( 2 ) 0.6662683 0.6359290 142.486407 *
## 2 ( 1 ) 0.9786784 0.9744140 2.678242 * *
## 2 ( 2 ) 0.9724710 0.9669653 5.495851 * *
## 3 ( 1 ) 0.9823355 0.9764473 3.018233 * * *
## 3 ( 2 ) 0.9822847 0.9763796 3.041280 * * *
## 4 ( 1 ) 0.9823756 0.9735634 5.000000 * * * *
#validasi model
lm.cement=lm(y~x1+x2,data=cement)
press.cement<-sum((lm.cement$residuals/(1-hatvalues(lm.cement)))^2)
press.cement #nilai Press
## [1] 93.88255
Data 2
#=================================DATA PDB=================================
pdb=read_excel("E:/Praktikum13.xlsx",sheet="Sheet1")
## New names:
## * `` -> ...1
pdb
## # A tibble: 13 x 6
## ...1 x1 x2 x3 x4 y
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1999 379558. 3.31 7100 210611000 24003.
## 2 2000 1389770. 10.3 9595 213395000 33515.
## 3 2001 1440406. 11.8 20266 216203000 30962.
## 4 2002 1505216. 9 9261 219026000 31289.
## 5 2003 1577171. 5.92 8571 221839000 32551.
## 6 2004 1656517. 6.35 9030 224607000 46525.
## 7 2005 1750815. 15.6 9751 227303000 57701.
## 8 2006 1847127. 5.66 9141 229919000 61066.
## 9 2007 1964327. 6.48 9142 232462000 74473.
## 10 2008 2082456. 4.15 9772 234951000 129197.
## 11 2009 2177742. 3.45 10356 237414000 96856.
## 12 2010 2310690. 5.96 9078 239871000 135606.
## 13 2011 1838058. 4.23 8731 242326000 156130.
regsubsets.pdb <-regsubsets(y ~ x1 + x2 + x3 + x4,
data = pdb,
nbest = 4, # 1 best model for each number of predictors
nvmax = NULL, # NULL for no limit on number of variables
force.in = NULL, force.out = NULL,
method = "exhaustive")
sum.reg.pdb=summary(regsubsets.pdb)
output.pdb=data.frame(rsq=sum.reg.pdb$rsq,
adjr2=sum.reg.pdb$adjr2,
cp=sum.reg.pdb$cp,
outmat=sum.reg.pdb$outmat)
output.pdb
## rsq adjr2 cp outmat.x1 outmat.x2 outmat.x3
## 1 ( 1 ) 0.84014847 0.82561651 0.987447
## 1 ( 2 ) 0.47710760 0.42957193 23.670066 *
## 1 ( 3 ) 0.17509492 0.10010355 42.539673 *
## 1 ( 4 ) 0.03302386 -0.05488306 51.416204 *
## 2 ( 1 ) 0.86603890 0.83924668 1.369825 *
## 2 ( 2 ) 0.84818149 0.81781779 2.485548 *
## 2 ( 3 ) 0.84015190 0.80818228 2.987232 *
## 2 ( 4 ) 0.61883231 0.54259877 16.815174 * *
## 3 ( 1 ) 0.87097825 0.82797099 3.061217 * *
## 3 ( 2 ) 0.86614908 0.82153211 3.362941 * *
## 3 ( 3 ) 0.85004824 0.80006432 4.368914 * *
## 3 ( 4 ) 0.62022376 0.49363167 18.728237 * * *
## 4 ( 1 ) 0.87195804 0.80793706 5.000000 * * *
## outmat.x4
## 1 ( 1 ) *
## 1 ( 2 )
## 1 ( 3 )
## 1 ( 4 )
## 2 ( 1 ) *
## 2 ( 2 ) *
## 2 ( 3 ) *
## 2 ( 4 )
## 3 ( 1 ) *
## 3 ( 2 ) *
## 3 ( 3 ) *
## 3 ( 4 )
## 4 ( 1 ) *
#validasi model
lm.pdb=lm(y~x1+x4,data=pdb)
as.data.frame((lm.pdb$residuals/(1-hatvalues(lm.pdb)))^2)
## (lm.pdb$residuals/(1 - hatvalues(lm.pdb)))^2
## 1 37499486
## 2 1231572639
## 3 135147526
## 4 13852989
## 5 259303149
## 6 184141129
## 7 183065049
## 8 470976604
## 9 328097637
## 10 1126264589
## 11 256700510
## 12 454312056
## 13 554646370
press.pdb=sum((lm.pdb$residuals/(1-hatvalues(lm.pdb)))^2)
press.pdb
## [1] 5235579732
membuang amatan yang memiliki e(i,-i) kuadrat yang tinggi
lm.pdb1=lm(y~x1+x4,data=pdb[-c(2,10),])
press.pdb1=sum((lm.pdb1$residuals/(1-hatvalues(lm.pdb1)))^2)
press.pdb1
## [1] 2653453516
perbandingan model
summary(lm.pdb)
##
## Call:
## lm(formula = y ~ x1 + x4, data = pdb)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19833 -13297 -3171 11072 29035
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.051e+06 1.888e+05 -5.567 0.000238 ***
## x1 -2.828e-02 2.034e-02 -1.390 0.194630
## x4 5.151e-03 9.559e-04 5.388 0.000306 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 18160 on 10 degrees of freedom
## Multiple R-squared: 0.866, Adjusted R-squared: 0.8392
## F-statistic: 32.32 on 2 and 10 DF, p-value: 4.314e-05
qqnorm(residuals(lm.pdb),main="QQ Plot Sisaan")
abline(a=mean(residuals(lm.pdb)),b=sd(residuals(lm.pdb)),col="red")

summary(lm.pdb1)
##
## Call:
## lm(formula = y ~ x1 + x4, data = pdb[-c(2, 10), ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -14353 -7709 -5756 6396 21355
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.198e+06 1.574e+05 -7.609 6.25e-05 ***
## x1 -4.242e-02 1.573e-02 -2.697 0.0272 *
## x4 5.878e-03 7.878e-04 7.462 7.19e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13310 on 8 degrees of freedom
## Multiple R-squared: 0.9282, Adjusted R-squared: 0.9102
## F-statistic: 51.69 on 2 and 8 DF, p-value: 2.661e-05
qqnorm(residuals(lm.pdb1),main="QQ Plot Sisaan")
abline(a=mean(residuals(lm.pdb1)),b=sd(residuals(lm.pdb1)),col="blue")
