Load Package

library(readxl)
#install.packages("leaps"") #instal package leaps
library(leaps) 

Preparing Data

Data 1

getwd()
## [1] "C:/Users/Ny  07/Documents/By dokument/R"
setwd("E:/") #file save located

#=================================DATA CEMENT=================================
cement=read_excel("Praktikum 14.xlsx",sheet="Sheet3")
cement
## # A tibble: 13 x 5
##        y    x1    x2    x3    x4
##    <dbl> <dbl> <dbl> <dbl> <dbl>
##  1  78.5     7    26     6    60
##  2  74.3     1    29    15    52
##  3 104.     11    56     8    20
##  4  87.6    11    31     8    47
##  5  95.9     7    52     6    33
##  6 109.     11    55     9    22
##  7 103.      3    71    17     6
##  8  72.5     1    31    22    44
##  9  93.1     2    54    18    22
## 10 116.     21    47     4    26
## 11  83.8     1    40    23    34
## 12 113.     11    66     9    12
## 13 109.     10    68     8    12

Akurasi Model

regsubsets.cement <-regsubsets(y ~ x1 + x2 + x3 + x4,
             data = cement,
             nbest = 2,       # 1 best model for each number of predictors
             nvmax = NULL,    # NULL for no limit on number of variables
             force.in = NULL, force.out = NULL,
             method = "exhaustive")
sum.reg.cement=summary(regsubsets.cement)
output.cement=data.frame(rsq=sum.reg.cement$rsq,
                         adjr2=sum.reg.cement$adjr2,
                         cp=sum.reg.cement$cp,
                         outmat=sum.reg.cement$outmat)
output.cement
##                rsq     adjr2         cp outmat.x1 outmat.x2 outmat.x3 outmat.x4
## 1  ( 1 ) 0.6745420 0.6449549 138.730833                                       *
## 1  ( 2 ) 0.6662683 0.6359290 142.486407                   *                    
## 2  ( 1 ) 0.9786784 0.9744140   2.678242         *         *                    
## 2  ( 2 ) 0.9724710 0.9669653   5.495851         *                             *
## 3  ( 1 ) 0.9823355 0.9764473   3.018233         *         *                   *
## 3  ( 2 ) 0.9822847 0.9763796   3.041280         *         *         *          
## 4  ( 1 ) 0.9823756 0.9735634   5.000000         *         *         *         *
#validasi model
lm.cement=lm(y~x1+x2,data=cement)
press.cement<-sum((lm.cement$residuals/(1-hatvalues(lm.cement)))^2)
press.cement #nilai Press
## [1] 93.88255

Data 2

#=================================DATA PDB=================================
pdb=read_excel("E:/Praktikum13.xlsx",sheet="Sheet1")
## New names:
## * `` -> ...1
pdb
## # A tibble: 13 x 6
##     ...1       x1    x2    x3        x4       y
##    <dbl>    <dbl> <dbl> <dbl>     <dbl>   <dbl>
##  1  1999  379558.  3.31  7100 210611000  24003.
##  2  2000 1389770. 10.3   9595 213395000  33515.
##  3  2001 1440406. 11.8  20266 216203000  30962.
##  4  2002 1505216.  9     9261 219026000  31289.
##  5  2003 1577171.  5.92  8571 221839000  32551.
##  6  2004 1656517.  6.35  9030 224607000  46525.
##  7  2005 1750815. 15.6   9751 227303000  57701.
##  8  2006 1847127.  5.66  9141 229919000  61066.
##  9  2007 1964327.  6.48  9142 232462000  74473.
## 10  2008 2082456.  4.15  9772 234951000 129197.
## 11  2009 2177742.  3.45 10356 237414000  96856.
## 12  2010 2310690.  5.96  9078 239871000 135606.
## 13  2011 1838058.  4.23  8731 242326000 156130.
regsubsets.pdb <-regsubsets(y ~ x1 + x2 + x3 + x4,
             data = pdb,
             nbest = 4,       # 1 best model for each number of predictors
             nvmax = NULL,    # NULL for no limit on number of variables
             force.in = NULL, force.out = NULL,
             method = "exhaustive")
sum.reg.pdb=summary(regsubsets.pdb)
output.pdb=data.frame(rsq=sum.reg.pdb$rsq,
                      adjr2=sum.reg.pdb$adjr2,
                      cp=sum.reg.pdb$cp,
                      outmat=sum.reg.pdb$outmat)
output.pdb
##                 rsq       adjr2        cp outmat.x1 outmat.x2 outmat.x3
## 1  ( 1 ) 0.84014847  0.82561651  0.987447                              
## 1  ( 2 ) 0.47710760  0.42957193 23.670066         *                    
## 1  ( 3 ) 0.17509492  0.10010355 42.539673                   *          
## 1  ( 4 ) 0.03302386 -0.05488306 51.416204                             *
## 2  ( 1 ) 0.86603890  0.83924668  1.369825         *                    
## 2  ( 2 ) 0.84818149  0.81781779  2.485548                   *          
## 2  ( 3 ) 0.84015190  0.80818228  2.987232                             *
## 2  ( 4 ) 0.61883231  0.54259877 16.815174         *         *          
## 3  ( 1 ) 0.87097825  0.82797099  3.061217         *                   *
## 3  ( 2 ) 0.86614908  0.82153211  3.362941         *         *          
## 3  ( 3 ) 0.85004824  0.80006432  4.368914                   *         *
## 3  ( 4 ) 0.62022376  0.49363167 18.728237         *         *         *
## 4  ( 1 ) 0.87195804  0.80793706  5.000000         *         *         *
##          outmat.x4
## 1  ( 1 )         *
## 1  ( 2 )          
## 1  ( 3 )          
## 1  ( 4 )          
## 2  ( 1 )         *
## 2  ( 2 )         *
## 2  ( 3 )         *
## 2  ( 4 )          
## 3  ( 1 )         *
## 3  ( 2 )         *
## 3  ( 3 )         *
## 3  ( 4 )          
## 4  ( 1 )         *
#validasi model
lm.pdb=lm(y~x1+x4,data=pdb)
as.data.frame((lm.pdb$residuals/(1-hatvalues(lm.pdb)))^2)
##    (lm.pdb$residuals/(1 - hatvalues(lm.pdb)))^2
## 1                                      37499486
## 2                                    1231572639
## 3                                     135147526
## 4                                      13852989
## 5                                     259303149
## 6                                     184141129
## 7                                     183065049
## 8                                     470976604
## 9                                     328097637
## 10                                   1126264589
## 11                                    256700510
## 12                                    454312056
## 13                                    554646370
press.pdb=sum((lm.pdb$residuals/(1-hatvalues(lm.pdb)))^2)
press.pdb
## [1] 5235579732

membuang amatan yang memiliki e(i,-i) kuadrat yang tinggi

lm.pdb1=lm(y~x1+x4,data=pdb[-c(2,10),])
press.pdb1=sum((lm.pdb1$residuals/(1-hatvalues(lm.pdb1)))^2)
press.pdb1
## [1] 2653453516

perbandingan model

summary(lm.pdb)
## 
## Call:
## lm(formula = y ~ x1 + x4, data = pdb)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -19833 -13297  -3171  11072  29035 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.051e+06  1.888e+05  -5.567 0.000238 ***
## x1          -2.828e-02  2.034e-02  -1.390 0.194630    
## x4           5.151e-03  9.559e-04   5.388 0.000306 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 18160 on 10 degrees of freedom
## Multiple R-squared:  0.866,  Adjusted R-squared:  0.8392 
## F-statistic: 32.32 on 2 and 10 DF,  p-value: 4.314e-05
qqnorm(residuals(lm.pdb),main="QQ Plot Sisaan")
abline(a=mean(residuals(lm.pdb)),b=sd(residuals(lm.pdb)),col="red")

summary(lm.pdb1)
## 
## Call:
## lm(formula = y ~ x1 + x4, data = pdb[-c(2, 10), ])
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -14353  -7709  -5756   6396  21355 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.198e+06  1.574e+05  -7.609 6.25e-05 ***
## x1          -4.242e-02  1.573e-02  -2.697   0.0272 *  
## x4           5.878e-03  7.878e-04   7.462 7.19e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13310 on 8 degrees of freedom
## Multiple R-squared:  0.9282, Adjusted R-squared:  0.9102 
## F-statistic: 51.69 on 2 and 8 DF,  p-value: 2.661e-05
qqnorm(residuals(lm.pdb1),main="QQ Plot Sisaan")
abline(a=mean(residuals(lm.pdb1)),b=sd(residuals(lm.pdb1)),col="blue")