Here we are going to see the below methods:
model <- step(lm(mpg ~ ., data=mtcars), direction = "forward")
## Start: AIC=70.9
## mpg ~ cyl + disp + hp + drat + wt + qsec + vs + am + gear + carb
The above model is built using all independent variables in the data. We got AIC value of 70.9.
Now, we use independent variable to check the AIC value.
model <- step(lm(mpg ~ cyl, data=mtcars), direction = "forward")
## Start: AIC=76.49
## mpg ~ cyl
model <- step(lm(mpg ~ cyl+disp, data=mtcars), direction = "forward")
## Start: AIC=74.33
## mpg ~ cyl + disp
model <- step(lm(mpg ~ cyl+disp+hp, data=mtcars), direction = "forward")
## Start: AIC=75.21
## mpg ~ cyl + disp + hp
model <- step(lm(mpg ~ cyl+disp+hp+drat, data=mtcars), direction = "forward")
## Start: AIC=75.12
## mpg ~ cyl + disp + hp + drat
model <- step(lm(mpg ~ cyl+disp+hp+drat+wt, data=mtcars), direction = "forward")
## Start: AIC=64.95
## mpg ~ cyl + disp + hp + drat + wt
model <- step(lm(mpg ~ cyl+disp+hp+drat+wt+qsec, data=mtcars), direction = "forward")
## Start: AIC=66.19
## mpg ~ cyl + disp + hp + drat + wt + qsec
model <- step(lm(mpg ~ cyl+disp+hp+drat+wt+qsec+vs, data=mtcars), direction = "forward")
## Start: AIC=68.16
## mpg ~ cyl + disp + hp + drat + wt + qsec + vs
model <- step(lm(mpg ~ cyl+disp+hp+drat+wt+qsec+vs+am, data=mtcars), direction = "forward")
## Start: AIC=67.2
## mpg ~ cyl + disp + hp + drat + wt + qsec + vs + am
model <- step(lm(mpg ~ cyl+disp+hp+drat+wt+qsec+vs+am+gear, data=mtcars), direction = "forward")
## Start: AIC=68.99
## mpg ~ cyl + disp + hp + drat + wt + qsec + vs + am + gear
model <- step(lm(mpg ~ cyl+disp+hp+drat+wt+qsec+vs+am+gear+carb, data=mtcars), direction = "forward")
## Start: AIC=70.9
## mpg ~ cyl + disp + hp + drat + wt + qsec + vs + am + gear + carb
model <- step(lm(mpg ~ cyl+disp+hp+drat+wt+qsec+vs+am+gear+carb, data=mtcars), direction = "forward")
## Start: AIC=70.9
## mpg ~ cyl + disp + hp + drat + wt + qsec + vs + am + gear + carb
model <- step(lm(mpg ~ ., data=mtcars), direction="backward")
## Start: AIC=70.9
## mpg ~ cyl + disp + hp + drat + wt + qsec + vs + am + gear + carb
##
## Df Sum of Sq RSS AIC
## - cyl 1 0.0799 147.57 68.915
## - vs 1 0.1601 147.66 68.932
## - carb 1 0.4067 147.90 68.986
## - gear 1 1.3531 148.85 69.190
## - drat 1 1.6270 149.12 69.249
## - disp 1 3.9167 151.41 69.736
## - hp 1 6.8399 154.33 70.348
## - qsec 1 8.8641 156.36 70.765
## <none> 147.49 70.898
## - am 1 10.5467 158.04 71.108
## - wt 1 27.0144 174.51 74.280
##
## Step: AIC=68.92
## mpg ~ disp + hp + drat + wt + qsec + vs + am + gear + carb
##
## Df Sum of Sq RSS AIC
## - vs 1 0.2685 147.84 66.973
## - carb 1 0.5201 148.09 67.028
## - gear 1 1.8211 149.40 67.308
## - drat 1 1.9826 149.56 67.342
## - disp 1 3.9009 151.47 67.750
## - hp 1 7.3632 154.94 68.473
## <none> 147.57 68.915
## - qsec 1 10.0933 157.67 69.032
## - am 1 11.8359 159.41 69.384
## - wt 1 27.0280 174.60 72.297
##
## Step: AIC=66.97
## mpg ~ disp + hp + drat + wt + qsec + am + gear + carb
##
## Df Sum of Sq RSS AIC
## - carb 1 0.6855 148.53 65.121
## - gear 1 2.1437 149.99 65.434
## - drat 1 2.2139 150.06 65.449
## - disp 1 3.6467 151.49 65.753
## - hp 1 7.1060 154.95 66.475
## <none> 147.84 66.973
## - am 1 11.5694 159.41 67.384
## - qsec 1 15.6830 163.53 68.200
## - wt 1 27.3799 175.22 70.410
##
## Step: AIC=65.12
## mpg ~ disp + hp + drat + wt + qsec + am + gear
##
## Df Sum of Sq RSS AIC
## - gear 1 1.565 150.09 63.457
## - drat 1 1.932 150.46 63.535
## <none> 148.53 65.121
## - disp 1 10.110 158.64 65.229
## - am 1 12.323 160.85 65.672
## - hp 1 14.826 163.35 66.166
## - qsec 1 26.408 174.94 68.358
## - wt 1 69.127 217.66 75.350
##
## Step: AIC=63.46
## mpg ~ disp + hp + drat + wt + qsec + am
##
## Df Sum of Sq RSS AIC
## - drat 1 3.345 153.44 62.162
## - disp 1 8.545 158.64 63.229
## <none> 150.09 63.457
## - hp 1 13.285 163.38 64.171
## - am 1 20.036 170.13 65.466
## - qsec 1 25.574 175.67 66.491
## - wt 1 67.572 217.66 73.351
##
## Step: AIC=62.16
## mpg ~ disp + hp + wt + qsec + am
##
## Df Sum of Sq RSS AIC
## - disp 1 6.629 160.07 61.515
## <none> 153.44 62.162
## - hp 1 12.572 166.01 62.682
## - qsec 1 26.470 179.91 65.255
## - am 1 32.198 185.63 66.258
## - wt 1 69.043 222.48 72.051
##
## Step: AIC=61.52
## mpg ~ hp + wt + qsec + am
##
## Df Sum of Sq RSS AIC
## - hp 1 9.219 169.29 61.307
## <none> 160.07 61.515
## - qsec 1 20.225 180.29 63.323
## - am 1 25.993 186.06 64.331
## - wt 1 78.494 238.56 72.284
##
## Step: AIC=61.31
## mpg ~ wt + qsec + am
##
## Df Sum of Sq RSS AIC
## <none> 169.29 61.307
## - am 1 26.178 195.46 63.908
## - qsec 1 109.034 278.32 75.217
## - wt 1 183.347 352.63 82.790
model <- step(lm(mpg ~ ., data=mtcars), direction="both")
## Start: AIC=70.9
## mpg ~ cyl + disp + hp + drat + wt + qsec + vs + am + gear + carb
##
## Df Sum of Sq RSS AIC
## - cyl 1 0.0799 147.57 68.915
## - vs 1 0.1601 147.66 68.932
## - carb 1 0.4067 147.90 68.986
## - gear 1 1.3531 148.85 69.190
## - drat 1 1.6270 149.12 69.249
## - disp 1 3.9167 151.41 69.736
## - hp 1 6.8399 154.33 70.348
## - qsec 1 8.8641 156.36 70.765
## <none> 147.49 70.898
## - am 1 10.5467 158.04 71.108
## - wt 1 27.0144 174.51 74.280
##
## Step: AIC=68.92
## mpg ~ disp + hp + drat + wt + qsec + vs + am + gear + carb
##
## Df Sum of Sq RSS AIC
## - vs 1 0.2685 147.84 66.973
## - carb 1 0.5201 148.09 67.028
## - gear 1 1.8211 149.40 67.308
## - drat 1 1.9826 149.56 67.342
## - disp 1 3.9009 151.47 67.750
## - hp 1 7.3632 154.94 68.473
## <none> 147.57 68.915
## - qsec 1 10.0933 157.67 69.032
## - am 1 11.8359 159.41 69.384
## + cyl 1 0.0799 147.49 70.898
## - wt 1 27.0280 174.60 72.297
##
## Step: AIC=66.97
## mpg ~ disp + hp + drat + wt + qsec + am + gear + carb
##
## Df Sum of Sq RSS AIC
## - carb 1 0.6855 148.53 65.121
## - gear 1 2.1437 149.99 65.434
## - drat 1 2.2139 150.06 65.449
## - disp 1 3.6467 151.49 65.753
## - hp 1 7.1060 154.95 66.475
## <none> 147.84 66.973
## - am 1 11.5694 159.41 67.384
## - qsec 1 15.6830 163.53 68.200
## + vs 1 0.2685 147.57 68.915
## + cyl 1 0.1883 147.66 68.932
## - wt 1 27.3799 175.22 70.410
##
## Step: AIC=65.12
## mpg ~ disp + hp + drat + wt + qsec + am + gear
##
## Df Sum of Sq RSS AIC
## - gear 1 1.565 150.09 63.457
## - drat 1 1.932 150.46 63.535
## <none> 148.53 65.121
## - disp 1 10.110 158.64 65.229
## - am 1 12.323 160.85 65.672
## - hp 1 14.826 163.35 66.166
## + carb 1 0.685 147.84 66.973
## + vs 1 0.434 148.09 67.028
## + cyl 1 0.414 148.11 67.032
## - qsec 1 26.408 174.94 68.358
## - wt 1 69.127 217.66 75.350
##
## Step: AIC=63.46
## mpg ~ disp + hp + drat + wt + qsec + am
##
## Df Sum of Sq RSS AIC
## - drat 1 3.345 153.44 62.162
## - disp 1 8.545 158.64 63.229
## <none> 150.09 63.457
## - hp 1 13.285 163.38 64.171
## + gear 1 1.565 148.53 65.121
## + cyl 1 1.003 149.09 65.242
## + vs 1 0.645 149.45 65.319
## + carb 1 0.107 149.99 65.434
## - am 1 20.036 170.13 65.466
## - qsec 1 25.574 175.67 66.491
## - wt 1 67.572 217.66 73.351
##
## Step: AIC=62.16
## mpg ~ disp + hp + wt + qsec + am
##
## Df Sum of Sq RSS AIC
## - disp 1 6.629 160.07 61.515
## <none> 153.44 62.162
## - hp 1 12.572 166.01 62.682
## + drat 1 3.345 150.09 63.457
## + gear 1 2.977 150.46 63.535
## + cyl 1 2.447 150.99 63.648
## + vs 1 1.121 152.32 63.927
## + carb 1 0.011 153.43 64.160
## - qsec 1 26.470 179.91 65.255
## - am 1 32.198 185.63 66.258
## - wt 1 69.043 222.48 72.051
##
## Step: AIC=61.52
## mpg ~ hp + wt + qsec + am
##
## Df Sum of Sq RSS AIC
## - hp 1 9.219 169.29 61.307
## <none> 160.07 61.515
## + disp 1 6.629 153.44 62.162
## + carb 1 3.227 156.84 62.864
## + drat 1 1.428 158.64 63.229
## - qsec 1 20.225 180.29 63.323
## + cyl 1 0.249 159.82 63.465
## + vs 1 0.249 159.82 63.466
## + gear 1 0.171 159.90 63.481
## - am 1 25.993 186.06 64.331
## - wt 1 78.494 238.56 72.284
##
## Step: AIC=61.31
## mpg ~ wt + qsec + am
##
## Df Sum of Sq RSS AIC
## <none> 169.29 61.307
## + hp 1 9.219 160.07 61.515
## + carb 1 8.036 161.25 61.751
## + disp 1 3.276 166.01 62.682
## + cyl 1 1.501 167.78 63.022
## + drat 1 1.400 167.89 63.042
## + gear 1 0.123 169.16 63.284
## + vs 1 0.000 169.29 63.307
## - am 1 26.178 195.46 63.908
## - qsec 1 109.034 278.32 75.217
## - wt 1 183.347 352.63 82.790
Note: AIC gives good model from a bunch of bad models. But it does not tell the best model.
library(leaps)
## Warning: package 'leaps' was built under R version 3.5.2
model <- regsubsets(mpg ~ ., data=mtcars)
model
## Subset selection object
## Call: regsubsets.formula(mpg ~ ., data = mtcars)
## 10 Variables (and intercept)
## Forced in Forced out
## cyl FALSE FALSE
## disp FALSE FALSE
## hp FALSE FALSE
## drat FALSE FALSE
## wt FALSE FALSE
## qsec FALSE FALSE
## vs FALSE FALSE
## am FALSE FALSE
## gear FALSE FALSE
## carb FALSE FALSE
## 1 subsets of each size up to 8
## Selection Algorithm: exhaustive
summary(model)
## Subset selection object
## Call: regsubsets.formula(mpg ~ ., data = mtcars)
## 10 Variables (and intercept)
## Forced in Forced out
## cyl FALSE FALSE
## disp FALSE FALSE
## hp FALSE FALSE
## drat FALSE FALSE
## wt FALSE FALSE
## qsec FALSE FALSE
## vs FALSE FALSE
## am FALSE FALSE
## gear FALSE FALSE
## carb FALSE FALSE
## 1 subsets of each size up to 8
## Selection Algorithm: exhaustive
## cyl disp hp drat wt qsec vs am gear carb
## 1 ( 1 ) " " " " " " " " "*" " " " " " " " " " "
## 2 ( 1 ) "*" " " " " " " "*" " " " " " " " " " "
## 3 ( 1 ) " " " " " " " " "*" "*" " " "*" " " " "
## 4 ( 1 ) " " " " "*" " " "*" "*" " " "*" " " " "
## 5 ( 1 ) " " "*" "*" " " "*" "*" " " "*" " " " "
## 6 ( 1 ) " " "*" "*" "*" "*" "*" " " "*" " " " "
## 7 ( 1 ) " " "*" "*" "*" "*" "*" " " "*" "*" " "
## 8 ( 1 ) " " "*" "*" "*" "*" "*" " " "*" "*" "*"
plot(model, scale = "bic")
We can simply find out the most significant variable from the above plot.
#Display adjusted r-squared values on the y bar.
plot(model, scale="adjr2")
# build bic with 2 subsets
model1 <- regsubsets(mpg ~ ., data=mtcars, nbest=2)
model1
## Subset selection object
## Call: regsubsets.formula(mpg ~ ., data = mtcars, nbest = 2)
## 10 Variables (and intercept)
## Forced in Forced out
## cyl FALSE FALSE
## disp FALSE FALSE
## hp FALSE FALSE
## drat FALSE FALSE
## wt FALSE FALSE
## qsec FALSE FALSE
## vs FALSE FALSE
## am FALSE FALSE
## gear FALSE FALSE
## carb FALSE FALSE
## 2 subsets of each size up to 8
## Selection Algorithm: exhaustive
plot(model1, scale="bic")
model <- regsubsets(mpg ~ ., data=mtcars)
plot(model, scale = "Cp")
Anova is used to compare simpler model with complex model.
#simple model
model.s <- lm(mpg ~ wt + cyl, data=mtcars)
#complex model
model.c <- lm(mpg ~ ., data = mtcars)
# Compare which model from the above is good, we use Anova test
anova(model.s, model.c)
## Analysis of Variance Table
##
## Model 1: mpg ~ wt + cyl
## Model 2: mpg ~ cyl + disp + hp + drat + wt + qsec + vs + am + gear + carb
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 29 191.17
## 2 21 147.49 8 43.678 0.7773 0.6269
We accepted the null hypothesis means both models give same results, so we use simpler model between both of them.