cut-off p-value: \(\alpha_{crit}\)
library(faraway)
## Warning: 套件 'faraway' 是用 R 版本 4.3.1 來建造的
data(state)
statedata <- data.frame(state.x77,row.names=state.abb)
lmod <- lm(Life.Exp ~ ., statedata)
summary(lmod)
##
## Call:
## lm(formula = Life.Exp ~ ., data = statedata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.48895 -0.51232 -0.02747 0.57002 1.49447
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.094e+01 1.748e+00 40.586 < 2e-16 ***
## Population 5.180e-05 2.919e-05 1.775 0.0832 .
## Income -2.180e-05 2.444e-04 -0.089 0.9293
## Illiteracy 3.382e-02 3.663e-01 0.092 0.9269
## Murder -3.011e-01 4.662e-02 -6.459 8.68e-08 ***
## HS.Grad 4.893e-02 2.332e-02 2.098 0.0420 *
## Frost -5.735e-03 3.143e-03 -1.825 0.0752 .
## Area -7.383e-08 1.668e-06 -0.044 0.9649
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7448 on 42 degrees of freedom
## Multiple R-squared: 0.7362, Adjusted R-squared: 0.6922
## F-statistic: 16.74 on 7 and 42 DF, p-value: 2.534e-10
Remove the predictor with the largest p-value over 0.05.
lmod2 <- update(lmod, .~. - Area)
summary(lmod2)
##
## Call:
## lm(formula = Life.Exp ~ Population + Income + Illiteracy + Murder +
## HS.Grad + Frost, data = statedata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.49047 -0.52533 -0.02546 0.57160 1.50374
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.099e+01 1.387e+00 51.165 < 2e-16 ***
## Population 5.188e-05 2.879e-05 1.802 0.0785 .
## Income -2.444e-05 2.343e-04 -0.104 0.9174
## Illiteracy 2.846e-02 3.416e-01 0.083 0.9340
## Murder -3.018e-01 4.334e-02 -6.963 1.45e-08 ***
## HS.Grad 4.847e-02 2.067e-02 2.345 0.0237 *
## Frost -5.776e-03 2.970e-03 -1.945 0.0584 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7361 on 43 degrees of freedom
## Multiple R-squared: 0.7361, Adjusted R-squared: 0.6993
## F-statistic: 19.99 on 6 and 43 DF, p-value: 5.362e-11
lmod3 <- update(lmod2, .~. - Illiteracy)
summary(lmod3)
##
## Call:
## lm(formula = Life.Exp ~ Population + Income + Murder + HS.Grad +
## Frost, data = statedata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.4892 -0.5122 -0.0329 0.5645 1.5166
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.107e+01 1.029e+00 69.067 < 2e-16 ***
## Population 5.115e-05 2.709e-05 1.888 0.0657 .
## Income -2.477e-05 2.316e-04 -0.107 0.9153
## Murder -3.000e-01 3.704e-02 -8.099 2.91e-10 ***
## HS.Grad 4.776e-02 1.859e-02 2.569 0.0137 *
## Frost -5.910e-03 2.468e-03 -2.395 0.0210 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7277 on 44 degrees of freedom
## Multiple R-squared: 0.7361, Adjusted R-squared: 0.7061
## F-statistic: 24.55 on 5 and 44 DF, p-value: 1.019e-11
lmod4 <- update(lmod3, . ~ . - Income)
sumary(lmod4)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 71.02712853 0.95285296 74.5415 < 2.2e-16
## Population 0.00005014 0.00002512 1.9960 0.052005
## Murder -0.30014880 0.03660946 -8.1987 1.775e-10
## HS.Grad 0.04658225 0.01482706 3.1417 0.002968
## Frost -0.00594329 0.00242087 -2.4550 0.018018
##
## n = 50, p = 5, Residual SE = 0.71969, R-Squared = 0.74
lmod5 <- update(lmod4, . ~ . - Population)
sumary(lmod5)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 71.0363788 0.9832622 72.2456 < 2.2e-16
## Murder -0.2830652 0.0367313 -7.7064 8.039e-10
## HS.Grad 0.0499487 0.0152011 3.2859 0.001950
## Frost -0.0069117 0.0024475 -2.8240 0.006988
##
## n = 50, p = 4, Residual SE = 0.74267, R-Squared = 0.71
step( ):Choose a model by AIC in a Stepwise Algorithm
lmod<-lm(Life.Exp ~ ., data=statedata)
step(lmod)
## Start: AIC=-22.18
## Life.Exp ~ Population + Income + Illiteracy + Murder + HS.Grad +
## Frost + Area
##
## Df Sum of Sq RSS AIC
## - Area 1 0.0011 23.298 -24.182
## - Income 1 0.0044 23.302 -24.175
## - Illiteracy 1 0.0047 23.302 -24.174
## <none> 23.297 -22.185
## - Population 1 1.7472 25.044 -20.569
## - Frost 1 1.8466 25.144 -20.371
## - HS.Grad 1 2.4413 25.738 -19.202
## - Murder 1 23.1411 46.438 10.305
##
## Step: AIC=-24.18
## Life.Exp ~ Population + Income + Illiteracy + Murder + HS.Grad +
## Frost
##
## Df Sum of Sq RSS AIC
## - Illiteracy 1 0.0038 23.302 -26.174
## - Income 1 0.0059 23.304 -26.170
## <none> 23.298 -24.182
## - Population 1 1.7599 25.058 -22.541
## - Frost 1 2.0488 25.347 -21.968
## - HS.Grad 1 2.9804 26.279 -20.163
## - Murder 1 26.2721 49.570 11.569
##
## Step: AIC=-26.17
## Life.Exp ~ Population + Income + Murder + HS.Grad + Frost
##
## Df Sum of Sq RSS AIC
## - Income 1 0.006 23.308 -28.161
## <none> 23.302 -26.174
## - Population 1 1.887 25.189 -24.280
## - Frost 1 3.037 26.339 -22.048
## - HS.Grad 1 3.495 26.797 -21.187
## - Murder 1 34.739 58.041 17.456
##
## Step: AIC=-28.16
## Life.Exp ~ Population + Murder + HS.Grad + Frost
##
## Df Sum of Sq RSS AIC
## <none> 23.308 -28.161
## - Population 1 2.064 25.372 -25.920
## - Frost 1 3.122 26.430 -23.877
## - HS.Grad 1 5.112 28.420 -20.246
## - Murder 1 34.816 58.124 15.528
##
## Call:
## lm(formula = Life.Exp ~ Population + Murder + HS.Grad + Frost,
## data = statedata)
##
## Coefficients:
## (Intercept) Population Murder HS.Grad Frost
## 7.103e+01 5.014e-05 -3.001e-01 4.658e-02 -5.943e-03
lmod_aic<-lm(Life.Exp ~ Population + Murder + HS.Grad + Frost, data=statedata)
summary(lmod_aic)
##
## Call:
## lm(formula = Life.Exp ~ Population + Murder + HS.Grad + Frost,
## data = statedata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.47095 -0.53464 -0.03701 0.57621 1.50683
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.103e+01 9.529e-01 74.542 < 2e-16 ***
## Population 5.014e-05 2.512e-05 1.996 0.05201 .
## Murder -3.001e-01 3.661e-02 -8.199 1.77e-10 ***
## HS.Grad 4.658e-02 1.483e-02 3.142 0.00297 **
## Frost -5.943e-03 2.421e-03 -2.455 0.01802 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7197 on 45 degrees of freedom
## Multiple R-squared: 0.736, Adjusted R-squared: 0.7126
## F-statistic: 31.37 on 4 and 45 DF, p-value: 1.696e-12
Q: 利用 summary(lmod_aic) 的 output, 找出此 model AIC, BIC, \(R_a^2\)。
regsubsets( ) (Library leaps) : functions for model selection
#install.packages("leaps")
require(leaps)
## 載入需要的套件:leaps
## Warning: 套件 'leaps' 是用 R 版本 4.3.1 來建造的
b <- regsubsets(Life.Exp~.,data=statedata)
rs <-summary(b)
rs$which
## (Intercept) Population Income Illiteracy Murder HS.Grad Frost Area
## 1 TRUE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
## 2 TRUE FALSE FALSE FALSE TRUE TRUE FALSE FALSE
## 3 TRUE FALSE FALSE FALSE TRUE TRUE TRUE FALSE
## 4 TRUE TRUE FALSE FALSE TRUE TRUE TRUE FALSE
## 5 TRUE TRUE TRUE FALSE TRUE TRUE TRUE FALSE
## 6 TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE
## 7 TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
AIC <- 50*log(rs$rss/50)+ (2:8)*2
plot(AIC ~ I(1:7), ylab="AIC", xlab="Number of Predictors")
plot(2:8 ,rs$cp, ylab="Cp statistic", xlab="Number of Parameters")
abline(0,1)
plot(2:8 , rs$adjr2, ylab="Adjusted R-square", xlab="Number of Parameters")