Leg shows to have the lowest P value of all variables.
data(seatpos, package="faraway")
lmod<-lm(hipcenter ~ ., data=seatpos)
summary(lmod)
##
## Call:
## lm(formula = hipcenter ~ ., data = seatpos)
##
## Residuals:
## Min 1Q Median 3Q Max
## -73.827 -22.833 -3.678 25.017 62.337
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 436.43213 166.57162 2.620 0.0138 *
## Age 0.77572 0.57033 1.360 0.1843
## Weight 0.02631 0.33097 0.080 0.9372
## HtShoes -2.69241 9.75304 -0.276 0.7845
## Ht 0.60134 10.12987 0.059 0.9531
## Seated 0.53375 3.76189 0.142 0.8882
## Arm -1.32807 3.90020 -0.341 0.7359
## Thigh -1.14312 2.66002 -0.430 0.6706
## Leg -6.43905 4.71386 -1.366 0.1824
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 37.72 on 29 degrees of freedom
## Multiple R-squared: 0.6866, Adjusted R-squared: 0.6001
## F-statistic: 7.94 on 8 and 29 DF, p-value: 1.306e-05
The regsubset confirms that Leg can be a strong predictor
require(leaps)
r<-regsubsets(hipcenter~.,data=seatpos)
rs<-summary(r)
rs$which
## (Intercept) Age Weight HtShoes Ht Seated Arm Thigh Leg
## 1 TRUE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
## 2 TRUE FALSE FALSE FALSE TRUE FALSE FALSE FALSE TRUE
## 3 TRUE TRUE FALSE FALSE TRUE FALSE FALSE FALSE TRUE
## 4 TRUE TRUE FALSE TRUE FALSE FALSE FALSE TRUE TRUE
## 5 TRUE TRUE FALSE TRUE FALSE FALSE TRUE TRUE TRUE
## 6 TRUE TRUE FALSE TRUE FALSE TRUE TRUE TRUE TRUE
## 7 TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE
## 8 TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
Run Prediction with all predictors.
x <-model.matrix(lmod)
(x1 <- apply(x,2,function(x,mean) quantile(x,0.95)))
## (Intercept) Age Weight HtShoes Ht Seated
## 1.000 65.300 197.250 188.005 185.615 96.960
## Arm Thigh Leg
## 37.535 45.215 41.995
predict(lmod, new=data.frame(t(x1)), interval ="prediction")
## fit lwr upr
## 1 -222.4813 -311.5539 -133.4086
predict(lmod, new=data.frame(t(x1)), interval ="confidence")
## fit lwr upr
## 1 -222.4813 -267.0047 -177.9578
Find the minimum with all predictors
require(leaps)
b <-regsubsets(hipcenter~.,data=seatpos)
rs<-summary(b)
rs$which
## (Intercept) Age Weight HtShoes Ht Seated Arm Thigh Leg
## 1 TRUE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
## 2 TRUE FALSE FALSE FALSE TRUE FALSE FALSE FALSE TRUE
## 3 TRUE TRUE FALSE FALSE TRUE FALSE FALSE FALSE TRUE
## 4 TRUE TRUE FALSE TRUE FALSE FALSE FALSE TRUE TRUE
## 5 TRUE TRUE FALSE TRUE FALSE FALSE TRUE TRUE TRUE
## 6 TRUE TRUE FALSE TRUE FALSE TRUE TRUE TRUE TRUE
## 7 TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE
## 8 TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
AIC Plots
AIC<-50*log(rs$rss/50)+ (2:9)*2
plot(AIC - I(1:8), ylab="AIC", xlab="Number of Predictors")
plot(2:9,rs$adjr2,xlab="No. of Parameters", ylab="Adjusted R-Square")
which.max(rs$adjr2)
## [1] 3
plot(2:9,rs$cp,xlab="No. of Parameters",ylab="Cp Statistic")
abline(0,1)
AIC Step
lmod<-lm(hipcenter ~ ., data=seatpos)
step(lmod)
## Start: AIC=283.62
## hipcenter ~ Age + Weight + HtShoes + Ht + Seated + Arm + Thigh +
## Leg
##
## Df Sum of Sq RSS AIC
## - Ht 1 5.01 41267 281.63
## - Weight 1 8.99 41271 281.63
## - Seated 1 28.64 41290 281.65
## - HtShoes 1 108.43 41370 281.72
## - Arm 1 164.97 41427 281.78
## - Thigh 1 262.76 41525 281.87
## <none> 41262 283.62
## - Age 1 2632.12 43894 283.97
## - Leg 1 2654.85 43917 283.99
##
## Step: AIC=281.63
## hipcenter ~ Age + Weight + HtShoes + Seated + Arm + Thigh + Leg
##
## Df Sum of Sq RSS AIC
## - Weight 1 11.10 41278 279.64
## - Seated 1 30.52 41297 279.66
## - Arm 1 160.50 41427 279.78
## - Thigh 1 269.08 41536 279.88
## - HtShoes 1 971.84 42239 280.51
## <none> 41267 281.63
## - Leg 1 2664.65 43931 282.01
## - Age 1 2808.52 44075 282.13
##
## Step: AIC=279.64
## hipcenter ~ Age + HtShoes + Seated + Arm + Thigh + Leg
##
## Df Sum of Sq RSS AIC
## - Seated 1 35.10 41313 277.67
## - Arm 1 156.47 41434 277.78
## - Thigh 1 285.16 41563 277.90
## - HtShoes 1 975.48 42253 278.53
## <none> 41278 279.64
## - Leg 1 2661.39 43939 280.01
## - Age 1 3011.86 44290 280.31
##
## Step: AIC=277.67
## hipcenter ~ Age + HtShoes + Arm + Thigh + Leg
##
## Df Sum of Sq RSS AIC
## - Arm 1 172.02 41485 275.83
## - Thigh 1 344.61 41658 275.99
## - HtShoes 1 1853.43 43166 277.34
## <none> 41313 277.67
## - Leg 1 2871.07 44184 278.22
## - Age 1 2976.77 44290 278.31
##
## Step: AIC=275.83
## hipcenter ~ Age + HtShoes + Thigh + Leg
##
## Df Sum of Sq RSS AIC
## - Thigh 1 472.8 41958 274.26
## <none> 41485 275.83
## - HtShoes 1 2340.7 43826 275.92
## - Age 1 3501.0 44986 276.91
## - Leg 1 3591.7 45077 276.98
##
## Step: AIC=274.26
## hipcenter ~ Age + HtShoes + Leg
##
## Df Sum of Sq RSS AIC
## <none> 41958 274.26
## - Age 1 3108.8 45067 274.98
## - Leg 1 3476.3 45434 275.28
## - HtShoes 1 4218.6 46176 275.90
##
## Call:
## lm(formula = hipcenter ~ Age + HtShoes + Leg, data = seatpos)
##
## Coefficients:
## (Intercept) Age HtShoes Leg
## 456.2137 0.5998 -2.3023 -6.8297
Use AIC results as new model for Predition
lmod<-lm( hipcenter ~ Age + HtShoes + Leg, data = seatpos)
x <-model.matrix(lmod)
(x1 <- apply(x,2,function(x,mean) quantile(x,0.95)))
## (Intercept) Age HtShoes Leg
## 1.000 65.300 188.005 41.995
predict(lmod, new=data.frame(t(x1)), interval ="prediction")
## fit lwr upr
## 1 -224.268 -302.9393 -145.5967
predict(lmod, new=data.frame(t(x1)), interval ="confidence")
## fit lwr upr
## 1 -224.268 -257.3212 -191.2148