library(faraway)
data(seatpos, package="faraway")
head(seatpos)
## Age Weight HtShoes Ht Seated Arm Thigh Leg hipcenter
## 1 46 180 187.2 184.9 95.2 36.1 45.3 41.3 -206.300
## 2 31 175 167.5 165.5 83.8 32.9 36.5 35.9 -178.210
## 3 23 100 153.6 152.2 82.9 26.0 36.6 31.0 -71.673
## 4 19 185 190.3 187.4 97.3 37.4 44.1 41.0 -257.720
## 5 23 159 178.0 174.1 93.9 29.5 40.1 36.9 -173.230
## 6 47 170 178.7 177.0 92.4 36.0 43.2 37.4 -185.150
lmod<-lm(hipcenter~.,data=seatpos)
summary(lmod)
##
## Call:
## lm(formula = hipcenter ~ ., data = seatpos)
##
## Residuals:
## Min 1Q Median 3Q Max
## -73.827 -22.833 -3.678 25.017 62.337
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 436.43213 166.57162 2.620 0.0138 *
## Age 0.77572 0.57033 1.360 0.1843
## Weight 0.02631 0.33097 0.080 0.9372
## HtShoes -2.69241 9.75304 -0.276 0.7845
## Ht 0.60134 10.12987 0.059 0.9531
## Seated 0.53375 3.76189 0.142 0.8882
## Arm -1.32807 3.90020 -0.341 0.7359
## Thigh -1.14312 2.66002 -0.430 0.6706
## Leg -6.43905 4.71386 -1.366 0.1824
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 37.72 on 29 degrees of freedom
## Multiple R-squared: 0.6866, Adjusted R-squared: 0.6001
## F-statistic: 7.94 on 8 and 29 DF, p-value: 1.306e-05
from the model, none of these predictors are stastically significant. It appears that for every unit of leg length that increases, -6.439 units are decreased in hipcenter.
predict(lmod, interval = "confidence", level=0.99)
## fit lwr upr
## 1 -230.82470 -274.4850 -187.16440
## 2 -158.22231 -213.2942 -103.15042
## 3 -96.85463 -143.8490 -49.86024
## 4 -255.78273 -308.3126 -203.25288
## 5 -188.59572 -248.9292 -128.26221
## 6 -186.02614 -223.9008 -148.15146
## 7 -153.98285 -202.0740 -105.89168
## 8 -244.79086 -300.7474 -188.83431
## 9 -139.71030 -185.2041 -94.21650
## 10 -112.98566 -160.3563 -65.61503
## 11 -163.72509 -194.3278 -133.12243
## 12 -89.14799 -130.7538 -47.54214
## 13 -194.10261 -269.2897 -118.91549
## 14 -128.43355 -167.9806 -88.88649
## 15 -186.44972 -240.0984 -132.80105
## 16 -177.90902 -231.3331 -124.48495
## 17 -201.58090 -267.4155 -135.74635
## 18 -98.43069 -156.9425 -39.91888
## 19 -145.80244 -184.1022 -107.50270
## 20 -167.75364 -210.6388 -124.86850
## 21 -178.41491 -227.2462 -129.58362
## 22 -279.07627 -356.4743 -201.67821
## 23 -245.56763 -299.5664 -191.56886
## 24 -81.55529 -125.9380 -37.17254
## 25 -141.13605 -176.7815 -105.49058
## 26 -222.49965 -256.4882 -188.51112
## 27 -156.83929 -193.6699 -120.00871
## 28 -128.68145 -185.2962 -72.06674
## 29 -193.00256 -236.4407 -149.56446
## 30 -93.20235 -137.1367 -49.26799
## 31 -102.96051 -180.7826 -25.13843
## 32 -182.39983 -235.8347 -128.96498
## 33 -166.93549 -218.6980 -115.17301
## 34 -102.63962 -141.3244 -63.95482
## 35 -194.49288 -238.9459 -150.03987
## 36 -142.50545 -199.7835 -85.22743
## 37 -178.52201 -217.9924 -139.05168
## 38 -154.08219 -197.7120 -110.45244
sumary(lmod)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 436.432128 166.571619 2.6201 0.01384
## Age 0.775716 0.570329 1.3601 0.18427
## Weight 0.026313 0.330970 0.0795 0.93718
## HtShoes -2.692408 9.753035 -0.2761 0.78446
## Ht 0.601345 10.129874 0.0594 0.95307
## Seated 0.533752 3.761894 0.1419 0.88815
## Arm -1.328069 3.900197 -0.3405 0.73592
## Thigh -1.143119 2.660024 -0.4297 0.67056
## Leg -6.439046 4.713860 -1.3660 0.18245
##
## n = 38, p = 9, Residual SE = 37.72029, R-Squared = 0.69
lmod <- update(lmod, . ~ . - Ht)
sumary(lmod)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 436.84207 163.64104 2.6695 0.01214
## Age 0.76574 0.53590 1.4289 0.16337
## Weight 0.02897 0.32244 0.0898 0.92901
## HtShoes -2.13409 2.53896 -0.8405 0.40726
## Seated 0.54959 3.68958 0.1490 0.88258
## Arm -1.30087 3.80833 -0.3416 0.73504
## Thigh -1.09039 2.46534 -0.4423 0.66145
## Leg -6.40612 4.60272 -1.3918 0.17421
##
## n = 38, p = 8, Residual SE = 37.08854, R-Squared = 0.69
lmod <- update(lmod, . ~ . -Weight)
sumary(lmod)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 427.50734 124.38767 3.4369 0.001696
## Age 0.77573 0.51579 1.5040 0.142709
## HtShoes -2.08235 2.43289 -0.8559 0.398613
## Seated 0.58582 3.60833 0.1624 0.872083
## Arm -1.28258 3.74154 -0.3428 0.734067
## Thigh -1.11535 2.41013 -0.4628 0.646758
## Leg -6.35716 4.49663 -1.4138 0.167396
##
## n = 38, p = 7, Residual SE = 36.49034, R-Squared = 0.69
lmod <- update(lmod, . ~ . -Seated)
sumary(lmod)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 436.54632 109.52662 3.9858 0.0003645
## Age 0.76671 0.50493 1.5185 0.1387169
## HtShoes -1.77157 1.47856 -1.1982 0.2396483
## Arm -1.33899 3.66826 -0.3650 0.7174980
## Thigh -1.19826 2.31930 -0.5166 0.6089553
## Leg -6.49097 4.35267 -1.4913 0.1456860
##
## n = 38, p = 6, Residual SE = 35.93092, R-Squared = 0.69
lmod <- update(lmod, . ~ . -Arm)
sumary(lmod)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 445.79773 105.14516 4.2398 0.0001695
## Age 0.65246 0.39097 1.6688 0.1046161
## HtShoes -1.91711 1.40497 -1.3645 0.1816360
## Thigh -1.37321 2.23923 -0.6133 0.5439130
## Leg -6.95019 4.11181 -1.6903 0.1003972
##
## n = 38, p = 5, Residual SE = 35.45591, R-Squared = 0.68
lmod <- update(lmod, . ~ . -Thigh)
sumary(lmod)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 456.21365 102.80779 4.4375 9.092e-05
## Age 0.59983 0.37792 1.5872 0.12173
## HtShoes -2.30226 1.24520 -1.8489 0.07318
## Leg -6.82975 4.06926 -1.6784 0.10244
##
## n = 38, p = 4, Residual SE = 35.12909, R-Squared = 0.68
lmod <- update(lmod, . ~ . -Age)
sumary(lmod)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 493.7938 102.1923 4.8320 2.662e-05
## HtShoes -2.4955 1.2658 -1.9714 0.05662
## Leg -6.3693 4.1461 -1.5362 0.13347
##
## n = 38, p = 3, Residual SE = 35.88337, R-Squared = 0.66
the SE increased after removing age. We can stop here.
AIC:
require(leaps)
## Loading required package: leaps
b <- regsubsets(hipcenter~.,data=seatpos)
rs <- summary(b)
rs$which
## (Intercept) Age Weight HtShoes Ht Seated Arm Thigh Leg
## 1 TRUE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
## 2 TRUE FALSE FALSE FALSE TRUE FALSE FALSE FALSE TRUE
## 3 TRUE TRUE FALSE FALSE TRUE FALSE FALSE FALSE TRUE
## 4 TRUE TRUE FALSE TRUE FALSE FALSE FALSE TRUE TRUE
## 5 TRUE TRUE FALSE TRUE FALSE FALSE TRUE TRUE TRUE
## 6 TRUE TRUE FALSE TRUE FALSE TRUE TRUE TRUE TRUE
## 7 TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE
## 8 TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
AIC <- nrow(seatpos)*log(rs$rss/nrow(seatpos)) + (2:9)*2
plot(AIC ~ I(1:8), ylab="AIC", xlab="Number of Predictors")
AIC
## [1] 275.0667 274.7798 274.2418 275.8291 277.6712 279.6389 281.6286 283.6240
lm1 <- lm(hipcenter ~ Age + Ht + Leg, data=seatpos)
summary(lm1)
##
## Call:
## lm(formula = hipcenter ~ Age + Ht + Leg, data = seatpos)
##
## Residuals:
## Min 1Q Median 3Q Max
## -79.715 -22.758 -4.102 21.394 60.576
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 452.1976 100.9482 4.480 8.04e-05 ***
## Age 0.5807 0.3790 1.532 0.1347
## Ht -2.3254 1.2545 -1.854 0.0725 .
## Leg -6.7390 4.1050 -1.642 0.1099
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 35.12 on 34 degrees of freedom
## Multiple R-squared: 0.6814, Adjusted R-squared: 0.6533
## F-statistic: 24.24 on 3 and 34 DF, p-value: 1.426e-08
from the graph and the values of AIC, the value is lowest at the 3 predictor variables. Intercept+Age+Ht+Leg.
Adjusted R2:
plot(2:9,rs$adjr2,xlab="No. of Parameters",ylab="Adjusted R-square")
which.max(rs$adjr2)
## [1] 3
We see that the Age+Ht+Leg model has the largest R2a.
Stepwise Selection:
lmod <- lm(hipcenter ~ ., data=seatpos)
step(lmod)
## Start: AIC=283.62
## hipcenter ~ Age + Weight + HtShoes + Ht + Seated + Arm + Thigh +
## Leg
##
## Df Sum of Sq RSS AIC
## - Ht 1 5.01 41267 281.63
## - Weight 1 8.99 41271 281.63
## - Seated 1 28.64 41290 281.65
## - HtShoes 1 108.43 41370 281.72
## - Arm 1 164.97 41427 281.78
## - Thigh 1 262.76 41525 281.87
## <none> 41262 283.62
## - Age 1 2632.12 43894 283.97
## - Leg 1 2654.85 43917 283.99
##
## Step: AIC=281.63
## hipcenter ~ Age + Weight + HtShoes + Seated + Arm + Thigh + Leg
##
## Df Sum of Sq RSS AIC
## - Weight 1 11.10 41278 279.64
## - Seated 1 30.52 41297 279.66
## - Arm 1 160.50 41427 279.78
## - Thigh 1 269.08 41536 279.88
## - HtShoes 1 971.84 42239 280.51
## <none> 41267 281.63
## - Leg 1 2664.65 43931 282.01
## - Age 1 2808.52 44075 282.13
##
## Step: AIC=279.64
## hipcenter ~ Age + HtShoes + Seated + Arm + Thigh + Leg
##
## Df Sum of Sq RSS AIC
## - Seated 1 35.10 41313 277.67
## - Arm 1 156.47 41434 277.78
## - Thigh 1 285.16 41563 277.90
## - HtShoes 1 975.48 42253 278.53
## <none> 41278 279.64
## - Leg 1 2661.39 43939 280.01
## - Age 1 3011.86 44290 280.31
##
## Step: AIC=277.67
## hipcenter ~ Age + HtShoes + Arm + Thigh + Leg
##
## Df Sum of Sq RSS AIC
## - Arm 1 172.02 41485 275.83
## - Thigh 1 344.61 41658 275.99
## - HtShoes 1 1853.43 43166 277.34
## <none> 41313 277.67
## - Leg 1 2871.07 44184 278.22
## - Age 1 2976.77 44290 278.31
##
## Step: AIC=275.83
## hipcenter ~ Age + HtShoes + Thigh + Leg
##
## Df Sum of Sq RSS AIC
## - Thigh 1 472.8 41958 274.26
## <none> 41485 275.83
## - HtShoes 1 2340.7 43826 275.92
## - Age 1 3501.0 44986 276.91
## - Leg 1 3591.7 45077 276.98
##
## Step: AIC=274.26
## hipcenter ~ Age + HtShoes + Leg
##
## Df Sum of Sq RSS AIC
## <none> 41958 274.26
## - Age 1 3108.8 45067 274.98
## - Leg 1 3476.3 45434 275.28
## - HtShoes 1 4218.6 46176 275.90
##
## Call:
## lm(formula = hipcenter ~ Age + HtShoes + Leg, data = seatpos)
##
## Coefficients:
## (Intercept) Age HtShoes Leg
## 456.2137 0.5998 -2.3023 -6.8297
According to this method, Age + HtShoes + Leg seems like a best model.
#Model selected by AIC :
lm1 <- lm(hipcenter ~ Age + Ht + Leg, data=seatpos)
summary(lm1)
##
## Call:
## lm(formula = hipcenter ~ Age + Ht + Leg, data = seatpos)
##
## Residuals:
## Min 1Q Median 3Q Max
## -79.715 -22.758 -4.102 21.394 60.576
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 452.1976 100.9482 4.480 8.04e-05 ***
## Age 0.5807 0.3790 1.532 0.1347
## Ht -2.3254 1.2545 -1.854 0.0725 .
## Leg -6.7390 4.1050 -1.642 0.1099
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 35.12 on 34 degrees of freedom
## Multiple R-squared: 0.6814, Adjusted R-squared: 0.6533
## F-statistic: 24.24 on 3 and 34 DF, p-value: 1.426e-08
predict(lm1, interval = "confidence", level=0.99)
## fit lwr upr
## 1 -229.37038 -260.1341 -198.60668
## 2 -156.57871 -175.2138 -137.94360
## 3 -97.27613 -129.7232 -64.82904
## 4 -248.84217 -282.3338 -215.35051
## 5 -187.96187 -210.0575 -165.86626
## 6 -184.13715 -210.7663 -157.50801
## 7 -157.08830 -179.6183 -134.55834
## 8 -246.83806 -289.9492 -203.72694
## 9 -143.21452 -171.0712 -115.35781
## 10 -121.03680 -144.4909 -97.58270
## 11 -162.39717 -182.9554 -141.83894
## 12 -88.19429 -117.3117 -59.07686
## 13 -201.14838 -232.2414 -170.05539
## 14 -136.49654 -156.8167 -116.17640
## 15 -191.86929 -220.5638 -163.17474
## 16 -180.69932 -220.0115 -141.38718
## 17 -190.16525 -216.4992 -163.83126
## 18 -98.10839 -136.5217 -59.69511
## 19 -146.90849 -171.0965 -122.72049
## 20 -171.99133 -195.7509 -148.23176
## 21 -182.34850 -224.2672 -140.42980
## 22 -280.44279 -326.9605 -233.92503
## 23 -246.44634 -278.2857 -214.60702
## 24 -77.94233 -110.2150 -45.66960
## 25 -141.64372 -169.7095 -113.57792
## 26 -221.48259 -248.3394 -194.62580
## 27 -153.45945 -178.2131 -128.70581
## 28 -129.45660 -171.4760 -87.43724
## 29 -189.00940 -217.4021 -160.61666
## 30 -90.72558 -120.1323 -61.31889
## 31 -102.49003 -148.3792 -56.60088
## 32 -181.34669 -205.5591 -157.13426
## 33 -166.98953 -209.8179 -124.16118
## 34 -101.67033 -132.6128 -70.72785
## 35 -188.60502 -218.6970 -158.51302
## 36 -133.36391 -155.2307 -111.49710
## 37 -184.56284 -208.6539 -160.47181
## 38 -153.31684 -185.7326 -120.90112
It appears that for every unit of leg length that increases, -6.739 units are decreased in hipcenter. Also the adjusted R2 value has been improved as compared to the original model with all predictors.