Body Fat with measurements from body

s188026 Yifei Liu

bodyfat: http://garthtarr.github.io/mplot/reference/bodyfat.html

(Interpret and comments are in every step, conlusion is at the last)

extract data

#install.packages("mplot")
library(mplot)
data(bodyfat)
Fat   = bodyfat$Bodyfat
Neck  = bodyfat$Neck
Chest = bodyfat$Chest
Abdo  = bodyfat$Abdo
Hip   = bodyfat$Hip
Thigh = bodyfat$Thigh
Knee  = bodyfat$Knee
Ankle = bodyfat$Ankle
Bic   = bodyfat$Bic
Fore  = bodyfat$Fore
Wrist = bodyfat$Wrist
Bodyfat<-data.frame(Fat, Neck, Chest, Abdo, Hip, Thigh, Knee, Ankle, Bic, Fore, Wrist)

cor()

corResult<-cor(Bodyfat)
corResult[,1]
##       Fat      Neck     Chest      Abdo       Hip     Thigh      Knee     Ankle 
## 1.0000000 0.4647812 0.7379384 0.8382541 0.6273400 0.5225635 0.4876076 0.3595300 
##       Bic      Fore     Wrist 
## 0.4915120 0.3403827 0.4200907

the correlation between the body fat and other parameters, some of them have strong correlation, the smallest is also has 0.34.

build the model

Model<-lm(Fat ~ Neck+Chest+Abdo+Hip+Thigh+Knee+Ankle+Bic+Fore+Wrist)
summary(Model)
## 
## Call:
## lm(formula = Fat ~ Neck + Chest + Abdo + Hip + Thigh + Knee + 
##     Ankle + Bic + Fore + Wrist)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.0414 -2.5037 -0.1783  2.7166  9.5073 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -10.34859    9.07550  -1.140   0.2565    
## Neck         -0.68341    0.30776  -2.221   0.0283 *  
## Chest        -0.03297    0.11898  -0.277   0.7822    
## Abdo          0.96013    0.10562   9.090 3.01e-15 ***
## Hip          -0.27401    0.16515  -1.659   0.0998 .  
## Thigh         0.03553    0.17675   0.201   0.8410    
## Knee         -0.12257    0.27596  -0.444   0.6577    
## Ankle        -0.24841    0.47587  -0.522   0.6027    
## Bic           0.03025    0.23880   0.127   0.8994    
## Fore          0.33694    0.26239   1.284   0.2016    
## Wrist        -0.26845    0.67340  -0.399   0.6909    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.054 on 117 degrees of freedom
## Multiple R-squared:  0.7487, Adjusted R-squared:  0.7272 
## F-statistic: 34.86 on 10 and 117 DF,  p-value: < 2.2e-16

Neck’s and Abdo’s coefficient was significantly not 0 at the level of P <0.05.(Hip should be considered, too)

Multiple R-squared: 0.7487 shows that predict variable explained 74.87% of the variance in bodyfat.

Residual standard error: 4.054 shows that the average estimation error of bodyfat is 4.054% in this model.

We need optimize it later.

diagnose

library(car)
## Loading required package: carData
plot(Model)

qqPlot(Model,id.method='identify',simulate = TRUE,labels=row.names(Bodyfat),main='Q-Q plot')

## [1] 60 73

Residuals vs Fitted: normal distribution —-> OK

Normal QQ: combine with qqplot, Within the confidence interval —-> OK

Scale-Location: The variance is basically a constant —-> OK

Residuals vs Leverage: cook’s distance is in the 0.5 —-> OK

independence

durbinWatsonTest(Model)
##  lag Autocorrelation D-W Statistic p-value
##    1     -0.06669996      2.112668   0.484
##  Alternative hypothesis: rho != 0

p=0.532>0.05. No autocorrelation.

homoscedasticity

ncvTest(Model)
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 0.9315466, Df = 1, p = 0.33446

p value shows the variance is constant.

VIF multicollinearity

vif(Model)
##     Neck    Chest     Abdo      Hip    Thigh     Knee    Ankle      Bic 
## 3.560842 6.996379 8.031253 8.139304 5.841558 3.274917 3.011799 3.963420 
##     Fore    Wrist 
## 2.209205 3.031608

smaller than 10 —-> OK

Computing best subsets regression

1.regression

2.plot

3.Model selection criteria: Adjusted R2, Cp and BIC

library(leaps)
leaps1<-regsubsets(Fat ~ Neck+Chest+Abdo+Hip+Thigh+Knee+Ankle+Bic+Fore+Wrist, data = Bodyfat, nvmax = 10)
leaps2<-regsubsets(Fat ~ Neck+Chest+Abdo+Hip+Thigh+Knee+Ankle+Bic+Fore+Wrist, data = Bodyfat, nbest = 10)
res.sum <- summary(leaps1)
res.sum
## Subset selection object
## Call: regsubsets.formula(Fat ~ Neck + Chest + Abdo + Hip + Thigh + 
##     Knee + Ankle + Bic + Fore + Wrist, data = Bodyfat, nvmax = 10)
## 10 Variables  (and intercept)
##       Forced in Forced out
## Neck      FALSE      FALSE
## Chest     FALSE      FALSE
## Abdo      FALSE      FALSE
## Hip       FALSE      FALSE
## Thigh     FALSE      FALSE
## Knee      FALSE      FALSE
## Ankle     FALSE      FALSE
## Bic       FALSE      FALSE
## Fore      FALSE      FALSE
## Wrist     FALSE      FALSE
## 1 subsets of each size up to 10
## Selection Algorithm: exhaustive
##           Neck Chest Abdo Hip Thigh Knee Ankle Bic Fore Wrist
## 1  ( 1 )  " "  " "   "*"  " " " "   " "  " "   " " " "  " "  
## 2  ( 1 )  "*"  " "   "*"  " " " "   " "  " "   " " " "  " "  
## 3  ( 1 )  "*"  " "   "*"  "*" " "   " "  " "   " " " "  " "  
## 4  ( 1 )  "*"  " "   "*"  "*" " "   " "  " "   " " "*"  " "  
## 5  ( 1 )  "*"  " "   "*"  "*" " "   " "  "*"   " " "*"  " "  
## 6  ( 1 )  "*"  " "   "*"  "*" " "   " "  "*"   " " "*"  "*"  
## 7  ( 1 )  "*"  " "   "*"  "*" " "   "*"  "*"   " " "*"  "*"  
## 8  ( 1 )  "*"  " "   "*"  "*" "*"   "*"  "*"   " " "*"  "*"  
## 9  ( 1 )  "*"  "*"   "*"  "*" "*"   "*"  "*"   " " "*"  "*"  
## 10  ( 1 ) "*"  "*"   "*"  "*" "*"   "*"  "*"   "*" "*"  "*"
data.frame(
  Adj.R2 = which.max(res.sum$adjr2),
  CP = which.min(res.sum$cp),
  BIC = which.min(res.sum$bic)
)
##   Adj.R2 CP BIC
## 1      4  3   3
plot(leaps1,scale = 'adjr2')

leaps1 for maximum size of subsets to examine, shows choose 4 when consider R2 and choose 3 for CP and BIC.

4 is Neck+Abdo+Hip+Fore

3 is -Fore

res.sum <- summary(leaps2)
res.sum
## Subset selection object
## Call: regsubsets.formula(Fat ~ Neck + Chest + Abdo + Hip + Thigh + 
##     Knee + Ankle + Bic + Fore + Wrist, data = Bodyfat, nbest = 10)
## 10 Variables  (and intercept)
##       Forced in Forced out
## Neck      FALSE      FALSE
## Chest     FALSE      FALSE
## Abdo      FALSE      FALSE
## Hip       FALSE      FALSE
## Thigh     FALSE      FALSE
## Knee      FALSE      FALSE
## Ankle     FALSE      FALSE
## Bic       FALSE      FALSE
## Fore      FALSE      FALSE
## Wrist     FALSE      FALSE
## 10 subsets of each size up to 8
## Selection Algorithm: exhaustive
##           Neck Chest Abdo Hip Thigh Knee Ankle Bic Fore Wrist
## 1  ( 1 )  " "  " "   "*"  " " " "   " "  " "   " " " "  " "  
## 1  ( 2 )  " "  "*"   " "  " " " "   " "  " "   " " " "  " "  
## 1  ( 3 )  " "  " "   " "  "*" " "   " "  " "   " " " "  " "  
## 1  ( 4 )  " "  " "   " "  " " "*"   " "  " "   " " " "  " "  
## 1  ( 5 )  " "  " "   " "  " " " "   " "  " "   "*" " "  " "  
## 1  ( 6 )  " "  " "   " "  " " " "   "*"  " "   " " " "  " "  
## 1  ( 7 )  "*"  " "   " "  " " " "   " "  " "   " " " "  " "  
## 1  ( 8 )  " "  " "   " "  " " " "   " "  " "   " " " "  "*"  
## 1  ( 9 )  " "  " "   " "  " " " "   " "  "*"   " " " "  " "  
## 1  ( 10 ) " "  " "   " "  " " " "   " "  " "   " " "*"  " "  
## 2  ( 1 )  "*"  " "   "*"  " " " "   " "  " "   " " " "  " "  
## 2  ( 2 )  " "  " "   "*"  "*" " "   " "  " "   " " " "  " "  
## 2  ( 3 )  " "  " "   "*"  " " " "   " "  " "   " " " "  "*"  
## 2  ( 4 )  " "  " "   "*"  " " " "   " "  "*"   " " " "  " "  
## 2  ( 5 )  " "  " "   "*"  " " " "   "*"  " "   " " " "  " "  
## 2  ( 6 )  " "  " "   "*"  " " "*"   " "  " "   " " " "  " "  
## 2  ( 7 )  " "  " "   "*"  " " " "   " "  " "   "*" " "  " "  
## 2  ( 8 )  " "  "*"   "*"  " " " "   " "  " "   " " " "  " "  
## 2  ( 9 )  " "  " "   "*"  " " " "   " "  " "   " " "*"  " "  
## 2  ( 10 ) "*"  "*"   " "  " " " "   " "  " "   " " " "  " "  
## 3  ( 1 )  "*"  " "   "*"  "*" " "   " "  " "   " " " "  " "  
## 3  ( 2 )  " "  " "   "*"  "*" " "   " "  " "   " " " "  "*"  
## 3  ( 3 )  "*"  " "   "*"  " " " "   " "  "*"   " " " "  " "  
## 3  ( 4 )  "*"  " "   "*"  " " " "   "*"  " "   " " " "  " "  
## 3  ( 5 )  "*"  " "   "*"  " " "*"   " "  " "   " " " "  " "  
## 3  ( 6 )  "*"  " "   "*"  " " " "   " "  " "   " " " "  "*"  
## 3  ( 7 )  "*"  " "   "*"  " " " "   " "  " "   " " "*"  " "  
## 3  ( 8 )  " "  " "   "*"  "*" " "   " "  "*"   " " " "  " "  
## 3  ( 9 )  " "  " "   "*"  "*" " "   "*"  " "   " " " "  " "  
## 3  ( 10 ) " "  " "   "*"  " " "*"   " "  " "   " " " "  "*"  
## 4  ( 1 )  "*"  " "   "*"  "*" " "   " "  " "   " " "*"  " "  
## 4  ( 2 )  "*"  " "   "*"  "*" " "   " "  " "   "*" " "  " "  
## 4  ( 3 )  "*"  " "   "*"  "*" " "   "*"  " "   " " " "  " "  
## 4  ( 4 )  "*"  " "   "*"  "*" " "   " "  "*"   " " " "  " "  
## 4  ( 5 )  "*"  " "   "*"  "*" " "   " "  " "   " " " "  "*"  
## 4  ( 6 )  "*"  " "   "*"  "*" "*"   " "  " "   " " " "  " "  
## 4  ( 7 )  "*"  "*"   "*"  "*" " "   " "  " "   " " " "  " "  
## 4  ( 8 )  "*"  " "   "*"  " " " "   " "  "*"   " " "*"  " "  
## 4  ( 9 )  "*"  " "   "*"  " " " "   "*"  " "   " " "*"  " "  
## 4  ( 10 ) "*"  " "   "*"  " " " "   "*"  "*"   " " " "  " "  
## 5  ( 1 )  "*"  " "   "*"  "*" " "   " "  "*"   " " "*"  " "  
## 5  ( 2 )  "*"  " "   "*"  "*" " "   "*"  " "   " " "*"  " "  
## 5  ( 3 )  "*"  " "   "*"  "*" " "   " "  " "   " " "*"  "*"  
## 5  ( 4 )  "*"  "*"   "*"  "*" " "   " "  " "   " " "*"  " "  
## 5  ( 5 )  "*"  " "   "*"  "*" " "   " "  " "   "*" "*"  " "  
## 5  ( 6 )  "*"  " "   "*"  "*" "*"   " "  " "   " " "*"  " "  
## 5  ( 7 )  "*"  " "   "*"  "*" " "   "*"  " "   "*" " "  " "  
## 5  ( 8 )  "*"  " "   "*"  "*" " "   " "  "*"   "*" " "  " "  
## 5  ( 9 )  "*"  " "   "*"  "*" " "   " "  " "   "*" " "  "*"  
## 5  ( 10 ) "*"  " "   "*"  "*" " "   "*"  " "   " " " "  "*"  
## 6  ( 1 )  "*"  " "   "*"  "*" " "   " "  "*"   " " "*"  "*"  
## 6  ( 2 )  "*"  " "   "*"  "*" " "   "*"  "*"   " " "*"  " "  
## 6  ( 3 )  "*"  " "   "*"  "*" " "   "*"  " "   " " "*"  "*"  
## 6  ( 4 )  "*"  " "   "*"  "*" "*"   " "  "*"   " " "*"  " "  
## 6  ( 5 )  "*"  "*"   "*"  "*" " "   " "  "*"   " " "*"  " "  
## 6  ( 6 )  "*"  " "   "*"  "*" " "   " "  "*"   "*" "*"  " "  
## 6  ( 7 )  "*"  " "   "*"  "*" "*"   "*"  " "   " " "*"  " "  
## 6  ( 8 )  "*"  "*"   "*"  "*" " "   "*"  " "   " " "*"  " "  
## 6  ( 9 )  "*"  " "   "*"  "*" " "   "*"  " "   "*" "*"  " "  
## 6  ( 10 ) "*"  "*"   "*"  "*" " "   " "  " "   " " "*"  "*"  
## 7  ( 1 )  "*"  " "   "*"  "*" " "   "*"  "*"   " " "*"  "*"  
## 7  ( 2 )  "*"  " "   "*"  "*" "*"   "*"  "*"   " " "*"  " "  
## 7  ( 3 )  "*"  "*"   "*"  "*" " "   " "  "*"   " " "*"  "*"  
## 7  ( 4 )  "*"  " "   "*"  "*" "*"   " "  "*"   " " "*"  "*"  
## 7  ( 5 )  "*"  "*"   "*"  "*" " "   "*"  "*"   " " "*"  " "  
## 7  ( 6 )  "*"  " "   "*"  "*" " "   " "  "*"   "*" "*"  "*"  
## 7  ( 7 )  "*"  " "   "*"  "*" " "   "*"  "*"   "*" "*"  " "  
## 7  ( 8 )  "*"  "*"   "*"  "*" " "   "*"  " "   " " "*"  "*"  
## 7  ( 9 )  "*"  " "   "*"  "*" " "   "*"  " "   "*" "*"  "*"  
## 7  ( 10 ) "*"  "*"   "*"  "*" "*"   " "  "*"   " " "*"  " "  
## 8  ( 1 )  "*"  " "   "*"  "*" "*"   "*"  "*"   " " "*"  "*"  
## 8  ( 2 )  "*"  "*"   "*"  "*" " "   "*"  "*"   " " "*"  "*"  
## 8  ( 3 )  "*"  " "   "*"  "*" " "   "*"  "*"   "*" "*"  "*"  
## 8  ( 4 )  "*"  "*"   "*"  "*" "*"   "*"  "*"   " " "*"  " "  
## 8  ( 5 )  "*"  "*"   "*"  "*" "*"   " "  "*"   " " "*"  "*"  
## 8  ( 6 )  "*"  "*"   "*"  "*" " "   " "  "*"   "*" "*"  "*"  
## 8  ( 7 )  "*"  " "   "*"  "*" "*"   "*"  "*"   "*" "*"  " "  
## 8  ( 8 )  "*"  "*"   "*"  "*" " "   "*"  "*"   "*" "*"  " "  
## 8  ( 9 )  "*"  " "   "*"  "*" "*"   " "  "*"   "*" "*"  "*"  
## 8  ( 10 ) "*"  "*"   "*"  "*" " "   "*"  " "   "*" "*"  "*"
data.frame(
  Adj.R2 = which.max(res.sum$adjr2),
  CP = which.min(res.sum$cp),
  BIC = which.min(res.sum$bic)
)
##   Adj.R2 CP BIC
## 1     31 21  21
plot(leaps2,scale = 'adjr2')

leaps2 for number of subsets of each size to record, shows choose 31 when consider R2 and choose 21 for CP and BIC.

31 is Neck+Abdo+Hip+Fore

21 is -Fore

So all information said that the best choice is Neck+Abdo+Hip (+Fore(if consider R2))

stepwise method

AIC | backward

library(MASS)
stepAIC(Model,direction = "backward")
## Start:  AIC=368.81
## Fat ~ Neck + Chest + Abdo + Hip + Thigh + Knee + Ankle + Bic + 
##     Fore + Wrist
## 
##         Df Sum of Sq    RSS    AIC
## - Bic    1      0.26 1923.0 366.83
## - Thigh  1      0.66 1923.4 366.86
## - Chest  1      1.26 1924.0 366.89
## - Wrist  1      2.61 1925.3 366.98
## - Knee   1      3.24 1926.0 367.03
## - Ankle  1      4.48 1927.2 367.11
## - Fore   1     27.10 1949.8 368.60
## <none>               1922.7 368.81
## - Hip    1     45.24 1967.9 369.79
## - Neck   1     81.03 2003.7 372.09
## - Abdo   1   1357.97 3280.7 435.20
## 
## Step:  AIC=366.83
## Fat ~ Neck + Chest + Abdo + Hip + Thigh + Knee + Ankle + Fore + 
##     Wrist
## 
##         Df Sum of Sq    RSS    AIC
## - Chest  1      1.10 1924.1 364.90
## - Thigh  1      1.24 1924.2 364.91
## - Wrist  1      2.49 1925.5 364.99
## - Knee   1      3.20 1926.2 365.04
## - Ankle  1      5.05 1928.0 365.16
## <none>               1923.0 366.83
## - Fore   1     34.28 1957.3 367.09
## - Hip    1     44.98 1968.0 367.79
## - Neck   1     80.79 2003.8 370.10
## - Abdo   1   1369.74 3292.7 433.67
## 
## Step:  AIC=364.9
## Fat ~ Neck + Abdo + Hip + Thigh + Knee + Ankle + Fore + Wrist
## 
##         Df Sum of Sq    RSS    AIC
## - Thigh  1      1.45 1925.5 363.00
## - Wrist  1      2.58 1926.7 363.07
## - Knee   1      3.19 1927.3 363.11
## - Ankle  1      5.01 1929.1 363.23
## <none>               1924.1 364.90
## - Fore   1     33.19 1957.3 365.09
## - Hip    1     46.45 1970.5 365.95
## - Neck   1     86.00 2010.1 368.50
## - Abdo   1   2546.66 4470.7 470.82
## 
## Step:  AIC=363
## Fat ~ Neck + Abdo + Hip + Knee + Ankle + Fore + Wrist
## 
##         Df Sum of Sq    RSS    AIC
## - Knee   1      2.62 1928.1 361.17
## - Wrist  1      3.70 1929.2 361.24
## - Ankle  1      4.36 1929.9 361.29
## <none>               1925.5 363.00
## - Fore   1     35.85 1961.4 363.36
## - Hip    1     56.44 1982.0 364.70
## - Neck   1     84.61 2010.1 366.50
## - Abdo   1   2556.02 4481.5 469.13
## 
## Step:  AIC=361.17
## Fat ~ Neck + Abdo + Hip + Ankle + Fore + Wrist
## 
##         Df Sum of Sq    RSS    AIC
## - Wrist  1      3.79 1931.9 359.42
## - Ankle  1      9.03 1937.2 359.77
## <none>               1928.1 361.17
## - Fore   1     36.30 1964.4 361.56
## - Hip    1     67.91 1996.1 363.60
## - Neck   1     87.51 2015.7 364.85
## - Abdo   1   2570.36 4498.5 467.61
## 
## Step:  AIC=359.42
## Fat ~ Neck + Abdo + Hip + Ankle + Fore
## 
##         Df Sum of Sq    RSS    AIC
## - Ankle  1     14.86 1946.8 358.40
## <none>               1931.9 359.42
## - Fore   1     36.06 1968.0 359.79
## - Hip    1     67.31 1999.2 361.81
## - Neck   1    140.04 2072.0 366.38
## - Abdo   1   2574.16 4506.1 465.83
## 
## Step:  AIC=358.4
## Fat ~ Neck + Abdo + Hip + Fore
## 
##        Df Sum of Sq    RSS    AIC
## - Fore  1     28.63 1975.4 358.27
## <none>              1946.8 358.40
## - Hip   1    126.23 2073.0 364.45
## - Neck  1    147.87 2094.7 365.78
## - Abdo  1   2670.93 4617.7 466.96
## 
## Step:  AIC=358.27
## Fat ~ Neck + Abdo + Hip
## 
##        Df Sum of Sq    RSS    AIC
## <none>              1975.4 358.27
## - Hip   1    107.53 2083.0 363.06
## - Neck  1    119.24 2094.7 363.78
## - Abdo  1   2642.74 4618.2 464.97
## 
## Call:
## lm(formula = Fat ~ Neck + Abdo + Hip)
## 
## Coefficients:
## (Intercept)         Neck         Abdo          Hip  
##    -14.2955      -0.6266       0.9290      -0.2863

least AIC: Neck+Abdo+Hip

anova and AIC test AIC for fore (and chest for interest)

Model1<-lm(Fat ~ Neck+Abdo+Hip)
Model2<-lm(Fat ~ Neck+Abdo+Hip+Fore)

#test for interest
#Because in cor() the chest has a high correlation.
#But all result shows we can delete it
Model3<-lm(Fat ~ Chest+Abdo+Hip)

anova(Model1,Model2)
## Analysis of Variance Table
## 
## Model 1: Fat ~ Neck + Abdo + Hip
## Model 2: Fat ~ Neck + Abdo + Hip + Fore
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1    124 1975.4                           
## 2    123 1946.8  1    28.626 1.8086 0.1812
AIC(Model1,Model2,Model3)
##        df      AIC
## Model1  5 723.5212
## Model2  6 723.6528
## Model3  5 730.3488

p = 0.1812 means we can delete fore. And AIC has same result

Result

summary(Model1)
## 
## Call:
## lm(formula = Fat ~ Neck + Abdo + Hip)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.0590 -2.5669 -0.0023  2.6107  8.7289 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -14.29546    7.49107  -1.908  0.05866 .  
## Neck         -0.62659    0.22903  -2.736  0.00713 ** 
## Abdo          0.92901    0.07213  12.880  < 2e-16 ***
## Hip          -0.28631    0.11020  -2.598  0.01051 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.991 on 124 degrees of freedom
## Multiple R-squared:  0.7418, Adjusted R-squared:  0.7356 
## F-statistic: 118.8 on 3 and 124 DF,  p-value: < 2.2e-16

So the regression equation:

bodyfat = -14.29546 - 0.62659 * Neck + 0.92901 * Abdomen - 0.28631 * Hip