Using the fat data, fit the model described in section 4.2 of the Faraway text using the following methods: Least squares, Least absolute deviations, and Huber method.

require(faraway)
## Loading required package: faraway
data(fat, package="faraway")
head(fat)
##   brozek siri density age weight height adipos  free neck chest abdom   hip
## 1   12.6 12.3  1.0708  23 154.25  67.75   23.7 134.9 36.2  93.1  85.2  94.5
## 2    6.9  6.1  1.0853  22 173.25  72.25   23.4 161.3 38.5  93.6  83.0  98.7
## 3   24.6 25.3  1.0414  22 154.00  66.25   24.7 116.0 34.0  95.8  87.9  99.2
## 4   10.9 10.4  1.0751  26 184.75  72.25   24.9 164.7 37.4 101.8  86.4 101.2
## 5   27.8 28.7  1.0340  24 184.25  71.25   25.6 133.1 34.4  97.3 100.0 101.9
## 6   20.6 20.9  1.0502  24 210.25  74.75   26.5 167.0 39.0 104.5  94.4 107.8
##   thigh knee ankle biceps forearm wrist
## 1  59.0 37.3  21.9   32.0    27.4  17.1
## 2  58.7 37.3  23.4   30.5    28.9  18.2
## 3  59.6 38.9  24.0   28.8    25.2  16.6
## 4  60.1 37.3  22.8   32.4    29.4  18.2
## 5  63.2 42.2  24.0   32.2    27.7  17.7
## 6  66.0 42.0  25.6   35.7    30.6  18.8
lmod<-lm(brozek ~ age + weight + height + neck + chest + abdom + hip + thigh + knee + ankle + biceps + forearm + wrist, data=fat)
summary(lmod)
## 
## Call:
## lm(formula = brozek ~ age + weight + height + neck + chest + 
##     abdom + hip + thigh + knee + ankle + biceps + forearm + wrist, 
##     data = fat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.264  -2.572  -0.097   2.898   9.327 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -15.29255   16.06992  -0.952  0.34225    
## age           0.05679    0.02996   1.895  0.05929 .  
## weight       -0.08031    0.04958  -1.620  0.10660    
## height       -0.06460    0.08893  -0.726  0.46830    
## neck         -0.43754    0.21533  -2.032  0.04327 *  
## chest        -0.02360    0.09184  -0.257  0.79740    
## abdom         0.88543    0.08008  11.057  < 2e-16 ***
## hip          -0.19842    0.13516  -1.468  0.14341    
## thigh         0.23190    0.13372   1.734  0.08418 .  
## knee         -0.01168    0.22414  -0.052  0.95850    
## ankle         0.16354    0.20514   0.797  0.42614    
## biceps        0.15280    0.15851   0.964  0.33605    
## forearm       0.43049    0.18445   2.334  0.02044 *  
## wrist        -1.47654    0.49552  -2.980  0.00318 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.988 on 238 degrees of freedom
## Multiple R-squared:  0.749,  Adjusted R-squared:  0.7353 
## F-statistic: 54.63 on 13 and 238 DF,  p-value: < 2.2e-16
cor(residuals(lmod)[-1],residuals(lmod)[-length(residuals(lmod))])
## [1] 0.1030356
##least squares
require(nlme)
## Loading required package: nlme
glmod <-  gls(brozek ~ age + weight + height + neck + chest + abdom + hip + thigh + knee + ankle + biceps + forearm + wrist, data=na.omit(fat))
summary(glmod)
## Generalized least squares fit by REML
##   Model: brozek ~ age + weight + height + neck + chest + abdom + hip +      thigh + knee + ankle + biceps + forearm + wrist 
##   Data: na.omit(fat) 
##        AIC      BIC    logLik
##   1464.835 1516.919 -717.4177
## 
## Coefficients:
##                  Value Std.Error   t-value p-value
## (Intercept) -15.292549 16.069921 -0.951626  0.3423
## age           0.056786  0.029965  1.895105  0.0593
## weight       -0.080310  0.049581 -1.619787  0.1066
## height       -0.064600  0.088930 -0.726414  0.4683
## neck         -0.437541  0.215334 -2.031920  0.0433
## chest        -0.023603  0.091839 -0.257007  0.7974
## abdom         0.885429  0.080077 11.057242  0.0000
## hip          -0.198419  0.135156 -1.468068  0.1434
## thigh         0.231895  0.133718  1.734211  0.0842
## knee         -0.011677  0.224143 -0.052095  0.9585
## ankle         0.163536  0.205143  0.797178  0.4261
## biceps        0.152799  0.158513  0.963954  0.3360
## forearm       0.430489  0.184452  2.333874  0.0204
## wrist        -1.476537  0.495519 -2.979779  0.0032
## 
##  Correlation: 
##         (Intr) age    weight height neck   chest  abdom  hip    thigh  knee  
## age      0.041                                                               
## weight   0.898  0.207                                                        
## height  -0.587  0.133 -0.454                                                 
## neck    -0.299 -0.091 -0.292  0.025                                          
## chest   -0.429 -0.035 -0.408  0.214 -0.032                                   
## abdom    0.001 -0.451 -0.202  0.110 -0.026 -0.469                            
## hip     -0.604  0.080 -0.541  0.252  0.174  0.181 -0.247                     
## thigh   -0.075  0.366 -0.056  0.203 -0.106  0.157 -0.137 -0.324              
## knee    -0.232 -0.251 -0.255 -0.086  0.150  0.088  0.069 -0.043 -0.278       
## ankle   -0.179  0.104 -0.183  0.062  0.104  0.025  0.093  0.053  0.010 -0.168
## biceps  -0.067 -0.072 -0.177  0.049 -0.082 -0.080  0.119  0.079 -0.268  0.076
## forearm -0.082  0.163 -0.011  0.021 -0.114 -0.118  0.053  0.095 -0.010 -0.090
## wrist   -0.181 -0.339 -0.143 -0.099 -0.290  0.017  0.149 -0.020  0.064 -0.075
##         ankle  biceps forerm
## age                         
## weight                      
## height                      
## neck                        
## chest                       
## abdom                       
## hip                         
## thigh                       
## knee                        
## ankle                       
## biceps   0.036              
## forearm  0.021 -0.295       
## wrist   -0.240 -0.039 -0.177
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -2.57362104 -0.64497638 -0.02432769  0.72670538  2.33871677 
## 
## Residual standard error: 3.987973 
## Degrees of freedom: 252 total; 238 residual
intervals(glmod,which="var-cov")
## Approximate 95% confidence intervals
## 
##  Residual standard error:
##    lower     est.    upper 
## 3.659628 3.987973 4.381550
##Huber
require(MASS)
## Loading required package: MASS
rlmod <- rlm(brozek ~ age + weight + height + neck + chest + abdom + hip + thigh + knee + ankle + biceps + forearm + wrist, data=fat)
summary(rlmod)
## 
## Call: rlm(formula = brozek ~ age + weight + height + neck + chest + 
##     abdom + hip + thigh + knee + ankle + biceps + forearm + wrist, 
##     data = fat)
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.3964  -2.7352  -0.1171   2.8008   9.4446 
## 
## Coefficients:
##             Value    Std. Error t value 
## (Intercept) -11.3460  17.1216    -0.6627
## age           0.0650   0.0319     2.0368
## weight       -0.0643   0.0528    -1.2163
## height       -0.0625   0.0948    -0.6595
## neck         -0.4553   0.2294    -1.9846
## chest        -0.0256   0.0978    -0.2614
## abdom         0.8778   0.0853    10.2891
## hip          -0.2142   0.1440    -1.4872
## thigh         0.2632   0.1425     1.8473
## knee         -0.1076   0.2388    -0.4505
## ankle         0.1815   0.2186     0.8306
## biceps        0.1367   0.1689     0.8091
## forearm       0.4152   0.1965     2.1126
## wrist        -1.5739   0.5279    -2.9812
## 
## Residual standard error: 4.073 on 238 degrees of freedom

(a)Comment on any substantial differences between the least squares fit and the robust methods. The values of the intercepts are different and most of the cases its lower. The significanct variables forund in the both models are same. In the huber model, age, appears to be significant, while it doesn’t in the basic lm model. The standard error is not greatly affected.

(b)Identify which two cases have the lowest weights in the Huber fit. What is surprising about these two points?

wts <- rlmod$w
names(wts) <- row.names(fat)
head(sort(wts),2)
##       224       207 
## 0.5269652 0.5800712
colMeans(fat)
##     brozek       siri    density        age     weight     height     adipos 
##  18.938492  19.150794   1.055574  44.884921 178.924405  70.148810  25.436905 
##       free       neck      chest      abdom        hip      thigh       knee 
## 143.713889  37.992063 100.824206  92.555952  99.904762  59.405952  38.590476 
##      ankle     biceps    forearm      wrist 
##  23.102381  32.273413  28.663889  18.229762

Cases 224 and 207 have lowest weights. The values of body fat are closer to the lowest and highest values of the data. Values of other variables for this row is close to the men values of the other variables of the data.

(c)Plot weight (of the man) against height. Identify the two outlying cases. Are these the same as those identified in the previous question?

plot(weight ~ height, fat)

These outliers are not the same as found in question b. Points 39, 42 were identified as outliers. One has the lowest height and the other the highest weight and are substantially separate from the rest of the points.

test <- fat[-c(42),] 
test1<- test[-c(39),] 
glmod1 <-  gls(brozek ~ age + weight + height + neck + chest + abdom + hip + thigh + knee + ankle + biceps + forearm + wrist, data=na.omit(test1))
summary(glmod)
## Generalized least squares fit by REML
##   Model: brozek ~ age + weight + height + neck + chest + abdom + hip +      thigh + knee + ankle + biceps + forearm + wrist 
##   Data: na.omit(fat) 
##        AIC      BIC    logLik
##   1464.835 1516.919 -717.4177
## 
## Coefficients:
##                  Value Std.Error   t-value p-value
## (Intercept) -15.292549 16.069921 -0.951626  0.3423
## age           0.056786  0.029965  1.895105  0.0593
## weight       -0.080310  0.049581 -1.619787  0.1066
## height       -0.064600  0.088930 -0.726414  0.4683
## neck         -0.437541  0.215334 -2.031920  0.0433
## chest        -0.023603  0.091839 -0.257007  0.7974
## abdom         0.885429  0.080077 11.057242  0.0000
## hip          -0.198419  0.135156 -1.468068  0.1434
## thigh         0.231895  0.133718  1.734211  0.0842
## knee         -0.011677  0.224143 -0.052095  0.9585
## ankle         0.163536  0.205143  0.797178  0.4261
## biceps        0.152799  0.158513  0.963954  0.3360
## forearm       0.430489  0.184452  2.333874  0.0204
## wrist        -1.476537  0.495519 -2.979779  0.0032
## 
##  Correlation: 
##         (Intr) age    weight height neck   chest  abdom  hip    thigh  knee  
## age      0.041                                                               
## weight   0.898  0.207                                                        
## height  -0.587  0.133 -0.454                                                 
## neck    -0.299 -0.091 -0.292  0.025                                          
## chest   -0.429 -0.035 -0.408  0.214 -0.032                                   
## abdom    0.001 -0.451 -0.202  0.110 -0.026 -0.469                            
## hip     -0.604  0.080 -0.541  0.252  0.174  0.181 -0.247                     
## thigh   -0.075  0.366 -0.056  0.203 -0.106  0.157 -0.137 -0.324              
## knee    -0.232 -0.251 -0.255 -0.086  0.150  0.088  0.069 -0.043 -0.278       
## ankle   -0.179  0.104 -0.183  0.062  0.104  0.025  0.093  0.053  0.010 -0.168
## biceps  -0.067 -0.072 -0.177  0.049 -0.082 -0.080  0.119  0.079 -0.268  0.076
## forearm -0.082  0.163 -0.011  0.021 -0.114 -0.118  0.053  0.095 -0.010 -0.090
## wrist   -0.181 -0.339 -0.143 -0.099 -0.290  0.017  0.149 -0.020  0.064 -0.075
##         ankle  biceps forerm
## age                         
## weight                      
## height                      
## neck                        
## chest                       
## abdom                       
## hip                         
## thigh                       
## knee                        
## ankle                       
## biceps   0.036              
## forearm  0.021 -0.295       
## wrist   -0.240 -0.039 -0.177
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -2.57362104 -0.64497638 -0.02432769  0.72670538  2.33871677 
## 
## Residual standard error: 3.987973 
## Degrees of freedom: 252 total; 238 residual
intervals(glmod1,which="var-cov")
## Approximate 95% confidence intervals
## 
##  Residual standard error:
##    lower     est.    upper 
## 3.616618 3.942473 4.333366

due to removal of potential outliers, there is not much effect on the residual error of the model.