Question 7.8

Use the fat data, fitting the model described in Section 4.2.

library(faraway)
data(fat, package="faraway")
lmod1<- lm(brozek ~ age + weight + height + neck + chest + abdom + hip + thigh + knee + ankle + biceps + forearm + wrist, fat)
summary(lmod1)

(a) Compute the condition numbers and variance inflation factors. Comment on the degree of collinearity observed in the data.

x <- model.matrix(lmod1) [,-1]
e <- eigen(t(x) %*% x)
e_val <- e$val
c_no <- sqrt(e$val[1]/e$val)
data.frame(condition.no= t(c_no))

Using the condition number greater than 30 is considered large and represents multicollinearity, we see a high condition number for the model.

data.frame(vif = t(vif(lmod1)))

Using the rule considering vif is greater than 10 to be a cause for concern, we see that weight, abdom and hip indicate collinearity with other predictors.

(b) Cases 39 and 42 are unusual. Refit the model without these two cases and recompute the collinearity diagnostics. Comment on the differences observed from the full data fit.

fat_b <- fat[-c(39,42),]
lmod_b <- lm(brozek ~ age + weight + height + neck + chest + abdom + hip + thigh + knee + ankle + biceps + forearm + wrist, fat_b)
summary(lmod_b)
x_b <- model.matrix(lmod_b)[,-1]
e_b <- eigen(t(x_b) %*% x_b)
e_val_b <- e_b$val
c_no_b <- sqrt(e_b$val[1]/e_b$val)
data.frame(condition.no= t(c_no_b))

We can see a high condition number for the model.

data.frame(vif = t(vif(lmod_b)))

We can see that weight, chest, abdom and hip have vifs indicating collinearity with other predictors.

(c) Fit a model with brozek as the response and just age, weight and height as predictors. Compute the collinearity diagnostics and compare to the full data fit.

lmod_c <- lm(brozek ~ age + weight + height, fat)
summary(lmod_c)
x_c <- model.matrix(lmod_c)[,-1]
e_c <- eigen(t(x_c) %*% x_c)
e_val_c <- e_c$val
c_no_c <- sqrt(e_c$val[1]/e_c$val)
data.frame(condition.no= t(c_no_c))

We can know that the condition numbers are not high.

data.frame(vif = t(vif(lmod_c)))

We can see that the vifs indicate no collinearity among the predictors.

(d) Compute a 95% prediction interval for brozek for the median values of age, weight and height.

x <- model.matrix(lmod_c)
median <- apply(x,2,median)
median
pred_d <- predict(lmod_c, data.frame(t(median)), interval="prediction")
pred_d
pred_d_width <- pred_d[3]-pred_d[2]
pred_d_width 

(e) Compute a 95% prediction interval for brozek for age=40, weight=200 and height=73. How does the interval compare to the previous prediction?

new_e <- data.frame(age=40, weight=200, height=73)
pred_e <- predict(lmod_c, data.frame(new_e), interval="prediction")
pred_e

We can see that the lower and upper bounds of the prediction interval in (e) is larger than those in (d).

(f) Compute a 95% prediction interval for brozek for age=40, weight=130 and height=73. Are the values of predictors unusual? Comment on how the interval compares to the previous two answers.

new_f <- data.frame(age=40, weight=130, height=73)
pred_f <- predict(lmod_c, data.frame(new_f), interval="prediction")
pred_f

We can see that the lower and upper bounds of the prediction interval in (f) is much smaller than those in (d) and (e),which means that it is considered unusual.

summary(fat$weight)
hist(fat$weight)

Since 130 is below the 1st Quantile, we can wonder it is unusual.

LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKIyMgUXVlc3Rpb24gNy44CiMjIyBVc2UgdGhlIGZhdCBkYXRhLCBmaXR0aW5nIHRoZSBtb2RlbCBkZXNjcmliZWQgaW4gU2VjdGlvbiA0LjIuCgpgYGB7cn0KbGlicmFyeShmYXJhd2F5KQpkYXRhKGZhdCwgcGFja2FnZT0iZmFyYXdheSIpCmxtb2QxPC0gbG0oYnJvemVrIH4gYWdlICsgd2VpZ2h0ICsgaGVpZ2h0ICsgbmVjayArIGNoZXN0ICsgYWJkb20gKyBoaXAgKyB0aGlnaCArIGtuZWUgKyBhbmtsZSArIGJpY2VwcyArIGZvcmVhcm0gKyB3cmlzdCwgZmF0KQpzdW1tYXJ5KGxtb2QxKQpgYGAKCiMjIyAoYSkgQ29tcHV0ZSB0aGUgY29uZGl0aW9uIG51bWJlcnMgYW5kIHZhcmlhbmNlIGluZmxhdGlvbiBmYWN0b3JzLiBDb21tZW50IG9uIHRoZSBkZWdyZWUgb2YgY29sbGluZWFyaXR5IG9ic2VydmVkIGluIHRoZSBkYXRhLgoKYGBge3J9CnggPC0gbW9kZWwubWF0cml4KGxtb2QxKSBbLC0xXQplIDwtIGVpZ2VuKHQoeCkgJSolIHgpCmVfdmFsIDwtIGUkdmFsCmNfbm8gPC0gc3FydChlJHZhbFsxXS9lJHZhbCkKZGF0YS5mcmFtZShjb25kaXRpb24ubm89IHQoY19ubykpCmBgYApVc2luZyB0aGUgY29uZGl0aW9uIG51bWJlciBncmVhdGVyIHRoYW4gMzAgaXMgY29uc2lkZXJlZCBsYXJnZSBhbmQgcmVwcmVzZW50cyBtdWx0aWNvbGxpbmVhcml0eSwgd2Ugc2VlIGEgaGlnaCBjb25kaXRpb24gbnVtYmVyIGZvciB0aGUgbW9kZWwuCgpgYGB7cn0KZGF0YS5mcmFtZSh2aWYgPSB0KHZpZihsbW9kMSkpKQpgYGAKVXNpbmcgdGhlIHJ1bGUgY29uc2lkZXJpbmcgdmlmIGlzIGdyZWF0ZXIgdGhhbiAxMCB0byBiZSBhIGNhdXNlIGZvciBjb25jZXJuLCB3ZSBzZWUgdGhhdCBgd2VpZ2h0YCwgYGFiZG9tYCBhbmQgIGBoaXBgIGluZGljYXRlIGNvbGxpbmVhcml0eSB3aXRoIG90aGVyIHByZWRpY3RvcnMuCgojIyMgKGIpIENhc2VzIDM5IGFuZCA0MiBhcmUgdW51c3VhbC4gUmVmaXQgdGhlIG1vZGVsIHdpdGhvdXQgdGhlc2UgdHdvIGNhc2VzIGFuZCByZWNvbXB1dGUgdGhlIGNvbGxpbmVhcml0eSBkaWFnbm9zdGljcy4gQ29tbWVudCBvbiB0aGUgZGlmZmVyZW5jZXMgb2JzZXJ2ZWQgZnJvbSB0aGUgZnVsbCBkYXRhIGZpdC4KCmBgYHtyfQpmYXRfYiA8LSBmYXRbLWMoMzksNDIpLF0KbG1vZF9iIDwtIGxtKGJyb3playB+IGFnZSArIHdlaWdodCArIGhlaWdodCArIG5lY2sgKyBjaGVzdCArIGFiZG9tICsgaGlwICsgdGhpZ2ggKyBrbmVlICsgYW5rbGUgKyBiaWNlcHMgKyBmb3JlYXJtICsgd3Jpc3QsIGZhdF9iKQpzdW1tYXJ5KGxtb2RfYikKYGBgCmBgYHtyfQp4X2IgPC0gbW9kZWwubWF0cml4KGxtb2RfYilbLC0xXQplX2IgPC0gZWlnZW4odCh4X2IpICUqJSB4X2IpCmVfdmFsX2IgPC0gZV9iJHZhbApjX25vX2IgPC0gc3FydChlX2IkdmFsWzFdL2VfYiR2YWwpCmRhdGEuZnJhbWUoY29uZGl0aW9uLm5vPSB0KGNfbm9fYikpCmBgYApXZSBjYW4gc2VlIGEgaGlnaCBjb25kaXRpb24gbnVtYmVyIGZvciB0aGUgbW9kZWwuCgpgYGB7cn0KZGF0YS5mcmFtZSh2aWYgPSB0KHZpZihsbW9kX2IpKSkKYGBgCldlIGNhbiBzZWUgdGhhdCBgd2VpZ2h0YCwgYGNoZXN0YCwgIGBhYmRvbWAgYW5kIGBoaXBgIGhhdmUgdmlmcyBpbmRpY2F0aW5nIGNvbGxpbmVhcml0eSB3aXRoIG90aGVyIHByZWRpY3RvcnMuCgoKIyMjIChjKSBGaXQgYSBtb2RlbCB3aXRoIGJyb3playBhcyB0aGUgcmVzcG9uc2UgYW5kIGp1c3QgYWdlLCB3ZWlnaHQgYW5kIGhlaWdodCBhcyBwcmVkaWN0b3JzLiBDb21wdXRlIHRoZSBjb2xsaW5lYXJpdHkgZGlhZ25vc3RpY3MgYW5kIGNvbXBhcmUgdG8gdGhlIGZ1bGwgZGF0YSBmaXQuCgpgYGB7cn0KbG1vZF9jIDwtIGxtKGJyb3playB+IGFnZSArIHdlaWdodCArIGhlaWdodCwgZmF0KQpzdW1tYXJ5KGxtb2RfYykKYGBgCmBgYHtyfQp4X2MgPC0gbW9kZWwubWF0cml4KGxtb2RfYylbLC0xXQplX2MgPC0gZWlnZW4odCh4X2MpICUqJSB4X2MpCmVfdmFsX2MgPC0gZV9jJHZhbApjX25vX2MgPC0gc3FydChlX2MkdmFsWzFdL2VfYyR2YWwpCmRhdGEuZnJhbWUoY29uZGl0aW9uLm5vPSB0KGNfbm9fYykpCmBgYApXZSBjYW4ga25vdyB0aGF0IHRoZSBjb25kaXRpb24gbnVtYmVycyBhcmUgbm90IGhpZ2guCmBgYHtyfQpkYXRhLmZyYW1lKHZpZiA9IHQodmlmKGxtb2RfYykpKQpgYGAKV2UgY2FuIHNlZSB0aGF0IHRoZSB2aWZzIGluZGljYXRlIG5vIGNvbGxpbmVhcml0eSBhbW9uZyB0aGUgcHJlZGljdG9ycy4KCiMjIyAoZCkgQ29tcHV0ZSBhIDk1JSBwcmVkaWN0aW9uIGludGVydmFsIGZvciBicm96ZWsgZm9yIHRoZSBtZWRpYW4gdmFsdWVzIG9mIGFnZSwgd2VpZ2h0IGFuZCBoZWlnaHQuCmBgYHtyfQp4IDwtIG1vZGVsLm1hdHJpeChsbW9kX2MpCm1lZGlhbiA8LSBhcHBseSh4LDIsbWVkaWFuKQptZWRpYW4KcHJlZF9kIDwtIHByZWRpY3QobG1vZF9jLCBkYXRhLmZyYW1lKHQobWVkaWFuKSksIGludGVydmFsPSJwcmVkaWN0aW9uIikKcHJlZF9kCmBgYAoKYGBge3J9CnByZWRfZF93aWR0aCA8LSBwcmVkX2RbM10tcHJlZF9kWzJdCnByZWRfZF93aWR0aCAKYGBgCiMjIyAoZSkgQ29tcHV0ZSBhIDk1JSBwcmVkaWN0aW9uIGludGVydmFsIGZvciBicm96ZWsgZm9yIGFnZT00MCwgd2VpZ2h0PTIwMCBhbmQgaGVpZ2h0PTczLiBIb3cgZG9lcyB0aGUgaW50ZXJ2YWwgY29tcGFyZSB0byB0aGUgcHJldmlvdXMgcHJlZGljdGlvbj8KCmBgYHtyfQpuZXdfZSA8LSBkYXRhLmZyYW1lKGFnZT00MCwgd2VpZ2h0PTIwMCwgaGVpZ2h0PTczKQpwcmVkX2UgPC0gcHJlZGljdChsbW9kX2MsIGRhdGEuZnJhbWUobmV3X2UpLCBpbnRlcnZhbD0icHJlZGljdGlvbiIpCnByZWRfZQpgYGAKV2UgY2FuIHNlZSB0aGF0IHRoZSBsb3dlciBhbmQgdXBwZXIgYm91bmRzIG9mIHRoZSBwcmVkaWN0aW9uIGludGVydmFsIGluIChlKSBpcyBsYXJnZXIgdGhhbiB0aG9zZSBpbiAoZCkuIAoKIyMjIChmKSBDb21wdXRlIGEgOTUlIHByZWRpY3Rpb24gaW50ZXJ2YWwgZm9yIGJyb3playBmb3IgYWdlPTQwLCB3ZWlnaHQ9MTMwIGFuZCBoZWlnaHQ9NzMuIEFyZSB0aGUgdmFsdWVzIG9mIHByZWRpY3RvcnMgdW51c3VhbD8gQ29tbWVudCBvbiBob3cgdGhlIGludGVydmFsIGNvbXBhcmVzIHRvIHRoZSBwcmV2aW91cyB0d28gYW5zd2Vycy4KCmBgYHtyfQpuZXdfZiA8LSBkYXRhLmZyYW1lKGFnZT00MCwgd2VpZ2h0PTEzMCwgaGVpZ2h0PTczKQpwcmVkX2YgPC0gcHJlZGljdChsbW9kX2MsIGRhdGEuZnJhbWUobmV3X2YpLCBpbnRlcnZhbD0icHJlZGljdGlvbiIpCnByZWRfZgpgYGAKV2UgY2FuIHNlZSB0aGF0IHRoZSBsb3dlciBhbmQgdXBwZXIgYm91bmRzIG9mIHRoZSBwcmVkaWN0aW9uIGludGVydmFsIGluIChmKSBpcyBtdWNoIHNtYWxsZXIgdGhhbiB0aG9zZSBpbiAoZCkgYW5kIChlKSx3aGljaCBtZWFucyB0aGF0IGl0IGlzIGNvbnNpZGVyZWQgdW51c3VhbC4KCmBgYHtyfQpzdW1tYXJ5KGZhdCR3ZWlnaHQpCmBgYAoKYGBge3J9Cmhpc3QoZmF0JHdlaWdodCkKYGBgClNpbmNlIDEzMCBpcyBiZWxvdyB0aGUgMXN0IFF1YW50aWxlLCB3ZSBjYW4gd29uZGVyIGl0IGlzIHVudXN1YWwuCg==