BIO232 Homework 1
## Settings for RMarkdown http://yihui.name/knitr/options#chunk_options
opts_chunk$set(comment = "", warning = FALSE, message = FALSE, tidy = FALSE,
echo = T, fig.width = 5, fig.height = 5)
options(width = 116, scipen = 10)
setwd("~/statistics/bio232/")
2.
## Load data
hers <- read.table("./hers.txt", header = TRUE)
## Subset of age 63
hers.subset.age.63 <- subset(hers, age == 63)
mean(hers.subset.age.63$sbp)
[1] 142.1
## By fitting a linear model
lm.sbp.by.age <- lm(sbp ~ age, data = hers)
predict(lm.sbp.by.age, newdata = data.frame(age = 63), type = "response")
1
133.5
3.
## a.
lm.logLDL.by.bmi <- lm(log(ldl) ~ bmi, data = hers)
summary(lm.logLDL.by.bmi)
Call:
lm(formula = log(ldl) ~ bmi, data = hers)
Residuals:
Min 1Q Median 3Q Max
-0.6936 -0.1512 0.0155 0.1792 0.8073
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.84011 0.07899 61.27 <2e-16 ***
bmi 0.00294 0.00269 1.09 0.28
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.262 on 274 degrees of freedom
Multiple R-squared: 0.00433, Adjusted R-squared: 0.000699
F-statistic: 1.19 on 1 and 274 DF, p-value: 0.276
confint(lm.logLDL.by.bmi)
2.5 % 97.5 %
(Intercept) 4.684597 4.995621
bmi -0.002362 0.008247
## c.
predict(lm.logLDL.by.bmi, newdata = data.frame(bmi = 30), interval = "conf")
fit lwr upr
1 4.928 4.897 4.96
## d.
predict(lm.logLDL.by.bmi, newdata = data.frame(bmi = 30), interval = "pred")
fit lwr upr
1 4.928 4.411 5.446
## e.
qplot(x = bmi, y = log(ldl), data = hers)
Error: could not find function "qplot"
## f.
summary(lm.logLDL.by.bmi$residuals)
Min. 1st Qu. Median Mean 3rd Qu. Max.
-0.6940 -0.1510 0.0155 0.0000 0.1790 0.8070
4.
sample10percent <- sample(seq_len(nrow(hers)), size = nrow(hers) / 10)
sample20percent <- sample(seq_len(nrow(hers)), size = nrow(hers) / 5)
summary(lm(log(ldl) ~ bmi, data = hers[sample10percent,]))
Call:
lm(formula = log(ldl) ~ bmi, data = hers[sample10percent, ])
Residuals:
Min 1Q Median 3Q Max
-0.6007 -0.1087 0.0273 0.2314 0.5317
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.03075 0.25721 19.56 <2e-16 ***
bmi -0.00499 0.00884 -0.56 0.58
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.261 on 25 degrees of freedom
Multiple R-squared: 0.0126, Adjusted R-squared: -0.0269
F-statistic: 0.319 on 1 and 25 DF, p-value: 0.577
summary(lm(log(ldl) ~ bmi, data = hers[sample20percent,]))
Call:
lm(formula = log(ldl) ~ bmi, data = hers[sample20percent, ])
Residuals:
Min 1Q Median 3Q Max
-0.653 -0.151 -0.015 0.192 0.495
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.98443 0.19484 25.58 <2e-16 ***
bmi -0.00161 0.00654 -0.25 0.81
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.269 on 53 degrees of freedom
Multiple R-squared: 0.00114, Adjusted R-squared: -0.0177
F-statistic: 0.0606 on 1 and 53 DF, p-value: 0.806