BIO232 Homework 1

## Settings for RMarkdown http://yihui.name/knitr/options#chunk_options
opts_chunk$set(comment = "", warning = FALSE, message = FALSE, tidy = FALSE, 
    echo = T, fig.width = 5, fig.height = 5)
options(width = 116, scipen = 10)

setwd("~/statistics/bio232/")

2.

## Load data
hers <- read.table("./hers.txt", header = TRUE)

## Subset of age 63
hers.subset.age.63 <- subset(hers, age == 63)
mean(hers.subset.age.63$sbp)
[1] 142.1

## By fitting a linear model
lm.sbp.by.age <- lm(sbp ~ age, data = hers)
predict(lm.sbp.by.age, newdata = data.frame(age = 63), type = "response")
    1 
133.5 

3.

## a.
lm.logLDL.by.bmi <- lm(log(ldl) ~ bmi, data = hers)
summary(lm.logLDL.by.bmi)

Call:
lm(formula = log(ldl) ~ bmi, data = hers)

Residuals:
    Min      1Q  Median      3Q     Max 
-0.6936 -0.1512  0.0155  0.1792  0.8073 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  4.84011    0.07899   61.27   <2e-16 ***
bmi          0.00294    0.00269    1.09     0.28    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 

Residual standard error: 0.262 on 274 degrees of freedom
Multiple R-squared: 0.00433,    Adjusted R-squared: 0.000699 
F-statistic: 1.19 on 1 and 274 DF,  p-value: 0.276 

confint(lm.logLDL.by.bmi)
                2.5 %   97.5 %
(Intercept)  4.684597 4.995621
bmi         -0.002362 0.008247

## c.
predict(lm.logLDL.by.bmi, newdata = data.frame(bmi = 30), interval = "conf")
    fit   lwr  upr
1 4.928 4.897 4.96

## d.
predict(lm.logLDL.by.bmi, newdata = data.frame(bmi = 30), interval = "pred")
    fit   lwr   upr
1 4.928 4.411 5.446

## e.
qplot(x = bmi, y = log(ldl), data = hers)
Error: could not find function "qplot"

## f.
summary(lm.logLDL.by.bmi$residuals)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-0.6940 -0.1510  0.0155  0.0000  0.1790  0.8070 

4.

sample10percent <- sample(seq_len(nrow(hers)), size = nrow(hers) / 10)
sample20percent <- sample(seq_len(nrow(hers)), size = nrow(hers) / 5)

summary(lm(log(ldl) ~ bmi, data = hers[sample10percent,]))

Call:
lm(formula = log(ldl) ~ bmi, data = hers[sample10percent, ])

Residuals:
    Min      1Q  Median      3Q     Max 
-0.6007 -0.1087  0.0273  0.2314  0.5317 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  5.03075    0.25721   19.56   <2e-16 ***
bmi         -0.00499    0.00884   -0.56     0.58    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 

Residual standard error: 0.261 on 25 degrees of freedom
Multiple R-squared: 0.0126, Adjusted R-squared: -0.0269 
F-statistic: 0.319 on 1 and 25 DF,  p-value: 0.577 

summary(lm(log(ldl) ~ bmi, data = hers[sample20percent,]))

Call:
lm(formula = log(ldl) ~ bmi, data = hers[sample20percent, ])

Residuals:
   Min     1Q Median     3Q    Max 
-0.653 -0.151 -0.015  0.192  0.495 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  4.98443    0.19484   25.58   <2e-16 ***
bmi         -0.00161    0.00654   -0.25     0.81    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 

Residual standard error: 0.269 on 53 degrees of freedom
Multiple R-squared: 0.00114,    Adjusted R-squared: -0.0177 
F-statistic: 0.0606 on 1 and 53 DF,  p-value: 0.806