Biostatistics 213: Homework 6
## Settings for RMarkdown http://yihui.name/knitr/options#chunk_options
opts_chunk$set(comment = "", warning = FALSE, message = FALSE, tidy = FALSE,
echo = T, fig.width = 5, fig.height = 5)
options(width = 116, scipen = 5, digits = 5)
setwd("~/statistics/bio213/")
library(gdata)
lbw <- read.xls("~/statistics/bio213/lbw.xls")
## Same data available online: http://www.umass.edu/statdata/statdata/data/index.html
## Cases 10 and 39 needs fix to make them identical to Dr. Orav's dataset
## lbw2 <- read.xls("http://www.umass.edu/statdata/statdata/data/lowbwt.xls")
## lbw2[c(10,39),"BWT"] <- c(2655,3035)
## Recoding
lbw <- within(lbw, {
## race relabeling
race.cat <- factor(race, levels = 1:3, labels = c("White","Black","Other"))
## ftv (frequency of visit) relabeling
ftv.cat <- cut(ftv, breaks = c(-Inf, 0, 2, Inf), labels = c("None","Normal","Many"))
ftv.cat <- relevel(ftv.cat, ref = "Normal")
## ptl
preterm <- factor(ptl >= 1, levels = c(F,T), labels = c("=0",">=1"))
})
a. Build a multiple regression model for infant birthweight, identifying all factors that affect the birthweight of the infant. You should consider all of the covariates as possible predictors.
## Load MASS package
library(MASS)
## Null model
model.null <- lm(bwt ~ 1, data = lbw)
## Full model
model.full <- lm(bwt ~ age + lwt + smoke + ht + ui + ftv.cat + race.cat + preterm, data = lbw)
## Full model result
summary(model.full, cor = T)
Call:
lm(formula = bwt ~ age + lwt + smoke + ht + ui + ftv.cat + race.cat +
preterm, data = lbw)
Residuals:
Min 1Q Median 3Q Max
-1896.7 -443.3 53.2 466.1 1654.4
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2947.32 320.48 9.20 < 2e-16 ***
age -2.91 9.67 -0.30 0.76354
lwt 4.22 1.72 2.46 0.01488 *
smoke -307.34 109.13 -2.82 0.00541 **
ht -568.63 200.88 -2.83 0.00518 **
ui -494.11 137.23 -3.60 0.00041 ***
ftv.catNone -56.50 105.36 -0.54 0.59245
ftv.catMany -185.86 203.19 -0.91 0.36158
race.catBlack -467.30 149.78 -3.12 0.00211 **
race.catOther -322.81 117.40 -2.75 0.00658 **
preterm>=1 -207.91 136.35 -1.52 0.12907
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 647 on 178 degrees of freedom
Multiple R-squared: 0.255, Adjusted R-squared: 0.213
F-statistic: 6.08 on 10 and 178 DF, p-value: 0.0000000609
Correlation of Coefficients:
(Intercept) age lwt smoke ht ui ftv.catNone ftv.catMany race.catBlack race.catOther
age -0.63
lwt -0.58 -0.19
smoke -0.24 0.07 0.08
ht 0.11 0.04 -0.23 -0.02
ui -0.14 0.06 0.08 -0.03 0.09
ftv.catNone -0.24 0.20 -0.03 -0.22 -0.07 -0.04
ftv.catMany -0.02 -0.01 -0.04 -0.13 -0.03 0.03 0.29
race.catBlack -0.13 0.21 -0.17 0.13 -0.03 0.01 -0.05 -0.05
race.catOther -0.35 0.11 0.17 0.41 -0.04 -0.02 -0.22 -0.10 0.31
preterm>=1 0.02 -0.15 0.10 -0.21 -0.05 -0.17 0.06 0.02 -0.08 -0.12
## Dropping one term from the full model
drop1(model.full, test = "F")
Single term deletions
Model:
bwt ~ age + lwt + smoke + ht + ui + ftv.cat + race.cat + preterm
Df Sum of Sq RSS AIC F value Pr(>F)
<none> 74475274 2457
age 1 37981 74513254 2455 0.09 0.76354
lwt 1 2530480 77005754 2461 6.05 0.01488 *
smoke 1 3318310 77793584 2463 7.93 0.00541 **
ht 1 3352598 77827871 2463 8.01 0.00518 **
ui 1 5424365 79899638 2468 12.96 0.00041 ***
ftv.cat 2 383274 74858548 2454 0.46 0.63328
race.cat 2 5559887 80035161 2467 6.64 0.00165 **
preterm 1 972851 75448125 2458 2.33 0.12907
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Adding one term to the null model (intercept-only)
add1(model.null, scope = ~age + lwt + smoke + ht + ui + ftv.cat + race.cat + preterm, test = "F")
Single term additions
Model:
bwt ~ 1
Df Sum of Sq RSS AIC F value Pr(>F)
<none> 99927264 2493
age 1 812010 99115254 2493 1.53 0.2174
lwt 1 3473052 96454213 2488 6.73 0.0102 *
smoke 1 3566553 96360711 2488 6.92 0.0092 **
ht 1 2133121 97794143 2491 4.08 0.0448 *
ui 1 8032186 91895078 2479 16.34 0.000077 ***
ftv.cat 2 2091437 97835828 2493 1.99 0.1399
race.cat 2 5076973 94850291 2487 4.98 0.0078 **
preterm 1 4760280 95166985 2485 9.35 0.0026 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Forward by AIC
stepAIC(model.null, scope = ~age + lwt + smoke + ht + ui + ftv.cat + race.cat + preterm, direction = "forward")
Start: AIC=2492.7
bwt ~ 1
Df Sum of Sq RSS AIC
+ ui 1 8032186 91895078 2479
+ preterm 1 4760280 95166985 2485
+ race.cat 2 5076973 94850291 2487
+ smoke 1 3566553 96360711 2488
+ lwt 1 3473052 96454213 2488
+ ht 1 2133121 97794143 2491
<none> 99927264 2493
+ ftv.cat 2 2091437 97835828 2493
+ age 1 812010 99115254 2493
Step: AIC=2478.8
bwt ~ ui
Df Sum of Sq RSS AIC
+ race.cat 2 4773411 87121667 2473
+ ht 1 3164060 88731018 2474
+ smoke 1 2943572 88951506 2475
+ preterm 1 2838829 89056249 2475
+ lwt 1 2095708 89799370 2476
<none> 91895078 2479
+ ftv.cat 2 1856011 90039067 2479
+ age 1 476194 91418884 2480
Step: AIC=2472.8
bwt ~ ui + race.cat
Df Sum of Sq RSS AIC
+ smoke 1 6072463 81049204 2461
+ ht 1 2690006 84431661 2469
+ preterm 1 2653197 84468471 2469
+ lwt 1 2254369 84867299 2470
<none> 87121667 2473
+ ftv.cat 2 1164466 85957202 2474
+ age 1 58269 87063398 2475
Step: AIC=2461.1
bwt ~ ui + race.cat + smoke
Df Sum of Sq RSS AIC
+ ht 1 2458993 78590212 2457
+ lwt 1 1562529 79486675 2459
+ preterm 1 1340665 79708539 2460
<none> 81049204 2461
+ age 1 235 81048969 2463
+ ftv.cat 2 334011 80715193 2464
Step: AIC=2457.3
bwt ~ ui + race.cat + smoke + ht
Df Sum of Sq RSS AIC
+ lwt 1 2694958 75895254 2453
+ preterm 1 1268612 77321599 2456
<none> 78590212 2457
+ age 1 728 78589484 2459
+ ftv.cat 2 246592 78343620 2461
Step: AIC=2452.7
bwt ~ ui + race.cat + smoke + ht + lwt
Df Sum of Sq RSS AIC
+ preterm 1 1009276 74885977 2452
<none> 75895254 2453
+ age 1 107841 75787412 2454
+ ftv.cat 2 327846 75567407 2456
Step: AIC=2452.2
bwt ~ ui + race.cat + smoke + ht + lwt + preterm
Df Sum of Sq RSS AIC
<none> 74885977 2452
+ age 1 27429 74858548 2454
+ ftv.cat 2 372723 74513254 2455
Call:
lm(formula = bwt ~ ui + race.cat + smoke + ht + lwt + preterm,
data = lbw)
Coefficients:
(Intercept) ui race.catBlack race.catOther smoke ht lwt
2869.59 -489.94 -466.71 -335.53 -323.19 -574.30 4.06
preterm>=1
-208.49
## Backward by AIC
stepAIC(model.full, direction = "backward")
Start: AIC=2457.1
bwt ~ age + lwt + smoke + ht + ui + ftv.cat + race.cat + preterm
Df Sum of Sq RSS AIC
- ftv.cat 2 383274 74858548 2454
- age 1 37981 74513254 2455
<none> 74475274 2457
- preterm 1 972851 75448125 2458
- lwt 1 2530480 77005754 2461
- smoke 1 3318310 77793584 2463
- ht 1 3352598 77827871 2463
- race.cat 2 5559887 80035161 2467
- ui 1 5424365 79899638 2468
Step: AIC=2454.1
bwt ~ age + lwt + smoke + ht + ui + race.cat + preterm
Df Sum of Sq RSS AIC
- age 1 27429 74885977 2452
<none> 74858548 2454
- preterm 1 928864 75787412 2454
- lwt 1 2443283 77301831 2458
- ht 1 3471379 78329927 2461
- smoke 1 3947315 78805863 2462
- race.cat 2 6090524 80949072 2465
- ui 1 5403600 80262148 2465
Step: AIC=2452.2
bwt ~ lwt + smoke + ht + ui + race.cat + preterm
Df Sum of Sq RSS AIC
<none> 74885977 2452
- preterm 1 1009276 75895254 2453
- lwt 1 2435622 77321599 2456
- ht 1 3447953 78333930 2459
- smoke 1 3923947 78809925 2460
- ui 1 5376170 80262148 2463
- race.cat 2 6251578 81137556 2463
Call:
lm(formula = bwt ~ lwt + smoke + ht + ui + race.cat + preterm,
data = lbw)
Coefficients:
(Intercept) lwt smoke ht ui race.catBlack race.catOther
2869.59 4.06 -323.19 -574.30 -489.94 -466.71 -335.53
preterm>=1
-208.49
## Stepwise by AIC
stepAIC(model.full, direction = "both")
Start: AIC=2457.1
bwt ~ age + lwt + smoke + ht + ui + ftv.cat + race.cat + preterm
Df Sum of Sq RSS AIC
- ftv.cat 2 383274 74858548 2454
- age 1 37981 74513254 2455
<none> 74475274 2457
- preterm 1 972851 75448125 2458
- lwt 1 2530480 77005754 2461
- smoke 1 3318310 77793584 2463
- ht 1 3352598 77827871 2463
- race.cat 2 5559887 80035161 2467
- ui 1 5424365 79899638 2468
Step: AIC=2454.1
bwt ~ age + lwt + smoke + ht + ui + race.cat + preterm
Df Sum of Sq RSS AIC
- age 1 27429 74885977 2452
<none> 74858548 2454
- preterm 1 928864 75787412 2454
+ ftv.cat 2 383274 74475274 2457
- lwt 1 2443283 77301831 2458
- ht 1 3471379 78329927 2461
- smoke 1 3947315 78805863 2462
- race.cat 2 6090524 80949072 2465
- ui 1 5403600 80262148 2465
Step: AIC=2452.2
bwt ~ lwt + smoke + ht + ui + race.cat + preterm
Df Sum of Sq RSS AIC
<none> 74885977 2452
- preterm 1 1009276 75895254 2453
+ age 1 27429 74858548 2454
+ ftv.cat 2 372723 74513254 2455
- lwt 1 2435622 77321599 2456
- ht 1 3447953 78333930 2459
- smoke 1 3923947 78809925 2460
- ui 1 5376170 80262148 2463
- race.cat 2 6251578 81137556 2463
Call:
lm(formula = bwt ~ lwt + smoke + ht + ui + race.cat + preterm,
data = lbw)
Coefficients:
(Intercept) lwt smoke ht ui race.catBlack race.catOther
2869.59 4.06 -323.19 -574.30 -489.94 -466.71 -335.53
preterm>=1
-208.49
## Backword by p-value (rms::fastbw())
library(rms)
model.full.ols <- ols(bwt ~ age + lwt + smoke + ht + ui + ftv.cat + race.cat + preterm, data = lbw)
fastbw(model.full.ols, rule = "aic")
Deleted Chi-Sq d.f. P Residual d.f. P AIC R2
ftv.cat 0.92 2 0.6325 0.92 2 0.6325 -3.08 0.251
age 0.07 1 0.7979 0.98 3 0.8057 -5.02 0.251
preterm 2.41 1 0.1204 3.39 4 0.4942 -4.61 0.240
lwt 6.44 1 0.0112 9.83 5 0.0800 -0.17 0.214
Approximate Estimates after Deleting Factors
Coef S.E. Wald Z P
Intercept 3413.0 87.89 38.832 0.00000000
smoke -384.5 102.91 -3.736 0.00018660
ht -472.4 194.87 -2.424 0.01533883
ui -562.1 134.04 -4.194 0.00002744
race.cat=Black -426.0 144.46 -2.949 0.00318839
race.cat=Other -411.2 109.89 -3.742 0.00018268
Factors in Final Model
[1] smoke ht ui race.cat
## All poissible models
library(leaps)
leaps <- regsubsets(bwt ~ age + lwt + smoke + ht + ui + ftv.cat + race.cat + preterm, data = lbw, method = "exhaustive")
summary(leaps)
Subset selection object
Call: regsubsets.formula(bwt ~ age + lwt + smoke + ht + ui + ftv.cat +
race.cat + preterm, data = lbw, method = "exhaustive")
10 Variables (and intercept)
Forced in Forced out
age FALSE FALSE
lwt FALSE FALSE
smoke FALSE FALSE
ht FALSE FALSE
ui FALSE FALSE
ftv.catNone FALSE FALSE
ftv.catMany FALSE FALSE
race.catBlack FALSE FALSE
race.catOther FALSE FALSE
preterm>=1 FALSE FALSE
1 subsets of each size up to 8
Selection Algorithm: exhaustive
age lwt smoke ht ui ftv.catNone ftv.catMany race.catBlack race.catOther preterm>=1
1 ( 1 ) " " " " " " " " "*" " " " " " " " " " "
2 ( 1 ) " " " " " " "*" "*" " " " " " " " " " "
3 ( 1 ) " " "*" " " "*" "*" " " " " " " " " " "
4 ( 1 ) " " " " "*" " " "*" " " " " "*" "*" " "
5 ( 1 ) " " " " "*" "*" "*" " " " " "*" "*" " "
6 ( 1 ) " " "*" "*" "*" "*" " " " " "*" "*" " "
7 ( 1 ) " " "*" "*" "*" "*" " " " " "*" "*" "*"
8 ( 1 ) " " "*" "*" "*" "*" " " "*" "*" "*" "*"
## BIC
plot(leaps,scale="bic")
## Cp
plot(leaps,scale="Cp")
## adj. R^2
plot(leaps,scale="adjr2")
## R^2
plot(leaps,scale="r2")
## Create vectors for outcome and predictors
outcome <- c("bwt")
predictors <- c("age","lwt","smoke","ht","ui","ftv.cat","race.cat","preterm")
dataset <-lbw
## The lines below should not need modification.
## Create list of models
list.of.models <- lapply(seq_along((predictors)), function(n) {
left.hand.side <- outcome
right.hand.side <- apply(X = combn(predictors, n), MARGIN = 2, paste, collapse = " + ")
paste(left.hand.side, right.hand.side, sep = " ~ ")
})
## Convert to a vector
vector.of.models <- unlist(list.of.models)
## Fit coxph to all models
list.of.fits <- lapply(vector.of.models, function(x) {
formula <- as.formula(x)
fit <- lm(formula, data = dataset)
result.adj.r.squared <- summary(fit)$adj.r.squared
data.frame(adj.r.squared = result.adj.r.squared,
model = x)
})
## Collapse to a data frame
result <- do.call(rbind, list.of.fits)
## Sort and print
library(doBy)
result.sorted <- orderBy(~ -adj.r.squared, result)
result.sorted
adj.r.squared model
242 0.2216126 bwt ~ lwt + smoke + ht + ui + race.cat + preterm
249 0.2175750 bwt ~ age + lwt + smoke + ht + ui + race.cat + preterm
254 0.2168330 bwt ~ lwt + smoke + ht + ui + ftv.cat + race.cat + preterm
199 0.2154564 bwt ~ lwt + smoke + ht + ui + race.cat
255 0.2128347 bwt ~ age + lwt + smoke + ht + ui + ftv.cat + race.cat + preterm
220 0.2122428 bwt ~ age + lwt + smoke + ht + ui + race.cat
240 0.2101659 bwt ~ lwt + smoke + ht + ui + ftv.cat + race.cat
247 0.2070072 bwt ~ age + lwt + smoke + ht + ui + ftv.cat + race.cat
215 0.2007120 bwt ~ smoke + ht + ui + race.cat + preterm
236 0.1965015 bwt ~ age + smoke + ht + ui + race.cat + preterm
246 0.1949936 bwt ~ smoke + ht + ui + ftv.cat + race.cat + preterm
149 0.1920375 bwt ~ smoke + ht + ui + race.cat
253 0.1906358 bwt ~ age + smoke + ht + ui + ftv.cat + race.cat + preterm
206 0.1902473 bwt ~ lwt + smoke + ui + race.cat + preterm
184 0.1876056 bwt ~ age + smoke + ht + ui + race.cat
245 0.1868551 bwt ~ lwt + ht + ui + ftv.cat + race.cat + preterm
244 0.1863839 bwt ~ lwt + smoke + ui + ftv.cat + race.cat + preterm
227 0.1858152 bwt ~ age + lwt + smoke + ui + race.cat + preterm
213 0.1856729 bwt ~ smoke + ht + ui + ftv.cat + race.cat
210 0.1853269 bwt ~ lwt + ht + ui + race.cat + preterm
133 0.1828212 bwt ~ lwt + smoke + ui + race.cat
252 0.1823553 bwt ~ age + lwt + ht + ui + ftv.cat + race.cat + preterm
251 0.1819950 bwt ~ age + lwt + smoke + ui + ftv.cat + race.cat + preterm
234 0.1811637 bwt ~ age + smoke + ht + ui + ftv.cat + race.cat
231 0.1808682 bwt ~ age + lwt + ht + ui + race.cat + preterm
156 0.1805403 bwt ~ smoke + ui + race.cat + preterm
168 0.1789097 bwt ~ age + lwt + smoke + ui + race.cat
204 0.1781133 bwt ~ lwt + smoke + ui + ftv.cat + race.cat
191 0.1762921 bwt ~ age + smoke + ui + race.cat + preterm
217 0.1758191 bwt ~ smoke + ui + ftv.cat + race.cat + preterm
225 0.1743878 bwt ~ age + lwt + smoke + ui + ftv.cat + race.cat
238 0.1713632 bwt ~ age + smoke + ui + ftv.cat + race.cat + preterm
78 0.1712858 bwt ~ smoke + ui + race.cat
203 0.1703150 bwt ~ lwt + smoke + ht + race.cat + preterm
208 0.1700535 bwt ~ lwt + ht + ui + ftv.cat + race.cat
139 0.1693584 bwt ~ lwt + ht + ui + race.cat
241 0.1672007 bwt ~ lwt + smoke + ht + ui + ftv.cat + preterm
113 0.1667597 bwt ~ age + smoke + ui + race.cat
229 0.1661444 bwt ~ age + lwt + ht + ui + ftv.cat + race.cat
200 0.1658490 bwt ~ lwt + smoke + ht + ui + preterm
224 0.1657311 bwt ~ age + lwt + smoke + ht + race.cat + preterm
154 0.1656318 bwt ~ smoke + ui + ftv.cat + race.cat
174 0.1649990 bwt ~ age + lwt + ht + ui + race.cat
243 0.1648530 bwt ~ lwt + smoke + ht + ftv.cat + race.cat + preterm
248 0.1634688 bwt ~ age + lwt + smoke + ht + ui + ftv.cat + preterm
221 0.1632141 bwt ~ age + lwt + smoke + ht + ui + preterm
189 0.1610482 bwt ~ age + smoke + ui + ftv.cat + race.cat
250 0.1602198 bwt ~ age + lwt + smoke + ht + ftv.cat + race.cat + preterm
218 0.1589744 bwt ~ ht + ui + ftv.cat + race.cat + preterm
160 0.1578804 bwt ~ ht + ui + race.cat + preterm
209 0.1570694 bwt ~ lwt + ht + ui + ftv.cat + preterm
128 0.1557114 bwt ~ lwt + smoke + ht + ui
198 0.1556962 bwt ~ lwt + smoke + ht + ui + ftv.cat
239 0.1551879 bwt ~ age + ht + ui + ftv.cat + race.cat + preterm
195 0.1550982 bwt ~ age + ht + ui + race.cat + preterm
212 0.1549621 bwt ~ lwt + ui + ftv.cat + race.cat + preterm
130 0.1547006 bwt ~ lwt + smoke + ht + race.cat
230 0.1535029 bwt ~ age + lwt + ht + ui + ftv.cat + preterm
163 0.1517779 bwt ~ age + lwt + smoke + ht + ui
219 0.1511978 bwt ~ age + lwt + smoke + ht + ui + ftv.cat
140 0.1511559 bwt ~ lwt + ht + ui + preterm
146 0.1506860 bwt ~ lwt + ui + race.cat + preterm
165 0.1505386 bwt ~ age + lwt + smoke + ht + race.cat
233 0.1502694 bwt ~ age + lwt + ui + ftv.cat + race.cat + preterm
175 0.1492173 bwt ~ age + lwt + ht + ui + preterm
201 0.1481357 bwt ~ lwt + smoke + ht + ftv.cat + race.cat
137 0.1463874 bwt ~ lwt + smoke + race.cat + preterm
181 0.1463563 bwt ~ age + lwt + ui + race.cat + preterm
222 0.1440908 bwt ~ age + lwt + smoke + ht + ftv.cat + race.cat
214 0.1424030 bwt ~ smoke + ht + ui + ftv.cat + preterm
207 0.1420619 bwt ~ lwt + smoke + ftv.cat + race.cat + preterm
153 0.1418467 bwt ~ smoke + ht + race.cat + preterm
185 0.1417711 bwt ~ age + smoke + ht + ui + preterm
172 0.1417665 bwt ~ age + lwt + smoke + race.cat + preterm
235 0.1413648 bwt ~ age + smoke + ht + ui + ftv.cat + preterm
150 0.1405816 bwt ~ smoke + ht + ui + preterm
162 0.1398698 bwt ~ ui + ftv.cat + race.cat + preterm
138 0.1389040 bwt ~ lwt + ht + ui + ftv.cat
188 0.1384769 bwt ~ age + smoke + ht + race.cat + preterm
228 0.1372956 bwt ~ age + lwt + smoke + ftv.cat + race.cat + preterm
84 0.1367008 bwt ~ ht + ui + race.cat
158 0.1366900 bwt ~ ht + ui + ftv.cat + race.cat
91 0.1363244 bwt ~ ui + race.cat + preterm
197 0.1359613 bwt ~ age + ui + ftv.cat + race.cat + preterm
216 0.1355230 bwt ~ smoke + ht + ftv.cat + race.cat + preterm
144 0.1354863 bwt ~ lwt + ui + ftv.cat + race.cat
173 0.1343472 bwt ~ age + lwt + ht + ui + ftv.cat
63 0.1341086 bwt ~ lwt + ht + ui
126 0.1336348 bwt ~ age + ui + race.cat + preterm
119 0.1325002 bwt ~ age + ht + ui + race.cat
211 0.1324511 bwt ~ lwt + ht + ftv.cat + race.cat + preterm
68 0.1322464 bwt ~ lwt + ui + race.cat
193 0.1320310 bwt ~ age + ht + ui + ftv.cat + race.cat
205 0.1318847 bwt ~ lwt + smoke + ui + ftv.cat + preterm
159 0.1317160 bwt ~ ht + ui + ftv.cat + preterm
237 0.1316976 bwt ~ age + smoke + ht + ftv.cat + race.cat + preterm
179 0.1310935 bwt ~ age + lwt + ui + ftv.cat + race.cat
194 0.1310746 bwt ~ age + ht + ui + ftv.cat + preterm
98 0.1303810 bwt ~ age + lwt + ht + ui
143 0.1303785 bwt ~ lwt + ht + race.cat + preterm
61 0.1298550 bwt ~ lwt + smoke + race.cat
82 0.1298145 bwt ~ smoke + race.cat + preterm
226 0.1286753 bwt ~ age + lwt + smoke + ui + ftv.cat + preterm
232 0.1277049 bwt ~ age + lwt + ht + ftv.cat + race.cat + preterm
103 0.1275166 bwt ~ age + lwt + ui + race.cat
120 0.1274309 bwt ~ age + ht + ui + preterm
134 0.1271745 bwt ~ lwt + smoke + ui + preterm
148 0.1264394 bwt ~ smoke + ht + ui + ftv.cat
117 0.1264033 bwt ~ age + smoke + race.cat + preterm
73 0.1263436 bwt ~ smoke + ht + ui
178 0.1262251 bwt ~ age + lwt + ht + race.cat + preterm
169 0.1258528 bwt ~ age + lwt + smoke + ui + preterm
96 0.1253013 bwt ~ age + lwt + smoke + race.cat
108 0.1250526 bwt ~ age + smoke + ht + ui
85 0.1249448 bwt ~ ht + ui + preterm
157 0.1245419 bwt ~ smoke + ftv.cat + race.cat + preterm
135 0.1240867 bwt ~ lwt + smoke + ftv.cat + race.cat
183 0.1235866 bwt ~ age + smoke + ht + ui + ftv.cat
145 0.1217401 bwt ~ lwt + ui + ftv.cat + preterm
75 0.1211884 bwt ~ smoke + ht + race.cat
192 0.1205753 bwt ~ age + smoke + ftv.cat + race.cat + preterm
155 0.1201496 bwt ~ smoke + ui + ftv.cat + preterm
170 0.1196895 bwt ~ age + lwt + smoke + ftv.cat + race.cat
190 0.1191399 bwt ~ age + smoke + ui + ftv.cat + preterm
180 0.1187606 bwt ~ age + lwt + ui + ftv.cat + preterm
132 0.1179623 bwt ~ lwt + smoke + ui + ftv.cat
114 0.1171787 bwt ~ age + smoke + ui + preterm
110 0.1166071 bwt ~ age + smoke + ht + race.cat
89 0.1162996 bwt ~ ui + ftv.cat + race.cat
79 0.1154136 bwt ~ smoke + ui + preterm
59 0.1149997 bwt ~ lwt + smoke + ui
32 0.1140111 bwt ~ ui + race.cat
202 0.1136176 bwt ~ lwt + smoke + ht + ftv.cat + preterm
167 0.1135897 bwt ~ age + lwt + smoke + ui + ftv.cat
151 0.1135238 bwt ~ smoke + ht + ftv.cat + race.cat
94 0.1117611 bwt ~ age + lwt + smoke + ui
69 0.1115457 bwt ~ lwt + ui + preterm
124 0.1115307 bwt ~ age + ui + ftv.cat + race.cat
131 0.1114619 bwt ~ lwt + smoke + ht + preterm
104 0.1111918 bwt ~ age + lwt + ui + preterm
223 0.1107095 bwt ~ age + lwt + smoke + ht + ftv.cat + preterm
166 0.1104947 bwt ~ age + lwt + smoke + ht + preterm
53 0.1097917 bwt ~ age + ui + race.cat
90 0.1095875 bwt ~ ui + ftv.cat + preterm
25 0.1092786 bwt ~ smoke + race.cat
125 0.1089822 bwt ~ age + ui + ftv.cat + preterm
186 0.1087521 bwt ~ age + smoke + ht + ftv.cat + race.cat
147 0.1085069 bwt ~ lwt + ftv.cat + race.cat + preterm
83 0.1078920 bwt ~ ht + ui + ftv.cat
118 0.1050696 bwt ~ age + ht + ui + ftv.cat
46 0.1046592 bwt ~ age + smoke + race.cat
182 0.1037973 bwt ~ age + lwt + ftv.cat + race.cat + preterm
72 0.1036477 bwt ~ lwt + race.cat + preterm
77 0.1030084 bwt ~ smoke + ui + ftv.cat
142 0.1026039 bwt ~ lwt + ht + ftv.cat + preterm
27 0.1024960 bwt ~ ht + ui
80 0.1023926 bwt ~ smoke + ftv.cat + race.cat
54 0.1023716 bwt ~ age + ui + preterm
48 0.1019043 bwt ~ age + ht + ui
141 0.1006251 bwt ~ lwt + ht + ftv.cat + race.cat
67 0.1005686 bwt ~ lwt + ui + ftv.cat
23 0.1002658 bwt ~ smoke + ui
112 0.1001019 bwt ~ age + smoke + ui + ftv.cat
177 0.0999716 bwt ~ age + lwt + ht + ftv.cat + preterm
107 0.0999348 bwt ~ age + lwt + race.cat + preterm
65 0.0999044 bwt ~ lwt + ht + race.cat
44 0.0993390 bwt ~ age + smoke + ui
33 0.0992064 bwt ~ ui + preterm
115 0.0975489 bwt ~ age + smoke + ftv.cat + race.cat
102 0.0961447 bwt ~ age + lwt + ui + ftv.cat
176 0.0959133 bwt ~ age + lwt + ht + ftv.cat + race.cat
161 0.0956868 bwt ~ ht + ftv.cat + race.cat + preterm
101 0.0953298 bwt ~ age + lwt + ht + preterm
66 0.0952435 bwt ~ lwt + ht + preterm
100 0.0949860 bwt ~ age + lwt + ht + race.cat
88 0.0936415 bwt ~ ht + race.cat + preterm
123 0.0932655 bwt ~ age + ht + race.cat + preterm
196 0.0932504 bwt ~ age + ht + ftv.cat + race.cat + preterm
18 0.0916898 bwt ~ lwt + ui
129 0.0895848 bwt ~ lwt + smoke + ht + ftv.cat
58 0.0895453 bwt ~ lwt + smoke + ht
39 0.0888304 bwt ~ age + lwt + ui
136 0.0865312 bwt ~ lwt + smoke + ftv.cat + preterm
93 0.0861829 bwt ~ age + lwt + smoke + ht
164 0.0851242 bwt ~ age + lwt + smoke + ht + ftv.cat
92 0.0850307 bwt ~ ftv.cat + race.cat + preterm
31 0.0843424 bwt ~ ui + ftv.cat
171 0.0842649 bwt ~ age + lwt + smoke + ftv.cat + preterm
127 0.0823832 bwt ~ age + ftv.cat + race.cat + preterm
97 0.0815054 bwt ~ age + lwt + smoke + preterm
52 0.0814688 bwt ~ age + ui + ftv.cat
111 0.0814437 bwt ~ age + smoke + ht + preterm
187 0.0810947 bwt ~ age + smoke + ht + ftv.cat + preterm
62 0.0809854 bwt ~ lwt + smoke + preterm
36 0.0807709 bwt ~ race.cat + preterm
57 0.0803858 bwt ~ age + race.cat + preterm
152 0.0795477 bwt ~ smoke + ht + ftv.cat + preterm
76 0.0761567 bwt ~ smoke + ht + preterm
71 0.0755631 bwt ~ lwt + ftv.cat + preterm
5 0.0754626 bwt ~ ui
12 0.0753086 bwt ~ age + ui
70 0.0746184 bwt ~ lwt + ftv.cat + race.cat
106 0.0736264 bwt ~ age + lwt + ftv.cat + preterm
20 0.0712617 bwt ~ lwt + race.cat
122 0.0699027 bwt ~ age + ht + ftv.cat + preterm
105 0.0696432 bwt ~ age + lwt + ftv.cat + race.cat
64 0.0688565 bwt ~ lwt + ht + ftv.cat
116 0.0678816 bwt ~ age + smoke + ftv.cat + preterm
87 0.0677688 bwt ~ ht + ftv.cat + preterm
81 0.0664606 bwt ~ smoke + ftv.cat + preterm
41 0.0662880 bwt ~ age + lwt + race.cat
47 0.0660935 bwt ~ age + smoke + preterm
42 0.0658056 bwt ~ age + lwt + preterm
51 0.0657645 bwt ~ age + ht + preterm
99 0.0643409 bwt ~ age + lwt + ht + ftv.cat
21 0.0639874 bwt ~ lwt + preterm
17 0.0630931 bwt ~ lwt + ht
60 0.0605331 bwt ~ lwt + smoke + ftv.cat
26 0.0604591 bwt ~ smoke + preterm
38 0.0601456 bwt ~ age + lwt + ht
30 0.0586109 bwt ~ ht + preterm
16 0.0574397 bwt ~ lwt + smoke
56 0.0568642 bwt ~ age + ftv.cat + preterm
95 0.0563368 bwt ~ age + lwt + smoke + ftv.cat
37 0.0549725 bwt ~ age + lwt + smoke
35 0.0548609 bwt ~ ftv.cat + preterm
86 0.0538430 bwt ~ ht + ftv.cat + race.cat
29 0.0534996 bwt ~ ht + race.cat
50 0.0503216 bwt ~ age + ht + race.cat
15 0.0501052 bwt ~ age + preterm
121 0.0494340 bwt ~ age + ht + ftv.cat + race.cat
43 0.0474546 bwt ~ age + smoke + ht
74 0.0469723 bwt ~ smoke + ht + ftv.cat
22 0.0461610 bwt ~ smoke + ht
109 0.0455331 bwt ~ age + smoke + ht + ftv.cat
34 0.0430365 bwt ~ ftv.cat + race.cat
8 0.0425446 bwt ~ preterm
7 0.0406003 bwt ~ race.cat
19 0.0394495 bwt ~ lwt + ftv.cat
55 0.0384957 bwt ~ age + ftv.cat + race.cat
14 0.0374012 bwt ~ age + race.cat
40 0.0352106 bwt ~ age + lwt + ftv.cat
24 0.0337310 bwt ~ smoke + ftv.cat
45 0.0321803 bwt ~ age + smoke + ftv.cat
10 0.0320935 bwt ~ age + smoke
3 0.0305348 bwt ~ smoke
2 0.0295941 bwt ~ lwt
9 0.0277202 bwt ~ age + lwt
28 0.0234537 bwt ~ ht + ftv.cat
49 0.0221477 bwt ~ age + ht + ftv.cat
11 0.0186227 bwt ~ age + ht
4 0.0161133 bwt ~ ht
6 0.0104020 bwt ~ ftv.cat
13 0.0089838 bwt ~ age + ftv.cat
1 0.0028219 bwt ~ age
## Result
model.best <- lm(bwt ~ lwt + smoke + ht + ui + race.cat + preterm, lbw)
summary(model.best)
Call:
lm(formula = bwt ~ lwt + smoke + ht + ui + race.cat + preterm,
data = lbw)
Residuals:
Min 1Q Median 3Q Max
-1886.4 -440.8 53.6 494.4 1621.1
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2869.59 243.65 11.78 <2e-16 ***
lwt 4.06 1.67 2.43 0.0162 *
smoke -323.19 104.94 -3.08 0.0024 **
ht -574.30 198.94 -2.89 0.0044 **
ui -489.94 135.91 -3.60 0.0004 ***
race.catBlack -466.71 145.12 -3.22 0.0015 **
race.catOther -335.53 112.26 -2.99 0.0032 **
preterm>=1 -208.49 133.49 -1.56 0.1201
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 643 on 181 degrees of freedom
Multiple R-squared: 0.251, Adjusted R-squared: 0.222
F-statistic: 8.65 on 7 and 181 DF, p-value: 0.00000000385
confint(model.best)
2.5 % 97.5 %
(Intercept) 2388.84008 3350.342
lwt 0.75818 7.361
smoke -530.26176 -116.119
ht -966.83781 -181.763
ui -758.11797 -221.757
race.catBlack -753.04399 -180.366
race.catOther -557.03948 -114.015
preterm>=1 -471.88237 54.902
b. In your model-building process, run the Tolerance and Variance Inflation Factors and interpret them. These do not have to dictate your final model.
library(rms)
## Variance Inflation Factors (VIF) and Tolerance (1/VIF) via rms::vif() (not car::vif())
table.tol.vif <- data.frame(Tolerance = (1 / vif(model.full)), VIF = vif(model.full))
table.tol.vif
Tolerance VIF
age 0.84720 1.1804
lwt 0.80710 1.2390
smoke 0.78020 1.2817
ht 0.92264 1.0838
ui 0.93151 1.0735
ftv.catNone 0.80043 1.2493
ftv.catMany 0.90178 1.1089
race.catBlack 0.83177 1.2023
race.catOther 0.70197 1.4246
preterm>=1 0.89177 1.1214