http://www.umass.edu/statdata/statdata/data/lowbwt.txt
SOURCE: Hosmer and Lemeshow (2000) Applied Logistic Regression: Second Edition. These data are copyrighted by John Wiley & Sons Inc. and must be acknowledged and used accordingly. Data were collected at Baystate Medical Center, Springfield, Massachusetts during 1986.
## Fix cases 10 and 39 to make them identical to Dr. Orav's dataset
lbw <- read.table("http://www.umass.edu/statdata/statdata/data/lowbwt.dat", head = T, skip = 4)
lbw[c(10,39),"BWT"] <- c(2655,3035)
## Change variable names to lower cases
names(lbw) <- tolower(names(lbw))
head(lbw)
id low age lwt race smoke ptl ht ui ftv bwt
1 85 0 19 182 2 0 0 0 1 0 2523
2 86 0 33 155 3 0 0 0 0 3 2551
3 87 0 20 105 1 1 0 0 0 1 2557
4 88 0 21 108 1 1 0 0 1 2 2594
5 89 0 18 107 1 1 0 0 1 0 2600
6 91 0 21 124 3 0 0 0 0 0 2622
library(gpairs)
gpairs(lbw)
## Recoding
lbw <- within(lbw, {
## Relabel race
race.cat <- factor(race, levels = 1:3, labels = c("White","Black","Other"))
## Categorize ftv (frequency of visit)
ftv.cat <- cut(ftv, breaks = c(-Inf, 0, 2, Inf), labels = c("None","Normal","Many"))
ftv.cat <- relevel(ftv.cat, ref = "Normal")
## Dichotomize ptl
preterm <- factor(ptl >= 1, levels = c(FALSE,TRUE), labels = c("0","1+"))
## Categorize smoke ht ui
smoke <- factor(smoke, levels = 0:1, labels = c("No","Yes"))
ht <- factor(ht, levels = 0:1, labels = c("No","Yes"))
ui <- factor(ui, levels = 0:1, labels = c("No","Yes"))
})
Alternative to the “## Categorize smoke ht ui” part.
## Categorize smoke ht ui
lbw[,c("smoke","ht","ui")] <-
lapply(lbw[,c("smoke","ht","ui")],
function(var) {
var <- factor(var, levels = 0:1, labels = c("No","Yes"))
})
lm.full <- lm(bwt ~ age + lwt + smoke + ht + ui + ftv.cat + race.cat + preterm, data = lbw)
lm.null <- lm(bwt ~ 1, data = lbw)
lm.full
Call:
lm(formula = bwt ~ age + lwt + smoke + ht + ui + ftv.cat + race.cat +
preterm, data = lbw)
Coefficients:
(Intercept) age lwt smokeYes htYes uiYes ftv.catNone
2947.32 -2.91 4.22 -307.34 -568.63 -494.11 -56.50
ftv.catMany race.catBlack race.catOther preterm1+
-185.86 -467.30 -322.81 -207.91
summary(lm.full)
Call:
lm(formula = bwt ~ age + lwt + smoke + ht + ui + ftv.cat + race.cat +
preterm, data = lbw)
Residuals:
Min 1Q Median 3Q Max
-1896.7 -443.3 53.2 466.1 1654.4
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2947.32 320.48 9.20 < 2e-16 ***
age -2.91 9.67 -0.30 0.76354
lwt 4.22 1.72 2.46 0.01488 *
smokeYes -307.34 109.13 -2.82 0.00541 **
htYes -568.63 200.88 -2.83 0.00518 **
uiYes -494.11 137.23 -3.60 0.00041 ***
ftv.catNone -56.50 105.36 -0.54 0.59245
ftv.catMany -185.86 203.19 -0.91 0.36158
race.catBlack -467.30 149.78 -3.12 0.00211 **
race.catOther -322.81 117.40 -2.75 0.00658 **
preterm1+ -207.91 136.35 -1.52 0.12907
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 647 on 178 degrees of freedom
Multiple R-squared: 0.255, Adjusted R-squared: 0.213
F-statistic: 6.08 on 10 and 178 DF, p-value: 0.0000000609
confint(lm.full)
2.5 % 97.5 %
(Intercept) 2314.9019 3579.741
age -22.0027 16.174
lwt 0.8344 7.612
smokeYes -522.7049 -91.979
htYes -965.0356 -172.218
uiYes -764.9083 -223.303
ftv.catNone -264.4122 151.414
ftv.catMany -586.8257 215.111
race.catBlack -762.8701 -171.736
race.catOther -554.4737 -91.143
preterm1+ -476.9680 61.155
anova(lm.full)
Analysis of Variance Table
Response: bwt
Df Sum Sq Mean Sq F value Pr(>F)
age 1 812010 812010 1.94 0.1653
lwt 1 2991583 2991583 7.15 0.0082 **
smoke 1 3196588 3196588 7.64 0.0063 **
ht 1 3554719 3554719 8.50 0.0040 **
ui 1 6866122 6866122 16.41 0.000076 ***
ftv.cat 2 845895 422947 1.01 0.3660
race.cat 2 6212222 3106111 7.42 0.0008 ***
preterm 1 972851 972851 2.33 0.1291
Residuals 178 74475274 418400
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(car)
Anova(lm.full, type = 3)
Anova Table (Type III tests)
Response: bwt
Sum Sq Df F value Pr(>F)
(Intercept) 35388198 1 84.58 < 2e-16 ***
age 37981 1 0.09 0.76354
lwt 2530480 1 6.05 0.01488 *
smoke 3318310 1 7.93 0.00541 **
ht 3352598 1 8.01 0.00518 **
ui 5424365 1 12.96 0.00041 ***
ftv.cat 383274 2 0.46 0.63328
race.cat 5559887 2 6.64 0.00165 **
preterm 972851 1 2.33 0.12907
Residuals 74475274 178
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Effect of a given predictor after other variables are typical values.
library(effects)
plot(allEffects(lm.full), ylim = c(2000,4000))
More complex model for interaction plot
The model itself is not very meaningful. This is for demonstration purpose only.
lm.full.int <- lm(bwt ~ age*lwt + smoke + ht + ui + age*ftv.cat + race.cat*preterm, data = lbw)
Anova(lm.full.int, type = 3)
Anova Table (Type III tests)
Response: bwt
Sum Sq Df F value Pr(>F)
(Intercept) 303077 1 0.76 0.3834
age 1589252 1 4.00 0.0469 *
lwt 1537542 1 3.87 0.0506 .
smoke 3675193 1 9.26 0.0027 **
ht 2991978 1 7.54 0.0067 **
ui 6382035 1 16.08 0.00009 ***
ftv.cat 4446524 2 5.60 0.0044 **
race.cat 2954002 2 3.72 0.0261 *
preterm 77945 1 0.20 0.6582
age:lwt 750299 1 1.89 0.1709
age:ftv.cat 4965938 2 6.26 0.0024 **
race.cat:preterm 335867 2 0.42 0.6556
Residuals 68649992 173
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Continuous x Continuous interaction
plot(effect("age:lwt", lm.full.int))
plot(effect("age:lwt", lm.full.int, xlevels = list(lwt = c(100,150,200,250))), multiline = T)
Continuous x Categorical interaction
plot(effect("age:ftv.cat", lm.full.int), multiline = TRUE)
Categorical x Categorical interaction
Use x.var and z.var to override the default X-axis variable and grouping variable assignment. The default is the predictor with the largest number of levels or values for x.var, which is not useful in categorical x categorical interactions.
plot(effect(c("race.cat*preterm"), lm.full.int), multiline = TRUE)
plot(effect(c("race.cat*preterm"), lm.full.int), x.var = "preterm", z.var = "race.cat", multiline = TRUE)