## Load dataset directly from the internet
lbw <- read.table("http://www.umass.edu/statdata/statdata/data/lowbwt.dat", head = T, skip = 4)
## Change variable names to lower cases
names(lbw) <- tolower(names(lbw))
## Recoding using within function
lbw <- within(lbw, {
## Relabel race: 1, 2, 3 -> White, Black, Other
race.cat <- factor(race, levels = 1:3, labels = c("White","Black","Other"))
## Dichotomize ptl
preterm <- factor(ptl >= 1, levels = c(FALSE,TRUE), labels = c("0","1+"))
## You can alse use cut
## preterm <- cut(ptl, breaks = c(-Inf, 0, Inf), labels = c("0","1+"))
## Change 0,1 binary to No,Yes binary
smoke <- factor(smoke, levels = c(0,1), labels = c("No","Yes"))
ht <- factor(ht , levels = c(0,1), labels = c("No","Yes"))
ui <- factor(ui , levels = c(0,1), labels = c("No","Yes"))
low <- factor(low , levels = c(0,1), labels = c("No","Yes"))
## Categorize ftv (frequency of visit): 0 1 2 3 4 6 -> None(0), Normal(1-2), Many (>= 3)
ftv.cat <- cut(ftv, breaks = c(-Inf, 0, 2, Inf), labels = c("None","Normal","Many"))
## Make Normal the reference level
ftv.cat <- relevel(ftv.cat, ref = "Normal")
})
These functions take data, split it by specified variables, and apply a specified summary function.
## mean grouped by ftv.cat
tapply(X = lbw$age, INDEX = lbw$ftv.cat, FUN = mean)
Normal None Many
24.69 21.95 24.67
## mean grouped by ftv.cat & smoke
tapply(X = lbw$age, INDEX = list(lbw$ftv.cat, lbw$smoke), FUN = mean)
No Yes
Normal 24.31 25.57
None 22.33 21.49
Many 25.50 23.83
## mean grouped by ftv.cat
by(data = lbw$age, INDICES = lbw$ftv.cat, FUN = mean)
lbw$ftv.cat: Normal
[1] 24.69
---------------------------------------------------------------------------------------
lbw$ftv.cat: None
[1] 21.95
---------------------------------------------------------------------------------------
lbw$ftv.cat: Many
[1] 24.67
## summary grouped by ftv.cat
by(data = lbw$age, INDICES = lbw$ftv.cat, FUN = summary)
lbw$ftv.cat: Normal
Min. 1st Qu. Median Mean 3rd Qu. Max.
14.0 21.0 24.0 24.7 29.0 45.0
---------------------------------------------------------------------------------------
lbw$ftv.cat: None
Min. 1st Qu. Median Mean 3rd Qu. Max.
14 19 21 22 25 36
---------------------------------------------------------------------------------------
lbw$ftv.cat: Many
Min. 1st Qu. Median Mean 3rd Qu. Max.
16.0 20.0 23.0 24.7 31.2 33.0
## summary grouped by ftv.cat and smoke
by(data = lbw$age, INDICES = list(lbw$ftv.cat, lbw$smoke), FUN = summary)
: Normal
: No
Min. 1st Qu. Median Mean 3rd Qu. Max.
14.0 20.2 24.0 24.3 28.8 45.0
---------------------------------------------------------------------------------------
: None
: No
Min. 1st Qu. Median Mean 3rd Qu. Max.
14.0 19.0 22.0 22.3 25.0 36.0
---------------------------------------------------------------------------------------
: Many
: No
Min. 1st Qu. Median Mean 3rd Qu. Max.
16.0 20.2 26.0 25.5 31.8 33.0
---------------------------------------------------------------------------------------
: Normal
: Yes
Min. 1st Qu. Median Mean 3rd Qu. Max.
18.0 21.0 24.0 25.6 29.5 35.0
---------------------------------------------------------------------------------------
: None
: Yes
Min. 1st Qu. Median Mean 3rd Qu. Max.
14.0 18.0 20.0 21.5 25.0 34.0
---------------------------------------------------------------------------------------
: Many
: Yes
Min. 1st Qu. Median Mean 3rd Qu. Max.
17.0 20.2 23.0 23.8 27.2 32.0
## simpler expression
with(lbw, by(data = age, INDICES = list(ftv.cat, smoke), FUN = summary))
: Normal
: No
Min. 1st Qu. Median Mean 3rd Qu. Max.
14.0 20.2 24.0 24.3 28.8 45.0
---------------------------------------------------------------------------------------
: None
: No
Min. 1st Qu. Median Mean 3rd Qu. Max.
14.0 19.0 22.0 22.3 25.0 36.0
---------------------------------------------------------------------------------------
: Many
: No
Min. 1st Qu. Median Mean 3rd Qu. Max.
16.0 20.2 26.0 25.5 31.8 33.0
---------------------------------------------------------------------------------------
: Normal
: Yes
Min. 1st Qu. Median Mean 3rd Qu. Max.
18.0 21.0 24.0 25.6 29.5 35.0
---------------------------------------------------------------------------------------
: None
: Yes
Min. 1st Qu. Median Mean 3rd Qu. Max.
14.0 18.0 20.0 21.5 25.0 34.0
---------------------------------------------------------------------------------------
: Many
: Yes
Min. 1st Qu. Median Mean 3rd Qu. Max.
17.0 20.2 23.0 23.8 27.2 32.0
library(doBy)
## mean grouped by ftv.cat
summaryBy(age ~ ftv.cat, data = lbw, FUN = mean)
ftv.cat age.mean
1 Normal 24.69
2 None 21.95
3 Many 24.67
## summary grouped by ftv.cat
summaryBy(age ~ ftv.cat, data = lbw, FUN = summary)
ftv.cat age.Min. age.1st Qu. age.Median age.Mean age.3rd Qu. age.Max.
1 Normal 14 21 24 24.7 29.0 45
2 None 14 19 21 22.0 25.0 36
3 Many 16 20 23 24.7 31.2 33
## summary grouped by ftv.cat and smoke
summaryBy(age ~ ftv.cat + smoke, data = lbw, FUN = summary)
ftv.cat smoke age.Min. age.1st Qu. age.Median age.Mean age.3rd Qu. age.Max.
1 Normal No 14 20.2 24 24.3 28.8 45
2 Normal Yes 18 21.0 24 25.6 29.5 35
3 None No 14 19.0 22 22.3 25.0 36
4 None Yes 14 18.0 20 21.5 25.0 34
5 Many No 16 20.2 26 25.5 31.8 33
6 Many Yes 17 20.2 23 23.8 27.2 32
library(plyr)
## mean grouped by ftv.cat
ddply(.data = lbw, .variables = c("ftv.cat"), .fun = summarize, mean.of.age = mean(age))
ftv.cat mean.of.age
1 Normal 24.69
2 None 21.95
3 Many 24.67
## mean grouped by ftv.cat and smoke
ddply(.data = lbw, .variables = c("ftv.cat","smoke"), .fun = summarize, mean.of.age = mean(age))
ftv.cat smoke mean.of.age
1 Normal No 24.31
2 Normal Yes 25.57
3 None No 22.33
4 None Yes 21.49
5 Many No 25.50
6 Many Yes 23.83