http://www.hsph.harvard.edu/fitzmaur/ala2e/tlc.txt
Data import
library(foreign)
tlc <- read.dta("http://www.hsph.harvard.edu/fitzmaur/ala2e/tlc.dta")
tlc$trt <- factor(tlc$trt, levels = c("Succimer","Placebo"))
head(tlc, 8)
id trt y0 y1 y4 y6
1 1 Placebo 30.8 26.9 25.8 23.8
2 2 Succimer 26.5 14.8 19.5 21.0
3 3 Succimer 25.8 23.0 19.1 23.2
4 4 Placebo 24.7 24.5 22.0 22.5
5 5 Succimer 20.4 2.8 3.2 9.4
6 6 Succimer 20.4 5.4 4.5 11.9
7 7 Placebo 28.6 20.8 19.2 18.4
8 8 Placebo 33.7 31.6 28.5 25.1
Summary
library(plyr)
tlc.summary <- ddply(tlc[,-1],
"trt",
function(X) {
MEANS <- colMeans(X[,-1])
SDS <- apply(X[,-1], 2, sd)
res <- rbind(MEANS, SDS)
res <- round(res, 1)
res <- cbind(var = c("mean","sd"), res)
res
})
tlc.summary
trt var y0 y1 y4 y6
1 Succimer mean 26.5 13.5 15.5 20.8
2 Succimer sd 5 7.7 7.9 9.2
3 Placebo mean 26.3 24.7 24.1 23.6
4 Placebo sd 5 5.5 5.8 5.6
Long format conversion
library(reshape2)
tlc.melt <- melt(tlc, id.vars = c("id","trt"), variable.name = "time", value.name = "blood.lead")
tlc.melt$time <- as.numeric(gsub("y","", tlc.melt$time))
## Load ggplot2
library(ggplot2)
## Map time to X, blood lead level to Y, and treatment assignment to Color
gg.tlc <- ggplot(data = tlc.melt, mapping = aes(x = time, y = blood.lead, color = trt))
summary(gg.tlc)
data: id, trt, time, blood.lead [400x4]
mapping: x = time, y = blood.lead, colour = trt
faceting: facet_null()
## Create mean values for each group at each time point (use same variable names as original data set)
tlc.means <- ddply(tlc.melt,
c("trt","time"),
function(DF) {
data.frame(blood.lead = mean(DF$blood.lead))
})
tlc.means
trt time blood.lead
1 Succimer 0 26.54
2 Succimer 1 13.52
3 Succimer 4 15.51
4 Succimer 6 20.76
5 Placebo 0 26.27
6 Placebo 1 24.66
7 Placebo 4 24.07
8 Placebo 6 23.65
## Add a layer of a line plot using the mean data set
gg.tlc +
layer(geom = "point") +
layer(geom = "line", data = tlc.means)
http://www.hsph.harvard.edu/fitzmaur/ala2e/fev1.txt
fev1 <- read.dta("http://www.hsph.harvard.edu/fitzmaur/ala2e/fev1.dta")
ggplot(data = fev1, mapping = aes(x = age, y = logfev1, group = id)) +
layer(geom = "point", pch = 1, alpha = 1/10) +
layer(geom = "line", alpha = 1/10) +
scale_x_continuous(breaks = c(6,10,14,18))
http://www.hsph.harvard.edu/fitzmaur/ala2e/fat.txt
fat <- read.dta("http://www.hsph.harvard.edu/fitzmaur/ala2e/fat.dta")
head(fat)
id age agemen time pbf
1 1 9.32 13.19 -3.87 7.94
2 1 10.33 13.19 -2.86 15.65
3 1 11.24 13.19 -1.95 13.51
4 1 12.19 13.19 -1.00 23.23
5 1 13.24 13.19 0.05 10.52
6 1 14.24 13.19 1.05 20.45
## Create menarche marker
fat <- within(fat, {
menarche <- factor(time >= 0, c(FALSE,TRUE), c("pre","post"))
})
## Plot by age
ggplot(data = fat, mapping = aes(x = age, y = pbf, group = id, shape = menarche)) +
layer(geom = "point") +
layer(geom = "line", lwd = 0.2) +
scale_shape_manual(values = c("pre" = 1, "post" = 16))
## Plot by time
ggplot(data = fat, mapping = aes(x = time, y = pbf, group = id, shape = menarche)) +
layer(geom = "point") +
layer(geom = "line", lwd = 0.2) +
scale_shape_manual(values = c("pre" = 1, "post" = 16))
http://www.hsph.harvard.edu/fitzmaur/ala2e/toenail.txt
toenail <- read.dta("http://www.hsph.harvard.edu/fitzmaur/ala2e/toenail.dta")
head(toenail, 10)
id y trt month visit
1 1 1 1 0.0000 1
2 1 1 1 0.8571 2
3 1 1 1 3.5357 3
4 1 0 1 4.5357 4
5 1 0 1 7.5357 5
6 1 0 1 10.0357 6
7 1 0 1 13.0714 7
8 2 0 0 0.0000 1
9 2 0 0 0.9643 2
10 2 1 0 2.0000 3
http://www.hsph.harvard.edu/fitzmaur/ala2e/epilepsy.txt
epilepsy <- read.dta("http://www.hsph.harvard.edu/fitzmaur/ala2e/epilepsy.dta")
epilepsy$trt <- factor(epilepsy$trt, 0:1, c("Placebo","Progabide"))
head(epilepsy)
id trt age y0 y1 y2 y3 y4
1 1 Placebo 31 11 5 3 3 3
2 2 Placebo 30 11 3 5 3 3
3 3 Placebo 25 6 2 4 0 5
4 4 Placebo 36 8 4 4 1 4
5 5 Placebo 22 66 7 18 9 21
6 6 Placebo 29 27 5 2 8 7
## Change to weekly rate
epilepsy[,c("y0", "y1", "y2", "y3", "y4")] <-
sweep(epilepsy[,c("y0", "y1", "y2", "y3", "y4")], MARGIN = 2, STATS = c(8,2,2,2,2), FUN = "/")
## Get means
epi.means <- ddply(epilepsy,
"trt",
function(X) {
colMeans(X[,c("y0", "y1", "y2", "y3", "y4")])
})
epi.means
trt y0 y1 y2 y3 y4
1 Placebo 3.848 4.679 4.143 4.393 4.000
2 Progabide 3.956 4.290 4.210 4.065 3.371
## Change to long format
epi.melt <- melt(epi.means, id.vars = "trt", value.name = "mean.rate.seizures", variable.name = "time")
epi.melt$time <- as.numeric(gsub("y", "", epi.melt$time))
epi.melt
trt time mean.rate.seizures
1 Placebo 0 3.848
2 Progabide 0 3.956
3 Placebo 1 4.679
4 Progabide 1 4.290
5 Placebo 2 4.143
6 Progabide 2 4.210
7 Placebo 3 4.393
8 Progabide 3 4.065
9 Placebo 4 4.000
10 Progabide 4 3.371
## Plot
ggplot(data = epi.melt, mapping = aes(x = time, y = mean.rate.seizures, group = trt, color = trt)) +
layer("point") +
layer("line") +
scale_y_continuous(name = "Mean rate of seizures (per week)", limits = c(2.5, 5)) +
scale_x_continuous(name = "Time (weeks)", breaks = 0:4, labels = seq(from = 0, to = 8, by = 2))