BIO 226 Applied Longitudinal Analysis: Introduction to Data Sets

References

Example 1: Treatment of Lead-Exposed Children Trial

http://www.hsph.harvard.edu/fitzmaur/ala2e/tlc.txt

Data import

library(foreign)
tlc <- read.dta("http://www.hsph.harvard.edu/fitzmaur/ala2e/tlc.dta")
tlc$trt <- factor(tlc$trt, levels = c("Succimer","Placebo"))
head(tlc, 8)
  id      trt   y0   y1   y4   y6
1  1  Placebo 30.8 26.9 25.8 23.8
2  2 Succimer 26.5 14.8 19.5 21.0
3  3 Succimer 25.8 23.0 19.1 23.2
4  4  Placebo 24.7 24.5 22.0 22.5
5  5 Succimer 20.4  2.8  3.2  9.4
6  6 Succimer 20.4  5.4  4.5 11.9
7  7  Placebo 28.6 20.8 19.2 18.4
8  8  Placebo 33.7 31.6 28.5 25.1

Summary

library(plyr)
tlc.summary <- ddply(tlc[,-1],
                     "trt",
                     function(X) {
                         MEANS <- colMeans(X[,-1])
                         SDS <- apply(X[,-1], 2, sd)

                         res <- rbind(MEANS, SDS)
                         res <- round(res, 1)
                         res <- cbind(var = c("mean","sd"), res)
                         res
                     })
tlc.summary
       trt  var   y0   y1   y4   y6
1 Succimer mean 26.5 13.5 15.5 20.8
2 Succimer   sd    5  7.7  7.9  9.2
3  Placebo mean 26.3 24.7 24.1 23.6
4  Placebo   sd    5  5.5  5.8  5.6

Long format conversion

library(reshape2)
tlc.melt <- melt(tlc, id.vars =  c("id","trt"), variable.name = "time", value.name = "blood.lead")
tlc.melt$time <- as.numeric(gsub("y","", tlc.melt$time))

## Load ggplot2
library(ggplot2)

## Map time to X, blood lead level to Y, and treatment assignment to Color
gg.tlc <- ggplot(data = tlc.melt, mapping = aes(x = time, y = blood.lead, color = trt))
summary(gg.tlc)
data: id, trt, time, blood.lead [400x4]
mapping:  x = time, y = blood.lead, colour = trt
faceting: facet_null() 

## Create mean values for each group at each time point (use same variable names as original data set)
tlc.means <- ddply(tlc.melt,
                   c("trt","time"),
                   function(DF) {
                       data.frame(blood.lead = mean(DF$blood.lead))
                   })
tlc.means
       trt time blood.lead
1 Succimer    0      26.54
2 Succimer    1      13.52
3 Succimer    4      15.51
4 Succimer    6      20.76
5  Placebo    0      26.27
6  Placebo    1      24.66
7  Placebo    4      24.07
8  Placebo    6      23.65

## Add a layer of a line plot using the mean data set
gg.tlc +
    layer(geom = "point") +
    layer(geom = "line", data = tlc.means)

plot of chunk unnamed-chunk-4

Example 2: Six Cities Studies of Air Pollution and Health

http://www.hsph.harvard.edu/fitzmaur/ala2e/fev1.txt

fev1 <- read.dta("http://www.hsph.harvard.edu/fitzmaur/ala2e/fev1.dta")

ggplot(data = fev1, mapping = aes(x = age, y = logfev1, group = id)) +
    layer(geom = "point", pch = 1, alpha = 1/10) +
    layer(geom = "line", alpha = 1/10) +
    scale_x_continuous(breaks = c(6,10,14,18))

plot of chunk unnamed-chunk-5

Example 3: Influence of Menarche on Changes in Body Fat

http://www.hsph.harvard.edu/fitzmaur/ala2e/fat.txt

fat <- read.dta("http://www.hsph.harvard.edu/fitzmaur/ala2e/fat.dta")
head(fat)
  id   age agemen  time   pbf
1  1  9.32  13.19 -3.87  7.94
2  1 10.33  13.19 -2.86 15.65
3  1 11.24  13.19 -1.95 13.51
4  1 12.19  13.19 -1.00 23.23
5  1 13.24  13.19  0.05 10.52
6  1 14.24  13.19  1.05 20.45

## Create menarche marker
fat <- within(fat, {
    menarche <- factor(time >= 0, c(FALSE,TRUE), c("pre","post"))
})

## Plot by age
ggplot(data = fat, mapping = aes(x = age, y = pbf, group = id, shape = menarche)) +
    layer(geom = "point") +
    layer(geom = "line", lwd = 0.2) +
    scale_shape_manual(values = c("pre" = 1, "post" = 16))

plot of chunk unnamed-chunk-6



## Plot by time
ggplot(data = fat, mapping = aes(x = time, y = pbf, group = id, shape = menarche)) +
    layer(geom = "point") +
    layer(geom = "line", lwd = 0.2) +
    scale_shape_manual(values = c("pre" = 1, "post" = 16))

plot of chunk unnamed-chunk-6

Example 4: Oral Treatment of Toenail Infection

http://www.hsph.harvard.edu/fitzmaur/ala2e/toenail.txt

toenail <- read.dta("http://www.hsph.harvard.edu/fitzmaur/ala2e/toenail.dta")
head(toenail, 10)
   id y trt   month visit
1   1 1   1  0.0000     1
2   1 1   1  0.8571     2
3   1 1   1  3.5357     3
4   1 0   1  4.5357     4
5   1 0   1  7.5357     5
6   1 0   1 10.0357     6
7   1 0   1 13.0714     7
8   2 0   0  0.0000     1
9   2 0   0  0.9643     2
10  2 1   0  2.0000     3

Example 5: Clinical Trial of Anti-Epileptic Drug Progabide

http://www.hsph.harvard.edu/fitzmaur/ala2e/epilepsy.txt

epilepsy <- read.dta("http://www.hsph.harvard.edu/fitzmaur/ala2e/epilepsy.dta")
epilepsy$trt <- factor(epilepsy$trt, 0:1, c("Placebo","Progabide"))
head(epilepsy)
  id     trt age y0 y1 y2 y3 y4
1  1 Placebo  31 11  5  3  3  3
2  2 Placebo  30 11  3  5  3  3
3  3 Placebo  25  6  2  4  0  5
4  4 Placebo  36  8  4  4  1  4
5  5 Placebo  22 66  7 18  9 21
6  6 Placebo  29 27  5  2  8  7

## Change to weekly rate
epilepsy[,c("y0", "y1", "y2", "y3", "y4")] <-
    sweep(epilepsy[,c("y0", "y1", "y2", "y3", "y4")], MARGIN = 2, STATS = c(8,2,2,2,2), FUN = "/")

## Get means
epi.means <- ddply(epilepsy,
                     "trt",
                     function(X) {
                         colMeans(X[,c("y0", "y1", "y2", "y3", "y4")])
                     })
epi.means
        trt    y0    y1    y2    y3    y4
1   Placebo 3.848 4.679 4.143 4.393 4.000
2 Progabide 3.956 4.290 4.210 4.065 3.371

## Change to long format
epi.melt <- melt(epi.means, id.vars = "trt", value.name = "mean.rate.seizures", variable.name = "time")
epi.melt$time <- as.numeric(gsub("y", "", epi.melt$time))
epi.melt
         trt time mean.rate.seizures
1    Placebo    0              3.848
2  Progabide    0              3.956
3    Placebo    1              4.679
4  Progabide    1              4.290
5    Placebo    2              4.143
6  Progabide    2              4.210
7    Placebo    3              4.393
8  Progabide    3              4.065
9    Placebo    4              4.000
10 Progabide    4              3.371

## Plot
ggplot(data = epi.melt, mapping = aes(x = time, y = mean.rate.seizures, group = trt, color = trt)) +
    layer("point") +
    layer("line") +
    scale_y_continuous(name = "Mean rate of seizures (per week)", limits = c(2.5, 5)) +
    scale_x_continuous(name = "Time (weeks)", breaks = 0:4, labels = seq(from = 0, to = 8, by = 2))

plot of chunk unnamed-chunk-8