Modern Epidemiology Chapter 10: Demonstration of p-value functions

References

Rothman. Modern Epidemiology. Chapter 10 Precision and Statistics in Epidemioloic Studies

Data preparation

Student’s Sleep Data
     Data which show the effect of two soporific drugs (increase in
     hours of sleep compared to control) on 10 patients.

## Load sleep data
data(sleep)
sleep

   extra group ID
1    0.7     1  1
2   -1.6     1  2
3   -0.2     1  3
4   -1.2     1  4
5   -0.1     1  5
6    3.4     1  6
7    3.7     1  7
8    0.8     1  8
9    0.0     1  9
10   2.0     1 10
11   1.9     2  1
12   0.8     2  2
13   1.1     2  3
14   0.1     2  4
15  -0.1     2  5
16   4.4     2  6
17   5.5     2  7
18   1.6     2  8
19   4.6     2  9
20   3.4     2 10

## Define a two group resampling function
re.sample <- function(data, index.n, index.m, times) {
    index.n <- sample(x = index.n, size = length(index.n)*times, replace = TRUE)
    index.m <- sample(x = index.m, size = length(index.m)*times, replace = TRUE)

    data[c(index.n, index.m), ]
}

## Create a dataset with the same size (1st resample)
sleep1a <- re.sample(data = sleep, index.n = 1:10, index.m = 11:20, times = 1)
## Create a dataset with the same size (2nd resample)
sleep1b <- re.sample(data = sleep, index.n = 1:10, index.m = 11:20, times = 1)
## Create a dataset five times as big
sleep5a <- re.sample(data = sleep, index.n = 1:10, index.m = 11:20, times = 5)
## Create a dataset 100 times as big
sleep100a <- re.sample(data = sleep, index.n = 1:10, index.m = 11:20, times = 100)
## Combine as a list
list.sleep <- list(sleep1a = sleep1a, sleep1b = sleep1b, sleep5a = sleep5a, sleep100a = sleep100a)

Two-sample t-test in each sample

## Plot each sample by group
layout(matrix(seq_along(list.sleep), nrow = 2, byrow = TRUE))
junk <- lapply(list.sleep, function(DF) {
    plot(extra ~ group, data = DF)
    title(paste0("total sample size ", nrow(DF)))
})
layout(1)

plot of chunk unnamed-chunk-4


## Show t-test results
res.t.test <- lapply(list.sleep, function(DF) {
    t.test(extra ~ group, data = DF)
})
res.t.test

$sleep1a

    Welch Two Sample t-test

data:  extra by group
t = -0.9135, df = 16.25, p-value = 0.3743
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -1.9907  0.7907
sample estimates:
mean in group 1 mean in group 2 
           0.86            1.46 


$sleep1b

    Welch Two Sample t-test

data:  extra by group
t = -2.492, df = 16.44, p-value = 0.02371
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -4.1966 -0.3434
sample estimates:
mean in group 1 mean in group 2 
           0.51            2.78 


$sleep5a

    Welch Two Sample t-test

data:  extra by group
t = -5.787, df = 89.4, p-value = 0.0000001046
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -2.692 -1.316
sample estimates:
mean in group 1 mean in group 2 
          0.356           2.360 


$sleep100a

    Welch Two Sample t-test

data:  extra by group
t = -19.55, df = 1976, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -1.724 -1.410
sample estimates:
mean in group 1 mean in group 2 
         0.7474          2.3144

p-value functions for each sample

## Create a p-value extractor
p.fun1 <- function(data, mu) {
    res.t.test <- t.test(extra ~ group, data = data, mu = mu)
    res.t.test$p.val
}

## All axis numbers horizontal, wider margins
par(las = 1, mar = c(5.1, 4.1, 4.1, 4.1))

## Plot an empty plot
plot(x = 0, y = 0, pch = "",
     xlab = "effect size", ylab = "p-value", main = "p-value functions",
     xlim = c(-4,2), ylim = c(0,1))

## Plot the p-value function for each sample
junk <- lapply(seq_along(list.sleep), function(i) {

    curve(expr = sapply(x, function(x) p.fun1(data = list.sleep[[i]], mu = x)),
          col = i, lwd = 2,
          from = -4.5, to = 2.5, n = 5001, add = TRUE)

})

## Add legend
legend("topright", col = seq_along(list.sleep), lwd = 2, legend = names(list.sleep))

## Add a line indicating 95% confidence intervals (at p=0.05, intersections are 95% confidence limits)
abline(h = 0.05, lty = 2)
mtext(text = "p=0.05", side = 2, at = 0.05, las = 1)

## Add a line indicating the usual p-value (intersections are the usual p-values)
abline(v = 0, lty = 3)

## Add axis right y-axis labeling
axis(side = 4, at = seq(0,1,0.2), labels = seq(1,0,-0.2))
mtext(text = "confidence level", side = 4,  las = 0, line = 3)

plot of chunk unnamed-chunk-5


## Extract numerical results and show
## p-values
res.p.value        <- sapply(res.t.test, function(htest) htest$p.value)
## Point estimates
res.est            <- sapply(res.t.test, function(htest) -diff(htest$estimate))
## Confidence limits
res.conf           <- sapply(res.t.test, function(htest) htest$conf)
rownames(res.conf) <- c("lower bound", "upper bound")
## Combine and show after transposition
res.combo          <- rbind(p.value = res.p.value, estimate = res.est, res.conf)
t(res.combo)

            p.value estimate lower bound upper bound
sleep1a   3.743e-01   -0.600      -1.991      0.7907
sleep1b   2.371e-02   -2.270      -4.197     -0.3434
sleep5a   1.046e-07   -2.004      -2.692     -1.3159
sleep100a 5.847e-78   -1.567      -1.724     -1.4098

Table of Contents: http://rpubs.com/kaz_yos/toc
Table of Contents (Japanese): http://rpubs.com/kaz_yos/toc-j
Top Page: http://rpubs.com/kaz_yos/
SlideShare: http://www.slideshare.net/kaz_yos