Modern Epidemiology Chapter 10: Demonstration of p-value functions

References

Data preparation

Student’s Sleep Data
     Data which show the effect of two soporific drugs (increase in
     hours of sleep compared to control) on 10 patients.
## Load sleep data
data(sleep)
sleep
   extra group ID
1    0.7     1  1
2   -1.6     1  2
3   -0.2     1  3
4   -1.2     1  4
5   -0.1     1  5
6    3.4     1  6
7    3.7     1  7
8    0.8     1  8
9    0.0     1  9
10   2.0     1 10
11   1.9     2  1
12   0.8     2  2
13   1.1     2  3
14   0.1     2  4
15  -0.1     2  5
16   4.4     2  6
17   5.5     2  7
18   1.6     2  8
19   4.6     2  9
20   3.4     2 10
## Define a two group resampling function
re.sample <- function(data, index.n, index.m, times) {
    index.n <- sample(x = index.n, size = length(index.n)*times, replace = TRUE)
    index.m <- sample(x = index.m, size = length(index.m)*times, replace = TRUE)

    data[c(index.n, index.m), ]
}

## Create a dataset with the same size (1st resample)
sleep1a <- re.sample(data = sleep, index.n = 1:10, index.m = 11:20, times = 1)
## Create a dataset with the same size (2nd resample)
sleep1b <- re.sample(data = sleep, index.n = 1:10, index.m = 11:20, times = 1)
## Create a dataset five times as big
sleep5a <- re.sample(data = sleep, index.n = 1:10, index.m = 11:20, times = 5)
## Create a dataset 100 times as big
sleep100a <- re.sample(data = sleep, index.n = 1:10, index.m = 11:20, times = 100)
## Combine as a list
list.sleep <- list(sleep1a = sleep1a, sleep1b = sleep1b, sleep5a = sleep5a, sleep100a = sleep100a)

Two-sample t-test in each sample

## Plot each sample by group
layout(matrix(seq_along(list.sleep), nrow = 2, byrow = TRUE))
junk <- lapply(list.sleep, function(DF) {
    plot(extra ~ group, data = DF)
    title(paste0("total sample size ", nrow(DF)))
})
layout(1)

plot of chunk unnamed-chunk-4


## Show t-test results
res.t.test <- lapply(list.sleep, function(DF) {
    t.test(extra ~ group, data = DF)
})
res.t.test
$sleep1a

    Welch Two Sample t-test

data:  extra by group
t = -0.9135, df = 16.25, p-value = 0.3743
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -1.9907  0.7907
sample estimates:
mean in group 1 mean in group 2 
           0.86            1.46 


$sleep1b

    Welch Two Sample t-test

data:  extra by group
t = -2.492, df = 16.44, p-value = 0.02371
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -4.1966 -0.3434
sample estimates:
mean in group 1 mean in group 2 
           0.51            2.78 


$sleep5a

    Welch Two Sample t-test

data:  extra by group
t = -5.787, df = 89.4, p-value = 0.0000001046
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -2.692 -1.316
sample estimates:
mean in group 1 mean in group 2 
          0.356           2.360 


$sleep100a

    Welch Two Sample t-test

data:  extra by group
t = -19.55, df = 1976, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -1.724 -1.410
sample estimates:
mean in group 1 mean in group 2 
         0.7474          2.3144 

p-value functions for each sample

## Create a p-value extractor
p.fun1 <- function(data, mu) {
    res.t.test <- t.test(extra ~ group, data = data, mu = mu)
    res.t.test$p.val
}

## All axis numbers horizontal, wider margins
par(las = 1, mar = c(5.1, 4.1, 4.1, 4.1))

## Plot an empty plot
plot(x = 0, y = 0, pch = "",
     xlab = "effect size", ylab = "p-value", main = "p-value functions",
     xlim = c(-4,2), ylim = c(0,1))

## Plot the p-value function for each sample
junk <- lapply(seq_along(list.sleep), function(i) {

    curve(expr = sapply(x, function(x) p.fun1(data = list.sleep[[i]], mu = x)),
          col = i, lwd = 2,
          from = -4.5, to = 2.5, n = 5001, add = TRUE)

})

## Add legend
legend("topright", col = seq_along(list.sleep), lwd = 2, legend = names(list.sleep))

## Add a line indicating 95% confidence intervals (at p=0.05, intersections are 95% confidence limits)
abline(h = 0.05, lty = 2)
mtext(text = "p=0.05", side = 2, at = 0.05, las = 1)

## Add a line indicating the usual p-value (intersections are the usual p-values)
abline(v = 0, lty = 3)

## Add axis right y-axis labeling
axis(side = 4, at = seq(0,1,0.2), labels = seq(1,0,-0.2))
mtext(text = "confidence level", side = 4,  las = 0, line = 3)

plot of chunk unnamed-chunk-5


## Extract numerical results and show
## p-values
res.p.value        <- sapply(res.t.test, function(htest) htest$p.value)
## Point estimates
res.est            <- sapply(res.t.test, function(htest) -diff(htest$estimate))
## Confidence limits
res.conf           <- sapply(res.t.test, function(htest) htest$conf)
rownames(res.conf) <- c("lower bound", "upper bound")
## Combine and show after transposition
res.combo          <- rbind(p.value = res.p.value, estimate = res.est, res.conf)
t(res.combo)
            p.value estimate lower bound upper bound
sleep1a   3.743e-01   -0.600      -1.991      0.7907
sleep1b   2.371e-02   -2.270      -4.197     -0.3434
sleep5a   1.046e-07   -2.004      -2.692     -1.3159
sleep100a 5.847e-78   -1.567      -1.724     -1.4098