Student’s Sleep Data
Data which show the effect of two soporific drugs (increase in
hours of sleep compared to control) on 10 patients.
## Load sleep data
data(sleep)
sleep
extra group ID
1 0.7 1 1
2 -1.6 1 2
3 -0.2 1 3
4 -1.2 1 4
5 -0.1 1 5
6 3.4 1 6
7 3.7 1 7
8 0.8 1 8
9 0.0 1 9
10 2.0 1 10
11 1.9 2 1
12 0.8 2 2
13 1.1 2 3
14 0.1 2 4
15 -0.1 2 5
16 4.4 2 6
17 5.5 2 7
18 1.6 2 8
19 4.6 2 9
20 3.4 2 10
## Define a two group resampling function
re.sample <- function(data, index.n, index.m, times) {
index.n <- sample(x = index.n, size = length(index.n)*times, replace = TRUE)
index.m <- sample(x = index.m, size = length(index.m)*times, replace = TRUE)
data[c(index.n, index.m), ]
}
## Create a dataset with the same size (1st resample)
sleep1a <- re.sample(data = sleep, index.n = 1:10, index.m = 11:20, times = 1)
## Create a dataset with the same size (2nd resample)
sleep1b <- re.sample(data = sleep, index.n = 1:10, index.m = 11:20, times = 1)
## Create a dataset five times as big
sleep5a <- re.sample(data = sleep, index.n = 1:10, index.m = 11:20, times = 5)
## Create a dataset 100 times as big
sleep100a <- re.sample(data = sleep, index.n = 1:10, index.m = 11:20, times = 100)
## Combine as a list
list.sleep <- list(sleep1a = sleep1a, sleep1b = sleep1b, sleep5a = sleep5a, sleep100a = sleep100a)
## Plot each sample by group
layout(matrix(seq_along(list.sleep), nrow = 2, byrow = TRUE))
junk <- lapply(list.sleep, function(DF) {
plot(extra ~ group, data = DF)
title(paste0("total sample size ", nrow(DF)))
})
layout(1)
## Show t-test results
res.t.test <- lapply(list.sleep, function(DF) {
t.test(extra ~ group, data = DF)
})
res.t.test
$sleep1a
Welch Two Sample t-test
data: extra by group
t = -0.9135, df = 16.25, p-value = 0.3743
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-1.9907 0.7907
sample estimates:
mean in group 1 mean in group 2
0.86 1.46
$sleep1b
Welch Two Sample t-test
data: extra by group
t = -2.492, df = 16.44, p-value = 0.02371
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-4.1966 -0.3434
sample estimates:
mean in group 1 mean in group 2
0.51 2.78
$sleep5a
Welch Two Sample t-test
data: extra by group
t = -5.787, df = 89.4, p-value = 0.0000001046
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-2.692 -1.316
sample estimates:
mean in group 1 mean in group 2
0.356 2.360
$sleep100a
Welch Two Sample t-test
data: extra by group
t = -19.55, df = 1976, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-1.724 -1.410
sample estimates:
mean in group 1 mean in group 2
0.7474 2.3144
## Create a p-value extractor
p.fun1 <- function(data, mu) {
res.t.test <- t.test(extra ~ group, data = data, mu = mu)
res.t.test$p.val
}
## All axis numbers horizontal, wider margins
par(las = 1, mar = c(5.1, 4.1, 4.1, 4.1))
## Plot an empty plot
plot(x = 0, y = 0, pch = "",
xlab = "effect size", ylab = "p-value", main = "p-value functions",
xlim = c(-4,2), ylim = c(0,1))
## Plot the p-value function for each sample
junk <- lapply(seq_along(list.sleep), function(i) {
curve(expr = sapply(x, function(x) p.fun1(data = list.sleep[[i]], mu = x)),
col = i, lwd = 2,
from = -4.5, to = 2.5, n = 5001, add = TRUE)
})
## Add legend
legend("topright", col = seq_along(list.sleep), lwd = 2, legend = names(list.sleep))
## Add a line indicating 95% confidence intervals (at p=0.05, intersections are 95% confidence limits)
abline(h = 0.05, lty = 2)
mtext(text = "p=0.05", side = 2, at = 0.05, las = 1)
## Add a line indicating the usual p-value (intersections are the usual p-values)
abline(v = 0, lty = 3)
## Add axis right y-axis labeling
axis(side = 4, at = seq(0,1,0.2), labels = seq(1,0,-0.2))
mtext(text = "confidence level", side = 4, las = 0, line = 3)
## Extract numerical results and show
## p-values
res.p.value <- sapply(res.t.test, function(htest) htest$p.value)
## Point estimates
res.est <- sapply(res.t.test, function(htest) -diff(htest$estimate))
## Confidence limits
res.conf <- sapply(res.t.test, function(htest) htest$conf)
rownames(res.conf) <- c("lower bound", "upper bound")
## Combine and show after transposition
res.combo <- rbind(p.value = res.p.value, estimate = res.est, res.conf)
t(res.combo)
p.value estimate lower bound upper bound
sleep1a 3.743e-01 -0.600 -1.991 0.7907
sleep1b 2.371e-02 -2.270 -4.197 -0.3434
sleep5a 1.046e-07 -2.004 -2.692 -1.3159
sleep100a 5.847e-78 -1.567 -1.724 -1.4098