There are two types of confidence intervals:
Pivotal quantities are essential for deriving confidence intervals, either exact or asymptotic.
Definition: An exact pivotal quantity, \(Q(Y_1,Y_2,...,Y_n; \theta)\):
\[P\left(q_1 \le Q(Y_1,Y_2,...,Y_n;\theta)\le q_2\right) = 0.95\]
\[(\tau(LCL), \tau(UCL))\]
forms a 95% CI for \(\tau(\theta)\) if \(\tau(\cdot)\) is increasing; and:
\[(\tau(UCL), \tau(LCL))\]
forms a 95% CI for \(\tau(\theta)\) if \(\tau(\cdot)\) is decreasing.
\[ Q(Y_1,Y_2,...,Y_n; \mu) = \frac{\bar Y - \mu}{S/\sqrt{n}}\sim t_{n-1}\]
q1 <- qt(0.025, n-1)q2 <- qt(0.975, n-1)\[0.95 = P\left(q_1 \le \frac{\bar Y - \mu}{S/\sqrt{n}} \le q_2\right)\]
\[0.95 = P\left(\bar Y - q_2\cdot \frac{S}{\sqrt{n}} \le \mu \le \bar Y - q_1\cdot \frac{S}{\sqrt{n}}\right)\]
\[ Q(Y_1,Y_2,...,Y_n; \sigma^2) = \frac{(n-1)S^2}{\sigma^2}\sim \chi^2_{n-1}\]
q1 <- qchisq(0.025, n-1)q2 <- qchisq(0.975, n-1)\[0.95 = P\left(q_1 \le \frac{(n-1)S^2}{\sigma^2} \le q_2\right)\]
\[0.95 = P\left(\frac{(n-1)S^2}{q_2} \le \sigma^2 \le \frac{(n-1)S^2}{q_1}\right)\]
| Patient | Pre | Post | (\(d\) = Pre-Post) |
|---|---|---|---|
| 1 | 148 | 140 | 8 |
| 2 | 152 | 144 | 8 |
| 3 | 145 | 138 | 7 |
| 4 | 150 | 143 | 7 |
| 5 | 147 | 139 | 8 |
| 6 | 155 | 146 | 9 |
| 7 | 149 | 141 | 8 |
| Summary | (\(\bar Y_d\) = 7.86; \(S_d\) = 0.69) |
\[ Q(Y_1,Y_2,...,Y_n; \beta) = \frac{\bar Y}{\beta}\sim GAM(n,1/n)\]
q1 <- qgamma(0.025, shape = n, scale = 1/n)q2 <- qgamma(0.975, shape = n, scale = 1/n)\[0.95 = P\left(q_1 \le \frac{\bar Y}{\beta} \le q_2\right)\]
\[0.95 = P\left(\frac{\bar Y}{q_2} \le \beta \le \frac{\bar Y}{q_1}\right)\]
\[ Q_{asymptotic}(Y_1,Y_2,...,Y_n; \beta) = \frac{\bar Y-\beta}{\sigma/\sqrt{n}}\sim AN(0,1)\]
q1 <- qnorm(0.025, mean = 0, sd = 1) = -1.96q2 <- qnorm(0.975, mean = 0, sd = 1) = 1.96\[0.95 \approx P\left(-1.96 \le \frac{\bar Y-\beta}{\sigma/\sqrt{n}} \le 1.96\right)\]
\[0.95 \approx P\left(\bar Y -1.96\frac{\sigma}{\sqrt{n}} \le \beta \le \bar Y + 1.96 \frac{\sigma}{\sqrt{n}}\right)\]
To simulate confidence interval coverage of a parameter \(\theta\), we want to:
library(tidyverse)
library(purrrfect)
N <- 10000
(many_exponential_CIs <- parameters(~n, ~beta, c(5, 10, 20, 40, 80, 160), c(2, 4, 6))
%>% add_trials(N)
%>% mutate(Ysample = pmap(list(n,beta), .f = \(n,b) rexp(n, rate = 1/b)))
%>% mutate(Ybar = map_dbl(Ysample, mean), S2 = map_dbl(Ysample, var))
%>% mutate(LCL_exact = Ybar/qgamma(0.975, shape = n, scale = 1/n),
UCL_exact = Ybar/qgamma(0.025, shape = n, scale = 1/n),
LCL_asymptotic = Ybar - 1.96*sqrt(S2/n),
UCL_asymptotic = Ybar + 1.96*sqrt(S2/n))
%>% mutate(exact_covers = ifelse(LCL_exact < beta & UCL_exact > beta, 1, 0))
%>%mutate(asymptotic_covers = ifelse(LCL_asymptotic < beta & UCL_asymptotic > beta, 1, 0))
) %>% head# A tibble: 6 × 12
n beta .trial Ysample Ybar S2 LCL_exact UCL_exact LCL_asymptotic UCL_asymptotic exact_covers asymptotic_covers
<dbl> <dbl> <dbl> <list> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 5 2 1 <dbl [5]> 0.484 0.164 0.237 1.49 0.130 0.839 0 0
2 5 2 2 <dbl [5]> 1.50 1.92 0.734 4.63 0.291 2.72 1 1
3 5 2 3 <dbl [5]> 1.62 3.13 0.791 4.99 0.0690 3.17 1 1
4 5 2 4 <dbl [5]> 1.70 2.23 0.830 5.23 0.389 3.01 1 1
5 5 2 5 <dbl [5]> 2.20 3.05 1.08 6.79 0.672 3.74 1 1
6 5 2 6 <dbl [5]> 1.11 3.09 0.540 3.40 -0.436 2.65 1 1
library(patchwork)
ggplot(data = many_exponential_CIs %>% slice(1:200)) +
geom_segment(aes(x = LCL_exact, xend = UCL_exact,
y = .trial, yend = .trial, color = factor(exact_covers))) +
geom_vline(aes(xintercept = 2)) +
guides(color='none') +
theme_classic() + xlab('95% CI endpoints') +
ggtitle('Coverage of exact 95% CI, n = 5') -> p1
ggplot(data = many_exponential_CIs %>% slice(1:200)) +
geom_segment(aes(x = LCL_asymptotic, xend = UCL_asymptotic,
y = .trial, yend = .trial, color = factor(asymptotic_covers))) +
geom_vline(aes(xintercept = 2)) +
guides(color='none') +
theme_classic() + xlab('95% CI endpoints') +
ggtitle('Coverage of asymptotic 95% CI, n = 5') -> p2
p1+p2(exponential_coverage <- many_exponential_CIs
%>% group_by(beta, n)
%>% summarize(exact_coverage = mean(exact_covers),
asymptotic_coverage = mean(asymptotic_covers)
)
) # A tibble: 18 × 4
# Groups: beta [3]
beta n exact_coverage asymptotic_coverage
<dbl> <dbl> <dbl> <dbl>
1 2 5 0.953 0.813
2 2 10 0.951 0.871
3 2 20 0.948 0.898
4 2 40 0.951 0.925
5 2 80 0.944 0.933
6 2 160 0.949 0.942
7 4 5 0.948 0.805
8 4 10 0.950 0.868
9 4 20 0.947 0.904
10 4 40 0.951 0.928
11 4 80 0.952 0.936
12 4 160 0.954 0.946
13 6 5 0.949 0.814
14 6 10 0.950 0.866
15 6 20 0.949 0.906
16 6 40 0.948 0.922
17 6 80 0.954 0.940
18 6 160 0.947 0.941
ggplot(data = exponential_coverage) +
geom_line(aes(x = n, y = exact_coverage, color = 'exact')) +
geom_line(aes(x = n, y = asymptotic_coverage, color = 'asymptotic')) +
geom_hline(aes(yintercept = 0.95)) +
scale_color_discrete(name='') + ylab('Coverage') +
theme_classic() +
ggtitle('Simulated coverage of nominal 95% CIs for exponential scale parameter') +
facet_wrap(~beta, labeller = label_both)\[\left(LCL_{exact}, UCL_{exact}\right)=\left(\frac{\bar Y}{q_2}, \frac{\bar Y}{q_1}\right)\]
\[\left(\tau(LCL_{exact}), \tau(UCL_{exact})\right)=\left(\left(\frac{\bar Y}{q_2}\right)^2, \left(\frac{\bar Y}{q_1}\right)^2\right)\]
\[\left(LCL_{asymptotic}, UCL_{asymptotic}\right)=\left(\bar Y -1.96\frac{\sigma}{\sqrt{n}}, \bar Y + 1.96 \frac{\sigma}{\sqrt{n}}\right)\]
\[\left(\tau(LCL_{asymptotic}), \tau(UCL_{asymptotic})\right)=\left(\left(\bar Y -1.96\frac{\sigma}{\sqrt{n}}\right)^2, \left(\bar Y + 1.96 \frac{\sigma}{\sqrt{n}}\right)^2\right)\]