library (purrrfect)
library (tidyverse)
N <- 10000
# UNIF(0,8)
(many_UNIF_CIs <- parameters (~ n, c (5 ,20 ,50 ,100 ))
%>% add_trials (N)
%>% mutate (Ysample = map (n, .f = \(n) runif (n,0 ,8 )))
%>% mutate (Ybar = map_dbl (Ysample, mean),
S2 = map_dbl (Ysample, var),
SE = sqrt (S2/ n))
%>% mutate (LCL_mean = Ybar - qt (0.975 , n-1 ) * SE,
UCL_mean = Ybar + qt (0.975 , n-1 )* SE,
LCL_var = ((n-1 )* S2) / qchisq (0.975 , n-1 ),
UCL_var = ((n-1 )* S2) / qchisq (0.025 , n-1 ))
%>% mutate (asymptotic_covers_mean = ifelse (LCL_mean < 4 & UCL_mean > 4 , 1 , 0 ),
asymptotic_covers_var = ifelse (LCL_var < 16 / 3 & UCL_var > 16 / 3 , 1 , 0 ))
)
# A tibble: 40,000 × 12
n .trial Ysample Ybar S2 SE LCL_mean UCL_mean LCL_var UCL_var
<dbl> <dbl> <list> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 5 1 <dbl [5]> 4.06 4.92 0.992 1.31 6.81 1.76 40.6
2 5 2 <dbl [5]> 2.55 5.66 1.06 -0.400 5.51 2.03 46.7
3 5 3 <dbl [5]> 3.24 8.70 1.32 -0.421 6.90 3.12 71.8
4 5 4 <dbl [5]> 1.14 0.562 0.335 0.210 2.07 0.202 4.64
5 5 5 <dbl [5]> 2.55 5.78 1.08 -0.438 5.53 2.08 47.8
6 5 6 <dbl [5]> 3.37 5.78 1.08 0.389 6.36 2.07 47.7
7 5 7 <dbl [5]> 6.13 5.30 1.03 3.27 8.99 1.90 43.8
8 5 8 <dbl [5]> 3.58 7.25 1.20 0.239 6.93 2.60 59.9
9 5 9 <dbl [5]> 3.66 3.55 0.843 1.32 6.00 1.27 29.3
10 5 10 <dbl [5]> 3.56 3.21 0.801 1.34 5.79 1.15 26.5
# ℹ 39,990 more rows
# ℹ 2 more variables: asymptotic_covers_mean <dbl>, asymptotic_covers_var <dbl>
(UNIFs_coverage <- many_UNIF_CIs
%>% group_by (n)
%>% summarize (asymptotic_coverage_mean = mean (asymptotic_covers_mean),
asymptotic_coverage_var = mean (asymptotic_covers_var))
)
# A tibble: 4 × 3
n asymptotic_coverage_mean asymptotic_coverage_var
<dbl> <dbl> <dbl>
1 5 0.936 0.986
2 20 0.950 0.997
3 50 0.944 0.997
4 100 0.948 0.998
## EXP(B = 4)
(many_EXP_CIs <- parameters (~ n, c (5 ,20 ,50 ,100 ))
%>% add_trials (N)
%>% mutate (Ysample = map (n, .f = \(n) rexp (n, rate = 1 / 4 )))
%>% mutate (Ybar = map_dbl (Ysample, mean),
S2 = map_dbl (Ysample, var),
SE = sqrt (S2/ n))
%>% mutate (LCL_mean = Ybar - qt (0.975 , n-1 ) * SE,
UCL_mean = Ybar + qt (0.975 , n-1 )* SE,
LCL_var = ((n-1 )* S2) / qchisq (0.975 , n-1 ),
UCL_var = ((n-1 )* S2) / qchisq (0.025 , n-1 ))
%>% mutate (asymptotic_covers_mean = ifelse (LCL_mean < 4 & UCL_mean > 4 , 1 , 0 ),
asymptotic_covers_var = ifelse (LCL_var < 16 & UCL_var > 16 , 1 , 0 ))
)
# A tibble: 40,000 × 12
n .trial Ysample Ybar S2 SE LCL_mean UCL_mean LCL_var UCL_var
<dbl> <dbl> <list> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 5 1 <dbl [5]> 0.514 0.177 0.188 -0.00819 1.04 0.0634 1.46
2 5 2 <dbl [5]> 3.03 2.41 0.694 1.10 4.96 0.865 19.9
3 5 3 <dbl [5]> 1.68 3.12 0.790 -0.516 3.87 1.12 25.8
4 5 4 <dbl [5]> 3.95 21.1 2.05 -1.75 9.66 7.57 174.
5 5 5 <dbl [5]> 4.71 20.3 2.02 -0.887 10.3 7.29 168.
6 5 6 <dbl [5]> 2.11 0.803 0.401 0.993 3.22 0.288 6.63
7 5 7 <dbl [5]> 3.05 8.27 1.29 -0.525 6.62 2.97 68.3
8 5 8 <dbl [5]> 4.53 5.95 1.09 1.50 7.56 2.14 49.2
9 5 9 <dbl [5]> 3.69 14.5 1.71 -1.04 8.43 5.22 120.
10 5 10 <dbl [5]> 4.30 46.1 3.04 -4.13 12.7 16.6 381.
# ℹ 39,990 more rows
# ℹ 2 more variables: asymptotic_covers_mean <dbl>, asymptotic_covers_var <dbl>
(EXPs_coverage <- many_EXP_CIs
%>% group_by (n)
%>% summarize (asymptotic_coverage_mean = mean (asymptotic_covers_mean),
asymptotic_coverage_var = mean (asymptotic_covers_var))
)
# A tibble: 4 × 3
n asymptotic_coverage_mean asymptotic_coverage_var
<dbl> <dbl> <dbl>
1 5 0.877 0.821
2 20 0.918 0.727
3 50 0.935 0.697
4 100 0.945 0.696
## GEO(p = 0.2)
(many_GEO_CIs <- parameters (~ n, c (5 ,20 ,50 ,100 ))
%>% add_trials (N)
%>% mutate (Ysample = map (n, .f = \(n) rgeom (n, 0.2 )))
%>% mutate (Ybar = map_dbl (Ysample, mean),
S2 = map_dbl (Ysample, var),
SE = sqrt (S2/ n))
%>% mutate (LCL_mean = Ybar - qt (0.975 , n-1 ) * SE,
UCL_mean = Ybar + qt (0.975 , n-1 )* SE,
LCL_var = ((n-1 )* S2) / qchisq (0.975 , n-1 ),
UCL_var = ((n-1 )* S2) / qchisq (0.025 , n-1 ))
%>% mutate (asymptotic_covers_mean = ifelse (LCL_mean < 4 & UCL_mean > 4 , 1 , 0 ),
asymptotic_covers_var = ifelse (LCL_var < (1 -0.2 ) / 0.2 ^ 2 & UCL_var > (1 -0.2 ) / 0.2 ^ 2 , 1 , 0 ))
)
# A tibble: 40,000 × 12
n .trial Ysample Ybar S2 SE LCL_mean UCL_mean LCL_var UCL_var
<dbl> <dbl> <list> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 5 1 <int [5]> 5.6 25.3 2.25 -0.645 11.8 9.08 209.
2 5 2 <int [5]> 6.4 66.3 3.64 -3.71 16.5 23.8 547.
3 5 3 <int [5]> 3.2 16.7 1.83 -1.87 8.27 5.99 138.
4 5 4 <int [5]> 5.6 43.8 2.96 -2.62 13.8 15.7 362.
5 5 5 <int [5]> 1.8 1.7 0.583 0.181 3.42 0.610 14.0
6 5 6 <int [5]> 3 12.5 1.58 -1.39 7.39 4.49 103.
7 5 7 <int [5]> 3.4 11.8 1.54 -0.865 7.67 4.24 97.4
8 5 8 <int [5]> 2.6 22.8 2.14 -3.33 8.53 8.18 188.
9 5 9 <int [5]> 3.2 19.2 1.96 -2.24 8.64 6.89 159.
10 5 10 <int [5]> 7.2 55.7 3.34 -2.07 16.5 20.0 460.
# ℹ 39,990 more rows
# ℹ 2 more variables: asymptotic_covers_mean <dbl>, asymptotic_covers_var <dbl>
(GEOs_coverage <- many_GEO_CIs
%>% group_by (n)
%>% summarize (asymptotic_coverage_mean = mean (asymptotic_covers_mean),
asymptotic_coverage_var = mean (asymptotic_covers_var))
)
# A tibble: 4 × 3
n asymptotic_coverage_mean asymptotic_coverage_var
<dbl> <dbl> <dbl>
1 5 0.881 0.825
2 20 0.918 0.731
3 50 0.937 0.699
4 100 0.944 0.692
(UNIFs_coverage <- UNIFs_coverage %>% mutate (distribution = 'UNIF(0,8)' ))
# A tibble: 4 × 4
n asymptotic_coverage_mean asymptotic_coverage_var distribution
<dbl> <dbl> <dbl> <chr>
1 5 0.936 0.986 UNIF(0,8)
2 20 0.950 0.997 UNIF(0,8)
3 50 0.944 0.997 UNIF(0,8)
4 100 0.948 0.998 UNIF(0,8)
(EXPs_coverage <- EXPs_coverage %>% mutate (distribution = 'EXP(B = 4)' ))
# A tibble: 4 × 4
n asymptotic_coverage_mean asymptotic_coverage_var distribution
<dbl> <dbl> <dbl> <chr>
1 5 0.877 0.821 EXP(B = 4)
2 20 0.918 0.727 EXP(B = 4)
3 50 0.935 0.697 EXP(B = 4)
4 100 0.945 0.696 EXP(B = 4)
(GEOs_coverage <- GEOs_coverage %>% mutate (distribution = 'GEO(p = 0.2)' ))
# A tibble: 4 × 4
n asymptotic_coverage_mean asymptotic_coverage_var distribution
<dbl> <dbl> <dbl> <chr>
1 5 0.881 0.825 GEO(p = 0.2)
2 20 0.918 0.731 GEO(p = 0.2)
3 50 0.937 0.699 GEO(p = 0.2)
4 100 0.944 0.692 GEO(p = 0.2)
(mu_table <- bind_rows (
UNIFs_coverage
%>% select (n, asymptotic_coverage_mean)
%>% rename ('Y_i ~ UNIF(0,8)' = asymptotic_coverage_mean),
EXPs_coverage
%>% select (n, asymptotic_coverage_mean)
%>% rename ('Y_i ~ EXP(B = 4)' = asymptotic_coverage_mean),
GEOs_coverage
%>% select (n, asymptotic_coverage_mean)
%>% rename ('Y_i ~ GEO(p = 0.2)' = asymptotic_coverage_mean),
)
%>% arrange (n)
%>% mutate (across (- n, ~ round (.,3 )))
)
# A tibble: 12 × 4
n `Y_i ~ UNIF(0,8)` `Y_i ~ EXP(B = 4)` `Y_i ~ GEO(p = 0.2)`
<dbl> <dbl> <dbl> <dbl>
1 5 0.936 NA NA
2 5 NA 0.877 NA
3 5 NA NA 0.881
4 20 0.95 NA NA
5 20 NA 0.918 NA
6 20 NA NA 0.918
7 50 0.944 NA NA
8 50 NA 0.935 NA
9 50 NA NA 0.937
10 100 0.948 NA NA
11 100 NA 0.945 NA
12 100 NA NA 0.944
library (knitr)
kable (mu_table)
5
0.936
NA
NA
5
NA
0.877
NA
5
NA
NA
0.881
20
0.950
NA
NA
20
NA
0.918
NA
20
NA
NA
0.918
50
0.944
NA
NA
50
NA
0.935
NA
50
NA
NA
0.937
100
0.948
NA
NA
100
NA
0.945
NA
100
NA
NA
0.944
(variance_table <- bind_rows (
UNIFs_coverage
%>% select (n, asymptotic_coverage_var)
%>% rename ('Y_i ~ UNIF(0,8)' = asymptotic_coverage_var),
EXPs_coverage
%>% select (n, asymptotic_coverage_var)
%>% rename ('Y_i ~ EXP(B = 4)' = asymptotic_coverage_var),
GEOs_coverage
%>% select (n, asymptotic_coverage_var)
%>% rename ('Y_i ~ GEO(p = 0.2)' = asymptotic_coverage_var),
)
%>% arrange (n)
%>% mutate (across (- n, ~ round (.,3 )))
)
# A tibble: 12 × 4
n `Y_i ~ UNIF(0,8)` `Y_i ~ EXP(B = 4)` `Y_i ~ GEO(p = 0.2)`
<dbl> <dbl> <dbl> <dbl>
1 5 0.986 NA NA
2 5 NA 0.821 NA
3 5 NA NA 0.825
4 20 0.997 NA NA
5 20 NA 0.727 NA
6 20 NA NA 0.731
7 50 0.997 NA NA
8 50 NA 0.697 NA
9 50 NA NA 0.699
10 100 0.998 NA NA
11 100 NA 0.696 NA
12 100 NA NA 0.692
library (knitr)
kable (variance_table)
5
0.986
NA
NA
5
NA
0.821
NA
5
NA
NA
0.825
20
0.997
NA
NA
20
NA
0.727
NA
20
NA
NA
0.731
50
0.997
NA
NA
50
NA
0.697
NA
50
NA
NA
0.699
100
0.998
NA
NA
100
NA
0.696
NA
100
NA
NA
0.692
When looking specifically at our means table, n = 5 is problematic for both the EXP and GEO distribution. When switching gears to the variance table, we see extreme under-coverage as n increases for both EXP and GEO, with n = 100 having concerningly low variance. For both the mean and variance CIs, the consequences are the least severe for the UNIF distribution. Lastly, we see that the variance CIs are most affected by the consequences of the normality assumption.