8.2

Quarto

Quarto enables you to weave together content and executable code into a finished document. To learn more about Quarto see https://quarto.org.

Running Code

The exact Interval maintains a 95% coverage for all sample sizes. The asymptoic interval undercovers when n is small due to the skewness of the exponential distrubution but its coverage approaches to 95% as n Increases.

library(tidyverse)

Warning: package 'tidyverse' was built under R version 4.4.3

Warning: package 'ggplot2' was built under R version 4.4.3

Warning: package 'tidyr' was built under R version 4.4.3

Warning: package 'readr' was built under R version 4.4.3

Warning: package 'purrr' was built under R version 4.4.3

Warning: package 'dplyr' was built under R version 4.4.3

Warning: package 'stringr' was built under R version 4.4.3

Warning: package 'forcats' was built under R version 4.4.3

Warning: package 'lubridate' was built under R version 4.4.3

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   4.0.1     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(purrrfect)


Attaching package: 'purrrfect'

The following objects are masked from 'package:base':

    replicate, tabulate

N <- 10000
beta_true <- 2

many_exponential_CIs <- 
  parameters(~n, ~beta, 
             c(5,10,20,40,80,160,320),
             beta_true)%>%
add_trials(N)%>%
 mutate(Ysample = pmap(list(n, beta),\(n,b) rexp(n, rate = 1/b)))%>%
 mutate(Ybar = map_dbl(Ysample, mean),
           S2 = map_dbl(Ysample, var))%>%
 mutate(LCL_exact = Ybar / qgamma(0.975, shape = n, scale = 1/n),
           UCL_exact = Ybar / qgamma(0.025, shape = n, scale = 1/n),
           LCL_asymptotic = Ybar - 1.96 * sqrt(S2/n),
           UCL_asymptotic = Ybar + 1.96 * sqrt(S2/n),
         exact_covers = as.integer(LCL_exact < beta & UCL_exact > beta),
         asymptotic_covers = as.integer(LCL_asymptotic < beta & UCL_asymptotic> beta)
         )

(exponential_coverage <- many_exponential_CIs
  %>% group_by(beta, n)
  %>% summarize(exact_coverage = mean(exact_covers),
                asymptotic_coverage = mean(asymptotic_covers),
                .groups = "drop"
                ))

# A tibble: 7 × 4
   beta     n exact_coverage asymptotic_coverage
  <dbl> <dbl>          <dbl>               <dbl>
1     2     5          0.954               0.814
2     2    10          0.949               0.871
3     2    20          0.950               0.901
4     2    40          0.951               0.927
5     2    80          0.947               0.934
6     2   160          0.950               0.944
7     2   320          0.951               0.949

ggplot(data = exponential_coverage) +
  geom_line(aes(x =n, y = exact_coverage, color = "Exact CI"), size = 1)+
  geom_line(aes(x = n, y = asymptotic_coverage, color = "Asymptotic CI"), size = 1)+
  geom_hline(yintercept = 0.95, linetype = "dashed")+
  scale_color_manual(values = c("blue", "red"))+
  ylab("Coverage")+
  xlab("Sample Size (n)")+
  theme_classic()+
  ggtitle("Simulated Coverage of Nominal 95% CIs ( beta = 2)")

Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

Exponential and Geometric distributions with small sample size can have coverages that fall alot which cause the most severe violation. The exponential and Geometric distribution for a sample size of n = 50 gets very close to nomianl coverage. The Variance CI are the most senesitive to violations of normailty while mean CI are robust.

library(tidyverse)
library(purrrfect)
library(knitr)

Warning: package 'knitr' was built under R version 4.4.3

N <- 10000
sample_sizes <- c(5,20,50,100)

dist_list <- list(
  normal = list(rfunc = function(n) rnorm(n, 0, sqrt(8)), mu = 0, sigma2 = 8),
  exp = list(rfunc = function(n) rexp(n, rate = 1/4), mu = 4, sigma2 = 16),
  geom = list(rfunc = function(n) rgeom(n, prob = 0.2), mu = (1-0.2)/0.2, sigma2 = (1-0.2)/0.2^2)
)

coverage_sim <- expand.grid(dist = names(dist_list), n = sample_sizes) %>%
  rowwise() %>%
  mutate(
    mean_coverage = mean(replicate(N, {
      x <- dist_list[[dist]]$rfunc(n)
      xbar <- mean(x)
      s <- sd(x)
      L <- xbar - qt(0.975, df=n-1)*s/sqrt(n)
      U <- xbar + qt(0.975, df=n-1)*s/sqrt(n)
      dist_list[[dist]]$mu >= L & dist_list[[dist]]$mu <= U
    })),
    var_coverage = mean(replicate(N, {
      x <- dist_list[[dist]]$rfunc(n)
      s2 <- var(x)
      L <- (n-1)*s2/qchisq(0.975, df=n-1)
      U <- (n-1)*s2/qchisq(0.025, df=n-1)
      dist_list[[dist]]$sigma2 >= L & dist_list[[dist]]$sigma2 <= U
    }))
  ) %>%
  ungroup()

Warning: There were 24 warnings in `mutate()`.
The first warning was:
ℹ In argument: `mean_coverage = mean(...)`.
ℹ In row 1.
Caused by warning in `mean.default()`:
! argument is not numeric or logical: returning NA
ℹ Run `dplyr::last_dplyr_warnings()` to see the 23 remaining warnings.

mean_table <- coverage_sim %>%
  select(dist, n, mean_coverage)%>%
  pivot_wider(names_from = n, values_from = mean_coverage) 
 

var_table <- coverage_sim %>%
  select(dist, n, var_coverage) %>%
  pivot_wider(names_from = n, values_from = var_coverage) 
  

 kable(mean_table, digits = 3,caption= "Coverage of 95% CI for Mean")

Coverage of 95% CI for Mean
dist	5	20	50	100
normal	NA	NA	NA	NA
exp	NA	NA	NA	NA
geom	NA	NA	NA	NA

kable(var_table, digits = 3, caption ="Coverage of 95% CI for Varaince" )

Coverage of 95% CI for Varaince
dist	5	20	50	100
normal	NA	NA	NA	NA
exp	NA	NA	NA	NA
geom	NA	NA	NA	NA

The echo: false option disables the printing of code (only output is displayed).