PS 7.7

Uniform (0,8)

library(purrrfect)

Attaching package: 'purrrfect'
The following objects are masked from 'package:base':

    replicate, tabulate
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.1.0     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
N <- 10000

mu <- 4
sigma <- sqrt(64/12)

clt_sims_unif <- (
  parameters(~n, c(5, 10, 20, 40, 80, 160)) %>%
    add_trials(N) %>%
    mutate(  Y_sample = map(n, \(n) runif(n, min = 0, max = 8)) ) %>%
    mutate(  Ybar = map_dbl(Y_sample, mean)) %>%
    mutate(fU = dnorm(Ybar, mean = mu, sd = sigma / sqrt(n)),
           FU = pnorm(Ybar, mean = mu, sd = sigma / sqrt(n)),
           Fhat = cume_dist(Ybar),
           .by = n
    )
)
ggplot(data = clt_sims_unif, aes(x = Ybar)) + 
  geom_histogram(aes(y = after_stat(density)), 
                 binwidth = .05, fill = 'goldenrod') + 
  geom_line(aes(y = fU), col ='cornflowerblue') + 
  facet_grid(n~., labeller = label_both, scale = 'free_y') + 
  labs(x=expression(bar(Y)))+
  theme_classic()

ggplot(data = clt_sims_unif, aes(x = Ybar) ) + 
  geom_step(aes(y = FU, col = 'Analytic Normal CDF')) +
  geom_step(aes(y = Fhat, col = 'Empirical CDF')) + 
  facet_grid(n~., labeller = label_both, scales = 'free') + 
  labs(color='', 
       x = expression(bar(Y)),
       y = 'CDF')+
  theme_classic()

Gamma (2,2)

mu2 <- 4
sigma2 <- sqrt(8)

clt_sims_gamma <- (
  parameters(~n, c(5, 10, 20, 40, 80, 160)) %>%
    add_trials(N) %>%
    mutate(
      Y_sample = map(n, \(n) rgamma(n, shape = 2, scale = 2))
    ) %>%
    mutate(
      Ybar = map_dbl(Y_sample, mean)
    ) %>%
    mutate(
      fU = dnorm(Ybar, mean = mu2, sd = sigma2 / sqrt(n)),
      FU = pnorm(Ybar, mean = mu2, sd = sigma2 / sqrt(n)),
      Fhat = cume_dist(Ybar),
      .by = n
    )
)
ggplot(data = clt_sims_gamma, aes(x = Ybar)) + 
  geom_histogram(aes(y = after_stat(density)), 
                 binwidth = .05, fill = 'goldenrod') + 
  geom_line(aes(y = fU), col ='cornflowerblue') + 
  facet_grid(n~., labeller = label_both, scale = 'free_y') + 
  labs(x=expression(bar(Y)))+
  theme_classic()

ggplot(data = clt_sims_gamma, aes(x = Ybar) ) + 
  geom_step(aes(y = FU, col = 'Analytic Normal CDF')) +
  geom_step(aes(y = Fhat, col = 'Empirical CDF')) + 
  facet_grid(n~., labeller = label_both, scales = 'free') + 
  labs(color='', 
       x = expression(bar(Y)),
       y = 'CDF')+
  theme_classic()

Poisson (4)

mu3 <- 4
sigma3 <- 2

clt_sims_pois <- (
  parameters(~n, c(5, 10, 20, 40, 80, 160)) %>%
    add_trials(N) %>%
    mutate( Y_sample = map(n, \(n) rpois(n, lambda = 4))) %>%
    mutate(Ybar = map_dbl(Y_sample, mean) ) %>%
    mutate( fU = dnorm(Ybar, mean = mu, sd = sigma / sqrt(n)),
            FU = pnorm(Ybar, mean = mu, sd = sigma / sqrt(n)),
            Fhat = cume_dist(Ybar),
           .by = n
    )
)
ggplot(data = clt_sims_pois, aes(x = Ybar)) + 
  geom_histogram(aes(y = after_stat(density)), 
                 binwidth = .2, fill = 'goldenrod') + 
  geom_line(aes(y = fU), col ='cornflowerblue') + 
  facet_grid(n~., labeller = label_both, scale = 'free_y') + 
  labs(x=expression(bar(Y)))+
  theme_classic()

ggplot(data = clt_sims_pois, aes(x = Ybar) ) + 
  geom_step(aes(y = FU, col = 'Analytic Normal CDF')) +
  geom_step(aes(y = Fhat, col = 'Empirical CDF')) + 
  facet_grid(n~., labeller = label_both, scales = 'free') + 
  labs(color='', 
       x = expression(bar(Y)),
       y = 'CDF')+
  theme_classic()

The plot that appears to have normality kick in the quickest I would say would be the uniform. The CDF overlaid plot appears seamless and the density fit with the normal approximation normalizes the quickest compared to the other plots. A value where all of the graphs appear to reach normality would be n = 40.