PS_7_7

Question 5

library(tidyverse)
Warning: package 'tidyverse' was built under R version 4.5.2
Warning: package 'ggplot2' was built under R version 4.5.2
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   4.0.1     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.1.0     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(purrrfect)

Attaching package: 'purrrfect'

The following objects are masked from 'package:base':

    replicate, tabulate
# 3 Pops : UNIF(0,8) ; GAM(alpha = 2, beta = 2) ; POI(4)
# Unif Sim
head(unif_sim <- parameters(~n, c(5, 10, 20, 40, 80, 160)) %>%
  add_trials(10000) %>%
  mutate(
    y_sample = map(n, ~runif(.x, 0, 8)),
    y_bar = map_dbl(y_sample, mean)
  ) %>%
  mutate(
    fU = dnorm(y_bar, mean = 4, sd = 2.3094/sqrt(n)),
    FU = pnorm(y_bar, mean = 4, sd = 2.3094/sqrt(n)),
    Fhat = cume_dist(y_bar),
    .by = n
  )
)
# A tibble: 6 × 7
      n .trial y_sample  y_bar     fU     FU   Fhat
  <dbl>  <dbl> <list>    <dbl>  <dbl>  <dbl>  <dbl>
1     5      1 <dbl [5]>  4.77 0.292  0.773  0.766 
2     5      2 <dbl [5]>  6.68 0.0135 0.995  0.998 
3     5      3 <dbl [5]>  1.78 0.0385 0.0159 0.0123
4     5      4 <dbl [5]>  4.05 0.386  0.520  0.522 
5     5      5 <dbl [5]>  3.74 0.374  0.400  0.403 
6     5      6 <dbl [5]>  2.94 0.228  0.152  0.158 
# UNIF(0,8) pdf and CDF Overlay
#pdf
ggplot(unif_sim, aes(x = y_bar)) +
  geom_histogram(aes(y = after_stat(density)), binwidth = .1, fill = 'cornflowerblue') +
  geom_line(aes(y = fU), col = 'red') +
  facet_wrap(~n, scales = 'free_y') +
  labs(title = "Uniform(0,8)", x = expression(bar(Y))) + theme_classic()

#cdf
ggplot(unif_sim, aes(x = y_bar)) +
  geom_line(aes(y = Fhat), col = 'cornflowerblue', linewidth = 1) +
  geom_line(aes(y = FU), col = 'red', linetype = "dashed") +
  facet_wrap(~n, scales = 'free_x') +
  labs(title = "Uniform(0,8): CDF", x = 'Ybar') + 
  theme_classic()

# GAM Sim
head(gam_sim <- parameters(~n, c(5, 10, 20, 40, 80, 160)) %>%
  add_trials(10000) %>%
  mutate(
    y_sample = map(n, ~rgamma(.x, shape = 2, rate = 2)),
    y_bar = map_dbl(y_sample, mean)
  ) %>%
  mutate(
    fU = dnorm(y_bar, mean = 1, sd = 0.7071/sqrt(n)),
    FU = pnorm(y_bar, mean = 1, sd = 0.7071/sqrt(n)),
    Fhat = cume_dist(y_bar),
    .by = n
  )
)
# A tibble: 6 × 7
      n .trial y_sample  y_bar    fU     FU   Fhat
  <dbl>  <dbl> <list>    <dbl> <dbl>  <dbl>  <dbl>
1     5      1 <dbl [5]> 1.54  0.293 0.956  0.944 
2     5      2 <dbl [5]> 0.889 1.19  0.363  0.394 
3     5      3 <dbl [5]> 0.621 0.615 0.115  0.0921
4     5      4 <dbl [5]> 0.479 0.325 0.0498 0.0232
5     5      5 <dbl [5]> 0.723 0.860 0.191  0.193 
6     5      6 <dbl [5]> 0.751 0.925 0.215  0.222 
# GAM(shape = 2, rate = 2) pdf and CDF Overlay
#pdf
ggplot(gam_sim, aes(x = y_bar)) +
  geom_histogram(aes(y = after_stat(density)), binwidth = .05, fill = 'cornflowerblue') +
  geom_line(aes(y = fU), col = 'red') +
  facet_wrap(~n, scales = 'free_y') +
  labs(title = "Gamma(2,2)", x = 'Ybar') +
  theme_classic()

#cdf
ggplot(gam_sim, aes(x = y_bar)) +
  geom_line(aes(y = Fhat), col = 'cornflowerblue', linewidth = 1) +
  geom_line(aes(y = FU), col = 'red', linetype = "dashed") +
  facet_wrap(~n, scales = 'free_x') +
  labs(title = "GAM(2,2): CDF", x = 'Ybar') + 
  theme_classic()

# POI Sim
head(poi_sim <- parameters(~n, c(5, 10, 20, 40, 80, 160)) %>%
  add_trials(10000) %>%
  mutate(
    y_sample = map(n, ~rpois(.x, lambda = 4)),
    y_bar = map_dbl(y_sample, mean)
  ) %>%
  mutate(
    fU = dnorm(y_bar, mean = 4, sd = 2/sqrt(n)),
    FU = pnorm(y_bar, mean = 4, sd = 2/sqrt(n)),
    Fhat = cume_dist(y_bar),
    .by = n
  )
)
# A tibble: 6 × 7
      n .trial y_sample  y_bar     fU     FU   Fhat
  <dbl>  <dbl> <list>    <dbl>  <dbl>  <dbl>  <dbl>
1     5      1 <int [5]>   3.8 0.435  0.412  0.478 
2     5      2 <int [5]>   2.8 0.181  0.0899 0.105 
3     5      3 <int [5]>   5.6 0.0901 0.963  0.968 
4     5      4 <int [5]>   4.2 0.435  0.588  0.648 
5     5      5 <int [5]>   4.6 0.356  0.749  0.79  
6     5      6 <int [5]>   2.2 0.0589 0.0221 0.0221
# POI(4) pdf and CDF Overlay
#pdf
ggplot(poi_sim, aes( x = y_bar)) +
  geom_histogram(aes(y = after_stat(density)), binwidth = 0.2, fill = 'cornflowerblue') +
  geom_line(aes(y=fU), col= 'red') +
  facet_wrap(~n, scales = 'free_y') +
  labs(title = 'POI(4)', x = 'Ybar') +
  theme_classic()

#cdf
ggplot(poi_sim, aes(x = y_bar)) +
  geom_line(aes(y = Fhat), col = 'cornflowerblue', linewidth = 1) +
  geom_line(aes(y = FU), col = 'red', linetype = "dashed") +
  facet_wrap(~n, scales = 'free_x') +
  labs(title = "POI(4): CDF", x = 'Ybar') + 
  theme_classic()

It seems like UNIF is pretty good even at low n, which makes sense, as it is a symmetric distribution. GAM does a bit worse, low n are a bit skewed left, but the skewness seems to mostly go away at n=20. POI(4) seems to mostly settle around n=20 as well, being a bit skewed left at low n.