7.4

library(tidyverse)

Warning: package 'tidyverse' was built under R version 4.4.3

Warning: package 'ggplot2' was built under R version 4.4.3

Warning: package 'tidyr' was built under R version 4.4.3

Warning: package 'readr' was built under R version 4.4.3

Warning: package 'purrr' was built under R version 4.4.3

Warning: package 'dplyr' was built under R version 4.4.3

Warning: package 'stringr' was built under R version 4.4.3

Warning: package 'forcats' was built under R version 4.4.3

Warning: package 'lubridate' was built under R version 4.4.3

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   4.0.1     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(purrrfect)


Attaching package: 'purrrfect'

The following objects are masked from 'package:base':

    replicate, tabulate

(Chi_sq_sim <- parameters(~n, ~ sigma,
            c(4,8,15),
            c(1,2,3))
  
 %>% add_trials(10000)
 %>% mutate(x_sample = pmap(list(n, sigma),
      \(n,s) rnorm(n, mean = 0, sd = s)))
  %>% mutate(S2 = map_dbl(x_sample, var),
             chi_stat = (n - 1) * S2 / sigma^2)
  %>% mutate(f_chi = dchisq(chi_stat, df = n - 1))
  
  
  )

# A tibble: 90,000 × 7
       n sigma .trial x_sample     S2 chi_stat  f_chi
   <dbl> <dbl>  <dbl> <list>    <dbl>    <dbl>  <dbl>
 1     4     1      1 <dbl [4]> 0.384     1.15 0.241 
 2     4     1      2 <dbl [4]> 0.932     2.80 0.165 
 3     4     1      3 <dbl [4]> 0.554     1.66 0.224 
 4     4     1      4 <dbl [4]> 1.55      4.65 0.0843
 5     4     1      5 <dbl [4]> 0.666     2.00 0.208 
 6     4     1      6 <dbl [4]> 1.22      3.66 0.122 
 7     4     1      7 <dbl [4]> 0.538     1.61 0.226 
 8     4     1      8 <dbl [4]> 2.19      6.58 0.0382
 9     4     1      9 <dbl [4]> 1.58      4.74 0.0811
10     4     1     10 <dbl [4]> 0.574     1.72 0.221 
# ℹ 89,990 more rows

ggplot(Chi_sq_sim, aes(x = chi_stat))+
  geom_histogram(aes(y = after_stat(density)),
                 fill = "goldenrod",
                 binwidth = 0.3, 
                 color = "white")+
  geom_density(color = "black", linewidth = 1)+
  stat_function(fun = dchisq, 
                args = list(df = Chi_sq_sim$n[1]-1),
                color = "cornflowerblue",
                linewidth = 1,
                linetype = 2)+
  facet_grid(n ~ sigma, labeller = label_both, scales = "free")+
  theme_classic(base_size = 16)+
  labs(
    title = expression("Simulated and analytic densities"),
    x = expression((n-1)*S^2/sigma^2),
    y = "Density"
  )

moment_check <- Chi_sq_sim %>%
group_by(n, sigma)%>%
 reframe(
  sim_mean_S2 = mean(S2),
  theo_mean_S2 = sigma^2,
  sim_var_S2 = var(S2),
  theo_var_S2 = 2 * sigma^4 / (n - 1),
  .groups = "drop"
)
moment_check

# A tibble: 90,000 × 7
       n sigma sim_mean_S2 theo_mean_S2 sim_var_S2 theo_var_S2 .groups
   <dbl> <dbl>       <dbl>        <dbl>      <dbl>       <dbl> <chr>  
 1     4     1       0.984            1      0.648       0.667 drop   
 2     4     1       0.984            1      0.648       0.667 drop   
 3     4     1       0.984            1      0.648       0.667 drop   
 4     4     1       0.984            1      0.648       0.667 drop   
 5     4     1       0.984            1      0.648       0.667 drop   
 6     4     1       0.984            1      0.648       0.667 drop   
 7     4     1       0.984            1      0.648       0.667 drop   
 8     4     1       0.984            1      0.648       0.667 drop   
 9     4     1       0.984            1      0.648       0.667 drop   
10     4     1       0.984            1      0.648       0.667 drop   
# ℹ 89,990 more rows

You can add options to executable code like this

library(tidyverse)
library(purrrfect)



N <- 1000
(many_poisson_samples <- parameters(~n, ~ lambda,
                                    c(10,20,30),
                                    c(1,3,6)
            )
  %>% add_trials(N)
  %>% mutate(ysample = pmap(list(n, lambda),
      \(nn,l) rpois(nn, lambda = l)))
  %>% mutate(ybar = map_dbl(ysample, mean))
  %>% mutate (S2 = map_dbl(ysample, var))
  
  
  )

# A tibble: 9,000 × 6
       n lambda .trial ysample     ybar    S2
   <dbl>  <dbl>  <dbl> <list>     <dbl> <dbl>
 1    10      1      1 <int [10]>   0.9 0.767
 2    10      1      2 <int [10]>   0.8 0.4  
 3    10      1      3 <int [10]>   1.2 0.622
 4    10      1      4 <int [10]>   0.4 0.267
 5    10      1      5 <int [10]>   0.8 0.844
 6    10      1      6 <int [10]>   0.9 0.544
 7    10      1      7 <int [10]>   1.1 0.989
 8    10      1      8 <int [10]>   0.6 0.489
 9    10      1      9 <int [10]>   1.4 0.489
10    10      1     10 <int [10]>   1.4 1.16 
# ℹ 8,990 more rows

ggplot(data = many_poisson_samples)+
  geom_point(aes(x = ybar, y = S2),
             shape = '.', alpha = 0.6)+
  labs(
    x = expression(bar(Y)),
    y = expression(S^2),
    title = expression("S2 vs YBar")
  )+ 
  facet_grid(lambda ~ n, labeller = label_both, scales = "free")+
  theme_classic(base_size = 16)

many_poisson_samples %>%
  group_by(n, lambda) %>%
  summarise(
    cor_ybar_S2 = cor(ybar, S2),
    .groups = "drop"
  )

# A tibble: 9 × 3
      n lambda cor_ybar_S2
  <dbl>  <dbl>       <dbl>
1    10      1       0.527
2    10      3       0.331
3    10      6       0.255
4    20      1       0.574
5    20      3       0.374
6    20      6       0.282
7    30      1       0.591
8    30      3       0.299
9    30      6       0.292