Warning: package 'tidyverse' was built under R version 4.4.3
Warning: package 'ggplot2' was built under R version 4.4.3
Warning: package 'tidyr' was built under R version 4.4.3
Warning: package 'readr' was built under R version 4.4.3
Warning: package 'purrr' was built under R version 4.4.3
Warning: package 'dplyr' was built under R version 4.4.3
Warning: package 'stringr' was built under R version 4.4.3
Warning: package 'forcats' was built under R version 4.4.3
Warning: package 'lubridate' was built under R version 4.4.3
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 4.0.1 ✔ tibble 3.2.1
✔ lubridate 1.9.4 ✔ tidyr 1.3.1
✔ purrr 1.0.4
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Attaching package: 'purrrfect'
The following objects are masked from 'package:base':
replicate, tabulate
(Chi_sq_sim <- parameters(~n, ~ sigma,
c(4,8,15),
c(1,2,3))
%>% add_trials(10000)
%>% mutate(x_sample = pmap(list(n, sigma),
\(n,s) rnorm(n, mean = 0, sd = s)))
%>% mutate(S2 = map_dbl(x_sample, var),
chi_stat = (n - 1) * S2 / sigma^2)
%>% mutate(f_chi = dchisq(chi_stat, df = n - 1))
)
# A tibble: 90,000 × 7
n sigma .trial x_sample S2 chi_stat f_chi
<dbl> <dbl> <dbl> <list> <dbl> <dbl> <dbl>
1 4 1 1 <dbl [4]> 0.384 1.15 0.241
2 4 1 2 <dbl [4]> 0.932 2.80 0.165
3 4 1 3 <dbl [4]> 0.554 1.66 0.224
4 4 1 4 <dbl [4]> 1.55 4.65 0.0843
5 4 1 5 <dbl [4]> 0.666 2.00 0.208
6 4 1 6 <dbl [4]> 1.22 3.66 0.122
7 4 1 7 <dbl [4]> 0.538 1.61 0.226
8 4 1 8 <dbl [4]> 2.19 6.58 0.0382
9 4 1 9 <dbl [4]> 1.58 4.74 0.0811
10 4 1 10 <dbl [4]> 0.574 1.72 0.221
# ℹ 89,990 more rows
ggplot(Chi_sq_sim, aes(x = chi_stat))+
geom_histogram(aes(y = after_stat(density)),
fill = "goldenrod",
binwidth = 0.3,
color = "white")+
geom_density(color = "black", linewidth = 1)+
stat_function(fun = dchisq,
args = list(df = Chi_sq_sim$n[1]-1),
color = "cornflowerblue",
linewidth = 1,
linetype = 2)+
facet_grid(n ~ sigma, labeller = label_both, scales = "free")+
theme_classic(base_size = 16)+
labs(
title = expression("Simulated and analytic densities"),
x = expression((n-1)*S^2/sigma^2),
y = "Density"
)
moment_check <- Chi_sq_sim %>%
group_by(n, sigma)%>%
reframe(
sim_mean_S2 = mean(S2),
theo_mean_S2 = sigma^2,
sim_var_S2 = var(S2),
theo_var_S2 = 2 * sigma^4 / (n - 1),
.groups = "drop"
)
moment_check
# A tibble: 90,000 × 7
n sigma sim_mean_S2 theo_mean_S2 sim_var_S2 theo_var_S2 .groups
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
1 4 1 0.984 1 0.648 0.667 drop
2 4 1 0.984 1 0.648 0.667 drop
3 4 1 0.984 1 0.648 0.667 drop
4 4 1 0.984 1 0.648 0.667 drop
5 4 1 0.984 1 0.648 0.667 drop
6 4 1 0.984 1 0.648 0.667 drop
7 4 1 0.984 1 0.648 0.667 drop
8 4 1 0.984 1 0.648 0.667 drop
9 4 1 0.984 1 0.648 0.667 drop
10 4 1 0.984 1 0.648 0.667 drop
# ℹ 89,990 more rows
You can add options to executable code like this
library(tidyverse)
library(purrrfect)
N <- 1000
(many_poisson_samples <- parameters(~n, ~ lambda,
c(10,20,30),
c(1,3,6)
)
%>% add_trials(N)
%>% mutate(ysample = pmap(list(n, lambda),
\(nn,l) rpois(nn, lambda = l)))
%>% mutate(ybar = map_dbl(ysample, mean))
%>% mutate (S2 = map_dbl(ysample, var))
)
# A tibble: 9,000 × 6
n lambda .trial ysample ybar S2
<dbl> <dbl> <dbl> <list> <dbl> <dbl>
1 10 1 1 <int [10]> 0.9 0.767
2 10 1 2 <int [10]> 0.8 0.4
3 10 1 3 <int [10]> 1.2 0.622
4 10 1 4 <int [10]> 0.4 0.267
5 10 1 5 <int [10]> 0.8 0.844
6 10 1 6 <int [10]> 0.9 0.544
7 10 1 7 <int [10]> 1.1 0.989
8 10 1 8 <int [10]> 0.6 0.489
9 10 1 9 <int [10]> 1.4 0.489
10 10 1 10 <int [10]> 1.4 1.16
# ℹ 8,990 more rows
ggplot(data = many_poisson_samples)+
geom_point(aes(x = ybar, y = S2),
shape = '.', alpha = 0.6)+
labs(
x = expression(bar(Y)),
y = expression(S^2),
title = expression("S2 vs YBar")
)+
facet_grid(lambda ~ n, labeller = label_both, scales = "free")+
theme_classic(base_size = 16)
many_poisson_samples %>%
group_by(n, lambda) %>%
summarise(
cor_ybar_S2 = cor(ybar, S2),
.groups = "drop"
)
# A tibble: 9 × 3
n lambda cor_ybar_S2
<dbl> <dbl> <dbl>
1 10 1 0.527
2 10 3 0.331
3 10 6 0.255
4 20 1 0.574
5 20 3 0.374
6 20 6 0.282
7 30 1 0.591
8 30 3 0.299
9 30 6 0.292