Question 5
library (tidyverse)
library (purrrfect)
(verify_chi_square <- parameters (~ n, ~ sigma,
c (4 ,8 ,15 ), c (1 ,2 ,3 )
)
%>% add_trials (10000 )
%>% mutate (ysample = pmap (list (n, sigma), \(n,s) rnorm (n,0 ,s)))
%>% mutate (S2 = map_dbl (ysample, var))
%>% mutate (sim = (n-1 )* S2 / sigma^ 2 )
%>% mutate (analytic = dchisq (sim, df = n-1 ))
)
# A tibble: 90,000 × 7
n sigma .trial ysample S2 sim analytic
<dbl> <dbl> <dbl> <list> <dbl> <dbl> <dbl>
1 4 1 1 <dbl [4]> 0.609 1.83 0.216
2 4 1 2 <dbl [4]> 0.202 0.607 0.229
3 4 1 3 <dbl [4]> 2.52 7.57 0.0249
4 4 1 4 <dbl [4]> 1.19 3.56 0.127
5 4 1 5 <dbl [4]> 0.997 2.99 0.155
6 4 1 6 <dbl [4]> 0.237 0.711 0.236
7 4 1 7 <dbl [4]> 0.204 0.613 0.230
8 4 1 8 <dbl [4]> 0.514 1.54 0.229
9 4 1 9 <dbl [4]> 2.42 7.26 0.0284
10 4 1 10 <dbl [4]> 0.637 1.91 0.212
# ℹ 89,990 more rows
(ggplot (data = verify_chi_square, aes (x = sim))
+ geom_histogram (aes (y = after_stat (density)),
fill = 'goldenrod' ,
center = 0.1 , binwidth = 0.2 )
+ geom_line (aes (y = analytic), col = 'cornflowerblue' )
+ facet_grid (n~ sigma, labeller = label_both)
+ theme_classic ()
+ labs (title = 'Verifying chi square distribution' )
)
(summary_table <- verify_chi_square
%>% group_by (n,sigma)
%>% summarise (sim_mean_S2 = mean (S2),
analytic_mean_S2 = sigma^ 2 ,
sim_var_S2 = var (S2),
analytic_var_S2 = 2 * sigma^ 4 / (n-1 ),
.groups = 'drop' )
)
Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
dplyr 1.1.0.
ℹ Please use `reframe()` instead.
ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
always returns an ungrouped data frame and adjust accordingly.
# A tibble: 90,000 × 6
n sigma sim_mean_S2 analytic_mean_S2 sim_var_S2 analytic_var_S2
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 4 1 0.999 1 0.678 0.667
2 4 1 0.999 1 0.678 0.667
3 4 1 0.999 1 0.678 0.667
4 4 1 0.999 1 0.678 0.667
5 4 1 0.999 1 0.678 0.667
6 4 1 0.999 1 0.678 0.667
7 4 1 0.999 1 0.678 0.667
8 4 1 0.999 1 0.678 0.667
9 4 1 0.999 1 0.678 0.667
10 4 1 0.999 1 0.678 0.667
# ℹ 89,990 more rows
Question 6
(many_exp_samples <- parameters (~ n, ~ lambda,
c (10 ,20 ,30 ), c (0.5 ,1 ,1.5 ))
%>% add_trials (1000 )
%>% mutate (ysample = pmap (list (n, lambda), \(n,l) rexp (n, rate = l)))
%>% mutate (ybar = map_dbl (ysample, mean),
S2 = map_dbl (ysample, var)
)
)
# A tibble: 9,000 × 6
n lambda .trial ysample ybar S2
<dbl> <dbl> <dbl> <list> <dbl> <dbl>
1 10 0.5 1 <dbl [10]> 1.15 0.503
2 10 0.5 2 <dbl [10]> 2.21 4.09
3 10 0.5 3 <dbl [10]> 1.29 0.758
4 10 0.5 4 <dbl [10]> 2.34 4.60
5 10 0.5 5 <dbl [10]> 2.16 10.1
6 10 0.5 6 <dbl [10]> 2.03 4.61
7 10 0.5 7 <dbl [10]> 2.16 3.77
8 10 0.5 8 <dbl [10]> 1.69 1.20
9 10 0.5 9 <dbl [10]> 1.31 1.12
10 10 0.5 10 <dbl [10]> 1.53 1.39
# ℹ 8,990 more rows
(ggplot (data = many_exp_samples)
+ geom_point (aes (x = ybar,y = S2),
shape = '.' )
+ labs (x = expression (bar (Y)),
y = expression (S^ 2 ),
title = 'Plots of sample mean vs sample variance' )
+ facet_grid (n~ lambda,labeller = label_both)
+ theme_classic ()
)
We can see a clear pattern between ybar and S2, indiciating that they are NOT independent