Chapter 7.4

Question 5

library(tidyverse)
library(purrrfect)

(verify_chi_square <- parameters(~n, ~sigma,
                                 c(4,8,15), c(1,2,3)
                                 )
  %>% add_trials(10000)
  %>% mutate(ysample = pmap(list(n, sigma), \(n,s) rnorm(n,0,s)))
  %>% mutate(S2 = map_dbl(ysample, var))
  %>% mutate(sim = (n-1)*S2 / sigma^2)
  %>% mutate(analytic = dchisq(sim, df = n-1))
)
# A tibble: 90,000 × 7
       n sigma .trial ysample      S2   sim analytic
   <dbl> <dbl>  <dbl> <list>    <dbl> <dbl>    <dbl>
 1     4     1      1 <dbl [4]> 0.609 1.83    0.216 
 2     4     1      2 <dbl [4]> 0.202 0.607   0.229 
 3     4     1      3 <dbl [4]> 2.52  7.57    0.0249
 4     4     1      4 <dbl [4]> 1.19  3.56    0.127 
 5     4     1      5 <dbl [4]> 0.997 2.99    0.155 
 6     4     1      6 <dbl [4]> 0.237 0.711   0.236 
 7     4     1      7 <dbl [4]> 0.204 0.613   0.230 
 8     4     1      8 <dbl [4]> 0.514 1.54    0.229 
 9     4     1      9 <dbl [4]> 2.42  7.26    0.0284
10     4     1     10 <dbl [4]> 0.637 1.91    0.212 
# ℹ 89,990 more rows
(ggplot(data = verify_chi_square, aes(x = sim))
 + geom_histogram(aes(y = after_stat(density)),
                  fill = 'goldenrod',
                  center = 0.1, binwidth = 0.2)
 + geom_line(aes(y = analytic), col = 'cornflowerblue')
 + facet_grid(n~sigma, labeller = label_both)
 + theme_classic()
 + labs(title = 'Verifying chi square distribution')
 )

(summary_table <- verify_chi_square
 %>% group_by(n,sigma)
 %>% summarise(sim_mean_S2 = mean(S2),
               analytic_mean_S2 = sigma^2,
               sim_var_S2 = var(S2),
               analytic_var_S2 = 2*sigma^4 / (n-1),
               .groups = 'drop')
)
Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
dplyr 1.1.0.
ℹ Please use `reframe()` instead.
ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
  always returns an ungrouped data frame and adjust accordingly.
# A tibble: 90,000 × 6
       n sigma sim_mean_S2 analytic_mean_S2 sim_var_S2 analytic_var_S2
   <dbl> <dbl>       <dbl>            <dbl>      <dbl>           <dbl>
 1     4     1       0.999                1      0.678           0.667
 2     4     1       0.999                1      0.678           0.667
 3     4     1       0.999                1      0.678           0.667
 4     4     1       0.999                1      0.678           0.667
 5     4     1       0.999                1      0.678           0.667
 6     4     1       0.999                1      0.678           0.667
 7     4     1       0.999                1      0.678           0.667
 8     4     1       0.999                1      0.678           0.667
 9     4     1       0.999                1      0.678           0.667
10     4     1       0.999                1      0.678           0.667
# ℹ 89,990 more rows

Question 6

(many_exp_samples <- parameters(~n, ~lambda,
                                c(10,20,30), c(0.5,1,1.5))
 %>% add_trials(1000)
 %>% mutate(ysample = pmap(list(n, lambda), \(n,l) rexp(n, rate = l)))
 %>%  mutate(ybar = map_dbl(ysample, mean),
             S2 = map_dbl(ysample, var)
             )
 )
# A tibble: 9,000 × 6
       n lambda .trial ysample     ybar     S2
   <dbl>  <dbl>  <dbl> <list>     <dbl>  <dbl>
 1    10    0.5      1 <dbl [10]>  1.15  0.503
 2    10    0.5      2 <dbl [10]>  2.21  4.09 
 3    10    0.5      3 <dbl [10]>  1.29  0.758
 4    10    0.5      4 <dbl [10]>  2.34  4.60 
 5    10    0.5      5 <dbl [10]>  2.16 10.1  
 6    10    0.5      6 <dbl [10]>  2.03  4.61 
 7    10    0.5      7 <dbl [10]>  2.16  3.77 
 8    10    0.5      8 <dbl [10]>  1.69  1.20 
 9    10    0.5      9 <dbl [10]>  1.31  1.12 
10    10    0.5     10 <dbl [10]>  1.53  1.39 
# ℹ 8,990 more rows
(ggplot(data = many_exp_samples)
 + geom_point(aes(x = ybar,y = S2),
              shape = '.')
 + labs(x = expression(bar(Y)),
        y = expression(S^2),
        title = 'Plots of sample mean vs sample variance')
 + facet_grid(n~lambda,labeller = label_both)
 + theme_classic()
 )

We can see a clear pattern between ybar and S2, indiciating that they are NOT independent