Chapter 9.1

Question 8

library(tidyverse)
library(purrrfect)

(two_normal_estimators <- parameters(~mu, ~n,
                                     c(-2, -1, 0, 1, 2), c(5,10,20,40,80))
  %>% add_trials(10000)
  %>% mutate(Ysample = pmap(list(mu,n), .f = \(m,n) rnorm(n,m,1)))
  %>% mutate(theta1 = map2_dbl(Ysample, n, \(y, n) (sum(y)/n)^2 - 1/n),
             theta2 = map2_dbl(Ysample, n, \(y, n) sum(y^2)/n - 1))
)

# A tibble: 250,000 × 6
      mu     n .trial Ysample   theta1 theta2
   <dbl> <dbl>  <dbl> <list>     <dbl>  <dbl>
 1    -2     5      1 <dbl [5]>   3.51   3.40
 2    -2     5      2 <dbl [5]>   4.63   4.06
 3    -2     5      3 <dbl [5]>   2.37   1.99
 4    -2     5      4 <dbl [5]>   5.62   5.58
 5    -2     5      5 <dbl [5]>   1.46   1.33
 6    -2     5      6 <dbl [5]>   3.22   3.57
 7    -2     5      7 <dbl [5]>   3.53   3.44
 8    -2     5      8 <dbl [5]>   2.67   3.58
 9    -2     5      9 <dbl [5]>   3.13   2.70
10    -2     5     10 <dbl [5]>   8.07   8.92
# ℹ 249,990 more rows

(sample_bias <- two_normal_estimators
  %>% summarize(bias_theta1 = mean(theta1 - mu^2),
                bias_theta2 = mean(theta2 - mu^2),
                var_theta1 = var(theta1),
                var_theta2 = var(theta2),
                .by = c(n, mu))
  )

# A tibble: 25 × 6
       n    mu bias_theta1 bias_theta2 var_theta1 var_theta2
   <dbl> <dbl>       <dbl>       <dbl>      <dbl>      <dbl>
 1     5    -2   -0.0115     -0.0152       3.25       3.57  
 2    10    -2   -0.0182     -0.0121       1.62       1.78  
 3    20    -2   -0.00752    -0.0119       0.812      0.912 
 4    40    -2   -0.00683    -0.00429      0.398      0.447 
 5    80    -2    0.00936     0.0121       0.199      0.224 
 6     5    -1    0.00173     0.00218      0.906      1.23  
 7    10    -1   -0.00374     0.00387      0.422      0.599 
 8    20    -1   -0.00406    -0.00200      0.204      0.300 
 9    40    -1    0.000190    0.000528     0.0994     0.148 
10    80    -1   -0.00144    -0.00137      0.0508     0.0752
# ℹ 15 more rows

(ggplot(data = sample_bias, aes(x = n))
  + geom_point(aes(y = bias_theta1, color = 'theta1'))
  + geom_line(aes(y = bias_theta1, color = 'theta1'))
  + geom_point(aes(y = bias_theta2, color = 'theta2'))
  + geom_line(aes(y = bias_theta2, color = 'theta2'))
  + geom_hline(yintercept = 0)
  + facet_wrap(~mu, labeller = label_bquote(mu == .(mu)))
  + labs(y = 'Bias',
         x = 'Sample size (n)',
         color = '',
         title = expression(paste('Simulated bias of two different estimators of ', mu^2)))
  + theme_classic()
)

(ggplot(data = sample_bias, aes(x = n))
  + geom_point(aes(y = var_theta1, color = 'theta1'))
  + geom_line(aes(y = var_theta1, color = 'theta1'))
  + geom_point(aes(y = var_theta2, color = 'theta2'))
  + geom_line(aes(y = var_theta2, color = 'theta2'))
  + geom_hline(yintercept = 0)
  + facet_wrap(~mu, labeller = label_bquote(mu == .(mu)))
  + labs(y = 'Variance',
         x = 'Sample size (n)',
         color = '',
         title = expression(paste('Simulated variance of two different estimators of ', mu^2)))
  + theme_classic()
)