Chapter 8.1

Question 9

library(purrrfect)
library(tidyverse)

(sim_study <- parameters(~n, c(3,11,51))
  %>% add_trials(10000)
  %>% mutate(Ysample = map(n, .f = \(n) rexp(n, rate = 1/5)))
  %>% mutate(ybar = map_dbl(Ysample, mean),
             median = map_dbl(Ysample, median)) 
  %>% mutate(theta1 = ybar,
             theta2 = ybar * log(2),
             theta3 = median)
)

# A tibble: 30,000 × 8
       n .trial Ysample    ybar median theta1 theta2 theta3
   <dbl>  <dbl> <list>    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
 1     3      1 <dbl [3]>  2.59   2.81   2.59   1.80   2.81
 2     3      2 <dbl [3]>  9.31   6.58   9.31   6.45   6.58
 3     3      3 <dbl [3]>  6.04   2.80   6.04   4.18   2.80
 4     3      4 <dbl [3]>  4.95   3.69   4.95   3.43   3.69
 5     3      5 <dbl [3]>  1.76   2.35   1.76   1.22   2.35
 6     3      6 <dbl [3]>  5.91   7.82   5.91   4.09   7.82
 7     3      7 <dbl [3]>  5.11   4.62   5.11   3.54   4.62
 8     3      8 <dbl [3]>  4.85   3.87   4.85   3.36   3.87
 9     3      9 <dbl [3]>  3.09   3.99   3.09   2.14   3.99
10     3     10 <dbl [3]>  8.49   8.59   8.49   5.89   8.59
# ℹ 29,990 more rows

(sim_study
  %>% pivot_longer(theta1 : theta3,
                   names_to = 'estimator',
                   values_to = 'estimate')
  %>% select(-Ysample)
  %>% summarize(bias = mean(estimate - 5*log(2)),
                variance = var(estimate),
                .by = c(n, estimator)
                )
  %>% mutate(mse = bias^2 + variance)
) -> summary_table

(final_table <- summary_table
  %>% pivot_longer(c(bias, variance, mse),
                   names_to = 'stat',
                   values_to = 'value')
  %>% mutate(row = case_when(
    stat == 'bias' ~ paste0("B(", estimator, ")"),
    stat == 'variance' ~ paste0("Var(", estimator, ")"),
    stat == 'mse' ~ paste0("MSE", estimator, ")")))
  %>% select(row, n, value)
  %>% pivot_wider(names_from = 'n',
                  values_from = 'value')
  %>% arrange(row)
)

# A tibble: 9 × 4
  row              `3`   `11`     `51`
  <chr>          <dbl>  <dbl>    <dbl>
1 B(theta1)    1.53    1.55    1.53   
2 B(theta2)   -0.00395 0.0124 -0.00354
3 B(theta3)    0.707   0.245   0.0479 
4 MSEtheta1)  10.7     4.68    2.83   
5 MSEtheta2)   4.02    1.09    0.239  
6 MSEtheta3)   9.52    2.46    0.510  
7 Var(theta1)  8.37    2.27    0.496  
8 Var(theta2)  4.02    1.09    0.239  
9 Var(theta3)  9.02    2.40    0.507

library(knitr)
kable(final_table, digits = 3,
      col.names = c("", "n = 3", "n = 11", "n = 51"))

	n = 3	n = 11	n = 51
B(theta1)	1.529	1.552	1.529
B(theta2)	-0.004	0.012	-0.004
B(theta3)	0.707	0.245	0.048
MSEtheta1)	10.708	4.680	2.835
MSEtheta2)	4.022	1.091	0.239
MSEtheta3)	9.522	2.461	0.510
Var(theta1)	8.371	2.271	0.496
Var(theta2)	4.022	1.091	0.239
Var(theta3)	9.022	2.401	0.507

When looking at the bias across our three estimators of theta, it appears that theta1 has a large overestimation bias, theta2 is unbiased, theta3 is shows a small overestimation bias. When examining variances, we see that theta1 and theta3 have larger variances for smaller values of n, but the variance decreases as n increases. Theta2 on the other hand appears to have much smaller variances across all levels of n when compared to theta1 and theta3. Lastly, when investigating MSE we see that theta2 has the smallest MSE across all values of n. These discoveries indicate that theta2 is the best estimator of the median.