PS 9.1

Recall question 1. Simulate 10,000 replications of each estimator over a grid of μ∈{-2,-1,0,1,2} and n∈{5,10,20,40,80}. Use visualizations of your simulated results to verify that: θ ̂_1 and θ ̂_2 are both unbiased estimators of θ; and Var(θ ̂_1 )<Var(θ ̂_2 ) for all μ and n.

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.1.0     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(dplyr)
library(purrrfect)


Attaching package: 'purrrfect'

The following objects are masked from 'package:base':

    replicate, tabulate

N <- 10000

(mu_sims <- 
  parameters(~mu, ~n, c(-2, -1, 0, 1, 2), c(5, 10, 20, 40, 80)) 
  %>% add_trials(N) 
  %>% mutate(Ysample = pmap(list(n, mu), 
                        \(n, mu) rnorm(n, mean = mu, sd = 1))) 
  %>% mutate(ybar = map_dbl(Ysample, mean),
             y2bar = map_dbl(Ysample, \(y) mean(y^2))) 
  %>% mutate(theta_hat1 = ybar^2 - 1/n,
             theta_hat2 = y2bar - 1)
)

# A tibble: 250,000 × 8
      mu     n .trial Ysample    ybar y2bar theta_hat1 theta_hat2
   <dbl> <dbl>  <dbl> <list>    <dbl> <dbl>      <dbl>      <dbl>
 1    -2     5      1 <dbl [5]> -2.22  5.79       4.71       4.79
 2    -2     5      2 <dbl [5]> -1.56  2.53       2.22       1.53
 3    -2     5      3 <dbl [5]> -1.65  3.44       2.51       2.44
 4    -2     5      4 <dbl [5]> -1.57  3.17       2.25       2.17
 5    -2     5      5 <dbl [5]> -1.87  3.52       3.31       2.52
 6    -2     5      6 <dbl [5]> -2.04  4.66       3.96       3.66
 7    -2     5      7 <dbl [5]> -2.38  5.97       5.45       4.97
 8    -2     5      8 <dbl [5]> -2.21  6.16       4.67       5.16
 9    -2     5      9 <dbl [5]> -1.88  4.47       3.32       3.47
10    -2     5     10 <dbl [5]> -2.23  6.13       4.78       5.13
# ℹ 249,990 more rows

(mu_sims %>%
  summarize(mean1 = mean(theta_hat1),
            mean2 = mean(theta_hat2),
            true_theta = mu^2,
            .by = c(mu, n))) %>%
  pivot_longer(cols = c(mean1, mean2),
               names_to = "estimator",
               values_to = "estimate") %>%
  ggplot(aes(mu, estimate, color = estimator)) +
  geom_point() +
  geom_line() +
  geom_line(aes(y = true_theta), linetype = "dashed") +
  facet_wrap(~n)

Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
dplyr 1.1.0.
ℹ Please use `reframe()` instead.
ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
  always returns an ungrouped data frame and adjust accordingly.

(mu_sims %>%
  summarize(var1 = var(theta_hat1),
            var2 = var(theta_hat2),
            .by = c(mu, n))) %>%
  pivot_longer(cols = c(var1, var2),
               names_to = "estimator",
               values_to = "variance") %>%
  ggplot(aes(n, variance, color = estimator)) +
  geom_line() +
  geom_point() +
  facet_wrap(~mu)