PS_9_1

Question 8

library(tidyverse)

Warning: package 'tidyverse' was built under R version 4.5.2

Warning: package 'ggplot2' was built under R version 4.5.2

Warning: package 'readr' was built under R version 4.5.2

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.2.0
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   4.0.1     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.1.0     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(purrrfect)


Attaching package: 'purrrfect'

The following objects are masked from 'package:base':

    replicate, tabulate

N <- 10000

sim_data <- parameters(
  ~n, ~mu,
  c(5, 10, 20, 40, 80), c(-2, -1, 0, 1, 2)
) %>% 
  add_trials(N) %>% 
  mutate(
    y_sample = pmap(list(n, mu), .f = \(n, m) rnorm(n, mean = m, sd = 1)),
    true_theta = mu^2,
    ybar_1 = map_dbl(y_sample, mean),
    ybar_2 = map_dbl(y_sample, ~mean(.x^2)),
    theta1_hat = ybar_1^2 - (1/n),
    theta2_hat = ybar_2 - 1
  ) %>% 
  pivot_longer(
    cols = c(theta1_hat, theta2_hat),
    names_to = "estimator",
    values_to = "estimate"
  )

ggplot(
  sim_data %>% 
    summarize(
      bias = mean(estimate) - first(true_theta), 
      .by = c(n, mu, estimator)
    ),
  aes(x = factor(n), y = bias, color = estimator, shape = estimator)
) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "black", linewidth = 0.8) +
  geom_point(size = 3.5, alpha = 0.8, position = position_dodge(width = 0.4)) +
  facet_wrap(~mu, labeller = label_both) +
  labs(
    title = "Verifying Estimator Unbiasedness",
    subtitle = "Simulated bias is near 0",
    x = "Sample Size (n)",
    y = "Empirical Bias",
    color = "Estimator",
    shape = "Estimator"
  ) +
  theme_minimal()

ggplot(
  sim_data %>% summarize(empirical_variance = var(estimate), .by = c(n, mu, estimator)), 
  aes(x = n, y = empirical_variance, color = estimator, group = estimator)
) +
  geom_line(linewidth = 1) +
  geom_point(size = 2.5) +
  facet_wrap(~mu, labeller = label_both) +
  labs(
    title = "Comparison of Estimator Variance",
    subtitle = expression("Checking if Var(" * hat(theta)[1] * ") < Var(" * hat(theta)[2] * ")"),
    x = "Sample Size (n)",
    y = "Empirical Variance",
    color = "Estimator"
  ) +
  theme_minimal() +
  scale_y_log10()