N <- 10000
sim_data <- parameters(
~n, ~mu,
c(5, 10, 20, 40, 80), c(-2, -1, 0, 1, 2)
) %>%
add_trials(N) %>%
mutate(
y_sample = pmap(list(n, mu), .f = \(n, m) rnorm(n, mean = m, sd = 1)),
true_theta = mu^2,
ybar_1 = map_dbl(y_sample, mean),
ybar_2 = map_dbl(y_sample, ~mean(.x^2)),
theta1_hat = ybar_1^2 - (1/n),
theta2_hat = ybar_2 - 1
) %>%
pivot_longer(
cols = c(theta1_hat, theta2_hat),
names_to = "estimator",
values_to = "estimate"
)
ggplot(
sim_data %>%
summarize(
bias = mean(estimate) - first(true_theta),
.by = c(n, mu, estimator)
),
aes(x = factor(n), y = bias, color = estimator, shape = estimator)
) +
geom_hline(yintercept = 0, linetype = "dashed", color = "black", linewidth = 0.8) +
geom_point(size = 3.5, alpha = 0.8, position = position_dodge(width = 0.4)) +
facet_wrap(~mu, labeller = label_both) +
labs(
title = "Verifying Estimator Unbiasedness",
subtitle = "Simulated bias is near 0",
x = "Sample Size (n)",
y = "Empirical Bias",
color = "Estimator",
shape = "Estimator"
) +
theme_minimal()
ggplot(
sim_data %>% summarize(empirical_variance = var(estimate), .by = c(n, mu, estimator)),
aes(x = n, y = empirical_variance, color = estimator, group = estimator)
) +
geom_line(linewidth = 1) +
geom_point(size = 2.5) +
facet_wrap(~mu, labeller = label_both) +
labs(
title = "Comparison of Estimator Variance",
subtitle = expression("Checking if Var(" * hat(theta)[1] * ") < Var(" * hat(theta)[2] * ")"),
x = "Sample Size (n)",
y = "Empirical Variance",
color = "Estimator"
) +
theme_minimal() +
scale_y_log10()