many many sims of 9.4

Author

courtney casey

#sim 9.4.1
one_run_s1 <- function(n, beta, t = 5) {
  y <- rexp(n, rate = 1 / beta)
  beta_hat <- mean(y)
  theta_hat <- exp(-t / beta_hat)
  se_hat <- theta_hat * t / (beta_hat * sqrt(n))
  
  lower <- theta_hat - 1.96 * se_hat
  upper <- theta_hat + 1.96 * se_hat
  
  theta_true <- exp(-t / beta)
  covered <- as.numeric(lower <= theta_true & theta_true <= upper)
  
  data.frame(
    n = n,
    beta = beta,
    theta_true = theta_true,
    theta_hat = theta_hat,
    lower = lower,
    upper = upper,
    covered = covered
  )
}

sim_s1 <- function(n, beta, reps = 10000, t = 5) {
  out <- replicate(reps, one_run_s1(n, beta, t), simplify = FALSE)
  out <- do.call(rbind, out)
  data.frame(
    n = n,
    beta = beta,
    coverage = mean(out$covered)
  )
}

n_vals <- c(20, 40, 80, 160, 320, 640)
beta_vals <- c(0.5, 2, 4)

results_s1 <- do.call(rbind,
  lapply(n_vals, function(n) {
    do.call(rbind, lapply(beta_vals, function(beta) sim_s1(n, beta)))
  })
)

results_s1

     n beta coverage
1   20  0.5   0.7490
2   20  2.0   0.8908
3   20  4.0   0.9239
4   40  0.5   0.7953
5   40  2.0   0.9185
6   40  4.0   0.9388
7   80  0.5   0.8376
8   80  2.0   0.9328
9   80  4.0   0.9450
10 160  0.5   0.8709
11 160  2.0   0.9427
12 160  4.0   0.9463
13 320  0.5   0.9030
14 320  2.0   0.9466
15 320  4.0   0.9541
16 640  0.5   0.9214
17 640  2.0   0.9477
18 640  4.0   0.9476

library(ggplot2)

ggplot(results_s1, aes(x = n, y = coverage, color = factor(beta))) +
  geom_line() +
  geom_point() +
  geom_hline(yintercept = 0.95, linetype = "dashed") +
  labs(color = "beta")

sim 9.4.2

loglik.alpha <- function(alpha, yvals) {
  if (alpha <= 0) return(-Inf)
  
  n <- length(yvals)
  ybar <- mean(yvals)
  beta_hat <- ybar / alpha
  
  ll <- sum(dgamma(yvals, shape = alpha, scale = beta_hat, log = TRUE))
  return(ll)
} 
get.mle <- function(yvals) {
  opt <- optimize(
    f = loglik.alpha,
    interval = c(0.01, 50),
    yvals = yvals,
    maximum = TRUE
  )
  
  alpha_hat <- opt$maximum
  beta_hat <- mean(yvals) / alpha_hat
  
  c(alpha_hat, beta_hat)
}

sim_gamma_mle <- function(n, reps = 10000, alpha = 4, beta = 2) {
  out <- replicate(reps, {
    y <- rgamma(n, shape = alpha, scale = beta)
    est <- get.mle(y)
    c(alpha_hat = est[1], beta_hat = est[2])
  })
  
  out <- as.data.frame(t(out))
  out$n <- n
  out
}

n_vals <- c(5, 20, 50, 100, 150, 200)

gamma_sims <- do.call(rbind, lapply(n_vals, sim_gamma_mle))

library(ggplot2)

ggplot(gamma_sims, aes(x = alpha_hat)) +
  geom_histogram(bins = 30) +
  geom_vline(xintercept = 4, linetype = "dashed") +
  facet_wrap(~ n, scales = "free") +
  coord_cartesian(xlim = c(0, 15))

ggplot(gamma_sims, aes(x = beta_hat)) +
  geom_histogram(bins = 30) +
  geom_vline(xintercept = 2, linetype = "dashed") +
  facet_wrap(~ n, scales = "free") +
  coord_cartesian(xlim = c(0, 5))

9.4.3

one_run_s3 <- function(n, p) {
  y <- rbinom(n, size = 1, prob = p)
  p_hat <- mean(y)
  tau_hat <- p_hat * (1 - p_hat)
  
  crlb_hat <- p_hat * (1 - p_hat) * (1 - 2 * p_hat)^2 / n
  se_hat <- sqrt(crlb_hat)
  
  lower <- tau_hat - 1.96 * se_hat
  upper <- tau_hat + 1.96 * se_hat
  
  tau_true <- p * (1 - p)
  covered <- as.numeric(lower <= tau_true & tau_true <= upper)
  
  data.frame(
    n = n,
    p = p,
    p_hat = p_hat,
    tau_hat = tau_hat,
    lower = lower,
    upper = upper,
    tau_true = tau_true,
    covered = covered
  )
}

sim_s3 <- function(n, p, reps = 10000) {
  out <- replicate(reps, one_run_s3(n, p), simplify = FALSE)
  do.call(rbind, out)
}

n_vals <- c(10, 20, 40, 80, 160, 320, 640, 1280)
p_vals <- c(0.1, 0.2, 0.3, 0.4, 0.5)

s3_all <- do.call(rbind,
  lapply(n_vals, function(n) {
    do.call(rbind, lapply(p_vals, function(p) sim_s3(n, p)))
  })
)

library(ggplot2)

ggplot(s3_all, aes(x = tau_hat)) +
  geom_density() +
  facet_grid(p ~ n, scales = "free") +
  labs(x = expression(hat(tau)), y = "density")

coverage_s3 <- aggregate(covered ~ n + p, data = s3_all, mean)

ggplot(coverage_s3, aes(x = n, y = covered, color = factor(p))) +
  geom_line() +
  geom_point() +
  geom_hline(yintercept = 0.95, linetype = "dashed") +
  labs(y = "coverage", color = "p")

τ(p)=p(1−p),0≤p≤1

this is a parabola opening downward, with maximum at p=0.5p=0.5p=0.5:

τ(0.5)=0.25(0.5)=0.25τ(0.5)=0.25

and minimum 0 at p=0p=0p=0 or p=1p=1p=1. so the parameter space is

[0, 1/4][0,; 1/4][0,1/4]

the issue is that when p=0.5p=0.5p=0.5, the true value τ=1/4/4τ=1/4 is on the boundary of the parameter space, which breaks the regularity condition