#sim 9.4.1
one_run_s1 <- function (n, beta, t = 5 ) {
y <- rexp (n, rate = 1 / beta)
beta_hat <- mean (y)
theta_hat <- exp (- t / beta_hat)
se_hat <- theta_hat * t / (beta_hat * sqrt (n))
lower <- theta_hat - 1.96 * se_hat
upper <- theta_hat + 1.96 * se_hat
theta_true <- exp (- t / beta)
covered <- as.numeric (lower <= theta_true & theta_true <= upper)
data.frame (
n = n,
beta = beta,
theta_true = theta_true,
theta_hat = theta_hat,
lower = lower,
upper = upper,
covered = covered
)
}
sim_s1 <- function (n, beta, reps = 10000 , t = 5 ) {
out <- replicate (reps, one_run_s1 (n, beta, t), simplify = FALSE )
out <- do.call (rbind, out)
data.frame (
n = n,
beta = beta,
coverage = mean (out$ covered)
)
}
n_vals <- c (20 , 40 , 80 , 160 , 320 , 640 )
beta_vals <- c (0.5 , 2 , 4 )
results_s1 <- do.call (rbind,
lapply (n_vals, function (n) {
do.call (rbind, lapply (beta_vals, function (beta) sim_s1 (n, beta)))
})
)
results_s1
n beta coverage
1 20 0.5 0.7490
2 20 2.0 0.8908
3 20 4.0 0.9239
4 40 0.5 0.7953
5 40 2.0 0.9185
6 40 4.0 0.9388
7 80 0.5 0.8376
8 80 2.0 0.9328
9 80 4.0 0.9450
10 160 0.5 0.8709
11 160 2.0 0.9427
12 160 4.0 0.9463
13 320 0.5 0.9030
14 320 2.0 0.9466
15 320 4.0 0.9541
16 640 0.5 0.9214
17 640 2.0 0.9477
18 640 4.0 0.9476
library (ggplot2)
ggplot (results_s1, aes (x = n, y = coverage, color = factor (beta))) +
geom_line () +
geom_point () +
geom_hline (yintercept = 0.95 , linetype = "dashed" ) +
labs (color = "beta" )
sim 9.4.2
loglik.alpha <- function (alpha, yvals) {
if (alpha <= 0 ) return (- Inf )
n <- length (yvals)
ybar <- mean (yvals)
beta_hat <- ybar / alpha
ll <- sum (dgamma (yvals, shape = alpha, scale = beta_hat, log = TRUE ))
return (ll)
}
get.mle <- function (yvals) {
opt <- optimize (
f = loglik.alpha,
interval = c (0.01 , 50 ),
yvals = yvals,
maximum = TRUE
)
alpha_hat <- opt$ maximum
beta_hat <- mean (yvals) / alpha_hat
c (alpha_hat, beta_hat)
}
sim_gamma_mle <- function (n, reps = 10000 , alpha = 4 , beta = 2 ) {
out <- replicate (reps, {
y <- rgamma (n, shape = alpha, scale = beta)
est <- get.mle (y)
c (alpha_hat = est[1 ], beta_hat = est[2 ])
})
out <- as.data.frame (t (out))
out$ n <- n
out
}
n_vals <- c (5 , 20 , 50 , 100 , 150 , 200 )
gamma_sims <- do.call (rbind, lapply (n_vals, sim_gamma_mle))
library (ggplot2)
ggplot (gamma_sims, aes (x = alpha_hat)) +
geom_histogram (bins = 30 ) +
geom_vline (xintercept = 4 , linetype = "dashed" ) +
facet_wrap (~ n, scales = "free" ) +
coord_cartesian (xlim = c (0 , 15 ))
ggplot (gamma_sims, aes (x = beta_hat)) +
geom_histogram (bins = 30 ) +
geom_vline (xintercept = 2 , linetype = "dashed" ) +
facet_wrap (~ n, scales = "free" ) +
coord_cartesian (xlim = c (0 , 5 ))
9.4.3
one_run_s3 <- function (n, p) {
y <- rbinom (n, size = 1 , prob = p)
p_hat <- mean (y)
tau_hat <- p_hat * (1 - p_hat)
crlb_hat <- p_hat * (1 - p_hat) * (1 - 2 * p_hat)^ 2 / n
se_hat <- sqrt (crlb_hat)
lower <- tau_hat - 1.96 * se_hat
upper <- tau_hat + 1.96 * se_hat
tau_true <- p * (1 - p)
covered <- as.numeric (lower <= tau_true & tau_true <= upper)
data.frame (
n = n,
p = p,
p_hat = p_hat,
tau_hat = tau_hat,
lower = lower,
upper = upper,
tau_true = tau_true,
covered = covered
)
}
sim_s3 <- function (n, p, reps = 10000 ) {
out <- replicate (reps, one_run_s3 (n, p), simplify = FALSE )
do.call (rbind, out)
}
n_vals <- c (10 , 20 , 40 , 80 , 160 , 320 , 640 , 1280 )
p_vals <- c (0.1 , 0.2 , 0.3 , 0.4 , 0.5 )
s3_all <- do.call (rbind,
lapply (n_vals, function (n) {
do.call (rbind, lapply (p_vals, function (p) sim_s3 (n, p)))
})
)
library (ggplot2)
ggplot (s3_all, aes (x = tau_hat)) +
geom_density () +
facet_grid (p ~ n, scales = "free" ) +
labs (x = expression (hat (tau)), y = "density" )
coverage_s3 <- aggregate (covered ~ n + p, data = s3_all, mean)
ggplot (coverage_s3, aes (x = n, y = covered, color = factor (p))) +
geom_line () +
geom_point () +
geom_hline (yintercept = 0.95 , linetype = "dashed" ) +
labs (y = "coverage" , color = "p" )
τ(p)=p(1−p),0≤p≤1
this is a parabola opening downward, with maximum at p=0.5p=0.5p=0.5:
τ(0.5)=0.25(0.5)=0.25τ(0.5)=0.25
and minimum 0 at p=0p=0p=0 or p=1p=1p=1. so the parameter space is
[0, 1/4][0,; 1/4][0,1/4]
the issue is that when p=0.5p=0.5p=0.5, the true value τ=1/4/4τ=1/4 is on the boundary of the parameter space, which breaks the regularity condition