#problem 1
set.seed(123)
#a
N <- 1000
p <- 0.5
R <- 5000
g_vals <- numeric(R)
for (r in 1:R) {
x <- rbinom(N, 1, p)
p_hat <- mean(x)
g_vals[r] <- p_hat / (1 - p_hat)
}
hist(g_vals, probability = TRUE, breaks = 50,
main = "Histogram of g(Xbar), N = 1000, p = 0.5",
xlab = expression(g(bar(X))))
xgrid <- seq(min(g_vals), max(g_vals), length.out = 1000)
lines(xgrid, dnorm(xgrid, mean = 1, sd = sqrt(4/N)),
col = "red", lwd = 2)
set.seed(123)
#b)
p <- 0.5
R <- 5000
N_vals <- c(10, 30, 50, 100, 500)
par(mfrow = c(3, 2))
for (N in N_vals) {
g_vals <- rep(NA, R)
for (r in 1:R) {
x <- rbinom(N, 1, p)
p_hat <- mean(x)
if (p_hat < 1) {
g_vals[r] <- p_hat / (1 - p_hat)
}
}
hist(g_vals, probability = TRUE, breaks = 40,
main = paste("N =", N),
xlab = expression(g(bar(X))),
col = "lightgray", border = "white")
xgrid <- seq(min(g_vals, na.rm = TRUE),
max(g_vals, na.rm = TRUE),
length.out = 1000)
lines(xgrid, dnorm(xgrid, mean = 1, sd = sqrt(4/N)),
col = "red", lwd = 2)
}
#problem 2
set.seed(123)
N_vals <- c(10, 30, 100, 1000)
p_vals <- c(0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99)
R <- 100
results_delta <- data.frame()
for (N in N_vals) {
for (p in p_vals) {
for (r in 1:R) {
x <- rbinom(N, 1, p)
p_hat <- mean(x)
fail_delta <- (p_hat == 1)
if (fail_delta) {
E_hat <- NA
V_hat <- NA
} else {
E_hat <- p_hat / (1 - p_hat)
V_hat <- p_hat / (N * (1 - p_hat)^3)
}
results_delta <- rbind(
results_delta,
data.frame(
N = N,
p = p,
rep = r,
p_hat = p_hat,
E_delta = E_hat,
V_delta = V_hat,
fail_delta = fail_delta
)
)
}
}
}
aggregate(fail_delta ~ N + p, data = results_delta, mean)
## N p fail_delta
## 1 10 0.01 0.00
## 2 30 0.01 0.00
## 3 100 0.01 0.00
## 4 1000 0.01 0.00
## 5 10 0.10 0.00
## 6 30 0.10 0.00
## 7 100 0.10 0.00
## 8 1000 0.10 0.00
## 9 10 0.25 0.00
## 10 30 0.25 0.00
## 11 100 0.25 0.00
## 12 1000 0.25 0.00
## 13 10 0.50 0.00
## 14 30 0.50 0.00
## 15 100 0.50 0.00
## 16 1000 0.50 0.00
## 17 10 0.75 0.10
## 18 30 0.75 0.00
## 19 100 0.75 0.00
## 20 1000 0.75 0.00
## 21 10 0.90 0.37
## 22 30 0.90 0.04
## 23 100 0.90 0.00
## 24 1000 0.90 0.00
## 25 10 0.99 0.91
## 26 30 0.99 0.79
## 27 100 0.99 0.37
## 28 1000 0.99 0.00
#b)
set.seed(123)
B <- 1000
results_boot <- data.frame()
for (N in N_vals) {
for (p in p_vals) {
for (r in 1:R) {
x <- rbinom(N, 1, p)
g_boot <- rep(NA, B)
fail_boot_vec <- logical(B)
for (b in 1:B) {
x_boot <- sample(x, N, replace = TRUE)
p_boot <- mean(x_boot)
fail_boot_vec[b] <- (p_boot == 1)
if (!fail_boot_vec[b]) {
g_boot[b] <- p_boot / (1 - p_boot)
}
}
results_boot <- rbind(
results_boot,
data.frame(
N = N,
p = p,
rep = r,
E_boot = mean(g_boot, na.rm = TRUE),
V_boot = var(g_boot, na.rm = TRUE),
n_boot_fail = sum(fail_boot_vec),
prop_boot_fail = mean(fail_boot_vec),
any_boot_fail = any(fail_boot_vec)
)
)
}
}
}
aggregate(prop_boot_fail ~ N + p, data = results_boot, mean)
## N p prop_boot_fail
## 1 10 0.01 0.00000
## 2 30 0.01 0.00000
## 3 100 0.01 0.00000
## 4 1000 0.01 0.00000
## 5 10 0.10 0.00000
## 6 30 0.10 0.00000
## 7 100 0.10 0.00000
## 8 1000 0.10 0.00000
## 9 10 0.25 0.00046
## 10 30 0.25 0.00000
## 11 100 0.25 0.00000
## 12 1000 0.25 0.00000
## 13 10 0.50 0.01182
## 14 30 0.50 0.00000
## 15 100 0.50 0.00000
## 16 1000 0.50 0.00000
## 17 10 0.75 0.15963
## 18 30 0.75 0.00208
## 19 100 0.75 0.00000
## 20 1000 0.75 0.00000
## 21 10 0.90 0.61375
## 22 30 0.90 0.16608
## 23 100 0.90 0.00099
## 24 1000 0.90 0.00000
## 25 10 0.99 0.96735
## 26 30 0.99 0.78867
## 27 100 0.99 0.51581
## 28 1000 0.99 0.00037
#problem 3
set.seed(123)
N_vals <- c(10, 30, 100, 1000)
p_vals <- c(0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99)
R <- 100
results_bayes <- data.frame()
for (N in N_vals) {
for (p in p_vals) {
for (r in 1:R) {
# Simulating sample
x <- rbinom(N, 1, p)
S <- sum(x)
# Posterior parameters
a <- S + 0.5
b <- N - S + 0.5
# Posterior mean of z = p/(1-p)
if (b > 1) {
post_mean <- a / (b - 1)
} else {
post_mean <- NA
}
# Posterior variance of z
if (b > 2) {
post_var <- a * (a + b - 1) / ((b - 2) * (b - 1)^2)
} else {
post_var <- NA
}
# MAP of z
if (S >= 1) {
post_map <- (S - 0.5) / (N - S + 1.5)
} else {
post_map <- 0
}
# 95% credible interval using Beta quantiles for p
p_lower <- qbeta(0.025, a, b)
p_upper <- qbeta(0.975, a, b)
z_lower <- p_lower / (1 - p_lower)
z_upper <- p_upper / (1 - p_upper)
results_bayes <- rbind(
results_bayes,
data.frame(
N = N,
p = p,
rep = r,
S = S,
a = a,
b = b,
bayes_mean = post_mean,
bayes_var = post_var,
bayes_map = post_map,
ci_lower = z_lower,
ci_upper = z_upper,
mean_exists = !is.na(post_mean),
var_exists = !is.na(post_var)
)
)
}
}
}
aggregate(mean_exists ~ N + p, data = results_bayes, mean)
## N p mean_exists
## 1 10 0.01 1.00
## 2 30 0.01 1.00
## 3 100 0.01 1.00
## 4 1000 0.01 1.00
## 5 10 0.10 1.00
## 6 30 0.10 1.00
## 7 100 0.10 1.00
## 8 1000 0.10 1.00
## 9 10 0.25 1.00
## 10 30 0.25 1.00
## 11 100 0.25 1.00
## 12 1000 0.25 1.00
## 13 10 0.50 1.00
## 14 30 0.50 1.00
## 15 100 0.50 1.00
## 16 1000 0.50 1.00
## 17 10 0.75 0.90
## 18 30 0.75 1.00
## 19 100 0.75 1.00
## 20 1000 0.75 1.00
## 21 10 0.90 0.63
## 22 30 0.90 0.96
## 23 100 0.90 1.00
## 24 1000 0.90 1.00
## 25 10 0.99 0.09
## 26 30 0.99 0.21
## 27 100 0.99 0.63
## 28 1000 0.99 1.00
aggregate(var_exists ~ N + p, data = results_bayes, mean)
## N p var_exists
## 1 10 0.01 1.00
## 2 30 0.01 1.00
## 3 100 0.01 1.00
## 4 1000 0.01 1.00
## 5 10 0.10 1.00
## 6 30 0.10 1.00
## 7 100 0.10 1.00
## 8 1000 0.10 1.00
## 9 10 0.25 1.00
## 10 30 0.25 1.00
## 11 100 0.25 1.00
## 12 1000 0.25 1.00
## 13 10 0.50 1.00
## 14 30 0.50 1.00
## 15 100 0.50 1.00
## 16 1000 0.50 1.00
## 17 10 0.75 0.61
## 18 30 0.75 1.00
## 19 100 0.75 1.00
## 20 1000 0.75 1.00
## 21 10 0.90 0.25
## 22 30 0.90 0.77
## 23 100 0.90 1.00
## 24 1000 0.90 1.00
## 25 10 0.99 0.00
## 26 30 0.99 0.02
## 27 100 0.99 0.28
## 28 1000 0.99 1.00
aggregate(cbind(bayes_mean, bayes_var, bayes_map) ~ N + p,
data = results_bayes, mean, na.rm = TRUE)
## N p bayes_mean bayes_var bayes_map
## 1 10 0.01 0.06501548 8.634224e-03 0.004761905
## 2 30 0.01 0.02674205 1.005270e-03 0.005353436
## 3 100 0.01 0.01697502 1.793993e-04 0.007850033
## 4 1000 0.01 0.01113306 1.141590e-05 0.010101478
## 5 10 0.10 0.16973248 3.202819e-02 0.060311072
## 6 30 0.10 0.13417787 6.523420e-03 0.090211370
## 7 100 0.10 0.12059013 1.563154e-03 0.106961778
## 8 1000 0.10 0.11473495 1.429805e-04 0.113367257
## 9 10 0.25 0.46605315 1.542652e-01 0.242570385
## 10 30 0.25 0.38035879 2.812287e-02 0.304929589
## 11 100 0.25 0.35690816 6.880250e-03 0.334139369
## 12 1000 0.25 0.33516822 6.001054e-04 0.332942784
## 13 10 0.50 1.55637696 4.712709e+00 0.797369442
## 14 30 0.50 1.14081712 2.272749e-01 0.928156385
## 15 100 0.50 1.01861377 4.435602e-02 0.959018722
## 16 1000 0.50 1.00497695 4.064507e-03 0.998959723
## 17 10 0.75 3.30601093 2.476746e+01 1.463099742
## 18 30 0.75 3.90612164 1.287914e+01 2.704176829
## 19 100 0.75 3.21665365 6.662436e-01 2.919352618
## 20 1000 0.75 3.06307353 5.142503e-02 3.034240231
## 21 10 0.90 4.34095238 4.289379e+01 1.789841270
## 22 30 0.90 10.23484308 1.982078e+02 5.325113203
## 23 100 0.90 10.80738913 2.823281e+01 8.459935838
## 24 1000 0.90 9.01567294 9.386323e-01 8.827236883
## 25 30 0.99 19.00000000 7.600000e+02 7.857142857
## 26 100 0.99 59.95238095 7.102222e+03 26.482993197
## 27 1000 0.99 125.52629823 1.298774e+04 95.895275412
results_bayes$ci_length <- results_bayes$ci_upper - results_bayes$ci_lower
aggregate(ci_length ~ N + p, data = results_bayes, mean)
## N p ci_length
## 1 10 0.01 3.101900e-01
## 2 30 0.01 1.073456e-01
## 3 100 0.01 4.768982e-02
## 4 1000 0.01 1.296497e-02
## 5 10 0.10 5.700470e-01
## 6 30 0.10 2.872319e-01
## 7 100 0.10 1.510704e-01
## 8 1000 0.10 4.676131e-02
## 9 10 0.25 1.279402e+00
## 10 30 0.25 6.150712e-01
## 11 100 0.25 3.185894e-01
## 12 1000 0.25 9.583893e-02
## 13 10 0.50 4.501711e+00
## 14 30 0.50 1.702021e+00
## 15 100 0.50 8.072527e-01
## 16 1000 0.50 2.494111e-01
## 17 10 0.75 2.119832e+03
## 18 30 0.75 7.833440e+00
## 19 100 0.75 3.046027e+00
## 20 1000 0.75 8.841045e-01
## 21 10 0.90 7.761526e+03
## 22 30 0.90 2.537608e+03
## 23 100 0.90 1.662124e+01
## 24 1000 0.90 3.753537e+00
## 25 10 0.99 1.900545e+04
## 26 30 0.99 4.871229e+04
## 27 100 0.99 7.589646e+04
## 28 1000 0.99 2.032750e+02
#problem 4
# Delta
delta_E <- results_delta[, c("N", "p", "rep", "E_delta")]
names(delta_E)[4] <- "estimate"
delta_E$method <- "Delta"
delta_E$type <- "Expectation"
# Bootstrap
boot_E <- results_boot[, c("N", "p", "rep", "E_boot")]
names(boot_E)[4] <- "estimate"
boot_E$method <- "Bootstrap"
boot_E$type <- "Expectation"
# Bayesian
bayes_E <- results_bayes[, c("N", "p", "rep", "bayes_mean")]
names(bayes_E)[4] <- "estimate"
bayes_E$method <- "Bayesian"
bayes_E$type <- "Expectation"
comparison_E <- rbind(delta_E, boot_E, bayes_E)
comparison_E <- comparison_E[is.finite(comparison_E$estimate), ]
# Delta
delta_V <- results_delta[, c("N", "p", "rep", "V_delta")]
names(delta_V)[4] <- "estimate"
delta_V$method <- "Delta"
delta_V$type <- "Variance"
# Bootstrap
boot_V <- results_boot[, c("N", "p", "rep", "V_boot")]
names(boot_V)[4] <- "estimate"
boot_V$method <- "Bootstrap"
boot_V$type <- "Variance"
# Bayesian
bayes_V <- results_bayes[, c("N", "p", "rep", "bayes_var")]
names(bayes_V)[4] <- "estimate"
bayes_V$method <- "Bayesian"
bayes_V$type <- "Variance"
comparison_V <- rbind(delta_V, boot_V, bayes_V)
comparison_V <- comparison_V[is.finite(comparison_V$estimate), ]
library(ggplot2)
ggplot(comparison_E, aes(x = method, y = estimate, fill = method)) +
geom_boxplot(outlier.size = 0.8) +
facet_grid(N ~ p, scales = "free_y") +
labs(
title = "Comparison of Expectation Estimates",
subtitle = "Delta vs Bootstrap vs Bayesian",
x = "Method",
y = "Estimated Expectation"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggplot(comparison_V, aes(x = method, y = estimate, fill = method)) +
geom_boxplot(outlier.size = 0.8) +
facet_grid(N ~ p, scales = "free_y") +
labs(
title = "Comparison of Variance Estimates",
subtitle = "Delta vs Bootstrap vs Bayesian",
x = "Method",
y = "Estimated Variance"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggplot(comparison_E, aes(x = method, y = estimate, fill = method)) +
geom_boxplot(outlier.size = 0.8) +
facet_grid(N ~ p, scales = "free_y") +
scale_y_log10() +
labs(
title = "Comparison of Expectation Estimates (log scale)",
x = "Method",
y = "Estimated Expectation (log scale)"
) +
theme_minimal()
## Warning in scale_y_log10(): log-10 transformation introduced infinite values.
## Warning: Removed 506 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
ggplot(comparison_V, aes(x = method, y = estimate, fill = method)) +
geom_boxplot(outlier.size = 0.8) +
facet_grid(N ~ p, scales = "free_y") +
scale_y_log10() +
labs(
title = "Comparison of Variance Estimates (log scale)",
x = "Method",
y = "Estimated Variance (log scale)"
) +
theme_minimal()
## Warning in scale_y_log10(): log-10 transformation introduced infinite values.
## Removed 506 rows containing non-finite outside the scale range
## (`stat_boxplot()`).