library(ggplot2)
# Parameters
N <- 1000
p_true <- 0.5
samples <- 5000
# Simulate g(p_hat)
p_hats <- apply(matrix(rbinom(N * samples, 1, p_true), nrow=samples), 1, mean)
g_p_hats <- p_hats / (1 - p_hats)
g_p_hats <- pmax(pmin(g_p_hats, qnorm(0.999)), qnorm(0.001))
# Theoretical variance of g(p_hat) using the Delta method
var_g_p_hat_theoretical <- (1 / (1 - p_true)^4) * (p_true * (1 - p_true) / N)
std_g_p_hat_theoretical <- sqrt(var_g_p_hat_theoretical)
# Create data frame for plotting
data <- data.frame(g_p_hats)
# Plotting
ggplot(data, aes(x=g_p_hats)) +
geom_histogram(bins=50, aes(y=..density..), fill='purple', alpha=0.6) +
stat_function(fun=function(x) dnorm(x, mean(g_p_hats), std_g_p_hat_theoretical),
color='red', size=1.5) +
labs(x='g(p_hat)', y='Density', title='Histogram of g(p_hat) with Theoretical Normal Overlay') +
theme_minimal()
simulate_and_plot_g_p_hat <- function(N, p_true=0.5, samples=5000) {
# Simulate g(p_hat)
p_hats <- apply(matrix(rbinom(N * samples, 1, p_true), nrow=samples), 1, mean)
g_p_hats <- p_hats / (1 - p_hats)
g_p_hats <- pmax(pmin(g_p_hats, qnorm(0.999)), qnorm(0.001))
# Theoretical variance of g(p_hat) using the Delta method
var_g_p_hat_theoretical <- (1 / (1 - p_true)^4) * (p_true * (1 - p_true) / N)
std_g_p_hat_theoretical <- sqrt(var_g_p_hat_theoretical)
# Plotting
hist(g_p_hats, breaks=50, freq=FALSE, col='lightblue', xlab='g(p_hat)', ylab='Density',
main=paste('Histogram of g(p_hat) for N=', N, ' with Theoretical Normal Overlay'))
# Overlay the normal curve
x <- seq(min(g_p_hats), max(g_p_hats), length.out=1000)
lines(x, dnorm(x, mean(g_p_hats), std_g_p_hat_theoretical), col='red', lwd=2)
legend('topright', legend=c('Empirical', 'Theoretical Normal'), fill=c('lightblue', 'red'))
}
# List of different sample sizes to simulate
sample_sizes <- c(10, 30, 50, 100, 500)
for (N in sample_sizes) {
simulate_and_plot_g_p_hat(N)
}
NA
NA
2(A)
# Define function for calculating variance using Delta method
delta_method_var <- function(p_hat, N) {
g_prime_p_hat <- 1 / (1 - p_hat)^2
var_p_hat <- p_hat * (1 - p_hat) / N
return((g_prime_p_hat ^ 2) * var_p_hat)
}
# Configurations
N_values <- c(10, 30, 100, 1000)
p_values <- c(0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99)
repeats <- 100
# Store results
results <- matrix(NA, nrow = length(N_values) * length(p_values), ncol = 4)
row_index <- 1
for (N in N_values) {
for (p in p_values) {
e_g_p_hats <- numeric(repeats)
var_g_p_hats <- numeric(repeats)
for (i in 1:repeats) {
# Simulate data
data <- rbinom(N, 1, p)
p_hat <- mean(data)
g_p_hat <- p_hat / (1 - p_hat)
# Calculate estimates using Delta method
var_g_p_hat <- delta_method_var(p_hat, N)
e_g_p_hats[i] <- g_p_hat
var_g_p_hats[i] <- var_g_p_hat
}
# Aggregate results
mean_e_g_p_hat <- mean(e_g_p_hats)
mean_var_g_p_hat <- mean(var_g_p_hats)
results[row_index, ] <- c(N, p, mean_e_g_p_hat, mean_var_g_p_hat)
row_index <- row_index + 1
}
}
# Display results
for (i in 1:nrow(results)) {
cat("N=", results[i, 1], ", p=", sprintf("%.2f", results[i, 2]), ", E[g(p_hat)]=", sprintf("%.4f", results[i, 3]), ", Var[g(p_hat)]=", sprintf("%.4f", results[i, 4]), "\n")
}
N= 10 , p= 0.01 , E[g(p_hat)]= 0.0056 , Var[g(p_hat)]= 0.0007
N= 10 , p= 0.10 , E[g(p_hat)]= 0.1117 , Var[g(p_hat)]= 0.0184
N= 10 , p= 0.25 , E[g(p_hat)]= 0.3694 , Var[g(p_hat)]= 0.0940
N= 10 , p= 0.50 , E[g(p_hat)]= 1.2874 , Var[g(p_hat)]= 1.1962
N= 10 , p= 0.75 , E[g(p_hat)]= Inf , Var[g(p_hat)]= NaN
N= 10 , p= 0.90 , E[g(p_hat)]= Inf , Var[g(p_hat)]= NaN
N= 10 , p= 0.99 , E[g(p_hat)]= Inf , Var[g(p_hat)]= NaN
N= 30 , p= 0.01 , E[g(p_hat)]= 0.0115 , Var[g(p_hat)]= 0.0004
N= 30 , p= 0.10 , E[g(p_hat)]= 0.1181 , Var[g(p_hat)]= 0.0054
N= 30 , p= 0.25 , E[g(p_hat)]= 0.3375 , Var[g(p_hat)]= 0.0218
N= 30 , p= 0.50 , E[g(p_hat)]= 1.0774 , Var[g(p_hat)]= 0.1756
N= 30 , p= 0.75 , E[g(p_hat)]= 4.1003 , Var[g(p_hat)]= 14.7857
N= 30 , p= 0.90 , E[g(p_hat)]= Inf , Var[g(p_hat)]= NaN
N= 30 , p= 0.99 , E[g(p_hat)]= Inf , Var[g(p_hat)]= NaN
N= 100 , p= 0.01 , E[g(p_hat)]= 0.0104 , Var[g(p_hat)]= 0.0001
N= 100 , p= 0.10 , E[g(p_hat)]= 0.1160 , Var[g(p_hat)]= 0.0015
N= 100 , p= 0.25 , E[g(p_hat)]= 0.3482 , Var[g(p_hat)]= 0.0065
N= 100 , p= 0.50 , E[g(p_hat)]= 1.0188 , Var[g(p_hat)]= 0.0450
N= 100 , p= 0.75 , E[g(p_hat)]= 3.1650 , Var[g(p_hat)]= 0.6130
N= 100 , p= 0.90 , E[g(p_hat)]= 10.4642 , Var[g(p_hat)]= 30.6328
N= 100 , p= 0.99 , E[g(p_hat)]= Inf , Var[g(p_hat)]= NaN
N= 1000 , p= 0.01 , E[g(p_hat)]= 0.0104 , Var[g(p_hat)]= 0.0000
N= 1000 , p= 0.10 , E[g(p_hat)]= 0.1106 , Var[g(p_hat)]= 0.0001
N= 1000 , p= 0.25 , E[g(p_hat)]= 0.3278 , Var[g(p_hat)]= 0.0006
N= 1000 , p= 0.50 , E[g(p_hat)]= 1.0038 , Var[g(p_hat)]= 0.0041
N= 1000 , p= 0.75 , E[g(p_hat)]= 3.0351 , Var[g(p_hat)]= 0.0500
N= 1000 , p= 0.90 , E[g(p_hat)]= 9.2124 , Var[g(p_hat)]= 0.9847
N= 1000 , p= 0.99 , E[g(p_hat)]= 108.4729 , Var[g(p_hat)]= 2856.9333
#2(b)
bootstrap_g_x <- function(data, B=1000) {
N <- length(data)
g_x_bs <- numeric(B)
for (b in 1:B) {
sample_b <- sample(data, size=N, replace=TRUE)
p_hat_b <- mean(sample_b)
g_x_bs[b] <- p_hat_b / (1 - p_hat_b)
}
g_bar <- mean(g_x_bs)
var_g <- var(g_x_bs) # Use default ddof=1 for sample variance
return(list(g_bar=g_bar, var_g=var_g))
}
# Example usage for a single configuration
N <- 1000
p <- 0.5 # True p value
data <- rbinom(N, 1, p)
B <- 1000 # Number of bootstrap samples
result <- bootstrap_g_x(data, B)
cat("Bootstrap estimates for N=", N, ", p=", p, ":\n")
Bootstrap estimates for N= 1000 , p= 0.5 :
cat("Mean of g(X_bar): ", sprintf("%.4f", result$g_bar), "\n")
Mean of g(X_bar): 1.0027
cat("Variance of g(X_bar): ", sprintf("%.4f", result$var_g), "\n")
Variance of g(X_bar): 0.0038
# Load necessary library
library(ggplot2)
# Define the bootstrap_g_x function
bootstrap_g_x <- function(data, B=1000) {
N <- length(data)
g_x_bs <- numeric(B)
for (b in 1:B) {
sample_b <- sample(data, size=N, replace=TRUE)
p_hat_b <- mean(sample_b)
g_x_bs[b] <- p_hat_b / (1 - p_hat_b)
}
g_bar <- mean(g_x_bs)
var_g <- var(g_x_bs) # Use default ddof=1 for sample variance
return(list(g_bar=g_bar, var_g=var_g, g_x_bs=g_x_bs))
}
# Example usage for a single configuration
N <- 100
p <- 0.5 # True p value
data <- rbinom(N, 1, p)
B <- 1000 # Number of bootstrap samples
result <- bootstrap_g_x(data, B)
# Plot histogram of bootstrap estimates
ggplot(data.frame(g_x_bs = result$g_x_bs), aes(x = g_x_bs)) +
geom_histogram(binwidth = 0.1, fill = "skyblue", color = "black", alpha = 0.8) +
labs(title = "Histogram of Bootstrap Estimates of g(X_bar)",
x = "Bootstrap Estimates of g(X_bar)",
y = "Frequency")
#2(b) this is the code for 28 combinations
bootstrap_g_x <- function(data, B=1000) {
N <- length(data)
g_x_bs <- numeric(B)
for (b in 1:B) {
sample_b <- sample(data, size=N, replace=TRUE)
p_hat_b <- mean(sample_b)
g_x_bs[b] <- p_hat_b / (1 - p_hat_b)
}
g_bar <- mean(g_x_bs)
var_g <- var(g_x_bs) # Use default ddof=1 for sample variance
return(list(g_bar=g_bar, var_g=var_g))
}
# Loop over sample sizes and true parameters
sample_sizes <- c(10, 30, 100, 1000)
true_parameters <- c(0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99)
B <- 1000 # Number of bootstrap samples
for (N in sample_sizes) {
for (p in true_parameters) {
data <- rbinom(N, 1, p) # Generate Bernoulli realizations
result <- bootstrap_g_x(data, B)
cat("Bootstrap estimates for N=", N, ", p=", p, ":\n")
cat("Mean of g(X_bar): ", sprintf("%.4f", result$g_bar), "\n")
cat("Variance of g(X_bar): ", sprintf("%.4f", result$var_g), "\n\n")
}
}
Bootstrap estimates for N= 10 , p= 0.01 :
Mean of g(X_bar): 0.0000
Variance of g(X_bar): 0.0000
Bootstrap estimates for N= 10 , p= 0.1 :
Mean of g(X_bar): 0.1263
Variance of g(X_bar): 0.0186
Bootstrap estimates for N= 10 , p= 0.25 :
Mean of g(X_bar): 0.8087
Variance of g(X_bar): 0.3900
Bootstrap estimates for N= 10 , p= 0.5 :
Mean of g(X_bar): Inf
Variance of g(X_bar): NaN
Bootstrap estimates for N= 10 , p= 0.75 :
Mean of g(X_bar): Inf
Variance of g(X_bar): NaN
Bootstrap estimates for N= 10 , p= 0.9 :
Mean of g(X_bar): Inf
Variance of g(X_bar): NaN
Bootstrap estimates for N= 10 , p= 0.99 :
Mean of g(X_bar): Inf
Variance of g(X_bar): NaN
Bootstrap estimates for N= 30 , p= 0.01 :
Mean of g(X_bar): 0.0000
Variance of g(X_bar): 0.0000
Bootstrap estimates for N= 30 , p= 0.1 :
Mean of g(X_bar): 0.2106
Variance of g(X_bar): 0.0118
Bootstrap estimates for N= 30 , p= 0.25 :
Mean of g(X_bar): 0.4472
Variance of g(X_bar): 0.0340
Bootstrap estimates for N= 30 , p= 0.5 :
Mean of g(X_bar): 1.4269
Variance of g(X_bar): 0.3467
Bootstrap estimates for N= 30 , p= 0.75 :
Mean of g(X_bar): 2.2880
Variance of g(X_bar): 1.2825
Bootstrap estimates for N= 30 , p= 0.9 :
Mean of g(X_bar): Inf
Variance of g(X_bar): NaN
Bootstrap estimates for N= 30 , p= 0.99 :
Mean of g(X_bar): Inf
Variance of g(X_bar): NaN
Bootstrap estimates for N= 100 , p= 0.01 :
Mean of g(X_bar): 0.0209
Variance of g(X_bar): 0.0002
Bootstrap estimates for N= 100 , p= 0.1 :
Mean of g(X_bar): 0.0889
Variance of g(X_bar): 0.0010
Bootstrap estimates for N= 100 , p= 0.25 :
Mean of g(X_bar): 0.3003
Variance of g(X_bar): 0.0055
Bootstrap estimates for N= 100 , p= 0.5 :
Mean of g(X_bar): 0.9807
Variance of g(X_bar): 0.0384
Bootstrap estimates for N= 100 , p= 0.75 :
Mean of g(X_bar): 2.9499
Variance of g(X_bar): 0.5129
Bootstrap estimates for N= 100 , p= 0.9 :
Mean of g(X_bar): 11.3745
Variance of g(X_bar): 22.1051
Bootstrap estimates for N= 100 , p= 0.99 :
Mean of g(X_bar): Inf
Variance of g(X_bar): NaN
Bootstrap estimates for N= 1000 , p= 0.01 :
Mean of g(X_bar): 0.0091
Variance of g(X_bar): 0.0000
Bootstrap estimates for N= 1000 , p= 0.1 :
Mean of g(X_bar): 0.1339
Variance of g(X_bar): 0.0002
Bootstrap estimates for N= 1000 , p= 0.25 :
Mean of g(X_bar): 0.3626
Variance of g(X_bar): 0.0007
Bootstrap estimates for N= 1000 , p= 0.5 :
Mean of g(X_bar): 0.9633
Variance of g(X_bar): 0.0037
Bootstrap estimates for N= 1000 , p= 0.75 :
Mean of g(X_bar): 2.7163
Variance of g(X_bar): 0.0373
Bootstrap estimates for N= 1000 , p= 0.9 :
Mean of g(X_bar): 8.3943
Variance of g(X_bar): 0.7508
Bootstrap estimates for N= 1000 , p= 0.99 :
Mean of g(X_bar): 143.1331
Variance of g(X_bar): 4437.9260
#3(c)
# Define the sample size and observed data
N <- 1000
p <- 0.5 # True value of p
x <- rbinom(N, 1, p) # Simulate Bernoulli trials
# Define the number of Monte Carlo samples
num_samples <- 100000
# Function to calculate z given p
calculate_z <- function(p) {
p / (1 - p)
}
# Function to perform Monte Carlo simulation
monte_carlo_simulation <- function(x, num_samples) {
# Initialize vector to store z samples
z_samples <- numeric(num_samples)
# Perform Monte Carlo simulation
for (i in 1:num_samples) {
# Sample from the posterior distribution of p
p_posterior <- rbeta(1, sum(x) + 0.5, N - sum(x) + 0.5)
# Calculate z from sampled p
z_samples[i] <- calculate_z(p_posterior)
}
return(z_samples)
}
# Perform Monte Carlo simulation
z_samples <- monte_carlo_simulation(x, num_samples)
# Calculate the expected value and variance of z
expected_z <- mean(z_samples)
variance_z <- var(z_samples)
# Print the results
cat("Estimated expected value of z:", expected_z, "\n")
Estimated expected value of z: 0.9743661
cat("Estimated variance of z:", variance_z, "\n")
Estimated variance of z: 0.00376868
# Define the sample size and observed data
N <- 10
p <- 0.5 # True value of p
x <- rbinom(N, 1, p) # Simulate Bernoulli trials
# Define the number of Monte Carlo samples
num_samples <- 10000
# Function to calculate z given p
calculate_z <- function(p) {
p / (1 - p)
}
# Function to perform Monte Carlo simulation
monte_carlo_simulation <- function(x, num_samples) {
# Initialize vector to store z samples
z_samples <- numeric(num_samples)
# Perform Monte Carlo simulation
for (i in 1:num_samples) {
# Sample from the posterior distribution of p
p_posterior <- rbeta(1, sum(x) + 0.5, N - sum(x) + 0.5)
# Calculate z from sampled p
z_samples[i] <- calculate_z(p_posterior)
}
return(z_samples)
}
# Perform Monte Carlo simulation
z_samples <- monte_carlo_simulation(x, num_samples)
# Plot histogram of z samples
hist(z_samples, breaks = 30, col = "skyblue", main = "Histogram of z", xlab = "z", ylab = "Frequency")
# Add vertical lines for mean and standard deviation
abline(v = mean(z_samples), col = "red", lwd = 2)
abline(v = mean(z_samples) + sd(z_samples), col = "blue", lwd = 2, lty = 2)
abline(v = mean(z_samples) - sd(z_samples), col = "blue", lwd = 2, lty = 2)
# Add legend
legend("topright", legend = c("Mean", "Mean + SD", "Mean - SD"), col = c("red", "blue", "blue"), lty = 1:2, cex = 0.8)
# Define the sample sizes and true parameters
sample_sizes <- c(10, 30, 100, 1000)
true_ps <- c(0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99)
# Initialize an empty dataframe to store results
results_df <- data.frame(N = integer(), P = numeric(), Expectation = numeric(), Variance = numeric(), stringsAsFactors = FALSE)
# Function to perform Monte Carlo simulation
monte_carlo_simulation <- function(N, p, num_samples) {
# Simulate Bernoulli trials
x <- rbinom(N, 1, p)
# Calculate z_samples
z_samples <- numeric(num_samples)
for (i in 1:num_samples) {
# Sample from the posterior distribution of p
p_posterior <- rbeta(1, sum(x) + 0.5, N - sum(x) + 0.5)
# Calculate z from sampled p
z_samples[i] <- p_posterior / (1 - p_posterior)
}
return(z_samples)
}
# Loop over each combination of sample size and true parameter
for (N in sample_sizes) {
for (p in true_ps) {
# Perform Monte Carlo simulation
z_samples <- monte_carlo_simulation(N, p, num_samples = 100000)
# Calculate the expected value and variance of z
expected_z <- mean(z_samples)
variance_z <- var(z_samples)
# Store the results
results_df <- rbind(results_df, data.frame(N = N, P = p, Expectation = expected_z, Variance = variance_z))
}
}
# Print the results
print(results_df)
NA
write.csv(results, "delta_method_totalfile.csv", row.names = FALSE)
write.csv(result, "bootstrap_method_totalfile.csv", row.names = FALSE)
write.csv(results_df, "bayesian_method_totalfile.csv", row.names = FALSE)