##Two Proportion CI

n1 <- 50
p1_true <- 0.32 
data_sample1 <- rbinom(n = n1, size = 1, prob = p1_true)

n2 <- 55
p2_true <- 0.88 
data_sample2 <- rbinom(n = n2, size = 1, prob = p2_true)

p1_obs <- mean(data_sample1)
p2_obs <- mean(data_sample2)
observed_diff <- p1_obs - p2_obs

cat(paste("Observed Proportion 1 (p1_obs):", round(p1_obs, 4), "\n"))
## Observed Proportion 1 (p1_obs): 0.34
cat(paste("Observed Proportion 2 (p2_obs):", round(p2_obs, 4), "\n"))
## Observed Proportion 2 (p2_obs): 0.8182
cat(paste("Observed Difference (p1_obs - p2_obs):", round(observed_diff, 4), "\n\n"))
## Observed Difference (p1_obs - p2_obs): -0.4782
R_replicates <- 1000
boot_diffs <- numeric(R_replicates)

for (i in 1:R_replicates) {
    boot_sample1 <- sample(data_sample1, size = n1, replace = TRUE)
    boot_sample2 <- sample(data_sample2, size = n2, replace = TRUE)
    
    p1_boot <- mean(boot_sample1)
    p2_boot <- mean(boot_sample2)
    boot_diffs[i] <- p1_boot - p2_boot
}

boot_mean_diff <- mean(boot_diffs)
boot_std_err <- sd(boot_diffs)
ci_lower_95 <- quantile(boot_diffs, 0.025)
ci_upper_95 <- quantile(boot_diffs, 0.975)

ci_lower_90 <- quantile(boot_diffs, 0.05) 
ci_upper_90 <- quantile(boot_diffs, 0.95)

ci_lower_99 <- quantile(boot_diffs, 0.005) 
ci_upper_99 <- quantile(boot_diffs, 0.995)

print(summary_df <- data.frame(Statistic = c("Observed Diff", "Boot Mean Diff", "95% CI Lower", "95% CI Upper", "90% CI Lower", "90% CI Upper", "99% CI Lower", "99% CI Upper"), Value = c(observed_diff, boot_mean_diff, ci_lower_95, ci_upper_95, ci_lower_90, ci_upper_90, ci_lower_99, ci_upper_99)))
##        Statistic      Value
## 1  Observed Diff -0.4781818
## 2 Boot Mean Diff -0.4763018
## 3   95% CI Lower -0.6419545
## 4   95% CI Upper -0.3054545
## 5   90% CI Lower -0.6145455
## 6   90% CI Upper -0.3254545
## 7   99% CI Lower -0.6764182
## 8   99% CI Upper -0.2654455
boot_data_diff <- data.frame(boot_stat = boot_diffs)

ggplot(boot_data_diff, aes(x = boot_stat)) + geom_histogram(bins = 30, fill = "#ADD8E6", color = "#00008B", alpha = 0.8) + labs(title = paste("Bootstrap Distribution of the Difference in Proportions (R =", R_replicates, ")"), x = "Bootstrap Difference in Proportions (p1 - p2)", y = "Frequency") + theme_minimal()

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.