First problem (one mean):
data_sample <- rnorm(n = 50, mean = 10, sd = 2)
observed_stat <- median(data_sample)
boot_median <- function(data, indices)
observed_stat_mean <- mean(data_sample)
boot_median <- function(data, indices) {d <- data[indices]
return(median(d))
}
boot_mean <- function(data, indices) {d <- data[indices]
return(mean(d))
}
R_replicates <- 1000
boot_results_mean <- boot(
  data = data_sample, 
  statistic = boot_mean, 
  R = R_replicates
)
cat("Observed Mean:", boot_results_mean$t0, "\n")
## Observed Mean: 9.928656
cat("Mean of Bootstrap Distribution:", mean(boot_results_mean$t), "\n")
## Mean of Bootstrap Distribution: 9.909241
boot_data_mean <- data.frame(boot_stat = boot_results_mean$t)
ggplot(boot_data_mean, aes(x = boot_stat)) + geom_histogram(bins = 15, fill = "lightgreen", color = "darkgreen", alpha = 0.8) + labs(title = paste("Bootstrap Distribution of the Sample Mean (R =", R_replicates, ")"), x = "Bootstrap Mean Values", y = "Frequency") + theme_minimal()
Second Problem (one proportion):
data_sample_prop <- rbinom(n = 50, size = 1, prob = 0.6)
observed_prop <- mean(data_sample_prop)
boot_prop <- function(data, indices) {d <- data[indices]
return(mean(d))
}
R_replicates <- 1000
cat(paste("Running", R_replicates, "bootstrap replicates...\n"))
## Running 1000 bootstrap replicates...
boot_results_prop <- boot(data = data_sample_prop, statistic = boot_prop, R = R_replicates)
cat("\n--- Bootstrap Results Summary ---\n")
## 
## --- Bootstrap Results Summary ---
print(boot_results_prop)
## 
## ORDINARY NONPARAMETRIC BOOTSTRAP
## 
## 
## Call:
## boot(data = data_sample_prop, statistic = boot_prop, R = R_replicates)
## 
## 
## Bootstrap Statistics :
##     original  bias    std. error
## t1*      0.6 0.00168  0.07136295
cat("\n95% Percentile Confidence Interval:\n")
## 
## 95% Percentile Confidence Interval:
boot.ci(boot_results_prop, type = "perc")
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 1000 bootstrap replicates
## 
## CALL : 
## boot.ci(boot.out = boot_results_prop, type = "perc")
## 
## Intervals : 
## Level     Percentile     
## 95%   ( 0.46,  0.74 )  
## Calculations and Intervals on Original Scale
print(ci_lower_90 <- quantile(boot_results_prop$t, 0.05))
##   5% 
## 0.48
print(ci_upper_90 <- quantile(boot_results_prop$t, 0.95))
##  95% 
## 0.72
print(ci_lower_99 <- quantile(boot_results_prop$t, 0.005))
##   0.5% 
## 0.4199
print(ci_upper_99 <- quantile(boot_results_prop$t, 0.995))
## 99.5% 
##  0.78
boot_data_prop <- data.frame( boot_stat = boot_results_prop$t)
ggplot(boot_data_prop, aes(x = boot_stat)) + geom_histogram(bins = 30, fill = "#FFB6C1", color = "#C71585", alpha = 0.8) + labs(title = paste("Bootstrap Distribution of the Sample Proportion (R =", R_replicates, ")"), subtitle = paste("Observed Proportion:", round(boot_results_prop$t0, 4)), x = "Bootstrap Proportion Values", y = "Frequency") + theme_minimal()
Third Problem (one proportion):
data_sample_prop <- rbinom(n = 50, size = 1, prob = 0.6)
observed_prop <- mean(data_sample_prop)
boot_prop <- function(data, indices) {d <- data[indices]
return(mean(d))
}
R_replicates <- 1000
cat(paste("Running", R_replicates, "bootstrap replicates...\n"))
## Running 1000 bootstrap replicates...
boot_results_prop <- boot(data = data_sample_prop, statistic = boot_prop, R = R_replicates)
cat("\n--- Bootstrap Results Summary ---\n")
## 
## --- Bootstrap Results Summary ---
print(boot_results_prop)
## 
## ORDINARY NONPARAMETRIC BOOTSTRAP
## 
## 
## Call:
## boot(data = data_sample_prop, statistic = boot_prop, R = R_replicates)
## 
## 
## Bootstrap Statistics :
##     original  bias    std. error
## t1*     0.58 0.00068  0.06866916
cat("\n95% Percentile Confidence Interval:\n")
## 
## 95% Percentile Confidence Interval:
boot.ci(boot_results_prop, type = "perc")
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 1000 bootstrap replicates
## 
## CALL : 
## boot.ci(boot.out = boot_results_prop, type = "perc")
## 
## Intervals : 
## Level     Percentile     
## 95%   ( 0.44,  0.72 )  
## Calculations and Intervals on Original Scale
print(ci_lower_90 <- quantile(boot_results_prop$t, 0.05))
##   5% 
## 0.46
print(ci_upper_90 <- quantile(boot_results_prop$t, 0.95))
## 95% 
## 0.7
print(ci_lower_99 <- quantile(boot_results_prop$t, 0.005))
## 0.5% 
##  0.4
print(ci_upper_99 <- quantile(boot_results_prop$t, 0.995))
## 99.5% 
##  0.76
boot_data_prop <- data.frame( boot_stat = boot_results_prop$t)
ggplot(boot_data_prop, aes(x = boot_stat)) + geom_histogram(bins = 30, fill = "#FFB6C1", color = "#C71585", alpha = 0.8) + labs(title = paste("Bootstrap Distribution of the Sample Proportion (R =", R_replicates, ")"), subtitle = paste("Observed Proportion:", round(boot_results_prop$t0, 4)), x = "Bootstrap Proportion Values", y = "Frequency") + theme_minimal()
Fourth Problem (one proportion):
data_sample_prop <- rbinom(n = 50, size = 1, prob = 0.6)
observed_prop <- mean(data_sample_prop)
boot_prop <- function(data, indices) {d <- data[indices]
return(mean(d))
}
R_replicates <- 1000
cat(paste("Running", R_replicates, "bootstrap replicates...\n"))
## Running 1000 bootstrap replicates...
boot_results_prop <- boot(data = data_sample_prop, statistic = boot_prop, R = R_replicates)
cat("\n--- Bootstrap Results Summary ---\n")
## 
## --- Bootstrap Results Summary ---
print(boot_results_prop)
## 
## ORDINARY NONPARAMETRIC BOOTSTRAP
## 
## 
## Call:
## boot(data = data_sample_prop, statistic = boot_prop, R = R_replicates)
## 
## 
## Bootstrap Statistics :
##     original  bias    std. error
## t1*     0.54 0.00132  0.07098236
cat("\n95% Percentile Confidence Interval:\n")
## 
## 95% Percentile Confidence Interval:
boot.ci(boot_results_prop, type = "perc")
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 1000 bootstrap replicates
## 
## CALL : 
## boot.ci(boot.out = boot_results_prop, type = "perc")
## 
## Intervals : 
## Level     Percentile     
## 95%   ( 0.40,  0.68 )  
## Calculations and Intervals on Original Scale
print(ci_lower_90 <- quantile(boot_results_prop$t, 0.05))
##   5% 
## 0.42
print(ci_upper_90 <- quantile(boot_results_prop$t, 0.95))
##  95% 
## 0.66
print(ci_lower_99 <- quantile(boot_results_prop$t, 0.005))
## 0.5% 
## 0.34
print(ci_upper_99 <- quantile(boot_results_prop$t, 0.995))
## 99.5% 
##  0.74
boot_data_prop <- data.frame( boot_stat = boot_results_prop$t)
ggplot(boot_data_prop, aes(x = boot_stat)) + geom_histogram(bins = 30, fill = "#FFB6C1", color = "#C71585", alpha = 0.8) + labs(title = paste("Bootstrap Distribution of the Sample Proportion (R =", R_replicates, ")"), subtitle = paste("Observed Proportion:", round(boot_results_prop$t0, 4)), x = "Bootstrap Proportion Values", y = "Frequency") + theme_minimal()
Fifth Problem (two proportion):
n1 <- 50
p1_true <- 0.70 
data_sample1 <- rbinom(n = n1, size = 1, prob = p1_true)
n2 <- 60
p2_true <- 0.50 
data_sample2 <- rbinom(n = n2, size = 1, prob = p2_true)
p1_obs <- mean(data_sample1)
p2_obs <- mean(data_sample2)
observed_diff <- p1_obs - p2_obs
cat(paste("Observed Proportion 1 (p1_obs):", round(p1_obs, 4), "\n"))
## Observed Proportion 1 (p1_obs): 0.6
cat(paste("Observed Proportion 2 (p2_obs):", round(p2_obs, 4), "\n"))
## Observed Proportion 2 (p2_obs): 0.4667
cat(paste("Observed Difference (p1_obs - p2_obs):", round(observed_diff, 4), "\n\n"))
## Observed Difference (p1_obs - p2_obs): 0.1333
R_replicates <- 1000
boot_diffs <- numeric(R_replicates)
for (i in 1:R_replicates) {
boot_sample1 <- sample(data_sample1, size = n1, replace = TRUE)
boot_sample2 <- sample(data_sample2, size = n2, replace = TRUE)
  
p1_boot <- mean(boot_sample1)
p2_boot <- mean(boot_sample2)
boot_diffs[i] <- p1_boot - p2_boot
}
boot_mean_diff <- mean(boot_diffs)
boot_std_err <- sd(boot_diffs)
ci_lower_95 <- quantile(boot_diffs, 0.025)
ci_upper_95 <- quantile(boot_diffs, 0.975)
ci_lower_90 <- quantile(boot_diffs, 0.05) 
ci_upper_90 <- quantile(boot_diffs, 0.95)
ci_lower_99 <- quantile(boot_diffs, 0.005) 
ci_upper_99 <- quantile(boot_diffs, 0.995)
print(summary_df <- data.frame(Statistic = c("Observed Diff", "Boot Mean Diff", "95% CI Lower", "95% CI Upper", "90% CI Lower", "90% CI Upper", "99% CI Lower", "99% CI Upper"), Value = c(observed_diff, boot_mean_diff, ci_lower_95, ci_upper_95, ci_lower_90, ci_upper_90, ci_lower_99, ci_upper_99)))
##        Statistic       Value
## 1  Observed Diff  0.13333333
## 2 Boot Mean Diff  0.13096667
## 3   95% CI Lower -0.04666667
## 4   95% CI Upper  0.31666667
## 5   90% CI Lower -0.01666667
## 6   90% CI Upper  0.28016667
## 7   99% CI Lower -0.11670000
## 8   99% CI Upper  0.35001667
boot_data_diff <- data.frame(boot_stat = boot_diffs)
ggplot(boot_data_diff, aes(x = boot_stat)) + geom_histogram(bins = 30, fill = "#ADD8E6", color = "#00008B", alpha = 0.8) + labs(title = paste("Bootstrap Distribution of the Difference in Proportions (R =", R_replicates, ")"), x = "Bootstrap Difference in Proportions (p1 - p2)", y = "Frequency") + theme_minimal()
Sixth Problem (two proportion):
n1 <- 55
p1_true <- 0.60 
data_sample1 <- rbinom(n = n1, size = 1, prob = p1_true)
n2 <- 70
p2_true <- 0.50 
data_sample2 <- rbinom(n = n2, size = 1, prob = p2_true)
p1_obs <- mean(data_sample1)
p2_obs <- mean(data_sample2)
observed_diff <- p1_obs - p2_obs
cat(paste("Observed Proportion 1 (p1_obs):", round(p1_obs, 4), "\n"))
## Observed Proportion 1 (p1_obs): 0.6364
cat(paste("Observed Proportion 2 (p2_obs):", round(p2_obs, 4), "\n"))
## Observed Proportion 2 (p2_obs): 0.4
cat(paste("Observed Difference (p1_obs - p2_obs):", round(observed_diff, 4), "\n\n"))
## Observed Difference (p1_obs - p2_obs): 0.2364
R_replicates <- 1000
boot_diffs <- numeric(R_replicates)
for (i in 1:R_replicates) {
boot_sample1 <- sample(data_sample1, size = n1, replace = TRUE)
boot_sample2 <- sample(data_sample2, size = n2, replace = TRUE)
  
p1_boot <- mean(boot_sample1)
p2_boot <- mean(boot_sample2)
boot_diffs[i] <- p1_boot - p2_boot
}
boot_mean_diff <- mean(boot_diffs)
boot_std_err <- sd(boot_diffs)
ci_lower_95 <- quantile(boot_diffs, 0.025)
ci_upper_95 <- quantile(boot_diffs, 0.975)
ci_lower_90 <- quantile(boot_diffs, 0.05) 
ci_upper_90 <- quantile(boot_diffs, 0.95)
ci_lower_99 <- quantile(boot_diffs, 0.005) 
ci_upper_99 <- quantile(boot_diffs, 0.995)
print(summary_df <- data.frame(Statistic = c("Observed Diff", "Boot Mean Diff", "95% CI Lower", "95% CI Upper", "90% CI Lower", "90% CI Upper", "99% CI Lower", "99% CI Upper"), Value = c(observed_diff, boot_mean_diff, ci_lower_95, ci_upper_95, ci_lower_90, ci_upper_90, ci_lower_99, ci_upper_99)))
##        Statistic        Value
## 1  Observed Diff  0.236363636
## 2 Boot Mean Diff  0.237844156
## 3   95% CI Lower  0.071396104
## 4   95% CI Upper  0.406493506
## 5   90% CI Lower  0.098571429
## 6   90% CI Upper  0.376623377
## 7   99% CI Lower -0.002597403
## 8   99% CI Upper  0.449370130
boot_data_diff <- data.frame(boot_stat = boot_diffs)
ggplot(boot_data_diff, aes(x = boot_stat)) + geom_histogram(bins = 30, fill = "#ADD8E6", color = "#00008B", alpha = 0.8) + labs(title = paste("Bootstrap Distribution of the Difference in Proportions (R =", R_replicates, ")"), x = "Bootstrap Difference in Proportions (p1 - p2)", y = "Frequency") + theme_minimal()
Seventh Problem (two proportion):
n1 <- 60
p1_true <- 0.80 
data_sample1 <- rbinom(n = n1, size = 1, prob = p1_true)
n2 <- 60
p2_true <- 0.55 
data_sample2 <- rbinom(n = n2, size = 1, prob = p2_true)
p1_obs <- mean(data_sample1)
p2_obs <- mean(data_sample2)
observed_diff <- p1_obs - p2_obs
cat(paste("Observed Proportion 1 (p1_obs):", round(p1_obs, 4), "\n"))
## Observed Proportion 1 (p1_obs): 0.8333
cat(paste("Observed Proportion 2 (p2_obs):", round(p2_obs, 4), "\n"))
## Observed Proportion 2 (p2_obs): 0.4333
cat(paste("Observed Difference (p1_obs - p2_obs):", round(observed_diff, 4), "\n\n"))
## Observed Difference (p1_obs - p2_obs): 0.4
R_replicates <- 1000
boot_diffs <- numeric(R_replicates)
for (i in 1:R_replicates) {
boot_sample1 <- sample(data_sample1, size = n1, replace = TRUE)
boot_sample2 <- sample(data_sample2, size = n2, replace = TRUE)
  
p1_boot <- mean(boot_sample1)
p2_boot <- mean(boot_sample2)
boot_diffs[i] <- p1_boot - p2_boot
}
boot_mean_diff <- mean(boot_diffs)
boot_std_err <- sd(boot_diffs)
ci_lower_95 <- quantile(boot_diffs, 0.025)
ci_upper_95 <- quantile(boot_diffs, 0.975)
ci_lower_90 <- quantile(boot_diffs, 0.05) 
ci_upper_90 <- quantile(boot_diffs, 0.95)
ci_lower_99 <- quantile(boot_diffs, 0.005) 
ci_upper_99 <- quantile(boot_diffs, 0.995)
print(summary_df <- data.frame(Statistic = c("Observed Diff", "Boot Mean Diff", "95% CI Lower", "95% CI Upper", "90% CI Lower", "90% CI Upper", "99% CI Lower", "99% CI Upper"), Value = c(observed_diff, boot_mean_diff, ci_lower_95, ci_upper_95, ci_lower_90, ci_upper_90, ci_lower_99, ci_upper_99)))
##        Statistic     Value
## 1  Observed Diff 0.4000000
## 2 Boot Mean Diff 0.3978833
## 3   95% CI Lower 0.2495833
## 4   95% CI Upper 0.5500000
## 5   90% CI Lower 0.2666667
## 6   90% CI Upper 0.5333333
## 7   99% CI Lower 0.1833333
## 8   99% CI Upper 0.6000000
boot_data_diff <- data.frame(boot_stat = boot_diffs)
ggplot(boot_data_diff, aes(x = boot_stat)) + geom_histogram(bins = 30, fill = "#ADD8E6", color = "#00008B", alpha = 0.8) + labs(title = paste("Bootstrap Distribution of the Difference in Proportions (R =", R_replicates, ")"), x = "Bootstrap Difference in Proportions (p1 - p2)", y = "Frequency") + theme_minimal()
Eighth problem (one mean):
data_sample <- rnorm(n = 50, mean = 10, sd = 2)
observed_stat <- median(data_sample)
boot_median <- function(data, indices)
observed_stat_mean <- mean(data_sample)
boot_median <- function(data, indices) {d <- data[indices]
return(median(d))
}
boot_mean <- function(data, indices) {d <- data[indices]
return(mean(d))
}
R_replicates <- 1000
boot_results_mean <- boot(
  data = data_sample, 
  statistic = boot_mean, 
  R = R_replicates
)
cat("Observed Mean:", boot_results_mean$t0, "\n")
## Observed Mean: 9.855972
cat("Mean of Bootstrap Distribution:", mean(boot_results_mean$t), "\n")
## Mean of Bootstrap Distribution: 9.862609
boot_data_mean <- data.frame(boot_stat = boot_results_mean$t)
ggplot(boot_data_mean, aes(x = boot_stat)) + geom_histogram(bins = 15, fill = "lightgreen", color = "darkgreen", alpha = 0.8) + labs(title = paste("Bootstrap Distribution of the Sample Mean (R =", R_replicates, ")"), x = "Bootstrap Mean Values", y = "Frequency") + theme_minimal()
Ninth problem (one mean):
data_sample <- rnorm(n = 50, mean = 10, sd = 2)
observed_stat <- median(data_sample)
boot_median <- function(data, indices)
observed_stat_mean <- mean(data_sample)
boot_median <- function(data, indices) {d <- data[indices]
return(median(d))
}
boot_mean <- function(data, indices) {d <- data[indices]
return(mean(d))
}
R_replicates <- 1000
boot_results_mean <- boot(
  data = data_sample, 
  statistic = boot_mean, 
  R = R_replicates
)
cat("Observed Mean:", boot_results_mean$t0, "\n")
## Observed Mean: 9.824534
cat("Mean of Bootstrap Distribution:", mean(boot_results_mean$t), "\n")
## Mean of Bootstrap Distribution: 9.81579
boot_data_mean <- data.frame(boot_stat = boot_results_mean$t)
ggplot(boot_data_mean, aes(x = boot_stat)) + geom_histogram(bins = 15, fill = "lightgreen", color = "darkgreen", alpha = 0.8) + labs(title = paste("Bootstrap Distribution of the Sample Mean (R =", R_replicates, ")"), x = "Bootstrap Mean Values", y = "Frequency") + theme_minimal()
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.