Exam 2 Practice

Confidence Interval Problems

First problem (one mean):

data_sample <- rnorm(n = 50, mean = 10, sd = 2)
observed_stat <- median(data_sample)
boot_median <- function(data, indices)
observed_stat_mean <- mean(data_sample)
boot_median <- function(data, indices) {d <- data[indices]
return(median(d))
}
boot_mean <- function(data, indices) {d <- data[indices]
return(mean(d))
}
R_replicates <- 1000
boot_results_mean <- boot(
  data = data_sample, 
  statistic = boot_mean, 
  R = R_replicates
)

cat("Observed Mean:", boot_results_mean$t0, "\n")

## Observed Mean: 9.928656

cat("Mean of Bootstrap Distribution:", mean(boot_results_mean$t), "\n")

## Mean of Bootstrap Distribution: 9.909241

boot_data_mean <- data.frame(boot_stat = boot_results_mean$t)

ggplot(boot_data_mean, aes(x = boot_stat)) + geom_histogram(bins = 15, fill = "lightgreen", color = "darkgreen", alpha = 0.8) + labs(title = paste("Bootstrap Distribution of the Sample Mean (R =", R_replicates, ")"), x = "Bootstrap Mean Values", y = "Frequency") + theme_minimal()

Second Problem (one proportion):

data_sample_prop <- rbinom(n = 50, size = 1, prob = 0.6)
observed_prop <- mean(data_sample_prop)
boot_prop <- function(data, indices) {d <- data[indices]
return(mean(d))
}

R_replicates <- 1000
cat(paste("Running", R_replicates, "bootstrap replicates...\n"))

## Running 1000 bootstrap replicates...

boot_results_prop <- boot(data = data_sample_prop, statistic = boot_prop, R = R_replicates)

cat("\n--- Bootstrap Results Summary ---\n")

## 
## --- Bootstrap Results Summary ---

print(boot_results_prop)

## 
## ORDINARY NONPARAMETRIC BOOTSTRAP
## 
## 
## Call:
## boot(data = data_sample_prop, statistic = boot_prop, R = R_replicates)
## 
## 
## Bootstrap Statistics :
##     original  bias    std. error
## t1*      0.6 0.00168  0.07136295

cat("\n95% Percentile Confidence Interval:\n")

## 
## 95% Percentile Confidence Interval:

boot.ci(boot_results_prop, type = "perc")

## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 1000 bootstrap replicates
## 
## CALL : 
## boot.ci(boot.out = boot_results_prop, type = "perc")
## 
## Intervals : 
## Level     Percentile     
## 95%   ( 0.46,  0.74 )  
## Calculations and Intervals on Original Scale

print(ci_lower_90 <- quantile(boot_results_prop$t, 0.05))

##   5% 
## 0.48

print(ci_upper_90 <- quantile(boot_results_prop$t, 0.95))

##  95% 
## 0.72

print(ci_lower_99 <- quantile(boot_results_prop$t, 0.005))

##   0.5% 
## 0.4199

print(ci_upper_99 <- quantile(boot_results_prop$t, 0.995))

## 99.5% 
##  0.78

boot_data_prop <- data.frame( boot_stat = boot_results_prop$t)

ggplot(boot_data_prop, aes(x = boot_stat)) + geom_histogram(bins = 30, fill = "#FFB6C1", color = "#C71585", alpha = 0.8) + labs(title = paste("Bootstrap Distribution of the Sample Proportion (R =", R_replicates, ")"), subtitle = paste("Observed Proportion:", round(boot_results_prop$t0, 4)), x = "Bootstrap Proportion Values", y = "Frequency") + theme_minimal()

Third Problem (one proportion):

data_sample_prop <- rbinom(n = 50, size = 1, prob = 0.6)
observed_prop <- mean(data_sample_prop)
boot_prop <- function(data, indices) {d <- data[indices]
return(mean(d))
}

R_replicates <- 1000
cat(paste("Running", R_replicates, "bootstrap replicates...\n"))

## Running 1000 bootstrap replicates...

boot_results_prop <- boot(data = data_sample_prop, statistic = boot_prop, R = R_replicates)

cat("\n--- Bootstrap Results Summary ---\n")

## 
## --- Bootstrap Results Summary ---

print(boot_results_prop)

## 
## ORDINARY NONPARAMETRIC BOOTSTRAP
## 
## 
## Call:
## boot(data = data_sample_prop, statistic = boot_prop, R = R_replicates)
## 
## 
## Bootstrap Statistics :
##     original  bias    std. error
## t1*     0.58 0.00068  0.06866916

cat("\n95% Percentile Confidence Interval:\n")

## 
## 95% Percentile Confidence Interval:

boot.ci(boot_results_prop, type = "perc")

## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 1000 bootstrap replicates
## 
## CALL : 
## boot.ci(boot.out = boot_results_prop, type = "perc")
## 
## Intervals : 
## Level     Percentile     
## 95%   ( 0.44,  0.72 )  
## Calculations and Intervals on Original Scale

print(ci_lower_90 <- quantile(boot_results_prop$t, 0.05))

##   5% 
## 0.46

print(ci_upper_90 <- quantile(boot_results_prop$t, 0.95))

## 95% 
## 0.7

print(ci_lower_99 <- quantile(boot_results_prop$t, 0.005))

## 0.5% 
##  0.4

print(ci_upper_99 <- quantile(boot_results_prop$t, 0.995))

## 99.5% 
##  0.76

boot_data_prop <- data.frame( boot_stat = boot_results_prop$t)

ggplot(boot_data_prop, aes(x = boot_stat)) + geom_histogram(bins = 30, fill = "#FFB6C1", color = "#C71585", alpha = 0.8) + labs(title = paste("Bootstrap Distribution of the Sample Proportion (R =", R_replicates, ")"), subtitle = paste("Observed Proportion:", round(boot_results_prop$t0, 4)), x = "Bootstrap Proportion Values", y = "Frequency") + theme_minimal()

Fourth Problem (one proportion):

data_sample_prop <- rbinom(n = 50, size = 1, prob = 0.6)
observed_prop <- mean(data_sample_prop)
boot_prop <- function(data, indices) {d <- data[indices]
return(mean(d))
}

R_replicates <- 1000
cat(paste("Running", R_replicates, "bootstrap replicates...\n"))

## Running 1000 bootstrap replicates...

boot_results_prop <- boot(data = data_sample_prop, statistic = boot_prop, R = R_replicates)

cat("\n--- Bootstrap Results Summary ---\n")

## 
## --- Bootstrap Results Summary ---

print(boot_results_prop)

## 
## ORDINARY NONPARAMETRIC BOOTSTRAP
## 
## 
## Call:
## boot(data = data_sample_prop, statistic = boot_prop, R = R_replicates)
## 
## 
## Bootstrap Statistics :
##     original  bias    std. error
## t1*     0.54 0.00132  0.07098236

cat("\n95% Percentile Confidence Interval:\n")

## 
## 95% Percentile Confidence Interval:

boot.ci(boot_results_prop, type = "perc")

## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 1000 bootstrap replicates
## 
## CALL : 
## boot.ci(boot.out = boot_results_prop, type = "perc")
## 
## Intervals : 
## Level     Percentile     
## 95%   ( 0.40,  0.68 )  
## Calculations and Intervals on Original Scale

print(ci_lower_90 <- quantile(boot_results_prop$t, 0.05))

##   5% 
## 0.42

print(ci_upper_90 <- quantile(boot_results_prop$t, 0.95))

##  95% 
## 0.66

print(ci_lower_99 <- quantile(boot_results_prop$t, 0.005))

## 0.5% 
## 0.34

print(ci_upper_99 <- quantile(boot_results_prop$t, 0.995))

## 99.5% 
##  0.74

boot_data_prop <- data.frame( boot_stat = boot_results_prop$t)

ggplot(boot_data_prop, aes(x = boot_stat)) + geom_histogram(bins = 30, fill = "#FFB6C1", color = "#C71585", alpha = 0.8) + labs(title = paste("Bootstrap Distribution of the Sample Proportion (R =", R_replicates, ")"), subtitle = paste("Observed Proportion:", round(boot_results_prop$t0, 4)), x = "Bootstrap Proportion Values", y = "Frequency") + theme_minimal()

Fifth Problem (two proportion):

n1 <- 50
p1_true <- 0.70 
data_sample1 <- rbinom(n = n1, size = 1, prob = p1_true)

n2 <- 60
p2_true <- 0.50 
data_sample2 <- rbinom(n = n2, size = 1, prob = p2_true)

p1_obs <- mean(data_sample1)
p2_obs <- mean(data_sample2)
observed_diff <- p1_obs - p2_obs

cat(paste("Observed Proportion 1 (p1_obs):", round(p1_obs, 4), "\n"))

## Observed Proportion 1 (p1_obs): 0.6

cat(paste("Observed Proportion 2 (p2_obs):", round(p2_obs, 4), "\n"))

## Observed Proportion 2 (p2_obs): 0.4667

cat(paste("Observed Difference (p1_obs - p2_obs):", round(observed_diff, 4), "\n\n"))

## Observed Difference (p1_obs - p2_obs): 0.1333

R_replicates <- 1000
boot_diffs <- numeric(R_replicates)

for (i in 1:R_replicates) {
boot_sample1 <- sample(data_sample1, size = n1, replace = TRUE)
boot_sample2 <- sample(data_sample2, size = n2, replace = TRUE)
  
p1_boot <- mean(boot_sample1)
p2_boot <- mean(boot_sample2)
boot_diffs[i] <- p1_boot - p2_boot
}

boot_mean_diff <- mean(boot_diffs)
boot_std_err <- sd(boot_diffs)
ci_lower_95 <- quantile(boot_diffs, 0.025)
ci_upper_95 <- quantile(boot_diffs, 0.975)

ci_lower_90 <- quantile(boot_diffs, 0.05) 
ci_upper_90 <- quantile(boot_diffs, 0.95)

ci_lower_99 <- quantile(boot_diffs, 0.005) 
ci_upper_99 <- quantile(boot_diffs, 0.995)

print(summary_df <- data.frame(Statistic = c("Observed Diff", "Boot Mean Diff", "95% CI Lower", "95% CI Upper", "90% CI Lower", "90% CI Upper", "99% CI Lower", "99% CI Upper"), Value = c(observed_diff, boot_mean_diff, ci_lower_95, ci_upper_95, ci_lower_90, ci_upper_90, ci_lower_99, ci_upper_99)))

##        Statistic       Value
## 1  Observed Diff  0.13333333
## 2 Boot Mean Diff  0.13096667
## 3   95% CI Lower -0.04666667
## 4   95% CI Upper  0.31666667
## 5   90% CI Lower -0.01666667
## 6   90% CI Upper  0.28016667
## 7   99% CI Lower -0.11670000
## 8   99% CI Upper  0.35001667

boot_data_diff <- data.frame(boot_stat = boot_diffs)

ggplot(boot_data_diff, aes(x = boot_stat)) + geom_histogram(bins = 30, fill = "#ADD8E6", color = "#00008B", alpha = 0.8) + labs(title = paste("Bootstrap Distribution of the Difference in Proportions (R =", R_replicates, ")"), x = "Bootstrap Difference in Proportions (p1 - p2)", y = "Frequency") + theme_minimal()

Sixth Problem (two proportion):

n1 <- 55
p1_true <- 0.60 
data_sample1 <- rbinom(n = n1, size = 1, prob = p1_true)

n2 <- 70
p2_true <- 0.50 
data_sample2 <- rbinom(n = n2, size = 1, prob = p2_true)

p1_obs <- mean(data_sample1)
p2_obs <- mean(data_sample2)
observed_diff <- p1_obs - p2_obs

cat(paste("Observed Proportion 1 (p1_obs):", round(p1_obs, 4), "\n"))

## Observed Proportion 1 (p1_obs): 0.6364

cat(paste("Observed Proportion 2 (p2_obs):", round(p2_obs, 4), "\n"))

## Observed Proportion 2 (p2_obs): 0.4

cat(paste("Observed Difference (p1_obs - p2_obs):", round(observed_diff, 4), "\n\n"))

## Observed Difference (p1_obs - p2_obs): 0.2364

R_replicates <- 1000
boot_diffs <- numeric(R_replicates)

for (i in 1:R_replicates) {
boot_sample1 <- sample(data_sample1, size = n1, replace = TRUE)
boot_sample2 <- sample(data_sample2, size = n2, replace = TRUE)
  
p1_boot <- mean(boot_sample1)
p2_boot <- mean(boot_sample2)
boot_diffs[i] <- p1_boot - p2_boot
}

boot_mean_diff <- mean(boot_diffs)
boot_std_err <- sd(boot_diffs)
ci_lower_95 <- quantile(boot_diffs, 0.025)
ci_upper_95 <- quantile(boot_diffs, 0.975)

ci_lower_90 <- quantile(boot_diffs, 0.05) 
ci_upper_90 <- quantile(boot_diffs, 0.95)

ci_lower_99 <- quantile(boot_diffs, 0.005) 
ci_upper_99 <- quantile(boot_diffs, 0.995)

print(summary_df <- data.frame(Statistic = c("Observed Diff", "Boot Mean Diff", "95% CI Lower", "95% CI Upper", "90% CI Lower", "90% CI Upper", "99% CI Lower", "99% CI Upper"), Value = c(observed_diff, boot_mean_diff, ci_lower_95, ci_upper_95, ci_lower_90, ci_upper_90, ci_lower_99, ci_upper_99)))

##        Statistic        Value
## 1  Observed Diff  0.236363636
## 2 Boot Mean Diff  0.237844156
## 3   95% CI Lower  0.071396104
## 4   95% CI Upper  0.406493506
## 5   90% CI Lower  0.098571429
## 6   90% CI Upper  0.376623377
## 7   99% CI Lower -0.002597403
## 8   99% CI Upper  0.449370130

boot_data_diff <- data.frame(boot_stat = boot_diffs)

ggplot(boot_data_diff, aes(x = boot_stat)) + geom_histogram(bins = 30, fill = "#ADD8E6", color = "#00008B", alpha = 0.8) + labs(title = paste("Bootstrap Distribution of the Difference in Proportions (R =", R_replicates, ")"), x = "Bootstrap Difference in Proportions (p1 - p2)", y = "Frequency") + theme_minimal()

Seventh Problem (two proportion):

n1 <- 60
p1_true <- 0.80 
data_sample1 <- rbinom(n = n1, size = 1, prob = p1_true)

n2 <- 60
p2_true <- 0.55 
data_sample2 <- rbinom(n = n2, size = 1, prob = p2_true)

p1_obs <- mean(data_sample1)
p2_obs <- mean(data_sample2)
observed_diff <- p1_obs - p2_obs

cat(paste("Observed Proportion 1 (p1_obs):", round(p1_obs, 4), "\n"))

## Observed Proportion 1 (p1_obs): 0.8333

cat(paste("Observed Proportion 2 (p2_obs):", round(p2_obs, 4), "\n"))

## Observed Proportion 2 (p2_obs): 0.4333

cat(paste("Observed Difference (p1_obs - p2_obs):", round(observed_diff, 4), "\n\n"))

## Observed Difference (p1_obs - p2_obs): 0.4

R_replicates <- 1000
boot_diffs <- numeric(R_replicates)

for (i in 1:R_replicates) {
boot_sample1 <- sample(data_sample1, size = n1, replace = TRUE)
boot_sample2 <- sample(data_sample2, size = n2, replace = TRUE)
  
p1_boot <- mean(boot_sample1)
p2_boot <- mean(boot_sample2)
boot_diffs[i] <- p1_boot - p2_boot
}

boot_mean_diff <- mean(boot_diffs)
boot_std_err <- sd(boot_diffs)
ci_lower_95 <- quantile(boot_diffs, 0.025)
ci_upper_95 <- quantile(boot_diffs, 0.975)

ci_lower_90 <- quantile(boot_diffs, 0.05) 
ci_upper_90 <- quantile(boot_diffs, 0.95)

ci_lower_99 <- quantile(boot_diffs, 0.005) 
ci_upper_99 <- quantile(boot_diffs, 0.995)

print(summary_df <- data.frame(Statistic = c("Observed Diff", "Boot Mean Diff", "95% CI Lower", "95% CI Upper", "90% CI Lower", "90% CI Upper", "99% CI Lower", "99% CI Upper"), Value = c(observed_diff, boot_mean_diff, ci_lower_95, ci_upper_95, ci_lower_90, ci_upper_90, ci_lower_99, ci_upper_99)))

##        Statistic     Value
## 1  Observed Diff 0.4000000
## 2 Boot Mean Diff 0.3978833
## 3   95% CI Lower 0.2495833
## 4   95% CI Upper 0.5500000
## 5   90% CI Lower 0.2666667
## 6   90% CI Upper 0.5333333
## 7   99% CI Lower 0.1833333
## 8   99% CI Upper 0.6000000

boot_data_diff <- data.frame(boot_stat = boot_diffs)

ggplot(boot_data_diff, aes(x = boot_stat)) + geom_histogram(bins = 30, fill = "#ADD8E6", color = "#00008B", alpha = 0.8) + labs(title = paste("Bootstrap Distribution of the Difference in Proportions (R =", R_replicates, ")"), x = "Bootstrap Difference in Proportions (p1 - p2)", y = "Frequency") + theme_minimal()

Eighth problem (one mean):

data_sample <- rnorm(n = 50, mean = 10, sd = 2)
observed_stat <- median(data_sample)
boot_median <- function(data, indices)
observed_stat_mean <- mean(data_sample)
boot_median <- function(data, indices) {d <- data[indices]
return(median(d))
}
boot_mean <- function(data, indices) {d <- data[indices]
return(mean(d))
}
R_replicates <- 1000
boot_results_mean <- boot(
  data = data_sample, 
  statistic = boot_mean, 
  R = R_replicates
)

cat("Observed Mean:", boot_results_mean$t0, "\n")

## Observed Mean: 9.855972

cat("Mean of Bootstrap Distribution:", mean(boot_results_mean$t), "\n")

## Mean of Bootstrap Distribution: 9.862609

boot_data_mean <- data.frame(boot_stat = boot_results_mean$t)

ggplot(boot_data_mean, aes(x = boot_stat)) + geom_histogram(bins = 15, fill = "lightgreen", color = "darkgreen", alpha = 0.8) + labs(title = paste("Bootstrap Distribution of the Sample Mean (R =", R_replicates, ")"), x = "Bootstrap Mean Values", y = "Frequency") + theme_minimal()

Ninth problem (one mean):

data_sample <- rnorm(n = 50, mean = 10, sd = 2)
observed_stat <- median(data_sample)
boot_median <- function(data, indices)
observed_stat_mean <- mean(data_sample)
boot_median <- function(data, indices) {d <- data[indices]
return(median(d))
}
boot_mean <- function(data, indices) {d <- data[indices]
return(mean(d))
}
R_replicates <- 1000
boot_results_mean <- boot(
  data = data_sample, 
  statistic = boot_mean, 
  R = R_replicates
)

cat("Observed Mean:", boot_results_mean$t0, "\n")

## Observed Mean: 9.824534

cat("Mean of Bootstrap Distribution:", mean(boot_results_mean$t), "\n")

## Mean of Bootstrap Distribution: 9.81579

boot_data_mean <- data.frame(boot_stat = boot_results_mean$t)

ggplot(boot_data_mean, aes(x = boot_stat)) + geom_histogram(bins = 15, fill = "lightgreen", color = "darkgreen", alpha = 0.8) + labs(title = paste("Bootstrap Distribution of the Sample Mean (R =", R_replicates, ")"), x = "Bootstrap Mean Values", y = "Frequency") + theme_minimal()

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

Exam 2 Practice

K. Rose

2025-10-29

Confidence Interval Problems