11) Write a computer program to simulate 10,000 Bernoulli trials with probability .3 for success on each trial. Have the program compute the 95 percent confidence interval for the probability of success based on the proportion of successes. Repeat the experiment 100 times and see how many times the true value of .3 is included within the confidence limits.
set.seed(1248)
n = 10000 # number of trials per experiment
p = .3 # probability of success of any trial
experiments <- 100 # number of experiments
# Define function to simulate 10k trials per experiment for 100 experiments
Bernoulli <- function() {
record <- vector(mode = 'numeric', length = experiments)
# Replicate experiment
for(i in 1:experiments){
trials <- rbinom(n = n, size = 1, p = p)
# Log number of trials that are successes in each trial
success <- 0
for(trial in trials){
if(trial == 1){
success = success + 1
}
# Calculcate per-experiment success rate over 10k trials and capture in vector
}
record[i] <- success/length(trials)
}
return(record)
}
# Conduct 10 experiments and collect results in dataframe
r_vec <- Bernoulli()
r_df <- as.data.frame(r_vec)
head(r_df)
## r_vec
## 1 0.3003
## 2 0.3115
## 3 0.2930
## 4 0.3044
## 5 0.2986
## 6 0.3004
# Define histogram of experiment results
g <- ggplot(data = r_df) +
# Define chart axes and title
scale_y_continuous(name = "Count of experiments", breaks = c(5, 10, 15), limits = c(0, 15)) +
ggtitle("Histogram: 100 experiments of 10k Bernoulli trials each") +
xlab("Rate of success")
# Plot
g +
# As histogram
geom_histogram(aes(x = r_vec), binwidth = .001) +
# With visual theme
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5), legend.position = "none")

# Calculate lower and upper bounds of 95% confidence interval
lower = p - qnorm(.975) * sqrt((p) * (1 - p) / n)
upper = p + qnorm(.975) * sqrt((p) * (1 - p) / n)
# Assign marker values conditional on whether outside of CI (A) or within CI (B)
r_df$r_CI <-
case_when(r_df$r_vec < lower | r_df$r_vec > upper ~ "A",
r_df$r_vec >= lower & r_df$r_vec <= upper ~ "B")
# Determine how many of the 100 experiments fall within the confidence interval
r_test <- length(which(r_df >= lower & r_df <= upper))
r_test
## [1] 92
# Convert marker values to factor
r_df$r_CI <- as.factor(r_df$r_C)
# Vizualize proportion of experiments within 95% confidence interval
ggplot(data = r_df, fill = r_vec) +
# Define chart axes and title
scale_y_continuous(name = "Count of experiments", breaks = c(5, 10, 15), limits = c(0, 15)) +
ggtitle("Histogram: 100 experiments of 10k Bernoulli trials each") +
xlab("Rate of success") +
# Define histogram
geom_histogram(aes(x = r_vec, fill = r_CI), binwidth = .001) +
# Visualize CI with hue and reference x-intercerpt lines
scale_fill_manual(values = c("gray78", "grey28")) +
geom_vline(aes(xintercept = lower, color = "red", linetype = "dotted")) +
geom_vline(aes(xintercept = upper, color = "red", linetype = "dotted")) +
geom_text(aes(x = .3, label = "92 experiments within .95 CI", y = 15), color = "grey28", text = element_text(size = 8), hjust = .5) +
geom_text(aes(x = lower, label = "\nlower bound = .291", y = 10), color = "red", angle = 90, text = element_text(size = 8)) +
geom_text(aes(x = upper, label = "upper bound = .309\n", y = 10), color = "red", angle = 90, text = element_text(size = 8)) +
# Set visual theme
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5), legend.position = "none")
## Warning: Ignoring unknown parameters: text
## Warning: Ignoring unknown parameters: text
## Warning: Ignoring unknown parameters: text
