Exercise 11

Write a computer program to simulate 10,000 Bernoulli trials with probability .3 for success on each trial. Have the program compute the 95 percent confidence interval for the probability of success based on the proportion of successes. Repeat the experiment 100 times and see how many times the true value of .3 is included within the confidence limits.

set.seed(81282)  # for reproducibility

p <- 0.3  # probability of success
n <- 10000  # number of trials

successes <- rbinom(n, 1, p)  # perform the Bernoulli trials
prop_success <- sum(successes) / n  # calculate the proportion of successes
se <- sqrt(prop_success * (1 - prop_success) / n)  # calculate the standard error
z <- qnorm(0.975)  # 95% z-score

lower_ci <- prop_success - z * se  # calculate the lower bound of the confidence interval
upper_ci <- prop_success + z * se  # calculate the upper bound of the confidence interval

# print the results
cat("Proportion of successes:", prop_success, "\n")
## Proportion of successes: 0.2962
cat("95% Confidence Interval: (", lower_ci, ",", upper_ci, ")\n")
## 95% Confidence Interval: ( 0.2872512 , 0.3051488 )

Repeat Experiment

To repeat the experiment 100 times and check how many times the true value of 0.3 is included within the confidence limits, you can use a loop:

set.seed(81282)
experm <- vector()
count_of_experm <- vector()

p_true <- 0.3
n <- 10000
num_experiments <- 100

count <- 0  # keep track of how many times the true value is within the confidence limits

#for (i in 1:num_experiments) {
for (i in seq(1,num_experiments,1)){
  successes <- rbinom(n, 1, p_true)
  prop_success <- sum(successes) / n
  se <- sqrt(prop_success * (1 - prop_success) / n)
  z <- qnorm(0.975)
  lower_ci <- prop_success - z * se
  upper_ci <- prop_success + z * se
  if (lower_ci <= p_true && p_true <= upper_ci) {
    count <- count + 1
  }
  experm[i]<-prop_success
}

cat("True value included within the confidence limits", count, "times out of", num_experiments, "experiments.")
## True value included within the confidence limits 94 times out of 100 experiments.
results_df <-as.data.frame(experm)
ggplot(results_df,aes(x=experm))+
  geom_density()+
  geom_vline(aes(xintercept=lower_ci,color='green'))+
  geom_vline(aes(xintercept=upper_ci,color='green'))+
  labs(title = 'Bernoulli Simulation', subtitle = "10,000 Trials")+ 
  theme(plot.title = element_text(hjust = 0.5))+
  geom_text(aes(x=round(lower_ci,5), label=paste0("Lower CI Bound\n",round(lower_ci,5),collapse = ''), y=40), colour="purple", angle=90,check_overlap=T) +
  geom_text(aes(x=round(upper_ci,5), label=paste0("Upper CI Bound\n",round(upper_ci,5),collapse = ''), y=40), colour="purple", angle=90, check_overlap=T)

#x = TERM, y = `MEAN TEST SCORE`