# Define "success" as mpg >= 25 (fuel-efficient car)
y <- as.integer(df$mpg >= 25)
n_p <- length(y)
phat <- mean(y)
alpha <- 0.05
z <- qnorm(1 - alpha/2)
# Wald CI
wald <- c(
phat - z * sqrt(phat*(1-phat)/n_p),
phat + z * sqrt(phat*(1-phat)/n_p)
)
# Wilson CI
wilson_num <- phat + z^2/(2*n_p)
wilson_rad <- z*sqrt(phat*(1-phat)/n_p + z^2/(4*n_p^2))
wilson_den <- 1 + z^2/n_p
wilson <- c((wilson_num - wilson_rad)/wilson_den,
(wilson_num + wilson_rad)/wilson_den)
list(n = n_p, phat = phat, Wald = wald, Wilson = wilson)
## $n
## [1] 32
##
## $phat
## [1] 0.1875
##
## $Wald
## [1] 0.05226615 0.32273385
##
## $Wilson
## [1] 0.08889545 0.35309155
The Wilson interval is often be slightly narrower and better centered when proportions are near 0 or 1.