download.file("http://www.openintro.org/stat/data/atheism.RData", destfile = "atheism.RData")
load("atheism.RData")
us12 <- subset(atheism, nationality == "United States" & year == "2012")
plot(us12$response)
table(us12$response)
##
## atheist non-atheist
## 50 952
50/(952+50)
## [1] 0.0499002
(50+2)/ (952+50+4) # Wilson-Adjusted Proportion
## [1] 0.05168986
inference(us12$response, est = "proportion", type = "ci", method = "theoretical", success = "atheist")
## Warning: package 'BHH2' was built under R version 4.0.4
## Single proportion -- success: atheist
## Summary statistics:
## p_hat = 0.0499 ; n = 1002
## Check conditions: number of successes = 50 ; number of failures = 952
## Standard error = 0.0069
## 95 % Confidence interval = ( 0.0364 , 0.0634 )
0.0499 - 0.0364
## [1] 0.0135
1.96*(0.0069)
## [1] 0.013524
afg12 <- subset(atheism, nationality == "Afghanistan" & year == "2012")
sweden12 <- subset(atheism, nationality == "Sweden" & year == "2012")
inference(sweden12$response, est = "proportion", type = "ci", method = "theoretical", success = "atheist")
## Single proportion -- success: atheist
## Summary statistics:
## p_hat = 0.0808 ; n = 495
## Check conditions: number of successes = 40 ; number of failures = 455
## Standard error = 0.0122
## 95 % Confidence interval = ( 0.0568 , 0.1048 )
1.96*(0.0122)
## [1] 0.023912
neth12 <- subset(atheism, nationality == "Netherlands" & year == "2012")
inference(neth12$response, est = "proportion", type = "ci", method = "theoretical", success = "atheist")
## Single proportion -- success: atheist
## Summary statistics:
## p_hat = 0.1395 ; n = 509
## Check conditions: number of successes = 71 ; number of failures = 438
## Standard error = 0.0154
## 95 % Confidence interval = ( 0.1094 , 0.1696 )
1.96*0.0154
## [1] 0.030184
n <- 1000
p <- seq(0, 1, 0.01)
me <- 2 * sqrt (p* (1-p) / n)
plot(me ~ p, ylab = "Margin of Error", xlab = "Population Proportion")
p <- 0.1
n <- 1040
p_hats <- rep(0, 5000)
for(i in 1:5000){
samp <- sample(c("atheist", "non_atheist"), n, replace = TRUE, prob = c(p, 1-p))
p_hats[i] <- sum(samp == "atheist")/n
}
hist(p_hats, main = "p = 0.1, n = 1040", xlim = c(0, 0.18))
Hint: Remember that R has functions such as mean to calculate summary statistics.
mean(p_hats)
## [1] 0.09969
min(p_hats)
## [1] 0.07019231
max(p_hats)
## [1] 0.1298077
max(p_hats) - min(p_hats)
## [1] 0.05961538
p <- 0.1
n1 <- 400
p_hats1 <- rep(0, 5000)
for(i in 1:5000){
samp <- sample(c("atheist", "non_atheist"), n1, replace = TRUE, prob = c(p, 1-p))
p_hats1[i] <- sum(samp == "atheist")/n1
}
hist(p_hats1, main = "p = 0.1, n = 400", xlim = c(0, 0.18))
mean(p_hats1)
## [1] 0.099759
min(p_hats1)
## [1] 0.0525
max(p_hats1)
## [1] 0.155
max(p_hats1) - min(p_hats1)
## [1] 0.1025
p2 <- 0.02
n <- 1040
p_hats2 <- rep(0, 5000)
for(i in 1:5000){
samp <- sample(c("atheist", "non_atheist"), n, replace = TRUE, prob = c(p2, 1-p2))
p_hats2[i] <- sum(samp == "atheist")/n
}
hist(p_hats2, main = "p = 0.02, n = 1040", xlim = c(0, 0.18))
mean(p_hats2)
## [1] 0.01995423
min(p_hats2)
## [1] 0.005769231
max(p_hats2)
## [1] 0.03942308
max(p_hats2) - min(p_hats2)
## [1] 0.03365385
p2 <- 0.02
n1 <- 400
p_hats3 <- rep(0, 5000)
for(i in 1:5000){
samp <- sample(c("atheist", "non_atheist"), n1, replace = TRUE, prob = c(p2, 1-p2))
p_hats3[i] <- sum(samp == "atheist")/n1
}
hist(p_hats3, main = "p = 0.1, n = 400", xlim = c(0, 0.18))
mean(p_hats3)
## [1] 0.0198785
min(p_hats3) - max(p_hats3)
## [1] -0.0475
par(mfrow = c(2,2))
p <- 0.1
n <- 1040
p2 <- 0.02
n1 <- 400
p_hats <- rep(0, 5000)
for(i in 1:5000){
samp <- sample(c("atheist", "non_atheist"), n, replace = TRUE, prob = c(p, 1-p))
p_hats[i] <- sum(samp == "atheist")/n
}
hist(p_hats, main = "p = 0.1, n = 1040", xlim = c(0, 0.18))
p_hats1 <- rep(0, 5000)
for(i in 1:5000){
samp <- sample(c("atheist", "non_atheist"), n1, replace = TRUE, prob = c(p, 1-p))
p_hats1[i] <- sum(samp == "atheist")/n1
}
hist(p_hats1, main = "p = 0.1, n = 400", xlim = c(0, 0.18))
p_hats2 <- rep(0, 5000)
for(i in 1:5000){
samp <- sample(c("atheist", "non_atheist"), n, replace = TRUE, prob = c(p2, 1-p2))
p_hats2[i] <- sum(samp == "atheist")/n
}
hist(p_hats2, main = "p = 0.02, n = 1040", xlim = c(0, 0.18))
p_hats3 <- rep(0, 5000)
for(i in 1:5000){
samp <- sample(c("atheist", "non_atheist"), n1, replace = TRUE, prob = c(p2, 1-p2))
p_hats3[i] <- sum(samp == "atheist")/n1
}
hist(p_hats3, main = "p = 0.1, n = 400", xlim = c(0, 0.18))
###Hint: Create a new data set for respondents from Spain. Form confidence intervals for the true proportion of athiests in both years, and determine whether they overlap.
par(mfrow = c(1,1))
p4 <- 0.09
n4 <- 1146
p_hats4 <- rep(0, 5000)
for(i in 1:5000){
samp <- sample(c("atheist", "non_atheist"), n4, replace = TRUE, prob = c(p4, 1-p4))
p_hats4[i] <- sum(samp == "atheist")/n4
}
hist(p_hats4, main = "Spain, 2012, p = 0.09, n = 1146", xlim = c(0, 0.18))
spain12 <- subset(atheism, nationality == "Spain" & year == "2012")
spain05 <- subset(atheism, nationality == "Spain" & year == "2005")
inference(spain12$response, est = "proportion", type = "ci", method = "theoretical", success = "atheist")
## Single proportion -- success: atheist
## Summary statistics:
## p_hat = 0.09 ; n = 1145
## Check conditions: number of successes = 103 ; number of failures = 1042
## Standard error = 0.0085
## 95 % Confidence interval = ( 0.0734 , 0.1065 )
inference(spain05$response, est = "proportion", type = "ci", method = "theoretical", success = "atheist")
## Single proportion -- success: atheist
## Summary statistics:
## p_hat = 0.1003 ; n = 1146
## Check conditions: number of successes = 115 ; number of failures = 1031
## Standard error = 0.0089
## 95 % Confidence interval = ( 0.083 , 0.1177 )
###b. Is there convincing evidence that the United States has seen a change in its atheism index between 2005 and 2012?
us05 <- subset(atheism, nationality == "United States" & year == "2005")
us12 <- subset(atheism, nationality == "United States" & year == "2012")
inference(us05$response, est = "proportion", type = "ci", method = "theoretical", success = "atheist")
## Single proportion -- success: atheist
## Summary statistics:
## p_hat = 0.01 ; n = 1002
## Check conditions: number of successes = 10 ; number of failures = 992
## Standard error = 0.0031
## 95 % Confidence interval = ( 0.0038 , 0.0161 )
inference(us12$response, est = "proportion", type = "ci", method = "theoretical", success = "atheist")
## Single proportion -- success: atheist
## Summary statistics:
## p_hat = 0.0499 ; n = 1002
## Check conditions: number of successes = 50 ; number of failures = 952
## Standard error = 0.0069
## 95 % Confidence interval = ( 0.0364 , 0.0634 )
0.25 / (0.01/1.96)^2
## [1] 9604