download.file("http://www.openintro.org/stat/data/atheism.RData", destfile = "atheism.RData")
load("atheism.RData")
head(atheism)
## nationality response year
## 1 Afghanistan non-atheist 2012
## 2 Afghanistan non-atheist 2012
## 3 Afghanistan non-atheist 2012
## 4 Afghanistan non-atheist 2012
## 5 Afghanistan non-atheist 2012
## 6 Afghanistan non-atheist 2012
us12 <- subset(atheism, nationality == "United States" & year == "2012")
summary(us12$response)
## atheist non-atheist
## 50 952
####total responses = 1002
(50/1002)*100
## [1] 4.99002
##Inference on proportions ### Exercise 5. Write out the conditions for inference to construct a 95% confidence interval for the proportion of atheists in the United States in 2012. Are you confident all conditions are met?
inference(us12$response, est = "proportion", type = "ci", method = "theoretical", success = "atheist")
## Single proportion -- success: atheist
## Summary statistics:
## p_hat = 0.0499 ; n = 1002
## Check conditions: number of successes = 50 ; number of failures = 952
## Standard error = 0.0069
## 95 % Confidence interval = ( 0.0364 , 0.0634 )
`margin of error` <- 1.96*0.0069
`margin of error`
## [1] 0.013524
Bg12 <- subset(atheism, nationality == "Belgium" & year == "2012")
summary(Bg12$response)
## atheist non-atheist
## 42 485
inference(Bg12$response, est = "proportion", type = "ci", method = "theoretical", success = "atheist")
## Single proportion -- success: atheist
## Summary statistics:
## p_hat = 0.0797 ; n = 527
## Check conditions: number of successes = 42 ; number of failures = 485
## Standard error = 0.0118
## 95 % Confidence interval = ( 0.0566 , 0.1028 )
`margin of error`<- 1.96*0.0118
`margin of error`
## [1] 0.023128
HK12 <- subset(atheism, nationality == "Hong Kong" & year == "2012")
summary(HK12$response)
## atheist non-atheist
## 45 455
inference(HK12$response, est = "proportion", type = "ci", method = "theoretical", success = "atheist")
## Single proportion -- success: atheist
## Summary statistics:
## p_hat = 0.09 ; n = 500
## Check conditions: number of successes = 45 ; number of failures = 455
## Standard error = 0.0128
## 95 % Confidence interval = ( 0.0649 , 0.1151 )
`margin of error`<- 1.96*0.0128
`margin of error`
## [1] 0.025088
##How does the proportion affect the margin of error?
n <- 1000
p <- seq(0, 1, 0.01)
me <- 2 * sqrt(p * (1 - p)/n)
plot(me ~ p, ylab = "Margin of Error", xlab = "Population Proportion")
##Success-failure condition
p <- 0.1
n <- 1040
p_hats <- rep(0, 5000)
for(i in 1:5000){
samp <- sample(c("atheist", "non_atheist"), n, replace = TRUE, prob = c(p, 1-p))
p_hats[i] <- sum(samp == "atheist")/n
}
hist(p_hats, main = "p = 0.1, n = 1040", xlim = c(0, 0.18))
summary(p_hats)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.07019 0.09327 0.09904 0.09969 0.10577 0.12981
p <- 0.1
n <- 400
p_400.1 <- rep(0, 5000)
for(i in 1:5000){
samp400.1 <- sample(c("atheist", "non_atheist"), n, replace = TRUE, prob = c(p, 1-p))
p_400.1[i] <- sum(samp400.1 == "atheist")/n
}
p <- 0.2
n <- 1040
p_1040.2 <- rep(0, 5000)
for(i in 1:5000){
samp1040.2 <- sample(c("atheist", "non_atheist"), n, replace = TRUE, prob = c(p, 1-p))
p_1040.2[i] <- sum(samp1040.2 == "atheist")/n
}
p <- 0.2
n <- 400
p_400.2 <- rep(0, 5000)
for(i in 1:5000){
samp400.2 <- sample(c("atheist", "non_atheist"), n, replace = TRUE, prob = c(p, 1-p))
p_400.2[i] <- sum(samp400.2 == "atheist")/n
}
par(mfrow = c(2, 2))
hist(p_hats, main = "p = 0.1, n = 1040", xlim = c(0, 0.18))
hist(p_400.1, main = "p = 0.1, n = 400", xlim = c(0.05, 0.2))
hist(p_1040.2, main = "p = 0.2, n = 1040", xlim = c(0.15, 0.3))
hist(p_400.2, main = "p = 0.2, n = 400", xlim = c(0.1, 0.3))
0.01*1040
## [1] 10.4
(1-0.01)*1040
## [1] 1029.6
0.02*400
## [1] 8
(1-0.02)*400
## [1] 392