download.file("http://www.openintro.org/stat/data/atheism.RData", destfile = "atheism.RData")
load("atheism.RData")
head(atheism)
## nationality response year
## 1 Afghanistan non-atheist 2012
## 2 Afghanistan non-atheist 2012
## 3 Afghanistan non-atheist 2012
## 4 Afghanistan non-atheist 2012
## 5 Afghanistan non-atheist 2012
## 6 Afghanistan non-atheist 2012
us12 <- subset(atheism, nationality == "United States" & year == "2012")
us12$nationality <- as.factor(as.character(us12$nationality))
prop.table(table(us12$nationality,us12$response))
##
## atheist non-atheist
## United States 0.0499002 0.9500998
inference(us12$response, est="proportion", type="ci", method="theoretical", success = "atheist")
## Single proportion -- success: atheist
## Summary statistics:
## p_hat = 0.0499 ; n = 1002
## Check conditions: number of successes = 50 ; number of failures = 952
## Standard error = 0.0069
## 95 % Confidence interval = ( 0.0364 , 0.0634 )
ch12 <- subset(atheism, nationality == "China" & year == "2012")
ch12$nationality <- as.factor(as.character(ch12$nationality))
prop.table(table(ch12$nationality,ch12$response))
##
## atheist non-atheist
## China 0.47 0.53
inference(ch12$response, est="proportion", type="ci", method="theoretical", success = "atheist")
## Single proportion -- success: atheist
## Summary statistics:
## p_hat = 0.47 ; n = 500
## Check conditions: number of successes = 235 ; number of failures = 265
## Standard error = 0.0223
## 95 % Confidence interval = ( 0.4263 , 0.5137 )
nig12 <- subset(atheism, nationality == "Nigeria" & year == "2012")
nig12$nationality <- as.factor(as.character(nig12$nationality))
prop.table(table(nig12$nationality,nig12$response))
##
## atheist non-atheist
## Nigeria 0.009532888 0.990467112
inference(nig12$response, est="proportion", type="ci", method="theoretical", success = "atheist")
## Single proportion -- success: atheist
## Summary statistics:
## p_hat = 0.0095 ; n = 1049
## Check conditions: number of successes = 10 ; number of failures = 1039
## Standard error = 0.003
## 95 % Confidence interval = ( 0.0037 , 0.0154 )
n <- 1000
p <- seq(0,1,.01)
me <- 2 * sqrt(p * (1-p)/n)
plot(me~p,ylab="Margin of Error",xlab="Population Proportion")
p <- 0.1
n <- 1040
p_hats <- rep(0, 5000)
for(i in 1:5000){
samp <- sample(c("atheist", "non_atheist"), n, replace = TRUE, prob = c(p, 1-p))
p_hats[i] <- sum(samp == "atheist")/n
}
hist(p_hats, main = "p = 0.1, n = 1040", xlim = c(0, 0.18))
summary(p_hats)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.07019 0.09327 0.09904 0.09969 0.10577 0.12981
p <- 0.1
n <- 1040
p_hats <- rep(0, 5000)
for(i in 1:5000){
samp <- sample(c("atheist", "non_atheist"), n, replace = TRUE, prob = c(p, 1-p))
p_hats[i] <- sum(samp == "atheist")/n
}
p <- 0.1
n <- 400
p_hats1 <- rep(0, 5000)
for(i in 1:5000){
samp <- sample(c("atheist", "non_atheist"), n, replace = TRUE, prob = c(p, 1-p))
p_hats1[i] <- sum(samp == "atheist")/n
}
p <- 0.02
n <- 1040
p_hats2 <- rep(0, 5000)
for(i in 1:5000){
samp <- sample(c("atheist", "non_atheist"), n, replace = TRUE, prob = c(p, 1-p))
p_hats2[i] <- sum(samp == "atheist")/n
}
p <- 0.02
n <- 400
p_hats3 <- rep(0, 5000)
for(i in 1:5000){
samp <- sample(c("atheist", "non_atheist"), n, replace = TRUE, prob = c(p, 1-p))
p_hats3[i] <- sum(samp == "atheist")/n
}
par(mfrow=c(2,2))
hist(p_hats, main = "p = 0.1, n = 1040", xlim = c(0, 0.18))
hist(p_hats1, main = "p = 0.1, n = 400", xlim = c(0, 0.18))
hist(p_hats2, main = "p = 0.02, n = 1040", xlim = c(0, 0.18))
hist(p_hats3, main = "p = 0.02, n = 400", xlim = c(0, 0.18))