##downloads the file from the database and puts it into R
download.file("http://www.openintro.org/stat/data/atheism.RData", destfile = "atheism.RData")
load("atheism.RData")
us12 <- subset(atheism, nationality == "United States" & year == "2012")
##This line looks for the amount of atheist responses in the subset
atheism1<-subset(us12, response=="atheist")
##Showing how many people answered the question and it shows 1002 people were surveyed
table(us12)
## , , year = 2012
##
## response
## nationality atheist non-atheist
## Afghanistan 0 0
## Argentina 0 0
## Armenia 0 0
## Australia 0 0
## Austria 0 0
## Azerbaijan 0 0
## Belgium 0 0
## Bosnia and Herzegovina 0 0
## Brazil 0 0
## Bulgaria 0 0
## Cameroon 0 0
## Canada 0 0
## China 0 0
## Colombia 0 0
## Czech Republic 0 0
## Ecuador 0 0
## Fiji 0 0
## Finland 0 0
## France 0 0
## Georgia 0 0
## Germany 0 0
## Ghana 0 0
## Hong Kong 0 0
## Iceland 0 0
## India 0 0
## Iraq 0 0
## Ireland 0 0
## Italy 0 0
## Japan 0 0
## Kenya 0 0
## Korea, Rep (South) 0 0
## Lebanon 0 0
## Lithuania 0 0
## Macedonia 0 0
## Malaysia 0 0
## Moldova 0 0
## Netherlands 0 0
## Nigeria 0 0
## Pakistan 0 0
## Palestinian territories (West Bank and Gaza) 0 0
## Peru 0 0
## Poland 0 0
## Romania 0 0
## Russian Federation 0 0
## Saudi Arabia 0 0
## Serbia 0 0
## South Africa 0 0
## South Sudan 0 0
## Spain 0 0
## Sweden 0 0
## Switzerland 0 0
## Tunisia 0 0
## Turkey 0 0
## Ukraine 0 0
## United States 50 952
## Uzbekistan 0 0
## Vietnam 0 0
50/1002
## [1] 0.0499002
##This does the CI interval
inference(us12$response, est = "proportion", type = "ci", method = "theoretical",
success = "atheist")
## Single proportion -- success: atheist
## Summary statistics:
## p_hat = 0.0499 ; n = 1002
## Check conditions: number of successes = 50 ; number of failures = 952
## Standard error = 0.0069
## 95 % Confidence interval = ( 0.0364 , 0.0634 )
1.96*.0069
## [1] 0.013524
Using the inference function, calculate confidence intervals for the proportion of atheists in 2012 in two other countries of your choice, and report the associated margins of error. Be sure to note whether the conditions for inference are met. It may be helpful to create new data sets for each of the two countries first, and then use these data sets in the inference function to construct the confidence intervals.
##This code is basically what was did earlier but now picks different countries from the list
jpn2012 <- subset(atheism, nationality == "Japan" & year == "2012")
inference(jpn2012$response, est = "proportion", type = "ci", method = "theoretical",
success = "atheist")
## Single proportion -- success: atheist
## Summary statistics:
## p_hat = 0.3069 ; n = 1212
## Check conditions: number of successes = 372 ; number of failures = 840
## Standard error = 0.0132
## 95 % Confidence interval = ( 0.281 , 0.3329 )
##Another line for another country
cnd2012 <- subset(atheism, nationality == "Canada" & year == "2012")
inference(cnd2012$response, est = "proportion", type = "ci", method = "theoretical",
success = "atheist")
## Single proportion -- success: atheist
## Summary statistics:
## p_hat = 0.0898 ; n = 1002
## Check conditions: number of successes = 90 ; number of failures = 912
## Standard error = 0.009
## 95 % Confidence interval = ( 0.0721 , 0.1075 )
n <- 1000
p <- seq(0, 1, 0.01)
me <- 2 * sqrt(p * (1 - p)/n)
plot(me ~ p, ylab = "Margin of Error", xlab = "Population Proportion")
Hint: Remember that R has functions such as mean to calculate summary statistics.
p <- 0.1
n <- 1040
p_hats <- rep(0, 5000)
for(i in 1:5000){
samp <- sample(c("atheist", "non_atheist"), n, replace = TRUE, prob = c(p, 1-p))
p_hats[i] <- sum(samp == "atheist")/n
}
hist(p_hats, main = "p = 0.1, n = 1040", xlim = c(0, 0.18))
summary(p_hats)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.07019 0.09327 0.09904 0.09969 0.10577 0.12981
par(mfrow = c(2, 2))
##Adds all the different situations to a table
p2<-.01
n2<-400
phat2<-rep(0,5000)
for(i in 1:5000){
samp <- sample(c("atheist", "non_atheist"), n, replace = TRUE, prob = c(p2, 1-p2))
phat2[i] <- sum(samp == "atheist")/n
}
hist(phat2, main = "p = 0.1, n = 400", xlim = c(0, 0.18))
##another scenario
p3<-.02
n3<-1040
phat3<-rep(0,5000)
for(i in 1:5000){
samp <- sample(c("atheist", "non_atheist"), n, replace = TRUE, prob = c(p3, 1-p3))
phat3[i] <- sum(samp == "atheist")/n
}
hist(phat3, main = "p = 0.02, n = 1040", xlim = c(0, 0.18))
##another scenario
p4<-.02
n4<-400
phat4<-rep(0,5000)
for(i in 1:5000){
samp <- sample(c("atheist", "non_atheist"), n, replace = TRUE, prob = c(p4, 1-p4))
phat4[i] <- sum(samp == "atheist")/n
}
hist(phat4, main = "p = 0.02, n = 1040", xlim = c(0, 0.18))