These percentages are sample statistics derived from the sample.
To generalize the findings we must assume the sample is random and independent and that each religion has at least 10 respondents (to test difference in proportion) in each country.
load("C:/Users/sogde/Downloads/atheism.RData")
Each row corresponds to a a survey respondent.
us12 <- subset(atheism, nationality == "United States" & year == "2012")
nrow(subset(us12,us12$response=='atheist'))/nrow(us12)
## [1] 0.0499002
we get a result of 5%.
The conditions of inference are that there be at least 10 survey respondents and that the sample is random and independent. These Conditions are met by the methodology and:
nrow(subset(us12,us12$response=='atheist'))
## [1] 50
inference(us12$response, est = "proportion", type = "ci", method = "theoretical",
success = "atheist")
## Warning: package 'lmPerm' was built under R version 3.3.2
## Warning: package 'BHH2' was built under R version 3.3.2
## Single proportion -- success: atheist
## Summary statistics:
## p_hat = 0.0499 ; n = 1002
## Check conditions: number of successes = 50 ; number of failures = 952
## Standard error = 0.0069
## 95 % Confidence interval = ( 0.0364 , 0.0634 )
Margin of error
se<-0.0069
zstar<-1.966
moe<- zstar*se
moe
## [1] 0.0135654
Canada
ca12 <- subset(atheism, nationality == "Canada" & year == "2012")
nrow(subset(ca12,ca12$response=='atheist')) >10
## [1] TRUE
##Conditions met
inference(ca12$response, est = "proportion", type = "ci", method = "theoretical",
success = "atheist")
## Single proportion -- success: atheist
## Summary statistics:
## p_hat = 0.0898 ; n = 1002
## Check conditions: number of successes = 90 ; number of failures = 912
## Standard error = 0.009
## 95 % Confidence interval = ( 0.0721 , 0.1075 )
##Does not contain zero
seca<-0.009
moeca<-seca*zstar
moeca
## [1] 0.017694
France
fr12 <- subset(atheism, nationality == "France" & year == "2012")
nrow(subset(fr12,fr12$response=='atheist')) >10
## [1] TRUE
##Conditions met
inference(fr12$response, est = "proportion", type = "ci", method = "theoretical",
success = "atheist")
## Single proportion -- success: atheist
## Summary statistics:
## p_hat = 0.2873 ; n = 1688
## Check conditions: number of successes = 485 ; number of failures = 1203
## Standard error = 0.011
## 95 % Confidence interval = ( 0.2657 , 0.3089 )
##Does not contain zero
sefr<-0.011
moefr<-sefr*zstar
moefr
## [1] 0.021626
n <- 1000
p <- seq(0, 1, 0.01)
me <- 2 * sqrt(p * (1 - p)/n)
plot(me ~ p, ylab = "Margin of Error", xlab = "Population Proportion")
Margin of error follows a semi-circular shape (locally it would be parabolic around 0.5 in a taylor series) with proportion, with a theoretically maximum where dme/dp = 0.0 at p=0.5. This is independent of n.
p <- 0.1
n <- 1040
p_hats <- rep(0, 5000)
for(i in 1:5000){
samp <- sample(c("atheist", "non_atheist"), n, replace = TRUE, prob = c(p, 1-p))
p_hats[i] <- sum(samp == "atheist")/n
}
hist(p_hats, main = "p = 0.1, n = 1040", xlim = c(0, 0.18))
summary(p_hats)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.07019 0.09327 0.10000 0.09998 0.10580 0.13170
##Center
mean(p_hats)
## [1] 0.09997692
##Shape
sd(p_hats)
## [1] 0.009275774
p <- 0.1
n <- 400
p_hats1 <- rep(0, 5000)
for(i in 1:5000){
samp <- sample(c("atheist", "non_atheist"), n, replace = TRUE, prob = c(p, 1-p))
p_hats1[i] <- sum(samp == "atheist")/n
}
summary(p_hats1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0500 0.0900 0.1000 0.1002 0.1100 0.1575
##Center
mean(p_hats1)
## [1] 0.100152
##Shape
sd(p_hats1)
## [1] 0.01483883
p <- 0.02
n <- 1040
p_hats2 <- rep(0, 5000)
for(i in 1:5000){
samp <- sample(c("atheist", "non_atheist"), n, replace = TRUE, prob = c(p, 1-p))
p_hats2[i] <- sum(samp == "atheist")/n
}
summary(p_hats2)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.005769 0.017310 0.020190 0.020010 0.023080 0.036540
##Center
mean(p_hats2)
## [1] 0.02001462
##Shape
sd(p_hats2)
## [1] 0.004303382
p <- 0.02
n <- 400
p_hats3 <- rep(0, 5000)
for(i in 1:5000){
samp <- sample(c("atheist", "non_atheist"), n, replace = TRUE, prob = c(p, 1-p))
p_hats3[i] <- sum(samp == "atheist")/n
}
summary(p_hats3)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00250 0.01500 0.02000 0.02009 0.02500 0.05000
##Center
mean(p_hats3)
## [1] 0.0200945
##Shape
sd(p_hats3)
## [1] 0.007119161
par(mfrow = c(2, 2))
hist(p_hats, main = "p = 0.1, n = 1040", xlim = c(0, 0.18))
hist(p_hats1, main = "p = 0.1, n = 400", xlim = c(0, 0.18))
hist(p_hats2, main = "p = 0.02, n = 1040", xlim = c(0, 0.18))
hist(p_hats3, main = "p = 0.02, n = 400", xlim = c(0, 0.18))
Based on this, increasing n decreased the spread and changing p changes the center.
It is probably not sensible since the samples among them are independent and the sizes are .1x1040 ~ 100 and .02x400 ~ 8 people since the sample size of 10 for that group is not met.
sp12 <- subset(atheism, nationality == "Spain" & year == "2012")
sp05 <- subset(atheism, nationality == "Spain" & year == "2005")
inference(sp12$response, est = "proportion", type = "ci", method = "theoretical",
success = "atheist")
## Single proportion -- success: atheist
## Summary statistics:
## p_hat = 0.09 ; n = 1145
## Check conditions: number of successes = 103 ; number of failures = 1042
## Standard error = 0.0085
## 95 % Confidence interval = ( 0.0734 , 0.1065 )
inference(sp05$response, est = "proportion", type = "ci", method = "theoretical",
success = "atheist")
## Single proportion -- success: atheist
## Summary statistics:
## p_hat = 0.1003 ; n = 1146
## Check conditions: number of successes = 115 ; number of failures = 1031
## Standard error = 0.0089
## 95 % Confidence interval = ( 0.083 , 0.1177 )
The confidence intervals do overlap so there is no meaningful difference.
us12 <- subset(atheism, nationality == "United States" & year == "2012")
us05 <- subset(atheism, nationality == "United States" & year == "2005")
inference(us12$response, est = "proportion", type = "ci", method = "theoretical",
success = "atheist")
## Single proportion -- success: atheist
## Summary statistics:
## p_hat = 0.0499 ; n = 1002
## Check conditions: number of successes = 50 ; number of failures = 952
## Standard error = 0.0069
## 95 % Confidence interval = ( 0.0364 , 0.0634 )
inference(us05$response, est = "proportion", type = "ci", method = "theoretical",
success = "atheist")
## Single proportion -- success: atheist
## Summary statistics:
## p_hat = 0.01 ; n = 1002
## Check conditions: number of successes = 10 ; number of failures = 992
## Standard error = 0.0031
## 95 % Confidence interval = ( 0.0038 , 0.0161 )
These confidence intervals do not overlap thus there is a meaningful difference.
We would expect to detect a change in 0.05*39 countries ~ 2 counties.
To ensure a moe no greater than 1% we have to assume the proportion ends up at the value that maximizes moe, ie p=0.5. zstar= 1.96 so, solving for n in the moe equation:
moe<-.01
zstar= 1.96
p<-0.5
n<-(moe^2/(p*(1-p)))^(-1)
n
## [1] 2500
n=2500