download.file("http://www.openintro.org/stat/data/atheism.RData", destfile = "atheism.RData")
load("atheism.RData")
names(atheism)
## [1] "nationality" "response" "year"
head(atheism)
## nationality response year
## 1 Afghanistan non-atheist 2012
## 2 Afghanistan non-atheist 2012
## 3 Afghanistan non-atheist 2012
## 4 Afghanistan non-atheist 2012
## 5 Afghanistan non-atheist 2012
## 6 Afghanistan non-atheist 2012
us12 <- subset(atheism, nationality == "United States" & year == "2012")
us12ath <- subset(atheism, nationality == "United States" & year == "2012" & response == "atheist")
nrow(us12ath)/nrow(us12)
## [1] 0.0499002
inference(us12$response, est = "proportion", type = "ci", method = "theoretical",
success = "atheist")
## Warning: package 'BHH2' was built under R version 3.6.3
## Single proportion -- success: atheist
## Summary statistics:
## p_hat = 0.0499 ; n = 1002
## Check conditions: number of successes = 50 ; number of failures = 952
## Standard error = 0.0069
## 95 % Confidence interval = ( 0.0364 , 0.0634 )
0.0069*1.96
## [1] 0.013524
Brazil12 <- subset(atheism, nationality == "Brazil" & year == "2012")
inference(Brazil12$response, est = "proportion", type = "ci", method = "theoretical",
success = "atheist")
## Single proportion -- success: atheist
## Summary statistics:
## p_hat = 0.01 ; n = 2002
## Check conditions: number of successes = 20 ; number of failures = 1982
## Standard error = 0.0022
## 95 % Confidence interval = ( 0.0056 , 0.0143 )
SE = 0.0022
Z_score = 1.96
ME = SE * Z_score
ME = 0.0022 * 1.96
0.0022 * 1.96
## [1] 0.004312
czr12 <- subset(atheism, nationality == "Czech Republic" & year == "2012")
inference (czr12$response, est = "proportion", type = "ci", method = "theoretical", success = "atheist")
## Single proportion -- success: atheist
## Summary statistics:
## p_hat = 0.3 ; n = 1000
## Check conditions: number of successes = 300 ; number of failures = 700
## Standard error = 0.0145
## 95 % Confidence interval = ( 0.2716 , 0.3284 )
SE = 0.0145
Z_score = 1.96
ME = SE * Z_score
ME = 0.0145 * 1.96
0.0145 * 1.96
## [1] 0.02842
n <- 1000
p <- seq(0, 1, 0.01)
me <- 2 * sqrt(p * (1 - p)/n)
plot(me ~ p, ylab = "Margin of Error", xlab = "Population Proportion")
p <- 0.1
n <- 1040
p_hats <- rep(0, 5000)
for(i in 1:5000){
samp <- sample(c("atheist", "non_atheist"), n, replace = TRUE, prob = c(p, 1-p))
p_hats[i] <- sum(samp == "atheist")/n
}
hist(p_hats, main = "p = 0.1, n = 1040", xlim = c(0, 0.18))
summary(p_hats)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.07019 0.09327 0.09904 0.09969 0.10577 0.12981
sd(p_hats)
## [1] 0.009287382
boxplot(p_hats)
hist(p_hats, main = "p = 0.1, n = 1040", xlim = c(0, 0.18), probability = TRUE)
abline(v = mean(p_hats),col = "royalblue",lwd = 2)
abline(v = median(p_hats),
col = "red",
lwd = 2)
legend(x = "topright", # location of legend within plot area
c("Mean (0.0999)", "Median (0.1000)"),
col = c("royalblue", "red"),
lwd = c(2, 2))
m <- mean(p_hats)
std <- sd(p_hats)
curve(dnorm(x, mean=m, sd=std),
col="darkblue", lwd=2, add=TRUE, yaxt="n")
#to make 2 columns and 2 rows of histogram
par(mfrow = c(2, 2))
#first histogram
hist(p_hats, main = "p = 0.1, n = 1040", xlim = c(0, 0.18))
#second simulation
p <- 0.1
n <- 400
p_hats2 <- rep(0, 5000)
for(i in 1:5000){
samp <- sample(c("atheist", "non_atheist"), n, replace = TRUE, prob = c(p, 1-p))
p_hats2[i] <- sum(samp == "atheist")/n
}
#second histogram
hist(p_hats2, main = "p = 0.1, n = 400", xlim = c(0, 0.18))
#third simulation
p <- 0.02
n <- 1040
p_hats3 <- rep(0, 5000)
for(i in 1:5000){
samp <- sample(c("atheist", "non_atheist"), n, replace = TRUE, prob = c(p, 1-p))
p_hats3[i] <- sum(samp == "atheist")/n
}
#third histogram
hist(p_hats3, main = "p = 0.02, n = 1040", xlim = c(0, 0.18))
#fourth simulation
p <- 0.02
n <- 400
p_hats4 <- rep(0, 5000)
for(i in 1:5000){
samp <- sample(c("atheist", "non_atheist"), n, replace = TRUE, prob = c(p, 1-p))
p_hats4[i] <- sum(samp == "atheist")/n
}
#fourth histogram
hist(p_hats4, main = "p = 0.02, n = 400", xlim = c(0, 0.18))
summary(p_hats)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.07019 0.09327 0.09904 0.09969 0.10577 0.12981
summary(p_hats2)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.05250 0.09000 0.10000 0.09976 0.11000 0.15500
summary(p_hats3)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.005769 0.017308 0.020192 0.019954 0.023077 0.039423
summary(p_hats4)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00000 0.01500 0.02000 0.01988 0.02500 0.04750
#to make 2 columns and 2 row of histogram
par(mfrow = c(2, 2))
#fifth simulation
p <- 0.2
n <- 400
p_hats5 <- rep(0, 5000)
for(i in 1:5000){
samp <- sample(c("atheist", "non_atheist"), n, replace = TRUE, prob = c(p, 1-p))
p_hats5[i] <- sum(samp == "atheist")/n
}
#fifth histogram
hist(p_hats5, main = "p = 0.2, n = 400", xlim = c(0, 0.18))
hist(p_hats5, main = "p = 0.2, n = 400", xlim = c(0, 0.30))
#sixth simulation
p <- 0.2
n <- 1040
p_hats6 <- rep(0, 5000)
for(i in 1:5000){
samp <- sample(c("atheist", "non_atheist"), n, replace = TRUE, prob = c(p, 1-p))
p_hats6[i] <- sum(samp == "atheist")/n
}
#sixth histogram
hist(p_hats6, main = "p = 0.2, n = 1040", xlim = c(0, 0.18))
hist(p_hats6, main = "p = 0.2, n = 400", xlim = c(0, 0.30))
par(mfrow = c(1,1))
0.02 * 400
## [1] 8
400 * (1 - 0.02)
## [1] 392
par(mfrow = c(1, 2))
spain05 <- subset(atheism,nationality == "Spain" & year == "2005")
spain05atheists <- subset(spain05,response == "atheist")
spain12 <- subset(atheism,nationality == "Spain" & year == "2012")
spain12atheists <- subset(spain12, response == "atheist")
inference(spain05$response, est = "proportion",type = "ci",method = "theoretical", success = "atheist")
## Single proportion -- success: atheist
## Summary statistics:
## p_hat = 0.1003 ; n = 1146
## Check conditions: number of successes = 115 ; number of failures = 1031
## Standard error = 0.0089
## 95 % Confidence interval = ( 0.083 , 0.1177 )
inference(spain12$response, est = "proportion",type = "ci",method = "theoretical", success = "atheist")
## Single proportion -- success: atheist
## Summary statistics:
## p_hat = 0.09 ; n = 1145
## Check conditions: number of successes = 103 ; number of failures = 1042
## Standard error = 0.0085
## 95 % Confidence interval = ( 0.0734 , 0.1065 )
ME = SE * Z_score
ME = 0.0089 * 1.96
0.0089 * 1.96
## [1] 0.017444
ME = SE * Z_score
ME = 0.0085 * 1.96
0.0085 * 1.96
## [1] 0.01666
p_spn05 = 0.1003
n_spn05 = 1146
p_spn12 = 0.09
n_spn12 = 1145
PE_spn = p_spn12 - p_spn05
SE_spn = sqrt((p_spn05*(1-p_spn05)/n_spn05)+(p_spn12*(1-p_spn12)/n_spn12))
SE_spn
## [1] 0.01225854
#Control interval for difference between proportion in 2005 and 2012
PE_spn + (1.96*SE_spn)
## [1] 0.01372674
PE_spn - (1.96*SE_spn)
## [1] -0.03432674
usa05 <- subset(atheism, nationality == "United States" & year == "2005")
usa12 <- subset(atheism, nationality == "United States" & year == "2012")
inference(usa05$response, est = "proportion", type = "ci", method = "theoretical", success = "atheist")
## Single proportion -- success: atheist
## Summary statistics:
## p_hat = 0.01 ; n = 1002
## Check conditions: number of successes = 10 ; number of failures = 992
## Standard error = 0.0031
## 95 % Confidence interval = ( 0.0038 , 0.0161 )
inference(usa12$response, est = "proportion", type = "ci", method = "theoretical", success = "atheist")
## Single proportion -- success: atheist
## Summary statistics:
## p_hat = 0.0499 ; n = 1002
## Check conditions: number of successes = 50 ; number of failures = 952
## Standard error = 0.0069
## 95 % Confidence interval = ( 0.0364 , 0.0634 )
ME = SE * Z_score
ME = 0.0031 * 1.96
0.0031 * 1.96
## [1] 0.006076
ME = SE * Z_score
ME = 0.0069 * 1.96
0.0069 * 1.96
## [1] 0.013524
p_usa05 = 0.01
n_usa05 = 1002
p_usa12 = 0.05
n_usa12 = 1002
PE_usa = p_usa12 - p_usa05
SE_usa = sqrt(((p_usa05*(1-p_usa05))/n_usa05)+((p_usa12*(1-p_usa12))/n_usa12))
SE_usa
## [1] 0.007568714
#Control interval for difference between proportion in 2005 and 2012
PE_usa + (1.96*SE_usa)
## [1] 0.05483468
PE_usa - (1.96*SE_usa)
## [1] 0.02516532
#ME = 1.96 * SE #SE = sqrt((p(1-p))/n) #ME = z* sqrt( p(p-1)/n )
#ME / z* = sqrt( p(p-1)/n )
#(ME / z*)^2 = p(p-1)/n
#(ME / z*)^2 / (p(p-1)) = 1/n
p(p-1)
#n = ———– #(ME / z)^2 p(p-1) #n = ———– # (ME / z)^2
0.5(1-0.5)
#n = ———– (0.01 / 1.96)^2
0.5*(1-0.5)
## [1] 0.25
(0.01 / 1.96)^2
## [1] 2.603082e-05
0.25/2.603082e-05
## [1] 9604