#Open Intro
#1
download.file("http://www.openintro.org/stat/data/atheism.RData", destfile = "atheism.RData")
load("atheism.Rdata")
spain05 <- subset(atheism, nationality == "Spain" & year =="2005")
spain12 <- subset(atheism, nationality == "Spain" & year =="2012")
inference(spain05$response, est = "proportion", success = "atheist", type = "ci", method = "theoretical")
## Warning: package 'BHH2' was built under R version 3.6.3
## Single proportion -- success: atheist
## Summary statistics:

## p_hat = 0.1003 ; n = 1146
## Check conditions: number of successes = 115 ; number of failures = 1031
## Standard error = 0.0089
## 95 % Confidence interval = ( 0.083 , 0.1177 )
inference(spain12$response, est = "proportion", success = "atheist", type = "ci", method = "theoretical")
## Single proportion -- success: atheist
## Summary statistics:

## p_hat = 0.09 ; n = 1145
## Check conditions: number of successes = 103 ; number of failures = 1042
## Standard error = 0.0085
## 95 % Confidence interval = ( 0.0734 , 0.1065 )
#a
#H0: p_hat_05 = p_hat_12
#HA: p_hat_05 =! p_hat_12
# It will fail to reject the null because the majority of the confidence level falls inside the 2005 interval.
us05 <- subset(atheism, nationality == "United States" & year =="2005")
us12 <- subset(atheism, nationality == "United States" & year =="2012")
inference(us05$response, est = "proportion", success = "atheist", type = "ci", method = "theoretical")
## Single proportion -- success: atheist
## Summary statistics:

## p_hat = 0.01 ; n = 1002
## Check conditions: number of successes = 10 ; number of failures = 992
## Standard error = 0.0031
## 95 % Confidence interval = ( 0.0038 , 0.0161 )
inference(us12$response, est = "proportion", success = "atheist", type = "ci", method = "theoretical")
## Single proportion -- success: atheist
## Summary statistics:

## p_hat = 0.0499 ; n = 1002
## Check conditions: number of successes = 50 ; number of failures = 952
## Standard error = 0.0069
## 95 % Confidence interval = ( 0.0364 , 0.0634 )
#b.
#H0: p_hat_05 = p_hat_12
#HA: p_hat_05 =! p_hat_12
#It will reject the null because none of the of the confidence interval proportion falls under the 2005 interval.
#2
0.05*39
## [1] 1.95
#alpha is 0.05 and when multiplied by the number of countries(39) we get 2 countries.
#3
# the z-score at 95% confidence level is 1.96, then we have to solve for n which is the number of people.
# then from there we can calculate n by setting p=0.5 and using the equation MoE=zscore*(sqrt(pq/n))
# the anwser for n is 9604 people.
# Milestone
#1
poke = read.csv("Pokemon.csv")
n = 40
#CL=95%
#H0: p = .5 half of pokemons are legendary
#HA: p < .5 half of pokemons are not legendary
samp <- sample(poke$Legendary,n)
#samp_mean <- mean(samp)
#phat <- samp_mean/n
#independent
#n*phat
#n*(1-phat)
#failed number of sucesses
#inference(y=as.factor(samp), est = "proportion", success = "TRUE", type = "ht", alternative = "less", method = "theoretical", null=0.5)
#We reject the null hypothesis because the pvalue is less than alpha which means that at 95% confidence level less than half of pokemons are legendary.
#2
#CL = 95%
#H0: pw = pn #proportion of water pokemon= proportion of normal pokemon
#Ha: pw =! pn
n = 40
sampPoke <- sample(poke$Type.1,n) #independent
pw <- sum(sampPoke == "Water")/n
n*pw
## [1] 6
n*(1-pw) #failed number of succeses
## [1] 34
pn <- sum(sampPoke == "Normal")/n
n*pn
## [1] 7
n*(1-pn) #failed number of succeses
## [1] 33
a<- table(poke$Type.1=="Water",poke$Type.1=="Normal");a
##
## FALSE TRUE
## FALSE 590 98
## TRUE 112 0
prop.test(a,correct = FALSE)
##
## 2-sample test for equality of proportions without continuity
## correction
##
## data: a
## X-squared = 18.181, df = 1, p-value = 2.009e-05
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.1685577 -0.1163260
## sample estimates:
## prop 1 prop 2
## 0.8575581 1.0000000
# we reject the null hypothesis because pvalue is less than alpha which means that at 95% confidence level proportion of water pokemons is not equalt to proportion of normal.