# Open Intro
#1
download.file("http://www.openintro.org/stat/data/ames.RData", destfile = "ames.RData")
load("ames.RData")
population <- ames$Gr.Liv.Area
set.seed(4)
samp_mean <- rep(NA, 50)
samp_sd <- rep(NA, 50)
n <- 60
for(i in 1:50){
samp <- sample(population, n) # obtain a sample of size n = 60 from the population
samp_mean[i] <- mean(samp) # save sample mean in ith element of samp_mean
samp_sd[i] <- sd(samp) # save sample sd in ith element of samp_sd
}
lower_vector <- samp_mean - 1.96 * samp_sd / sqrt(n)
upper_vector <- samp_mean + 1.96 * samp_sd / sqrt(n)
plot_ci(lower_vector, upper_vector, mean(population))

include <- (50-1)/50
c(include)
## [1] 0.98
# The highlighted does not iclude the true population mean, the true population mean is 0.98 which is greater than the given confidence level. The reason is because random sampling distributions usually estimate higher or lower than the true value.
#2
qnorm(1-.11/2)
## [1] 1.598193
# the z score for a confidence level of 89% us 1.598193
#3
lower_vector <- samp_mean - qnorm(1-.11/2) * samp_sd / sqrt(n)
upper_vector <- samp_mean + qnorm(1-.11/2) * samp_sd / sqrt(n)
plot_ci(lower_vector, upper_vector, mean(population))

(50-4)/50
## [1] 0.92
# We get 92% of intervals that include the true population mean at 89% confidence level.
#Milestone Data
#1
poke = read.csv("Pokemon.csv")
table(poke$Legendary)
##
## False True
## 735 65
n = 40
#CL = 95%
p_hat<- sum(poke$Legendary==TRUE)/800
n*p_hat
## [1] 0
n*(1-p_hat)
## [1] 40
ci <- p_hat + c(-1,1)*1.96 * sqrt((p_hat*(1-p_hat))/n);ci
## [1] 0 0
#We are 95% confident that the true population mean of legendary pokemon is around 0
#2
population <- poke$Attack
n = 40
#CL=95%
samp <- sample(population, n)
qqnorm(samp)
qqline(samp)

x_bar <- mean(samp)
sigma <- sd(samp)
conInt <- x_bar + c(-1,1)*1.96 * sigma/sqrt(n);conInt
## [1] 74.81796 90.68204
#We are 95% confident that the true population mean of pokemon speed is between 77.25 and 94.15.