#Openintro
download.file("http://www.openintro.org/stat/data/ames.RData", destfile = "ames.RData")
load("ames.RData")
#1
area <- ames$Gr.Liv.Area
price <- ames$SalePrice
set.seed(50)
sampleP <- sample(price, 50)
mean(sampleP)
## [1] 171767.1
#best point estimate of population mean is 171767
#2
set.seed(5000)
sample_means50<- rep(NA, 5000)
for(i in 1:5000){
samp <- sample(price, 50)
sample_means50[i] <- mean(samp)
}
hist(sample_means50)

mean(sample_means50)
## [1] 180598.2
#the sampling distribution is symetric.
#guess for the mean of population around 180000
#Mean of population = 180598
#3
set.seed(5000)
sample_means150<- rep(NA, 5000)
for(i in 1:5000){
samp_150 <- sample(price, 150)
sample_means150[i] <- mean(samp_150)
}
hist(sample_means150)

mean(sample_means150)
## [1] 180687.5
# it looks about the same with the one before
#guess =180000
#mean of population =180687
#4
xlimits <- range(sample_means150)
hist(sample_means50, breaks = 20, xlim = xlimits)

hist(sample_means150, breaks = 20, xlim = xlimits)

#the one with a mean of 150 has a smaller spread and we would preffer the one with smaller spread.
#Milestone
poke = read.csv("Pokemon.csv")
#1
set.seed(100)
Speed_mean_5<- rep(NA, 100)
for(i in 1:100){
samp_speed_5 <- sample(poke$Speed, 5,replace = TRUE)
Speed_mean_5[i] <- mean(samp_speed_5)
}
hist(Speed_mean_5)

mean(Speed_mean_5)
## [1] 68.712
#2 with n=25
set.seed(100)
Speed_mean_25<- rep(NA, 100)
for(i in 1:100){
samp_speed_25 <- sample(poke$Speed, 25,replace = TRUE)
Speed_mean_25[i] <- mean(samp_speed_25)
}
hist(Speed_mean_25)

mean(Speed_mean_25)
## [1] 68.7196
#3 n=100
set.seed(100)
Speed_mean_100<- rep(NA, 100)
for(i in 1:100){
samp_speed_100 <- sample(poke$Speed, 100,replace = TRUE)
Speed_mean_100[i] <- mean(samp_speed_100)
}
hist(Speed_mean_100)

mean(Speed_mean_100)
## [1] 68.6022
#the centers of all distriboutions are about the same, but more n makes it less spread.
#4 n=5
set.seed(100)
Speed_SD_5<- rep(NA, 100)
for(i in 1:100){
samp_sp_5<- sample(poke$Speed, 5,replace = TRUE)
Speed_SD_5[i] <- sd(samp_sp_5)
}
hist(Speed_SD_5)

mean(Speed_SD_5)
## [1] 28.17653
#5 n=25
set.seed(100)
Speed_SD_25<- rep(NA, 100)
for(i in 1:100){
samp_sp_25<- sample(poke$Speed, 25,replace = TRUE)
Speed_SD_25[i] <- sd(samp_sp_25)
}
hist(Speed_SD_25)

mean(Speed_SD_25)
## [1] 29.13027
#n=100
set.seed(100)
Speed_SDs100<- rep(NA, 100)
for(i in 1:100){
sampnew <- sample(poke$Speed, 100,replace = TRUE)
Speed_SDs100[i] <- sd(sampnew)
}
hist(Speed_SDs100)

mean(Speed_SDs100)
## [1] 29.0316
#6
# there is differenc where their centers are located, with less n we get a wider spread.
#Documentation: I recieved help from C2C Sermrsripong and C2C Khan on all labs, we worked together trught them as we are in the same milestone group. Whenever I got stuck at a question I asked for their help