lab5zzzz.R

#Openintro
download.file("http://www.openintro.org/stat/data/ames.RData", destfile = "ames.RData")
load("ames.RData")
#1
area <- ames$Gr.Liv.Area
price <- ames$SalePrice
set.seed(50)
sampleP <- sample(price, 50)
mean(sampleP)

## [1] 171767.1

#best point estimate of population mean is 171767

#2
set.seed(5000)
sample_means50<- rep(NA, 5000)

for(i in 1:5000){
  samp <- sample(price, 50)
  sample_means50[i] <- mean(samp)
}
hist(sample_means50)

mean(sample_means50)

## [1] 180598.2

#the sampling distribution is symetric.
#guess for the mean of population around 180000
#Mean of population = 180598

#3
set.seed(5000)
sample_means150<- rep(NA, 5000)

for(i in 1:5000){
  samp_150 <- sample(price, 150)
  sample_means150[i] <- mean(samp_150)
}
hist(sample_means150)

mean(sample_means150)

## [1] 180687.5

# it looks about the same with the one before
#guess =180000
#mean of population =180687

#4
xlimits <- range(sample_means150)
hist(sample_means50, breaks = 20, xlim = xlimits)

hist(sample_means150, breaks = 20, xlim = xlimits)

#the one with a mean of 150 has a smaller spread and we would preffer the one with smaller spread.

#Milestone
poke = read.csv("Pokemon.csv")
#1
set.seed(100)
Speed_mean_5<- rep(NA, 100)

for(i in 1:100){
  samp_speed_5 <- sample(poke$Speed, 5,replace = TRUE)
  Speed_mean_5[i] <- mean(samp_speed_5)
}
hist(Speed_mean_5)

mean(Speed_mean_5)

## [1] 68.712

#2 with n=25
set.seed(100)
Speed_mean_25<- rep(NA, 100)

for(i in 1:100){
  samp_speed_25 <- sample(poke$Speed, 25,replace = TRUE)
  Speed_mean_25[i] <- mean(samp_speed_25)
}
hist(Speed_mean_25)

mean(Speed_mean_25)

## [1] 68.7196

#3 n=100
set.seed(100)
Speed_mean_100<- rep(NA, 100)

for(i in 1:100){
  samp_speed_100 <- sample(poke$Speed, 100,replace = TRUE)
  Speed_mean_100[i] <- mean(samp_speed_100)
}
hist(Speed_mean_100)

mean(Speed_mean_100)

## [1] 68.6022

#the centers of all distriboutions are about the same, but more n makes it less spread.

#4 n=5
set.seed(100)
Speed_SD_5<- rep(NA, 100)

for(i in 1:100){
  samp_sp_5<- sample(poke$Speed, 5,replace = TRUE)
  Speed_SD_5[i] <- sd(samp_sp_5)
}
hist(Speed_SD_5)

mean(Speed_SD_5)

## [1] 28.17653

#5 n=25
set.seed(100)
Speed_SD_25<- rep(NA, 100)

for(i in 1:100){
  samp_sp_25<- sample(poke$Speed, 25,replace = TRUE)
  Speed_SD_25[i] <- sd(samp_sp_25)
}
hist(Speed_SD_25)

mean(Speed_SD_25)

## [1] 29.13027

#n=100
set.seed(100)
Speed_SDs100<- rep(NA, 100)

for(i in 1:100){
  sampnew <- sample(poke$Speed, 100,replace = TRUE)
  Speed_SDs100[i] <- sd(sampnew)
}
hist(Speed_SDs100)

mean(Speed_SDs100)

## [1] 29.0316

#6
# there is differenc where their centers are located, with less n we get a wider spread.

#Documentation: I recieved help from C2C Sermrsripong and C2C Khan on all labs, we worked together trught them as we are in the same milestone group. Whenever I got stuck at a question I asked for their help

lab5zzzz.R

C21Zhivko.Kolevski.m

2020-04-05