download.file("http://www.openintro.org/stat/data/ames.RData", destfile = "ames.RData")
load("ames.RData")
area <- ames$Gr.Liv.Area
price <- ames$SalePrice
summary(area)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 334 1126 1442 1500 1743 5642
hist(area)
### Answer: Residential homes that were sold in Ames, Iowa between the years 2006 and 2010 had a mean of 1500 square feet in their above ground living areas. The distribution of areas is fairly bell-shaped, but is somewhat right skewed.
samp1 <- sample(area, 50)
summary(samp1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 759 1040 1282 1377 1731 3078
hist(samp1)
samp2 <- sample(area, 50)
summary(samp2)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 672 1057 1388 1409 1668 2574
hist(samp2)
## Exercise 4: How many elements are there in sample_means50? Describe the sampling distribution, and be sure to specifically note its center. Would you expect the distribution to change if we instead collected 50,000 sample means?
sample_means_50 <- rep(NA, 5000)
for(i in 1:5000){
samp <- sample(area, 50)
sample_means_50[i] <- mean(samp)
}
hist(sample_means_50, breaks = 25)
summary(sample_means_50)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1274 1449 1498 1499 1546 1777
sample_means_small <- rep(NA, 100)
for(i in 1:100){
samp <- sample(area, 50)
sample_means_small[i] <- mean(samp)
print(i)
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15
## [1] 16
## [1] 17
## [1] 18
## [1] 19
## [1] 20
## [1] 21
## [1] 22
## [1] 23
## [1] 24
## [1] 25
## [1] 26
## [1] 27
## [1] 28
## [1] 29
## [1] 30
## [1] 31
## [1] 32
## [1] 33
## [1] 34
## [1] 35
## [1] 36
## [1] 37
## [1] 38
## [1] 39
## [1] 40
## [1] 41
## [1] 42
## [1] 43
## [1] 44
## [1] 45
## [1] 46
## [1] 47
## [1] 48
## [1] 49
## [1] 50
## [1] 51
## [1] 52
## [1] 53
## [1] 54
## [1] 55
## [1] 56
## [1] 57
## [1] 58
## [1] 59
## [1] 60
## [1] 61
## [1] 62
## [1] 63
## [1] 64
## [1] 65
## [1] 66
## [1] 67
## [1] 68
## [1] 69
## [1] 70
## [1] 71
## [1] 72
## [1] 73
## [1] 74
## [1] 75
## [1] 76
## [1] 77
## [1] 78
## [1] 79
## [1] 80
## [1] 81
## [1] 82
## [1] 83
## [1] 84
## [1] 85
## [1] 86
## [1] 87
## [1] 88
## [1] 89
## [1] 90
## [1] 91
## [1] 92
## [1] 93
## [1] 94
## [1] 95
## [1] 96
## [1] 97
## [1] 98
## [1] 99
## [1] 100
hist(sample_means_50)
sample_means10 <- rep(NA, 5000)
sample_means100 <- rep(NA, 5000)
for(i in 1:5000){
samp <- sample(area, 10)
sample_means10[i] <- mean(samp)
samp <- sample(area, 100)
sample_means100[i] <- mean(samp)
}
par(mfrow = c(3, 1))
xlimits <- range(sample_means10)
hist(sample_means10, breaks = 20, xlim = xlimits)
hist(sample_means_50, breaks = 20, xlim = xlimits)
hist(sample_means100, breaks = 20, xlim = xlimits)
## On Your Own 1: Take a random sample of size 50 from price. Using this sample, what is your best point estimate of the population mean?
samp3 <- sample(price, 50)
summary(samp3)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 60000 119475 166300 166213 209500 309000
hist(samp3)
sample_means50 <- rep(NA, 5000)
for(i in 1:5000){
samp <- sample(price, 50)
sample_means50[i] <- mean(samp)
}
hist(sample_means50)
summary(price)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 12789 129500 160000 180796 213500 755000
sample_means150 <- rep(NA, 5000)
for(i in 1:5000){
samp <- sample(price, 150)
sample_means150[i] <- mean(samp)
}
hist(sample_means150)
summary(price)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 12789 129500 160000 180796 213500 755000