load("more/ames.RData")
area <- ames$Gr.Liv.Area
price <- ames$SalePrice
summary(area)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 334 1126 1442 1500 1743 5642
hist(area)
This population data is unimodal and skewed to the right.
samp1 <- sample(area, 50)
summary(samp1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 848 1187 1429 1460 1616 2522
hist(samp1)
The results of the distribution of the sample are variable sometimes they are unimodal somtimes bimodal. The Skewness is right however and it does sometime come pretty close the population
samp2 <- sample(area, 50)
hist(samp2)
mean(samp1)
## [1] 1459.9
mean(samp2)
## [1] 1532.3
samp3 <- sample(area, 1000)
hist(samp3)
mean(samp3)
## [1] 1511.559
mean(area)
## [1] 1499.69
The mean of sample 1 and sample 2 are pretty close but some times one is greater than the other as it is variable. I think the one with size of 100 would provide a more accurate estimate of the population but it may also vary because it is a random selection of the population that is used for sampling.
sample_means50 <- rep(NA, 5000)
for(i in 1:5000){
samp <- sample(area, 50)
sample_means50[i] <- mean(samp)
}
hist(sample_means50)
length(sample_means50)
## [1] 5000
summary(sample_means50)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1291 1452 1496 1499 1544 1885
sample_means50_a <- rep(NA, 50000)
for(i in 1:50000){
samp <- sample(area, 50)
sample_means50_a[i] <- mean(samp)
}
hist(sample_means50_a)
length(sample_means50_a)
## [1] 50000
summary(sample_means50_a)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1239 1452 1498 1500 1546 1864
The sample size is 5000. The centre is close to 1500 and I do not expect the distribution to change if we collected 50,000 sample means.
sample_means_small <- rep(NA, 100)
for(i in 1:100){
samp <- sample(area, 50)
sample_means_small[i] <- mean(samp)
}
sample_means_small
## [1] 1508.94 1676.46 1434.06 1692.28 1506.04 1425.04 1483.60 1333.28
## [9] 1445.00 1397.16 1472.88 1549.58 1440.58 1507.10 1474.96 1579.32
## [17] 1484.16 1586.44 1606.06 1476.98 1501.82 1553.12 1545.10 1272.40
## [25] 1637.56 1481.98 1510.40 1457.36 1450.46 1460.62 1500.20 1497.20
## [33] 1480.46 1503.52 1535.56 1521.98 1583.00 1354.18 1417.96 1454.10
## [41] 1442.80 1464.92 1606.00 1463.42 1660.50 1496.38 1562.28 1493.98
## [49] 1514.78 1548.34 1434.18 1482.94 1681.24 1422.42 1507.52 1490.34
## [57] 1395.72 1624.94 1486.02 1326.86 1481.58 1457.46 1520.78 1469.44
## [65] 1377.20 1384.80 1532.08 1512.82 1449.24 1628.68 1485.06 1493.12
## [73] 1358.94 1502.62 1542.14 1463.78 1383.26 1572.48 1495.26 1477.94
## [81] 1399.20 1506.10 1520.58 1405.72 1479.60 1543.74 1464.54 1482.98
## [89] 1478.32 1498.26 1481.70 1529.84 1560.94 1559.82 1487.08 1491.48
## [97] 1390.48 1521.62 1445.10 1519.82
There are 100 elements in sample_means_small. The 100 elemenets represent the sample mean for the 100 iterations done to get a ‘sample’ of the area.
hist(sample_means50)
sample_means10 <- rep(NA, 5000)
sample_means100 <- rep(NA, 5000)
for(i in 1:5000){
samp <- sample(area, 10)
sample_means10[i] <- mean(samp)
samp <- sample(area, 100)
sample_means100[i] <- mean(samp)
}
par(mfrow = c(3, 1))
xlimits <- range(sample_means10)
hist(sample_means10, breaks = 20, xlim = xlimits)
hist(sample_means50, breaks = 20, xlim = xlimits)
hist(sample_means100, breaks = 20, xlim = xlimits)
As the sample mean Increases the center becomes larger and the spread gets narrower.
sample_meansprice50 <- sample(price, 50)
mean(sample_meansprice50)
## [1] 179060.5
sample_means50 <- rep(NA, 5000)
for(i in 1:5000){
samp <- sample(price, 50)
sample_means50[i] <- mean(samp)
}
hist(sample_means50, breaks = 25)
summary(sample_means50)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 146700 172900 180300 180700 187800 226400
mean(sample_means50)
## [1] 180693.8
mean(price)
## [1] 180796.1
sample_means150 <- rep(NA, 5000)
for(i in 1:5000){
samp <- sample(price, 50)
sample_means150[i] <- mean(samp)
}
hist(sample_means150, breaks = 25)
summary(sample_means150)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 147100 173200 180300 180800 188000 239300
mean(sample_means150)
## [1] 180797.9
mean(price)
## [1] 180796.1