download.file("http://www.openintro.org/stat/data/ames.RData", destfile = "ames.RData")
load("ames.RData")
Random sample size n = 60
population <- ames$Gr.Liv.Area
samp <- sample(population, 60)
hist(samp, main = "Histogram of House Sizes in Ames, Iowa", xlab = "House size")
median(samp)
## [1] 1456
mean(samp)
## [1] 1519.467
sample_mean <- mean(samp)
se <- sd(samp) / sqrt(60)
lower <- sample_mean - 1.96 * se
upper <- sample_mean + 1.96 * se
c(lower, upper)
## [1] 1405.916 1633.017
mean(population)
## [1] 1499.69
Start by creating empty vectors:
samp_mean <- rep(NA, 50)
samp_sd <- rep(NA, 50)
n <- 60
Now for the “loop”:
for(i in 1:50) {
samp <- sample(population, n)
samp_mean[i] <- mean(samp)
samp_sd[i] <- sd(samp)
}
Finally, construct the confidence intervals:
lower_vector <- samp_mean - 1.96 * samp_sd / sqrt(n)
upper_vector <- samp_mean + 1.96 * samp_sd / sqrt(n)
c(lower_vector, upper_vector)
## [1] 1424.514 1351.310 1396.667 1367.645 1403.375 1373.508 1292.319 1346.705
## [9] 1450.274 1379.100 1299.914 1434.638 1549.134 1466.914 1366.673 1368.764
## [17] 1375.325 1325.794 1358.073 1393.510 1444.535 1431.244 1429.687 1345.474
## [25] 1405.105 1415.175 1375.313 1295.793 1301.863 1380.670 1347.793 1444.591
## [33] 1294.120 1355.745 1432.832 1335.027 1283.136 1328.442 1341.111 1489.160
## [41] 1437.870 1476.534 1421.674 1409.941 1320.915 1292.525 1363.032 1296.496
## [49] 1283.813 1449.983 1703.853 1601.223 1655.000 1686.155 1592.525 1663.225
## [57] 1501.048 1551.995 1692.859 1626.833 1519.753 1674.562 1833.732 1714.153
## [65] 1576.794 1632.770 1641.542 1524.639 1599.893 1659.223 1709.898 1695.423
## [73] 1785.313 1555.693 1654.295 1656.158 1641.354 1522.741 1536.737 1626.464
## [81] 1604.141 1707.109 1543.314 1597.955 1717.768 1549.507 1526.831 1561.891
## [89] 1541.856 1774.407 1696.930 1741.066 1863.493 1646.159 1555.318 1506.342
## [97] 1615.968 1545.304 1514.920 1699.917
plot_ci(lower_vector, upper_vector, mean(population))
qnorm(0.975, 0, 1)
## [1] 1.959964
qnorm(0.95, 0, 1)
## [1] 1.644854
lower_vector2 <- samp_mean - 1.645 * samp_sd / sqrt(n)
upper_vector2 <- samp_mean + 1.645 * samp_sd / sqrt(n)
c(lower_vector2, upper_vector2)
## [1] 1446.961 1371.392 1417.426 1393.240 1418.575 1396.789 1309.092 1363.202
## [9] 1469.768 1399.007 1317.579 1453.918 1572.004 1486.781 1383.558 1389.978
## [17] 1396.717 1341.773 1377.505 1414.862 1465.859 1452.473 1458.264 1362.367
## [25] 1425.129 1434.540 1396.691 1314.030 1320.737 1400.421 1368.392 1465.687
## [33] 1314.144 1375.208 1455.728 1352.262 1302.719 1347.201 1357.242 1512.082
## [41] 1458.687 1497.791 1457.177 1428.923 1339.751 1309.706 1383.358 1316.490
## [49] 1302.384 1470.067 1681.406 1581.141 1634.241 1660.560 1577.325 1639.944
## [57] 1484.275 1535.498 1673.366 1606.926 1502.087 1655.282 1810.863 1694.286
## [65] 1559.909 1611.555 1620.149 1508.661 1580.461 1637.871 1688.574 1674.194
## [73] 1756.736 1538.800 1634.271 1636.794 1619.975 1504.504 1517.863 1606.712
## [81] 1583.541 1686.013 1523.289 1578.492 1694.872 1532.272 1507.248 1543.132
## [89] 1525.725 1751.485 1676.113 1719.809 1827.989 1627.177 1536.482 1489.160
## [97] 1595.642 1525.310 1496.349 1679.833
plot_ci(lower_vector2, upper_vector2, mean(population))