library(haven)
library(xts)
col_data <- read.csv("/Users/iancopeland/Documents/R/stats_1/datasets/col_us_2016.csv")Homework 3
Part 1
1.1 Bedroom apartments
m <- mean(col_data$X1BR_apt)
sd <- sd(col_data$X1BR_apt)
(1475.00 - m) / sd #chs[1] 0.6213872
(806.25 - m) / sd #chat[1] -0.7378202
(894.30 - m) / sd #sa[1] -0.5588621
| City | z-score | Summary |
|---|---|---|
| Charleston | 0.62 | Charleston’s mean cost of rent for a 1 bedroom apartment is more expensive than 71.2% of US cities. |
| Chattanooga | -0.74 | Chattanooga’s mean cost of rent for a 1 bedroom apartment is cheaper than 77% of US cities. |
| San Antonio | -0.56 | San Antonio’s mean cost of rent for a 1 bedroom apartment is cheaper than 71.2% of US cities. |
1.2 Utility cost
m1 <- mean(col_data$Utilities)
sd1 <- sd(col_data$Utilities)
(189.06 - m1) / sd1 #chs[1] 0.9358781
(122.11 - m1) / sd1 #chat[1] -0.8163078
(135.46 - m1) / sd1 #sa[1] -0.4669175
| City | z-score | Summary |
|---|---|---|
| Charleston | 0.94 | Charleston’s mean cost of utilities is more expensive than 82.6% of US cities. |
| Chattanooga | -0.82 | Chattanooga’s mean cost of utilities is cheaper than 79.3% of US cities. |
| San Antonio | -0.47 | San Antonio’s mean cost of utilities is cheaper than 68% of US cities. |
1.3 Beer
m2 <- mean(col_data$Beer)
sd2 <- sd(col_data$Beer)
(1.65 - m2) / sd2 #chs[1] -0.5420667
(2.67 - m2) / sd2 #chat[1] 1.328618
(1.98 - m2) / sd2 #sa[1] 0.06315483
| City | z-score | Summary |
|---|---|---|
| Charleston | -0.54 | Charleston’s beer prices are on average cheaper than 70.5% of US cities. |
| Chattanooga | 1.33 | Chattanooga’s beer prices are on average more expensive than 90.8% of US cities. |
| San Antonio | 0.06 | San Antonio’s beer prices are on average more expensive than 52.3% of US cities. |
Part 2
grad_data <- read_sav("/Users/iancopeland/Documents/R/stats_1/datasets/grad_rates_sav.sav")2.1 Mean graduation rate
m_x <- mean(grad_data$gradrate)
sd_x <- sd(grad_data$gradrate)
print(m_x)[1] 48.91702
print(sd_x)[1] 19.94411
2.2 20 samples
set.seed(0)
n = 20
sample_means = rep(grad_data$gradrate, n)
for(i in 1:n){
sample_means[i] = mean(rnorm(30, mean = 48.9, sd = 19.9))
}
first(sample_means, n=20) [1] 49.33682 48.38715 48.70933 49.53925 43.03658 50.92297 52.27180 50.59589
[9] 48.55560 50.58571 48.56179 50.28900 48.77118 51.61966 44.80247 47.64498
[17] 50.05546 46.45337 49.14148 42.94104
2.3 Sample means
| Sample Number | Mean |
|---|---|
| 1 | 49.33 |
| 2 | 48.34 |
| 3 | 48.71 |
| 4 | 49.54 |
| 5 | 43.04 |
| 6 | 50.92 |
| 7 | 52.27 |
| 8 | 50.6 |
| 9 | 48.56 |
| 10 | 50.59 |
| 11 | 48.56 |
| 12 | 50.29 |
| 13 | 48.78 |
| 14 | 51.62 |
| 15 | 44.8 |
| 16 | 47.64 |
| 17 | 50.06 |
| 18 | 46.45 |
| 19 | 49.14 |
| 20 | 42.94 |
means <- c(48.34, 48.71, 49.54, 43.04, 50.92, 52.27, 50.6, 48.56, 50.59, 48.56, 50.29, 48.78, 51.62, 44.8, 47.64, 50.06, 46.45, 49.14, 42.94)
m_sample <- mean(means)
print(m_sample)[1] 48.57105
- This calculated sample mean is very close to our population mean of 48.91.
hist(sample_means,
main = "",
xlab = "Sample Means",
col = "steelblue")- This histogram shows a normal distribution among the means of our sampling distribution. We can conclude that our sample is representative of the population.