—- Instructions —-
—- Part 1. Open the R file “Lab7_Assignment.R” and answer the questions below
HTS
library(data.table)
## Warning: package 'data.table' was built under R version 4.4.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.2
library(foreign)
## Warning: package 'foreign' was built under R version 4.4.2
hts <- data.table(read.spss("datasets/HTS.household.10regions.sav",to.data.frame = T))
## re-encoding from CP1252
2.1 Run a T-Test to show if the household income means is statistically different between households living in single family residences or not (use the whole sample). Produce two pdfs, one with an histogram pdf plot, and another with the simulated hypothesis testing plot showing where the T-statistic falls. Provide a short interpretation of your results
income<-t.test(hhincome ~ sf, data=hts, var.equal=F)
income
##
## Welch Two Sample t-test
##
## data: hhincome by sf
## t = -36.234, df = 4353.4, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group other and group single family detached is not equal to 0
## 95 percent confidence interval:
## -30.19913 -27.09895
## sample estimates:
## mean in group other mean in group single family detached
## 46.73311 75.38215
income_histogram<-ggplot(hts, aes(x=hhincome))+
geom_histogram() +
facet_wrap(~sf)
ggsave("hhincome_histogram.pdf", income_histogram)
## Saving 7 x 5 in image
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 857 rows containing non-finite outside the scale range
## (`stat_bin()`).
curve(dt(x, df=income$parameter), from =-40, to =40)
abline(h=0, col='blue')
points(x= income$statistic, y=0, col='red')
upper975 <- qt(p = .975, df = income$parameter)
abline(v = upper975,col='red')
lower025 <- qt(p = .025, df = income$parameter)
abline(v = lower025,col='red')
# The t-test shows a significant difference in the mean household incomes between single family households and not single family households: t = -36.23, p-value < 2.2e-16. Because the p-value is less than .05 and t is negative, we reject the null hypothesis and conclude that single-family households tend to have significantly higher incomes than those in other types of housing.
jobpop
) over and under the city median (of the
jobpop
variable of course)sa_hts<-hts[region%in%c("San Antonio, TX")]
median_jobpop <- median(sa_hts$jobpop, na.rm = TRUE)
sa_hts$jobpop_group <- ifelse(sa_hts$jobpop > median_jobpop, "Above Median", "Below Median")
lnvmt_ttest <- t.test(lnvmt ~ jobpop_group, data = sa_hts, var.equal = F)
lnvmt_ttest
##
## Welch Two Sample t-test
##
## data: lnvmt by jobpop_group
## t = -2.2052, df = 1512.2, p-value = 0.02759
## alternative hypothesis: true difference in means between group Above Median and group Below Median is not equal to 0
## 95 percent confidence interval:
## -0.21709014 -0.01269755
## sample estimates:
## mean in group Above Median mean in group Below Median
## 2.989259 3.104152
#Since lnvmt_ttest calculates a p-value of .02759, which is less than .05, we reject the null hypothesis; since t = -2.2052, which is negative, we conclude that households in neighborhoods with a jobpop index below the city median tend to have significantly higher household vehicle miles traveled.
normality_histogram<-ggplot(sa_hts, aes(x=lnvmt)) +
geom_histogram()
normality_histogram
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 43 rows containing non-finite outside the scale range
## (`stat_bin()`).
outliersplot<-ggplot(data=sa_hts, aes(x=income_cat, y=lnvmt)) +
geom_boxplot()
outliersplot
## Warning: Removed 43 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
ggsave("outliersplot.pdf",plot=outliersplot)
## Saving 7 x 5 in image
## Warning: Removed 43 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
sa_hts_bp<-boxplot(sa_hts$lnvmt~sa_hts$income_cat)
outliers<-sa_hts_bp$out
sa_hts[lnvmt%in%outliers,]
## rhhnum region geoid10 anyvmt
## <num> <fctr> <char> <num>
## 1: 150000034 San Antonio, TX 1
## 2: 150000075 San Antonio, TX 1
## 3: 150000094 San Antonio, TX 1
## 4: 150000113 San Antonio, TX 1
## 5: 150000123 San Antonio, TX 1
## 6: 150000162 San Antonio, TX 1
## 7: 150000164 San Antonio, TX 1
## 8: 150000201 San Antonio, TX 1
## 9: 150000205 San Antonio, TX 1
## 10: 150000242 San Antonio, TX 1
## 11: 150000359 San Antonio, TX 1
## 12: 150000400 San Antonio, TX 1
## 13: 150000488 San Antonio, TX 1
## 14: 150000529 San Antonio, TX 1
## 15: 150000585 San Antonio, TX 1
## 16: 150000656 San Antonio, TX 1
## 17: 150000669 San Antonio, TX 1
## 18: 150000728 San Antonio, TX 1
## 19: 150000740 San Antonio, TX 1
## 20: 150000759 San Antonio, TX 1
## 21: 150000766 San Antonio, TX 1
## 22: 150000768 San Antonio, TX 1
## 23: 150000828 San Antonio, TX 1
## 24: 150001040 San Antonio, TX 1
## 25: 150001060 San Antonio, TX 1
## 26: 150001072 San Antonio, TX 1
## 27: 150001122 San Antonio, TX 1
## 28: 150001169 San Antonio, TX 1
## 29: 150001265 San Antonio, TX 1
## 30: 150001374 San Antonio, TX 1
## 31: 150001387 San Antonio, TX 1
## 32: 155000211 San Antonio, TX 1
## 33: 155000223 San Antonio, TX 1
## 34: 155000323 San Antonio, TX 1
## 35: 155000334 San Antonio, TX 1
## rhhnum region geoid10 anyvmt
## lnvmt autotrips anywalk walktrips anybike biketrips anytransit
## <num> <num> <num> <num> <num> <num> <num>
## 1: -1.617320225 2 0 NA 0 NA 0
## 2: 0.110661824 2 1 2 0 NA 0
## 3: -0.550564483 2 0 NA 0 NA 0
## 4: 1.356620908 10 0 NA 0 NA 0
## 5: 0.547420044 2 0 NA 0 NA 0
## 6: 0.971593758 2 0 NA 0 NA 0
## 7: 0.973079594 2 0 NA 0 NA 0
## 8: 0.962427632 2 0 NA 0 NA 0
## 9: -0.001480761 2 0 NA 0 NA 0
## 10: -2.155615546 2 0 NA 0 NA 0
## 11: -0.415248432 2 0 NA 0 NA 0
## 12: -0.500980007 2 0 NA 0 NA 0
## 13: -0.057901070 2 1 2 0 NA 0
## 14: -0.652877789 2 0 NA 0 NA 0
## 15: 0.934789108 3 0 NA 0 NA 0
## 16: 0.630968751 3 0 NA 0 NA 0
## 17: 5.654327814 12 0 NA 0 NA 0
## 18: 0.803340881 2 0 NA 0 NA 0
## 19: 0.786823621 2 0 NA 0 NA 0
## 20: -0.517029239 2 1 2 0 NA 0
## 21: -0.815960978 4 1 20 0 NA 0
## 22: 0.087722985 2 0 NA 0 NA 0
## 23: 0.820388367 3 0 NA 0 NA 0
## 24: 0.810984705 3 0 NA 0 NA 0
## 25: 0.673487713 6 0 NA 0 NA 0
## 26: -0.475510803 2 1 4 0 NA 1
## 27: 0.471652396 2 0 NA 0 NA 0
## 28: 0.761051704 2 0 NA 0 NA 0
## 29: 5.566984726 61 0 NA 0 NA 0
## 30: -1.098307547 4 0 NA 0 NA 0
## 31: 5.469960773 24 0 NA 0 NA 0
## 32: 0.816963848 4 0 NA 0 NA 0
## 33: 1.019976182 2 0 NA 0 NA 0
## 34: -1.613511253 2 0 NA 0 NA 0
## 35: 0.788930682 2 0 NA 0 NA 0
## lnvmt autotrips anywalk walktrips anybike biketrips anytransit
## transittrips veh hhsize hhworker htype
## <num> <num> <num> <num> <fctr>
## 1: NA 1 2 0 single family detached
## 2: NA 0 1 0 single family detached
## 3: NA 0 1 0 single family attached
## 4: NA 2 4 0 single family detached
## 5: NA 1 1 0 single family detached
## 6: NA 1 2 2 single family detached
## 7: NA 2 2 0 multi-family
## 8: NA 1 1 0 single family detached
## 9: NA 1 1 1 multi-family
## 10: NA 1 1 0 multi-family
## 11: NA 1 2 0 single family detached
## 12: NA 1 1 0 single family detached
## 13: NA 1 2 1 single family detached
## 14: NA 1 1 0 single family detached
## 15: NA 1 2 0 single family detached
## 16: NA 2 2 1 single family detached
## 17: NA 2 2 0 multi-family
## 18: NA 1 1 0 multi-family
## 19: NA 1 2 1 single family detached
## 20: NA 1 1 1 single family detached
## 21: NA 0 10 0 single family attached
## 22: NA 1 1 0 single family detached
## 23: NA 2 1 0 single family detached
## 24: NA 1 1 0 multi-family
## 25: NA 2 3 3 single family detached
## 26: 2 1 4 2 single family detached
## 27: NA 3 3 2 single family detached
## 28: NA 1 1 0 single family detached
## 29: NA 3 8 3 single family detached
## 30: NA 2 3 0 single family detached
## 31: NA 2 4 2 single family detached
## 32: NA 1 2 0 single family detached
## 33: NA 2 2 0 single family detached
## 34: NA 1 1 0 single family attached
## 35: NA 1 1 0 single family detached
## transittrips veh hhsize hhworker htype
## sf hhincome lnhhincome income_cat actden
## <fctr> <num> <num> <fctr> <num>
## 1: single family detached 36.093344 3.586108 middle (35K-75K) 9.2397270
## 2: single family detached 49.975400 3.911531 middle (35K-75K) 8.6468920
## 3: other 5.552822 1.714306 low (<35K) 3.0320845
## 4: single family detached 91.621560 4.517667 high (>75K) 8.1877985
## 5: single family detached 36.093344 3.586108 middle (35K-75K) 4.8896856
## 6: single family detached 74.963090 4.316996 middle (35K-75K) 14.3997310
## 7: other 91.621560 4.517667 high (>75K) 9.0961010
## 8: single family detached 49.975400 3.911531 middle (35K-75K) 9.9988560
## 9: other 36.093344 3.586108 middle (35K-75K) 5.9810605
## 10: other 24.987700 3.218384 low (<35K) 11.0650290
## 11: single family detached 74.963090 4.316996 middle (35K-75K) 8.8165430
## 12: single family detached 30.540521 3.419054 low (<35K) 8.8444410
## 13: single family detached 41.646164 3.729209 middle (35K-75K) 4.4840920
## 14: single family detached 5.552822 1.714306 low (<35K) 10.0952270
## 15: single family detached 41.646164 3.729209 middle (35K-75K) 7.7122264
## 16: single family detached 36.093344 3.586108 middle (35K-75K) 7.9214687
## 17: other 194.348770 5.269654 high (>75K) 9.6090740
## 18: other 74.963090 4.316996 middle (35K-75K) 7.3696694
## 19: single family detached 61.081043 4.112202 middle (35K-75K) 3.4951300
## 20: single family detached 24.987700 3.218384 low (<35K) 10.9680110
## 21: other 5.552822 1.714306 low (<35K) 12.0877430
## 22: single family detached 49.975400 3.911531 middle (35K-75K) 10.4318230
## 23: single family detached 49.975400 3.911531 middle (35K-75K) 7.5484366
## 24: other 91.621560 4.517667 high (>75K) 7.0588540
## 25: single family detached 61.081043 4.112202 middle (35K-75K) 0.1214534
## 26: single family detached 36.093344 3.586108 middle (35K-75K) 11.4529560
## 27: single family detached 49.975400 3.911531 middle (35K-75K) 8.0802355
## 28: single family detached 91.621560 4.517667 high (>75K) 5.4094615
## 29: single family detached 124.938490 4.827822 high (>75K) 5.6574220
## 30: single family detached 49.975400 3.911531 middle (35K-75K) 6.4791360
## 31: single family detached 49.975400 3.911531 middle (35K-75K) 0.9654419
## 32: single family detached 41.646164 3.729209 middle (35K-75K) 8.8580770
## 33: single family detached 91.621560 4.517667 high (>75K) 5.6731050
## 34: other 49.975400 3.911531 middle (35K-75K) 5.8626366
## 35: single family detached 49.975400 3.911531 middle (35K-75K) 7.3114360
## sf hhincome lnhhincome income_cat actden
## jobpop entropy intden pct4way stopden emp10a emp20a
## <num> <num> <num> <num> <num> <num> <num>
## 1: 0.4185260 0.16244244 209.82292 69.90291 65.187706 13.86737109 57.308064
## 2: 0.7139923 0.29639672 195.75620 62.19512 62.069035 14.15221104 52.264787
## 3: 0.9751881 0.46053908 51.83287 17.64706 36.587906 3.86342048 34.935372
## 4: 0.9562107 0.48856271 163.87358 17.54386 132.248860 17.52706908 54.715649
## 5: 0.9392486 0.07615311 137.93326 50.00000 18.391100 10.98739150 47.230055
## 6: 0.3155761 0.52129104 115.73794 16.00000 4.629518 14.52770440 64.147319
## 7: 0.2757024 0.59759781 116.43728 26.08696 65.812380 11.83869142 60.151262
## 8: 0.5662457 0.70665044 167.30470 18.18182 106.466630 20.08555106 55.436170
## 9: 0.3022279 0.38942315 106.07134 25.00000 11.785705 10.62143408 58.532009
## 10: 0.9296499 0.60263764 233.90045 73.78641 86.293370 15.17862561 60.224454
## 11: 0.9929870 0.51493784 164.66924 21.31148 91.782850 8.67411957 47.455079
## 12: 0.5860302 0.56954311 111.70435 34.28571 82.980380 18.02256675 60.190521
## 13: 0.9506992 0.05914424 124.10874 37.77778 13.789861 12.05442672 59.864812
## 14: 0.6627539 0.37209211 252.25702 60.00000 73.383860 14.15221104 52.264787
## 15: 0.9333875 0.42519550 172.45209 43.05556 167.661760 17.60682427 62.487322
## 16: 0.9140198 0.54222122 135.42618 46.15385 45.836550 6.60915377 33.560958
## 17: 0.2794879 0.57826638 98.04865 23.68421 25.802277 11.83869142 60.151262
## 18: 0.9042735 0.59814906 126.50892 18.18182 80.505680 11.34368912 43.979908
## 19: 0.6501693 0.40846235 120.17170 31.11111 0.000000 4.82395033 27.595294
## 20: 0.4640113 0.36697353 254.67436 58.82353 59.923378 13.86737109 57.308064
## 21: 0.8113150 0.86108506 311.18470 62.04380 95.399690 15.83487209 59.136737
## 22: 0.7780097 0.37126910 231.49753 60.67416 33.814247 16.46263457 62.034178
## 23: 0.3808891 0.31534339 139.31204 14.28571 71.077570 13.74823368 60.723171
## 24: 0.4314074 0.50996051 139.85750 39.06250 74.299290 11.93268860 60.676606
## 25: 0.0000000 0.00000000 60.98489 0.00000 0.000000 0.09362565 1.253296
## 26: 0.8972445 0.53395276 236.78170 80.58252 68.965540 14.95124902 58.091250
## 27: 0.1477215 0.43741136 149.86797 47.72727 146.461880 17.99073899 62.373386
## 28: 0.8524599 0.43011995 129.08925 25.92593 19.124332 5.11262943 33.805920
## 29: 0.2674744 0.07052753 169.45107 10.52632 0.000000 3.81388310 33.332466
## 30: 0.9604064 0.52177640 141.30476 12.00000 45.217525 3.04568214 26.317106
## 31: 0.7904070 0.00000000 58.45176 18.18182 0.000000 0.58020660 6.606925
## 32: 0.4953511 0.38403628 144.59960 43.13725 39.694008 9.32590768 59.651306
## 33: 0.6559231 0.28286766 132.42319 23.80952 0.000000 11.66531058 47.301018
## 34: 0.8542333 0.38653741 112.23320 17.07317 10.949581 4.02615078 30.948851
## 35: 0.6401025 0.63938971 102.99034 54.05405 11.134090 3.65957415 31.873095
## jobpop entropy intden pct4way stopden emp10a emp20a
## emp30a emp30t intptlat10 intptlon10 jobpop_group
## <num> <num> <num> <num> <char>
## 1: 83.01140 61.899065 NA NA Below Median
## 2: 82.79096 61.309323 NA NA Above Median
## 3: 67.87067 42.216129 NA NA Above Median
## 4: 86.74590 60.671653 NA NA Above Median
## 5: 80.05266 49.565495 NA NA Above Median
## 6: 89.43900 58.747992 NA NA Below Median
## 7: 86.38886 45.255248 NA NA Below Median
## 8: 86.01300 60.948567 NA NA Below Median
## 9: 83.42281 50.670922 NA NA Below Median
## 10: 85.87503 66.208817 NA NA Above Median
## 11: 81.97929 56.125112 NA NA Above Median
## 12: 88.17728 61.762466 NA NA Below Median
## 13: 83.44027 48.867885 NA NA Above Median
## 14: 82.79096 61.309323 NA NA Above Median
## 15: 86.72918 63.452186 NA NA Above Median
## 16: 72.11665 48.604842 NA NA Above Median
## 17: 86.38886 45.255248 NA NA Below Median
## 18: 84.49009 25.250195 NA NA Above Median
## 19: 69.97539 0.000000 NA NA Above Median
## 20: 83.01140 61.899065 NA NA Below Median
## 21: 85.59019 62.466392 NA NA Above Median
## 22: 86.35518 57.009230 NA NA Above Median
## 23: 86.61166 55.180929 NA NA Below Median
## 24: 84.54458 57.327136 NA NA Below Median
## 25: 19.07548 0.000000 NA NA Below Median
## 26: 83.66579 66.097234 NA NA Above Median
## 27: 86.78776 62.041733 NA NA Below Median
## 28: 74.63203 2.848276 NA NA Above Median
## 29: 72.47047 33.151036 NA NA Below Median
## 30: 68.36939 3.703415 NA NA Above Median
## 31: 32.78149 0.000000 NA NA Above Median
## 32: 88.99626 60.086864 NA NA Below Median
## 33: 78.23687 31.201864 NA NA Above Median
## 34: 70.83189 5.107056 NA NA Above Median
## 35: 69.11641 33.188560 NA NA Below Median
## emp30a emp30t intptlat10 intptlon10 jobpop_group
sa_hts2<-sa_hts[!lnvmt%in%outliers,]
boxplot(sa_hts$lnvmt~sa_hts$income_cat)
boxplot(sa_hts2$lnvmt~sa_hts2$income_cat)
no_outliersplot<-ggplot(sa_hts2, aes(x=income_cat, y=lnvmt))+
geom_boxplot()
no_outliersplot
## Warning: Removed 43 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
ggsave("no_outliersplot.pdf",plot=no_outliersplot)
## Saving 7 x 5 in image
## Warning: Removed 43 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
normality_histogram2<-ggplot(sa_hts2, aes(x=lnvmt))+
geom_histogram()
normality_histogram2
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 43 rows containing non-finite outside the scale range
## (`stat_bin()`).
ggsave("notquitenormalbutbetterthanbefore.pdf",plot=normality_histogram2)
## Saving 7 x 5 in image
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 43 rows containing non-finite outside the scale range
## (`stat_bin()`).
anova<-aov(lnvmt~income_cat, data=sa_hts2)
summary(anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## income_cat 2 139.1 69.54 93 <2e-16 ***
## Residuals 1482 1108.1 0.75
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 43 observations deleted due to missingness
tukey<-TukeyHSD(anova)
tukey_plot<-plot(tukey)