hhdata <- read.csv("./HTS.household.10regions.csv")
autotrips
) vary by housing type (htype
).htype
and
autotrips
?#htype is nominal #autotrips is ratio
#kruskal-wallis test
#Null Hypothesis: There is no difference in the number of car trips across different housing types #Research Hypothesis: There is a difference in the number of car trips across different housinng types
autotrips
by htype
.boxplot(autotrips ~ htype, data = hhdata, main = "Car Trips by Housing Type",
xlab = "Housing Type", ylab = "Number of Car Trips", col = "beige")
outliers <- boxplot.stats(hhdata$autotrips)$out
outliers_data <- hhdata[hhdata$autotrips %in% outliers, ]
head(outliers_data, 3)
## rhhnum region geoid10 anyvmt lnvmt autotrips anywalk walktrips
## 54 60002753 6 530330099004 1 4.067426 28 0 NA
## 58 60002859 6 530530729051 1 5.457890 25 1 10
## 90 60003440 6 530330031002 1 3.851057 26 0 NA
## anybike biketrips anytransit transittrips veh hhsize hhworker htype sf
## 54 0 NA 0 NA 2 4 1 2 0
## 58 0 NA 0 NA 3 5 1 1 1
## 90 0 NA 0 NA 2 4 2 1 1
## hhincome lnhhincome income_cat actden jobpop entropy intden
## 54 62.80802 4.140083 2 3.835707 0.7055189 0.8375544 141.5637
## 58 NA NA NA 2.939009 0.8668553 NA 207.9743
## 90 85.64730 4.450238 3 8.397629 0.2500973 0.2323152 185.5913
## pct4way stopden emp10a emp20a emp30a emp30t intptlat10 intptlon10
## 54 18.00000 39.63782 8.645995 31.80893 54.71615 27.10904 47.58199 -122.3566
## 58 18.18182 0.00000 0.000000 0.00000 1.12863 11.21352 47.08706 -122.6128
## 90 75.00000 32.27674 4.183809 26.64404 47.89991 11.22604 47.68659 -122.3913
df_nonOut
. And also show the first 6 rows of
df_nonOut
df_nonOut <- hhdata[!hhdata$autotrips %in% outliers, ]
head(df_nonOut, 6)
## rhhnum region geoid10 anyvmt lnvmt autotrips anywalk walktrips
## 1 60002001 6 530330293031 0 NA 0 0 NA
## 2 60002006 6 530330260014 1 2.295805 4 0 NA
## 3 60002016 6 530330054003 1 2.958618 3 0 NA
## 4 60002033 6 530610519057 1 3.692084 8 0 NA
## 5 60002058 6 530530735002 1 4.097942 8 0 NA
## 6 60002062 6 530330080013 0 NA 0 1 2
## anybike biketrips anytransit transittrips veh hhsize hhworker htype sf
## 1 0 NA 0 NA 1 1 1 1 1
## 2 0 NA 0 NA 1 3 1 1 1
## 3 0 NA 0 NA 1 1 1 1 1
## 4 0 NA 0 NA 2 2 2 1 1
## 5 0 NA 0 NA 2 2 2 1 1
## 6 0 NA 1 1 0 1 0 3 0
## hhincome lnhhincome income_cat actden jobpop entropy intden
## 1 51.38838 3.939412 2 5.948663 0.9516889 0.4443010 119.90339
## 2 NA NA NA 4.385597 0.7495158 0.5320316 137.48098
## 3 119.90622 4.786710 3 21.237998 0.3004523 0.8288664 283.80054
## 4 97.06694 4.575401 3 4.015335 0.5849263 0.1515443 133.72835
## 5 17.12946 2.840800 1 1.229728 0.6809742 0.0000000 65.20049
## 6 28.54910 3.351625 1 54.002145 0.1702469 0.9126628 262.14678
## pct4way stopden emp10a emp20a emp30a emp30t intptlat10
## 1 21.42857 14.27421 1.2022030 13.50539 35.68429 11.445340 47.44295
## 2 37.77778 15.27566 1.5747126 23.78211 49.82386 16.383400 47.49113
## 3 54.34783 55.52619 18.6821191 32.83103 56.11268 29.870831 47.65027
## 4 11.53846 25.71699 1.9664792 11.78069 37.31162 5.481164 47.81160
## 5 30.76923 0.00000 0.0659269 7.02416 18.39332 9.520263 47.21924
## 6 65.68627 110.51286 22.8173893 38.12466 60.45881 32.293135 47.61507
## intptlon10
## 1 -122.1918
## 2 -122.2326
## 3 -122.3425
## 4 -122.2651
## 5 -122.2769
## 6 -122.3555
kruskal_test <- kruskal.test(autotrips ~ htype, data = df_nonOut)
kruskal_test
##
## Kruskal-Wallis rank sum test
##
## data: autotrips by htype
## Kruskal-Wallis chi-squared = 899.2, df = 3, p-value < 2.2e-16
#we reject the null because of the miniscule p-value
# Pairwise comparisons
pairwise <- pairwise.wilcox.test(df_nonOut$autotrips, df_nonOut$htype, p.adjust.method = "bonferroni")
pairwise
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: df_nonOut$autotrips and df_nonOut$htype
##
## 0 1 2
## 1 3.3e-09 - -
## 2 1 < 2e-16 -
## 3 1.8e-05 < 2e-16 5.8e-13
##
## P value adjustment method: bonferroni
veh
) vary by household income grouped by three income
brackets (income_cat
).income_cat
and
veh
?#income_cat is ordinal (categorical rankings) #veh is ratio (continuous)
#one way anova
#Null Hypothesis: There is no difference in the number of vehicles owned across income brackets #Research Hypotheis: There is a difference in the number of vehicles owned across income brackets
veh
by income_cat
.boxplot(veh ~ income_cat, data = hhdata, main = "Vehicles by Income Bracket",
xlab = "Income Category", ylab = "Number of Vehicles", col = "lightblue")
outliers_veh <- boxplot.stats(hhdata$veh)$out
outliers_veh_data <- hhdata[hhdata$veh %in% outliers_veh, ]
head(outliers_veh_data, 4)
## rhhnum region geoid10 anyvmt lnvmt autotrips anywalk walktrips
## 9 60002070 6 530330322033 1 4.199029 4 0 NA
## 22 60002324 6 530330309023 1 3.056000 5 0 NA
## 23 60002325 6 530330226041 1 3.387408 5 0 NA
## 31 60002456 6 530610519131 1 3.345673 14 0 NA
## anybike biketrips anytransit transittrips veh hhsize hhworker htype sf
## 9 0 NA 0 NA 4 3 3 1 1
## 22 0 NA 0 NA 4 3 1 1 1
## 23 0 NA 0 NA 4 1 1 1 1
## 31 0 NA 0 NA 4 4 3 1 1
## hhincome lnhhincome income_cat actden jobpop entropy intden
## 9 171.29460 5.143385 3 1.513747 0.5659448 0.0000000 95.88457
## 22 39.96874 3.688098 2 4.248022 0.8815746 0.1794142 115.95127
## 23 119.90622 4.786710 3 5.150412 0.6404061 0.7302014 130.42673
## 31 85.64730 4.450238 3 3.259246 0.3142119 0.3492300 81.69724
## pct4way stopden emp10a emp20a emp30a emp30t intptlat10
## 9 6.896552 0.000000 0.1134464 7.245275 21.98141 12.890464 47.59547
## 22 9.375000 36.234772 0.9904327 6.314313 22.88700 6.061196 47.27176
## 23 24.000000 54.779227 6.9134319 16.930129 52.65299 19.275291 47.68069
## 31 15.789474 8.599709 2.4417306 12.692853 44.85289 7.470242 47.79871
## intptlon10
## 9 -122.0694
## 22 -122.2421
## 23 -122.1700
## 31 -122.2780
df_nonOut2
. And also show the first 6 rows of
df_nonOut2
df_nonOut2 <- hhdata[!hhdata$veh %in% outliers_veh, ]
head(df_nonOut2, 6)
## rhhnum region geoid10 anyvmt lnvmt autotrips anywalk walktrips
## 1 60002001 6 530330293031 0 NA 0 0 NA
## 2 60002006 6 530330260014 1 2.295805 4 0 NA
## 3 60002016 6 530330054003 1 2.958618 3 0 NA
## 4 60002033 6 530610519057 1 3.692084 8 0 NA
## 5 60002058 6 530530735002 1 4.097942 8 0 NA
## 6 60002062 6 530330080013 0 NA 0 1 2
## anybike biketrips anytransit transittrips veh hhsize hhworker htype sf
## 1 0 NA 0 NA 1 1 1 1 1
## 2 0 NA 0 NA 1 3 1 1 1
## 3 0 NA 0 NA 1 1 1 1 1
## 4 0 NA 0 NA 2 2 2 1 1
## 5 0 NA 0 NA 2 2 2 1 1
## 6 0 NA 1 1 0 1 0 3 0
## hhincome lnhhincome income_cat actden jobpop entropy intden
## 1 51.38838 3.939412 2 5.948663 0.9516889 0.4443010 119.90339
## 2 NA NA NA 4.385597 0.7495158 0.5320316 137.48098
## 3 119.90622 4.786710 3 21.237998 0.3004523 0.8288664 283.80054
## 4 97.06694 4.575401 3 4.015335 0.5849263 0.1515443 133.72835
## 5 17.12946 2.840800 1 1.229728 0.6809742 0.0000000 65.20049
## 6 28.54910 3.351625 1 54.002145 0.1702469 0.9126628 262.14678
## pct4way stopden emp10a emp20a emp30a emp30t intptlat10
## 1 21.42857 14.27421 1.2022030 13.50539 35.68429 11.445340 47.44295
## 2 37.77778 15.27566 1.5747126 23.78211 49.82386 16.383400 47.49113
## 3 54.34783 55.52619 18.6821191 32.83103 56.11268 29.870831 47.65027
## 4 11.53846 25.71699 1.9664792 11.78069 37.31162 5.481164 47.81160
## 5 30.76923 0.00000 0.0659269 7.02416 18.39332 9.520263 47.21924
## 6 65.68627 110.51286 22.8173893 38.12466 60.45881 32.293135 47.61507
## intptlon10
## 1 -122.1918
## 2 -122.2326
## 3 -122.3425
## 4 -122.2651
## 5 -122.2769
## 6 -122.3555
#use a oneway anova
anova_test <- aov(veh ~ income_cat, data = hhdata)
summary(anova_test)
## Df Sum Sq Mean Sq F value Pr(>F)
## income_cat 1 2395 2395.5 2698 <2e-16 ***
## Residuals 13353 11856 0.9
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 857 observations deleted due to missingness
#p_value is less than 0.05, reject the null
pairwise_veh <- pairwise.wilcox.test(df_nonOut2$veh, df_nonOut2$income_cat, p.adjust.method = "bonferroni")
pairwise_veh
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: df_nonOut2$veh and df_nonOut2$income_cat
##
## 1 2
## 2 <2e-16 -
## 3 <2e-16 <2e-16
##
## P value adjustment method: bonferroni