Use IPUMS Data from Github
knitr::opts_chunk$set(echo = TRUE)
library(haven)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(broom)
ipums<-read_dta("https://github.com/coreysparks/data/blob/master/usa_00045.dta?raw=true")
names(ipums) #print the column names
## [1] "year" "datanum" "serial" "hhwt" "statefip"
## [6] "met2013" "puma" "gq" "pernum" "perwt"
## [11] "famsize" "nchild" "nchlt5" "eldch" "nsibs"
## [16] "relate" "related" "sex" "age" "marst"
## [21] "birthyr" "fertyr" "race" "raced" "hispan"
## [26] "hispand" "bpl" "bpld" "citizen" "yrsusa1"
## [31] "language" "languaged" "speakeng" "educ" "educd"
## [36] "empstat" "empstatd" "labforce" "occ" "ind"
## [41] "inctot" "incwage" "poverty" "hwsei" "migrate1"
## [46] "migrate1d" "carpool" "trantime"
Filter trantime variable and recode missing values
ipums_new<-ipums%>%
mutate(mytrantime<-ifelse(trantime==000, NA, trantime))%>%
filter(labforce==2, met2013%in%c(11100, 12420, 13140, 15180, 17780, 18580, 19100, 21340, 26420, 28660, 29700, 31180, 32580, 33260, 36220, 41660, 41700, 46340, 47380,48660), age >= 18) %>%
mutate(citymetro=case_when(.$met2013==11100~"Amarillo",
.$met2013==12420~"Austin",
.$met2013==13140~"Beaumont",
.$met2013==15180~"Brownsville",
.$met2013==17780~"College Station",
.$met2013==18580~"Corpus Christi",
.$met2013==19100~"Dallas",
.$met2013==21340~"El Paso",
.$met2013==26420~"Houston",
.$met2013==28660~"Killeen",
.$met2013==29700~"Laredo",
.$met2013==31180~"Lubbock",
.$met2013==32580~"McAllen",
.$met2013==33260~"Midland",
.$met2013==36220~"Odessa",
.$met2013==41660~"San Angelo",
.$met2013==41700~"San Antonio",
.$met2013==46340~"Tyler",
.$met2013==47380~"Waco",
.$met2013==48660~"Wichita Falls"))
Summary Statistics on trantime for Texas metro cities
ipums_new%>%
group_by(citymetro)%>%
summarise(means=mean(trantime, na.rm=T), sds=sd(trantime,na.rm=T), n=n())
## # A tibble: 19 x 4
## citymetro means sds n
## <chr> <dbl> <dbl> <int>
## 1 Amarillo 16.76692 21.78368 133
## 2 Austin 22.80106 18.89597 945
## 3 Beaumont 21.01325 22.32188 151
## 4 Brownsville 16.53846 11.43919 130
## 5 College Station 20.37624 24.73291 101
## 6 Corpus Christi 19.36161 23.53661 224
## 7 Dallas 24.97202 22.98984 3217
## 8 El Paso 21.04908 24.71307 326
## 9 Houston 26.46783 22.44292 2456
## 10 Laredo 19.34343 20.35459 99
## 11 Lubbock 15.99265 17.31345 136
## 12 McAllen 19.38806 22.01837 201
## 13 Midland 19.40816 16.94466 49
## 14 Odessa 17.26190 12.81778 42
## 15 San Angelo 25.04545 32.96507 44
## 16 San Antonio 24.30786 22.46521 903
## 17 Tyler 21.75758 27.18271 99
## 18 Waco 20.06716 18.82136 134
## 19 Wichita Falls 11.98649 10.24962 74
Create boxplots to summarize trantime for Texas metro cities
ipums_new%>%
ggplot(aes(x=citymetro, y=trantime))+geom_boxplot()+ggtitle(label= "Avg. Traveltime to Work in Texas Metro Cities",xlab("traveltime"))

The F test from the ANOVA model is 8.5119 indicating there is a difference between the transtime means between the cities.
The city with the longest average commute time is Wichita Falls, TX with an average of 12 minutes.
The city with the longest average commute time is Houston, TX with an average of 26.5 minutes.