Try some regression modeling
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.4.2
## ── Attaching packages ───────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.0 ✔ purrr 0.3.2
## ✔ tibble 2.1.1 ✔ dplyr 0.8.0.1
## ✔ tidyr 0.8.3 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## Warning: package 'tibble' was built under R version 3.4.4
## Warning: package 'tidyr' was built under R version 3.4.4
## Warning: package 'readr' was built under R version 3.4.4
## Warning: package 'purrr' was built under R version 3.4.4
## Warning: package 'dplyr' was built under R version 3.4.4
## Warning: package 'stringr' was built under R version 3.4.4
## Warning: package 'forcats' was built under R version 3.4.4
## ── Conflicts ──────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## Evictions data
evic<-read.csv("/Users/heatherkitada/Downloads/cities.csv",
header=TRUE,
stringsAsFactors = FALSE)
# ONC data
oncDF<-read.csv("/Users/heatherkitada/Downloads/oneNightCount.csv",
header=TRUE,
stringsAsFactors = FALSE)
oncG<-oncDF%>%
gather(City, Count, -c(Location,YEAR))
unique(oncG$City)
## [1] "SEATTLE" "KENT" "NORTH.END"
## [4] "EAST.SIDE" "SW.KING.CO" "WHITE.CNTR"
## [7] "FEDERAL.WAY" "RENTON" "NIGHT.OWL.BUSES"
## [10] "AUBURN" "VASHON.ISLAND" "TOTAL"
evic_onc<-evic%>%
filter(name %in% c("Auburn", "Federal Way",
"Kent", "Renton",
"Seattle", "Vashon",
"White Center"))
nameToONC<-data.frame(name=c("Auburn", "Federal Way",
"Kent", "Renton",
"Seattle", "Vashon",
"White Center"),
City=c("AUBURN", "FEDERAL.WAY",
"KENT", "RENTON",
"SEATTLE", "VASHON.ISLAND",
"WHITE.CNTR"))
evicCity<-evic_onc%>%
left_join(nameToONC, by="name")%>%
left_join(oncG)%>%
mutate(homeless_rate=Count/population)%>%
filter(Location=="TOTAL")
## Warning: Column `name` joining character vector and factor, coercing into
## character vector
## Joining, by = "City"
## Warning: Column `City` joining factor and character vector, coercing into
## character vector
head(evicCity)
## GEOID year name parent.location population poverty.rate
## 1 5303180 2000 Auburn Washington 40314 12.77
## 2 5303180 2000 Auburn Washington 40314 12.77
## 3 5303180 2000 Auburn Washington 40314 12.77
## 4 5303180 2000 Auburn Washington 40314 12.77
## 5 5303180 2000 Auburn Washington 40314 12.77
## 6 5303180 2000 Auburn Washington 40314 12.77
## renter.occupied.households pct.renter.occupied median.gross.rent
## 1 8427.06 45.8 639
## 2 8427.06 45.8 639
## 3 8427.06 45.8 639
## 4 8427.06 45.8 639
## 5 8427.06 45.8 639
## 6 8427.06 45.8 639
## median.household.income median.property.value rent.burden pct.white
## 1 39208 153400 26 79.92
## 2 39208 153400 26 79.92
## 3 39208 153400 26 79.92
## 4 39208 153400 26 79.92
## 5 39208 153400 26 79.92
## 6 39208 153400 26 79.92
## pct.af.am pct.hispanic pct.am.ind pct.asian pct.nh.pi pct.multiple
## 1 2.37 7.49 2.36 3.45 0.49 3.79
## 2 2.37 7.49 2.36 3.45 0.49 3.79
## 3 2.37 7.49 2.36 3.45 0.49 3.79
## 4 2.37 7.49 2.36 3.45 0.49 3.79
## 5 2.37 7.49 2.36 3.45 0.49 3.79
## 6 2.37 7.49 2.36 3.45 0.49 3.79
## pct.other eviction.filings evictions eviction.rate eviction.filing.rate
## 1 0.13 295.43 192.79 2.29 3.51
## 2 0.13 295.43 192.79 2.29 3.51
## 3 0.13 295.43 192.79 2.29 3.51
## 4 0.13 295.43 192.79 2.29 3.51
## 5 0.13 295.43 192.79 2.29 3.51
## 6 0.13 295.43 192.79 2.29 3.51
## low.flag imputed subbed City Location YEAR Count homeless_rate
## 1 1 0 0 AUBURN TOTAL 2016 110 0.002728581
## 2 1 0 0 AUBURN TOTAL 2015 132 0.003274297
## 3 1 0 0 AUBURN TOTAL 2014 97 0.002406112
## 4 1 0 0 AUBURN TOTAL 2013 57 0.001413901
## 5 1 0 0 AUBURN TOTAL 2012 44 0.001091432
## 6 1 0 0 AUBURN TOTAL 2011 45 0.001116238
### NOW that we have combine the ONC and Evication Labs data we can play with it!
# we can start by filtering on a given year and then doing regressions
# might consider instead of Count using homeless_rate as the response
this.year<-2016
thisDat<-evicCity%>%
filter(year==this.year)
mod<-lm(homeless_rate~median.household.income+eviction.rate, data=thisDat)
summary(mod)
##
## Call:
## lm(formula = homeless_rate ~ median.household.income + eviction.rate,
## data = thisDat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.833e-03 -5.216e-04 1.819e-05 4.622e-04 1.933e-03
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.213e-03 9.844e-04 7.328 9.94e-10 ***
## median.household.income -5.822e-08 1.418e-08 -4.107 0.000132 ***
## eviction.rate -2.413e-03 2.928e-04 -8.243 3.09e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0008106 on 56 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.5497, Adjusted R-squared: 0.5336
## F-statistic: 34.18 on 2 and 56 DF, p-value: 1.986e-10