In this demo we will be looking at factors for housing insecurity and homelessness in Seattle, Washington.
library(tidyverse)
#install.packages("maps")
library(maps)
wa_counties <- map_data("county", "washington") %>%
select(lon = long, lat, group, id = subregion)
head(wa_counties)
## lon lat group id
## 1 -118.2356 46.73617 1 adams
## 2 -119.3700 46.74190 1 adams
## 3 -119.3700 46.74190 1 adams
## 4 -119.3757 46.90232 1 adams
## 5 -118.9804 46.90805 1 adams
## 6 -118.9804 47.25756 1 adams
Plotting with ggplot
ggplot(wa_counties, aes(lon, lat)) +
geom_point(size = .25, show.legend = FALSE) +
coord_quickmap()
ggplot(wa_counties, aes(lon, lat, group = group)) +
geom_polygon(fill = "white", colour = "grey50") +
coord_quickmap()
#install.packages("sf")
library(sf)
#install.packages("tigris")
library(tigris)
options(tigris_use_cache = TRUE)
# set year for boundaries
this.year = 2017 #the last census
wa_tracts <- tracts(state = 'WA', county = 'King',
cb = T, year = this.year)
head(wa_tracts)
# BASE PLOT
plot(wa_tracts)
# GGPLOT
ggplot(wa_tracts) +
geom_sf() +
coord_sf()
We mapped the ONC cities to census tracts, which was a challenging task. The Census uses AFFGEOID (formerly GEO_IDs) for their shapefiles and there uniquely define areas based on three things: State, County, and Census tract. However, there is not a level for city. We approximated to the best of our abilities by overlaying maps for the cities with Census tracts.
# IMPORT OUR DATA
oncT<-read.csv("https://raw.githubusercontent.com/kitadasmalley/fallChallenge2019/main/data/oncTract2.csv",
header=TRUE,
stringsAsFactors = FALSE)%>%
mutate(AFFGEOID=GEO_ID)
oncT$City<-as.factor(oncT$City)
These tract codes were then joined with shapefiles from the Census’ Tigris package in R.
# GEO JOIN DATA
joinONC<-geo_join(wa_tracts, oncT,
by_sp="AFFGEOID", by_df="AFFGEOID")
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
# CREAT PLOT IN GGPLOT
ggplot(data = joinONC, aes(fill=City))+
geom_sf()+
#coord_sf(xlim=c(-122.6, -122))+ #zoom in on onc only
theme_bw()+
ggtitle("King County Cities Included in One Night Count Data")
We also wanted to explore the placement of meal programs in the Seattle area. This data came from the City of Seattle and is also publically hosted on Kaggle. Addresses for each location are provided, which we geo-coded into their latitudes and longitudes.
Note that there is a high concentration of meal programs in downtown Seattle but not many in other locations. This might suggest that this is where homeless people are located.
# IMPORT THE GEOCODED DATA
food<-read.csv("https://raw.githubusercontent.com/kitadasmalley/fallChallenge2019/main/data/GEOCODE_FOOD%20-%20Sheet1.csv",
header=TRUE)
ggplot(data = joinONC)+
geom_sf(aes(fill=City))+
geom_point(data=food, aes(Long, Lat))+
theme_bw()
First you will need to get a API key: https://api.census.gov/data/key_signup.html
Once you have an API key load it into your R environment so that you can access the ACS data.
#install.packages("tidycensus")
library(tidycensus)
## Warning: package 'tidycensus' was built under R version 3.6.2
# YOUR CODE SHOULD LOOK LIKE THIS
# census_api_key("INCLUDE YOUR API HERE")
## To install your API key for use in future sessions, run this function with `install = TRUE`.
Many many variables are included in the ACS. The ACS has 1 and 5 year estimates. Use the following code to see what variables are available:
# Set a year of interest
this.year = 2010
# This looks at the 5 year estimates
# You can also do "acs1"
vars <- load_variables(year = this.year,
dataset = "acs5",
cache = TRUE)
# There are 25070 possible variables
dim(vars)
## [1] 20926 3
Explore several possible explantory variables from the American Community Survey (ACS) including:
# MEDIAN HOME VALUE
waMedv <- get_acs(geography = "tract", year=this.year,
state = "WA", county = "King",
variables = "B25077_001E")
## Getting data from the 2006-2010 5-year ACS
head(waMedv)
## # A tibble: 6 x 5
## GEOID NAME variable estimate moe
## <chr> <chr> <chr> <dbl> <dbl>
## 1 53033000100 Census Tract 1, King County, Washington B25077_0… 497300 66258
## 2 53033000200 Census Tract 2, King County, Washington B25077_0… 358700 10609
## 3 53033000300 Census Tract 3, King County, Washington B25077_0… 346500 10807
## 4 53033000401 Census Tract 4.01, King County, Washingt… B25077_0… 342100 45503
## 5 53033000402 Census Tract 4.02, King County, Washingt… B25077_0… 387400 28878
## 6 53033000500 Census Tract 5, King County, Washington B25077_0… 596100 33208
Get census tract level estimates for King County:
## Names for variable types
# Gives five year estimates
waHouse <- get_acs(geography = "tract", year=this.year,
state = "WA", county = "King", geometry = TRUE,
variables = c(popululation = "B02001_001",
median.gross.rent = "B25064_001",
median.household.income = "B19013_001",
rent.burden = "B25071_001"))
## Getting data from the 2006-2010 5-year ACS
head(waHouse)
## Simple feature collection with 6 features and 5 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -122.3515 ymin: 47.76317 xmax: -122.3052 ymax: 47.77779
## CRS: 4269
## GEOID NAME
## 1 53033020300 Census Tract 203, King County, Washington
## 2 53033020300 Census Tract 203, King County, Washington
## 3 53033020300 Census Tract 203, King County, Washington
## 4 53033020300 Census Tract 203, King County, Washington
## 5 53033020401 Census Tract 204.01, King County, Washington
## 6 53033020401 Census Tract 204.01, King County, Washington
## variable estimate moe geometry
## 1 popululation 5642.0 355.0 MULTIPOLYGON (((-122.3404 4...
## 2 median.household.income 54773.0 6375.0 MULTIPOLYGON (((-122.3404 4...
## 3 median.gross.rent 1027.0 108.0 MULTIPOLYGON (((-122.3404 4...
## 4 rent.burden 29.2 3.6 MULTIPOLYGON (((-122.3404 4...
## 5 popululation 3297.0 213.0 MULTIPOLYGON (((-122.3243 4...
## 6 median.household.income 65353.0 8248.0 MULTIPOLYGON (((-122.3243 4...
Notice that we’re going to need to do a little data wrangling so that we have a tidydata format to spread the column named ‘variable’.
waTidy<-as.data.frame(waHouse)[,c(1,3:4)]%>%
spread(variable, estimate)
head(waTidy)
## GEOID median.gross.rent median.household.income popululation
## 1 53033000100 838 47518 5784
## 2 53033000200 906 54797 7682
## 3 53033000300 1101 61966 2548
## 4 53033000401 841 35095 5805
## 5 53033000402 951 45183 4662
## 6 53033000500 1521 110125 3474
## rent.burden
## 1 28.6
## 2 28.8
## 3 34.6
## 4 31.0
## 5 34.5
## 6 24.8
Suppose that we want to study median house value (which is B25077_001E).
### GET CENSUS DATA
### B25077_001E: MEDIAN HOME VALUE
waMedv <- get_acs(geography = "tract", year=this.year,
state = "WA", county = "King",
variables = "B25077_001E")%>%
mutate(AFFGEOID=paste0("1400000US", GEOID))
## Getting data from the 2006-2010 5-year ACS
In order to combine data from the US Census with our spatial data from trigis we will need to use a geo_join.
## USE GEO_JOIN TO COMBINE SPATIAL DATA AND OTHER DATA FRAMES
joinWA<-geo_join(wa_tracts, waMedv,
by_sp="AFFGEOID", by_df="AFFGEOID")
Then we will use the tmap package to plot the data with a green color gradient (with 7 levels).
#install.packages("tmap")
library(tmap)
## Warning: package 'tmap' was built under R version 3.6.2
## USE TMAP PACKAGE
tm_shape(joinWA, projection = 26916)+
tm_fill("estimate", style = "quantile", n=7, palette = "Greens")+
tm_legend(bg.color="white", bg.alpha=0.6)
# MEDIAN HOME VALUE
waMedvG <- get_acs(geography = "tract", year=this.year,
state = "WA", county = "King",
variables = "B25077_001E",
geometry = TRUE)
## Getting data from the 2006-2010 5-year ACS
head(waMedvG)
## Simple feature collection with 6 features and 5 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -122.3515 ymin: 47.71841 xmax: -122.1842 ymax: 47.77779
## CRS: 4269
## GEOID NAME variable estimate
## 1 53033020300 Census Tract 203, King County, Washington B25077_001 358100
## 2 53033020401 Census Tract 204.01, King County, Washington B25077_001 335400
## 3 53033020402 Census Tract 204.02, King County, Washington B25077_001 374900
## 4 53033021100 Census Tract 211, King County, Washington B25077_001 339500
## 5 53033021804 Census Tract 218.04, King County, Washington B25077_001 424100
## 6 53033022003 Census Tract 220.03, King County, Washington B25077_001 382300
## moe geometry
## 1 18863 MULTIPOLYGON (((-122.3404 4...
## 2 13782 MULTIPOLYGON (((-122.3243 4...
## 3 24841 MULTIPOLYGON (((-122.2852 4...
## 4 14556 MULTIPOLYGON (((-122.3237 4...
## 5 17403 MULTIPOLYGON (((-122.2061 4...
## 6 17607 MULTIPOLYGON (((-122.1876 4...
#install.packages("leaflet")
library(leaflet)
pal<-colorNumeric("Greens", domain=0:ceiling(max(waMedvG$estimate, na.rm=TRUE)))
popup<-paste("Tract: ", as.character(substring(waMedvG$GEOID, 6, 11)), "<br>",
"Median Home Value: ", as.character(waMedvG$estimate))
leaflet()%>%
addProviderTiles("CartoDB.Positron")%>%
addPolygons(data=waMedvG,
fillColor= ~pal(waMedvG$estimate),
fillOpacity = .7,
weight =.5,
smoothFactor = 0.2,
popup = popup)
## Warning: sf layer has inconsistent datum (+proj=longlat +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +no_defs ).
## Need '+proj=longlat +datum=WGS84'
qpal<-colorQuantile("viridis", domain=waMedvG$estimate,
n=5,na.color="#FFFFFF")
leaflet()%>%
addProviderTiles("CartoDB.Positron")%>%
addPolygons(data=waMedvG,
fillColor= ~qpal(waMedvG$estimate),
fillOpacity = 0.7,
color="grey",
opacity=.5,
weight = 0.4,
smoothFactor = 0.2,
popup = popup)%>%
addLegend("bottomright", pal=qpal, values=waMedvG$estimate,
opacity = .7,
title="Percentiles")
## Warning: sf layer has inconsistent datum (+proj=longlat +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +no_defs ).
## Need '+proj=longlat +datum=WGS84'