Motivating Example

In this demo we will be looking at factors for housing insecurity and homelessness in Seattle, Washington.

Step 1: Polygon Maps

library(tidyverse)

#install.packages("maps")
library(maps)

wa_counties <- map_data("county", "washington") %>% 
  select(lon = long, lat, group, id = subregion)

head(wa_counties)
##         lon      lat group    id
## 1 -118.2356 46.73617     1 adams
## 2 -119.3700 46.74190     1 adams
## 3 -119.3700 46.74190     1 adams
## 4 -119.3757 46.90232     1 adams
## 5 -118.9804 46.90805     1 adams
## 6 -118.9804 47.25756     1 adams

Plotting with ggplot

PART A: Points

ggplot(wa_counties, aes(lon, lat)) + 
  geom_point(size = .25, show.legend = FALSE) +
  coord_quickmap()

PART B: Polygons

ggplot(wa_counties, aes(lon, lat, group = group)) +
  geom_polygon(fill = "white", colour = "grey50") + 
  coord_quickmap()

Step 2: Simple Features Maps

#install.packages("sf")
library(sf)

#install.packages("tigris")
library(tigris)
options(tigris_use_cache = TRUE)

# set year for boundaries 
this.year = 2017 #the last census

wa_tracts <- tracts(state = 'WA', county = 'King', 
                    cb = T, year = this.year)

head(wa_tracts)

# BASE PLOT
plot(wa_tracts)

# GGPLOT
ggplot(wa_tracts) + 
  geom_sf() + 
  coord_sf()

Step 3: Layered Maps / Adding Other Geometries

PART A: Mapping One Night Count Areas

We mapped the ONC cities to census tracts, which was a challenging task. The Census uses AFFGEOID (formerly GEO_IDs) for their shapefiles and there uniquely define areas based on three things: State, County, and Census tract. However, there is not a level for city. We approximated to the best of our abilities by overlaying maps for the cities with Census tracts.

# IMPORT OUR DATA
oncT<-read.csv("https://raw.githubusercontent.com/kitadasmalley/fallChallenge2019/main/data/oncTract2.csv",
               header=TRUE,
               stringsAsFactors = FALSE)%>%
  mutate(AFFGEOID=GEO_ID)

oncT$City<-as.factor(oncT$City)

These tract codes were then joined with shapefiles from the Census’ Tigris package in R.

# GEO JOIN DATA 
joinONC<-geo_join(wa_tracts, oncT, 
                  by_sp="AFFGEOID", by_df="AFFGEOID")
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
# CREAT PLOT IN GGPLOT
ggplot(data = joinONC, aes(fill=City))+
  geom_sf()+
  #coord_sf(xlim=c(-122.6, -122))+ #zoom in on onc only
  theme_bw()+
  ggtitle("King County Cities Included in One Night Count Data")

PART B: Mapping Meal Programs

We also wanted to explore the placement of meal programs in the Seattle area. This data came from the City of Seattle and is also publically hosted on Kaggle. Addresses for each location are provided, which we geo-coded into their latitudes and longitudes.

Note that there is a high concentration of meal programs in downtown Seattle but not many in other locations. This might suggest that this is where homeless people are located.

# IMPORT THE GEOCODED DATA
food<-read.csv("https://raw.githubusercontent.com/kitadasmalley/fallChallenge2019/main/data/GEOCODE_FOOD%20-%20Sheet1.csv",
               header=TRUE)

ggplot(data = joinONC)+
  geom_sf(aes(fill=City))+
  geom_point(data=food, aes(Long, Lat))+
  theme_bw()

Step 4: Getting Data from the US Census Bureau

First you will need to get a API key: https://api.census.gov/data/key_signup.html

Once you have an API key load it into your R environment so that you can access the ACS data.

#install.packages("tidycensus")
library(tidycensus)
## Warning: package 'tidycensus' was built under R version 3.6.2
# YOUR CODE SHOULD LOOK LIKE THIS
# census_api_key("INCLUDE YOUR API HERE")
## To install your API key for use in future sessions, run this function with `install = TRUE`.

Many many variables are included in the ACS. The ACS has 1 and 5 year estimates. Use the following code to see what variables are available:

# Set a year of interest
this.year = 2010

# This looks at the 5 year estimates
# You can also do "acs1"
vars <- load_variables(year = this.year,
                      dataset = "acs5",
                      cache = TRUE)

# There are 25070 possible variables 
dim(vars)
## [1] 20926     3

Explore several possible explantory variables from the American Community Survey (ACS) including:

PART A: Getting data for one variable

# MEDIAN HOME VALUE
waMedv <- get_acs(geography = "tract", year=this.year,
               state = "WA", county = "King",
               variables = "B25077_001E")
## Getting data from the 2006-2010 5-year ACS
head(waMedv)
## # A tibble: 6 x 5
##   GEOID       NAME                                      variable  estimate   moe
##   <chr>       <chr>                                     <chr>        <dbl> <dbl>
## 1 53033000100 Census Tract 1, King County, Washington   B25077_0…   497300 66258
## 2 53033000200 Census Tract 2, King County, Washington   B25077_0…   358700 10609
## 3 53033000300 Census Tract 3, King County, Washington   B25077_0…   346500 10807
## 4 53033000401 Census Tract 4.01, King County, Washingt… B25077_0…   342100 45503
## 5 53033000402 Census Tract 4.02, King County, Washingt… B25077_0…   387400 28878
## 6 53033000500 Census Tract 5, King County, Washington   B25077_0…   596100 33208

PART B: You can also pull multiple variables are a time

Get census tract level estimates for King County:

## Names for variable types
# Gives five year estimates
waHouse <- get_acs(geography = "tract", year=this.year,
                  state = "WA", county = "King", geometry = TRUE,
                  variables = c(popululation = "B02001_001",
                                median.gross.rent = "B25064_001",
                                median.household.income = "B19013_001",
                                rent.burden = "B25071_001"))
## Getting data from the 2006-2010 5-year ACS
head(waHouse)
## Simple feature collection with 6 features and 5 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -122.3515 ymin: 47.76317 xmax: -122.3052 ymax: 47.77779
## CRS:            4269
##         GEOID                                         NAME
## 1 53033020300    Census Tract 203, King County, Washington
## 2 53033020300    Census Tract 203, King County, Washington
## 3 53033020300    Census Tract 203, King County, Washington
## 4 53033020300    Census Tract 203, King County, Washington
## 5 53033020401 Census Tract 204.01, King County, Washington
## 6 53033020401 Census Tract 204.01, King County, Washington
##                  variable estimate    moe                       geometry
## 1            popululation   5642.0  355.0 MULTIPOLYGON (((-122.3404 4...
## 2 median.household.income  54773.0 6375.0 MULTIPOLYGON (((-122.3404 4...
## 3       median.gross.rent   1027.0  108.0 MULTIPOLYGON (((-122.3404 4...
## 4             rent.burden     29.2    3.6 MULTIPOLYGON (((-122.3404 4...
## 5            popululation   3297.0  213.0 MULTIPOLYGON (((-122.3243 4...
## 6 median.household.income  65353.0 8248.0 MULTIPOLYGON (((-122.3243 4...

Notice that we’re going to need to do a little data wrangling so that we have a tidydata format to spread the column named ‘variable’.

waTidy<-as.data.frame(waHouse)[,c(1,3:4)]%>%
  spread(variable, estimate)

head(waTidy)
##         GEOID median.gross.rent median.household.income popululation
## 1 53033000100               838                   47518         5784
## 2 53033000200               906                   54797         7682
## 3 53033000300              1101                   61966         2548
## 4 53033000401               841                   35095         5805
## 5 53033000402               951                   45183         4662
## 6 53033000500              1521                  110125         3474
##   rent.burden
## 1        28.6
## 2        28.8
## 3        34.6
## 4        31.0
## 5        34.5
## 6        24.8

Step 5: Geojoins and TMAPS

Suppose that we want to study median house value (which is B25077_001E).

### GET CENSUS DATA
### B25077_001E: MEDIAN HOME VALUE
waMedv <- get_acs(geography = "tract", year=this.year,
              state = "WA", county = "King",
              variables = "B25077_001E")%>%
  mutate(AFFGEOID=paste0("1400000US", GEOID))
## Getting data from the 2006-2010 5-year ACS

In order to combine data from the US Census with our spatial data from trigis we will need to use a geo_join.

## USE GEO_JOIN TO COMBINE SPATIAL DATA AND OTHER DATA FRAMES
joinWA<-geo_join(wa_tracts, waMedv, 
                 by_sp="AFFGEOID", by_df="AFFGEOID")

Then we will use the tmap package to plot the data with a green color gradient (with 7 levels).

#install.packages("tmap")
library(tmap)
## Warning: package 'tmap' was built under R version 3.6.2
## USE TMAP PACKAGE
tm_shape(joinWA, projection = 26916)+
  tm_fill("estimate", style = "quantile", n=7, palette = "Greens")+
  tm_legend(bg.color="white", bg.alpha=0.6)

Step 7: Interactive Maps with Leaflet

PART A: Pop-ups

# MEDIAN HOME VALUE
waMedvG <- get_acs(geography = "tract", year=this.year,
               state = "WA", county = "King",
               variables = "B25077_001E", 
               geometry = TRUE)
## Getting data from the 2006-2010 5-year ACS
head(waMedvG)
## Simple feature collection with 6 features and 5 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -122.3515 ymin: 47.71841 xmax: -122.1842 ymax: 47.77779
## CRS:            4269
##         GEOID                                         NAME   variable estimate
## 1 53033020300    Census Tract 203, King County, Washington B25077_001   358100
## 2 53033020401 Census Tract 204.01, King County, Washington B25077_001   335400
## 3 53033020402 Census Tract 204.02, King County, Washington B25077_001   374900
## 4 53033021100    Census Tract 211, King County, Washington B25077_001   339500
## 5 53033021804 Census Tract 218.04, King County, Washington B25077_001   424100
## 6 53033022003 Census Tract 220.03, King County, Washington B25077_001   382300
##     moe                       geometry
## 1 18863 MULTIPOLYGON (((-122.3404 4...
## 2 13782 MULTIPOLYGON (((-122.3243 4...
## 3 24841 MULTIPOLYGON (((-122.2852 4...
## 4 14556 MULTIPOLYGON (((-122.3237 4...
## 5 17403 MULTIPOLYGON (((-122.2061 4...
## 6 17607 MULTIPOLYGON (((-122.1876 4...
#install.packages("leaflet")
library(leaflet)

pal<-colorNumeric("Greens", domain=0:ceiling(max(waMedvG$estimate, na.rm=TRUE)))

popup<-paste("Tract: ", as.character(substring(waMedvG$GEOID, 6, 11)), "<br>",
             "Median Home Value: ", as.character(waMedvG$estimate))

leaflet()%>%
  addProviderTiles("CartoDB.Positron")%>%
  addPolygons(data=waMedvG,
              fillColor= ~pal(waMedvG$estimate),
              fillOpacity = .7,
              weight =.5,
              smoothFactor = 0.2,
              popup = popup)
## Warning: sf layer has inconsistent datum (+proj=longlat +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +no_defs ).
## Need '+proj=longlat +datum=WGS84'

PART B: Quantile Color Palette

qpal<-colorQuantile("viridis", domain=waMedvG$estimate,
                       n=5,na.color="#FFFFFF")

leaflet()%>%
  addProviderTiles("CartoDB.Positron")%>%
  addPolygons(data=waMedvG,
              fillColor= ~qpal(waMedvG$estimate),
              fillOpacity = 0.7,
                  color="grey",
                  opacity=.5,
                  weight = 0.4,
                  smoothFactor = 0.2,
                  popup = popup)%>%
      addLegend("bottomright", pal=qpal, values=waMedvG$estimate,
                opacity = .7,
                title="Percentiles")
## Warning: sf layer has inconsistent datum (+proj=longlat +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +no_defs ).
## Need '+proj=longlat +datum=WGS84'