library(gbm)
## Loaded gbm 2.1.8
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.4     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.0.1     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(dplyr)
library(maps)
## 
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
## 
##     map
library(usmap)
library(viridis)
## Loading required package: viridisLite
## 
## Attaching package: 'viridis'
## The following object is masked from 'package:maps':
## 
##     unemp
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(sf)
## Linking to GEOS 3.9.1, GDAL 3.2.1, PROJ 7.2.1
library(tigris)
## To enable 
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
library(tidycensus)
## 
## Attaching package: 'tidycensus'
## The following object is masked from 'package:tigris':
## 
##     fips_codes
library(tmap)
## Registered S3 methods overwritten by 'stars':
##   method             from
##   st_bbox.SpatRaster sf  
##   st_crs.SpatRaster  sf
library(leaflet)
vote<-read.csv("https://raw.githubusercontent.com/kitadasmalley/DATA502/main/FALL2021/Data/voterTurnOut1618.csv",
              header=TRUE, 
              stringsAsFactors = FALSE)
states <- usmap::us_map()

head(states)
##         x        y order  hole piece group fips abbr    full
## 1 1091779 -1380695     1 FALSE     1  01.1   01   AL Alabama
## 2 1091268 -1376372     2 FALSE     1  01.1   01   AL Alabama
## 3 1091140 -1362998     3 FALSE     1  01.1   01   AL Alabama
## 4 1090940 -1343517     4 FALSE     1  01.1   01   AL Alabama
## 5 1090913 -1341006     5 FALSE     1  01.1   01   AL Alabama
## 6 1090796 -1334480     6 FALSE     1  01.1   01   AL Alabama

Joining

mapPropVote<-states%>%
  mutate(State=full)%>%
  left_join(vote)
## Joining, by = "State"
head(mapPropVote)
##         x        y order  hole piece group fips abbr    full   State YEAR Voted
## 1 1091779 -1380695     1 FALSE     1  01.1   01   AL Alabama Alabama 2016 Voted
## 2 1091779 -1380695     1 FALSE     1  01.1   01   AL Alabama Alabama 2018 Voted
## 3 1091268 -1376372     2 FALSE     1  01.1   01   AL Alabama Alabama 2016 Voted
## 4 1091268 -1376372     2 FALSE     1  01.1   01   AL Alabama Alabama 2018 Voted
## 5 1091140 -1362998     3 FALSE     1  01.1   01   AL Alabama Alabama 2016 Voted
## 6 1091140 -1362998     3 FALSE     1  01.1   01   AL Alabama Alabama 2018 Voted
##   nVote nWgtVote    n    nWgt sampPropVote wgtPropVote
## 1  1208  2095097 1691 2954157    0.7143702   0.7092028
## 2   976  1830305 1511 2856889    0.6459298   0.6406635
## 3  1208  2095097 1691 2954157    0.7143702   0.7092028
## 4   976  1830305 1511 2856889    0.6459298   0.6406635
## 5  1208  2095097 1691 2954157    0.7143702   0.7092028
## 6   976  1830305 1511 2856889    0.6459298   0.6406635
this.year <- 2016
plot <- mapPropVote %>%
  filter(YEAR == this.year)%>%
  ggplot(aes(text= State, x, y, group= group))+
  geom_polygon(aes(fill= wgtPropVote), color = "black")+
  theme_bw()+
  scale_fill_viridis(option= "magma", direction = -1)

ggplotly(plot)

Demo II

Step 1: Polygon Maps
wa_counties <- map_data("county", "washington") %>% 
  select(lon = long, lat, group, id = subregion)
head(wa_counties)
##         lon      lat group    id
## 1 -118.2356 46.73617     1 adams
## 2 -119.3700 46.74190     1 adams
## 3 -119.3700 46.74190     1 adams
## 4 -119.3757 46.90232     1 adams
## 5 -118.9804 46.90805     1 adams
## 6 -118.9804 47.25756     1 adams

PART A: Points

ggplot(wa_counties, aes(lon, lat)) + 
  geom_point(size = .25, show.legend = FALSE) +
  coord_quickmap()

PART B: Polygons

ggplot(wa_counties, aes(lon, lat, group= group))+
  geom_polygon(fill= "white", colour = "grey50")+
  coord_quickmap()

Step 2: Simple Features Maps

options(tigris_use_cache = TRUE)

# set year for boundaries 
this.year = 2017 #the last census

wa_tracts <- tracts(state = 'WA', county = 'King', 
                    cb = T, year = this.year)

head(wa_tracts)
## Simple feature collection with 6 features and 9 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -122.3966 ymin: 47.63231 xmax: -122.2551 ymax: 47.73405
## Geodetic CRS:  NAD83
##    STATEFP COUNTYFP TRACTCE             AFFGEOID       GEOID  NAME LSAD   ALAND
## 20      53      033  000200 1400000US53033000200 53033000200     2   CT 3286278
## 21      53      033  000900 1400000US53033000900 53033000900     9   CT 1007015
## 22      53      033  002000 1400000US53033002000 53033002000    20   CT 1103320
## 23      53      033  005000 1400000US53033005000 53033005000    50   CT  757380
## 24      53      033  005801 1400000US53033005801 53033005801 58.01   CT 1814668
## 25      53      033  006800 1400000US53033006800 53033006800    68   CT  718104
##     AWATER                       geometry
## 20       0 MULTIPOLYGON (((-122.3236 4...
## 21 1851901 MULTIPOLYGON (((-122.285 47...
## 22   28680 MULTIPOLYGON (((-122.3177 4...
## 23       0 MULTIPOLYGON (((-122.3473 4...
## 24  264894 MULTIPOLYGON (((-122.3966 4...
## 25       0 MULTIPOLYGON (((-122.3623 4...
# BASE PLOT
plot(wa_tracts)

# GGPLOT
ggplot(wa_tracts) + 
  geom_sf() + 
  coord_sf()

Step 3: Layered Maps / Adding Other Geometries

PART A: Mapping One Night Count Areas

We mapped the ONC cities to census tracts, which was a challenging task. The Census uses AFFGEOID (formerly GEO_IDs) for their shapefiles and there uniquely define areas based on three things: State, County, and Census tract. However, there is not a level for city. We approximated to the best of our abilities by overlaying maps for the cities with Census tracts.

# IMPORT OUR DATA

oncT<-read.csv("https://raw.githubusercontent.com/kitadasmalley/fallChallenge2019/main/data/oncTract2.csv",
               header=TRUE,
               stringsAsFactors = FALSE)%>%
  mutate(AFFGEOID=GEO_ID)

oncT$City<-as.factor(oncT$ï..City)


head(oncT)
##   ï..City      GEOID               GEO_ID             AFFGEOID    City
## 1 Seattle 5.3033e+10 1400000US53033000100 1400000US53033000100 Seattle
## 2 Seattle 5.3033e+10 1400000US53033000200 1400000US53033000200 Seattle
## 3 Seattle 5.3033e+10 1400000US53033000300 1400000US53033000300 Seattle
## 4 Seattle 5.3033e+10 1400000US53033000401 1400000US53033000401 Seattle
## 5 Seattle 5.3033e+10 1400000US53033000402 1400000US53033000402 Seattle
## 6 Seattle 5.3033e+10 1400000US53033000500 1400000US53033000500 Seattle

These tract codes were then joined with shapefiles from the Census’ Tigris package in R.

# GEO JOIN DATA 
joinONC<- geo_join(wa_tracts, oncT, 
                  by_sp="AFFGEOID", by_df="AFFGEOID")
## Warning: We recommend using the dplyr::*_join() family of functions instead.
## Warning: `group_by_()` was deprecated in dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
ggplot(data = joinONC, aes(fill=City))+
  geom_sf()+
  theme_bw()+
  ggtitle("King County Cities Included in One Night Count Data")

PART B: Mapping Meal Programs

We also wanted to explore the placement of meal programs in the Seattle area. This data came from the City of Seattle and is also publically hosted on Kaggle. Addresses for each location are provided, which we geo-coded into their latitudes and longitudes.

Note that there is a high concentration of meal programs in downtown Seattle but not many in other locations. This might suggest that this is where homeless people are located.

# IMPORT THE GEOCODED DATA
food<-read.csv("https://raw.githubusercontent.com/kitadasmalley/fallChallenge2019/main/data/GEOCODE_FOOD%20-%20Sheet1.csv",
               header=TRUE)

ggplot(data = joinONC)+
  geom_sf(aes(fill= City))+
  geom_point(data=food, aes(Long, Lat))+
  theme_bw()

Step 4: Getting Data from the US Census Bureau

First you will need to get a API key: https://api.census.gov/data/key_signup.html

Once you have an API key load it into your R environment so that you can access the ACS data.

#install.packages("tidycensus")

# Get your code here https://api.census.gov/data/key_signup.html
library(tidycensus)
# Set a year of interest
this.year = 2010

# This looks at the 5 year estimates
# You can also do "acs1"
vars <- load_variables(year = this.year,
                      dataset = "acs5",
                      cache = TRUE)

# There are 25070 possible variables 
dim(vars)
## [1] 20927     3

Explore several possible explantory variables from the American Community Survey (ACS) including:

B02001_001: Total B03002_003: White alone (Not Hispanic or Latino) B03002_004 Black or African American alone (Not Hispanic or Latino) B03002_012: Hispanic or Latino B03002_005: Native American alone (Not Hispanic or Latino) B03002_006: Asian alone (Not Hispanic or Latino) B03002_007: Native Hawaiian or Pacific Islander alone (Not Hispanic or Latino) B03002_009: Multiple Races (Not Hispanic or Latino) B03002_008: Other (Not Hispanic or Latino) B25064_001 MEDIAN GROSS RENT B25071_001: Rent Burden (MEDIAN GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME) B19013_001: MEDIAN HOUSEHOLD INCOME IN PAST 12 MONTHS B01002_001: Median age B25115_016: Renter Occupied - family B25115_027: Renter Occupied - nonfamily

PART A: Getting data for one variable

# MEDIAN HOME VALUE
## Specify year and state

waMedv <- get_acs(geography = "tract", year=this.year,
               state = "WA", county = "King",
               variables = "B25077_001E") ## this is the table that we want
## Getting data from the 2006-2010 5-year ACS
head(waMedv)
## # A tibble: 6 x 5
##   GEOID       NAME                                       variable estimate   moe
##   <chr>       <chr>                                      <chr>       <dbl> <dbl>
## 1 53033000100 Census Tract 1, King County, Washington    B25077_~   497300 66258
## 2 53033000200 Census Tract 2, King County, Washington    B25077_~   358700 10609
## 3 53033000300 Census Tract 3, King County, Washington    B25077_~   346500 10807
## 4 53033000401 Census Tract 4.01, King County, Washington B25077_~   342100 45503
## 5 53033000402 Census Tract 4.02, King County, Washington B25077_~   387400 28878
## 6 53033000500 Census Tract 5, King County, Washington    B25077_~   596100 33208

PART B: You can also pull multiple variables are a time

Get census tract level estimates for King County:

## Names for variable types
# Gives five year estimates
waHouse <- get_acs(geography = "tract", year=this.year,
                  state = "WA", county = "King", geometry = TRUE,
                  variables = c(popululation = "B02001_001",
                                median.gross.rent = "B25064_001",
                                median.household.income = "B19013_001",
                                rent.burden = "B25071_001"))
## Getting data from the 2006-2010 5-year ACS
head(waHouse)
## Simple feature collection with 6 features and 5 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -122.3515 ymin: 47.76317 xmax: -122.3052 ymax: 47.77779
## Geodetic CRS:  NAD83
##         GEOID                                         NAME
## 1 53033020300    Census Tract 203, King County, Washington
## 2 53033020300    Census Tract 203, King County, Washington
## 3 53033020300    Census Tract 203, King County, Washington
## 4 53033020300    Census Tract 203, King County, Washington
## 5 53033020401 Census Tract 204.01, King County, Washington
## 6 53033020401 Census Tract 204.01, King County, Washington
##                  variable estimate    moe                       geometry
## 1            popululation   5642.0  355.0 MULTIPOLYGON (((-122.3404 4...
## 2 median.household.income  54773.0 6375.0 MULTIPOLYGON (((-122.3404 4...
## 3       median.gross.rent   1027.0  108.0 MULTIPOLYGON (((-122.3404 4...
## 4             rent.burden     29.2    3.6 MULTIPOLYGON (((-122.3404 4...
## 5            popululation   3297.0  213.0 MULTIPOLYGON (((-122.3243 4...
## 6 median.household.income  65353.0 8248.0 MULTIPOLYGON (((-122.3243 4...
Notice that we’re going to need to do a little data wrangling so that we have a tidydata format to spread the column named ‘variable’
waTidy<-as.data.frame(waHouse)[,c(1,3:4)]%>%
  spread(variable, estimate)

head(waTidy)
##         GEOID median.gross.rent median.household.income popululation
## 1 53033000100               838                   47518         5784
## 2 53033000200               906                   54797         7682
## 3 53033000300              1101                   61966         2548
## 4 53033000401               841                   35095         5805
## 5 53033000402               951                   45183         4662
## 6 53033000500              1521                  110125         3474
##   rent.burden
## 1        28.6
## 2        28.8
## 3        34.6
## 4        31.0
## 5        34.5
## 6        24.8

Step 5: Geojoins and TMAPS

Suppose that we want to study median house value (which is B25077_001E).

### GET CENSUS DATA
### B25077_001E: MEDIAN HOME VALUE

waMedv <- get_acs(geography = "tract", year=this.year,
              state = "WA", county = "King",
              variables = "B25077_001E")%>%
  mutate(AFFGEOID=paste0("1400000US", GEOID))
## Getting data from the 2006-2010 5-year ACS

In order to combine data from the US Census with our spatial data from trigis we will need to use a geo_join.

## USE GEO_JOIN TO COMBINE SPATIAL DATA AND OTHER DATA FRAMES
joinWA<-geo_join(wa_tracts, waMedv, 
                 by_sp="AFFGEOID", by_df="AFFGEOID")
## Warning: We recommend using the dplyr::*_join() family of functions instead.

Then we will use the tmap package to plot the data with a green color gradient (with 7 levels).

#install.packages("tmap")

## USE TMAP PACKAGE
tm_shape(joinWA, projection = 26916)+
  tm_fill("estimate", style = "quantile", n=7, palette = "Greens")+ ## good for skewed data
  tm_legend(bg.color="white", bg.alpha=0.6) # bg= background

Step 7: Interactive Maps with Leaflet

PART A: Pop-ups

# MEDIAN HOME VALUE
waMedvG <- get_acs(geography = "tract", year=this.year,
               state = "WA", county = "King",
               variables = "B25077_001E", 
               geometry = TRUE)
## Getting data from the 2006-2010 5-year ACS
head(waMedvG)
## Simple feature collection with 6 features and 5 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -122.3515 ymin: 47.71841 xmax: -122.1842 ymax: 47.77779
## Geodetic CRS:  NAD83
##         GEOID                                         NAME   variable estimate
## 1 53033020300    Census Tract 203, King County, Washington B25077_001   358100
## 2 53033020401 Census Tract 204.01, King County, Washington B25077_001   335400
## 3 53033020402 Census Tract 204.02, King County, Washington B25077_001   374900
## 4 53033021100    Census Tract 211, King County, Washington B25077_001   339500
## 5 53033021804 Census Tract 218.04, King County, Washington B25077_001   424100
## 6 53033022003 Census Tract 220.03, King County, Washington B25077_001   382300
##     moe                       geometry
## 1 18863 MULTIPOLYGON (((-122.3404 4...
## 2 13782 MULTIPOLYGON (((-122.3243 4...
## 3 24841 MULTIPOLYGON (((-122.2852 4...
## 4 14556 MULTIPOLYGON (((-122.3237 4...
## 5 17403 MULTIPOLYGON (((-122.2061 4...
## 6 17607 MULTIPOLYGON (((-122.1876 4...
#install.packages("leaflet")

pal<- colorNumeric("Greens", domain=0:ceiling(max(waMedvG$estimate, na.rm=TRUE))) # First define the palet.

popup<-paste("Tract: ", as.character(substring(waMedvG$GEOID, 6, 11)), "<br>",
             "Median Home Value: ", as.character(waMedvG$estimate)) # Pop up which sensus tract we are looking at

leaflet()%>%
  addProviderTiles("CartoDB.Positron")%>%
  addPolygons(data=waMedvG, 
              fillColor= ~pal(waMedvG$estimate), #we're going to pass in the variable that we're using for our color palette, so this is Washington median values with geography dollar sign testament.
              fillOpacity = .7,
              weight =.5,
              smoothFactor = 0.2,
              popup = popup) # the pop up is going to have the name pop up and it's going to reference back to this texts that we put together.
## Warning: sf layer has inconsistent datum (+proj=longlat +datum=NAD83 +no_defs).
## Need '+proj=longlat +datum=WGS84'

PART B: Quantile Color Palette

#create a color palette that is a quantel 

qpal<-colorQuantile("viridis", domain=waMedvG$estimate,
                       n=5,na.color="#FFFFFF") #color palette viridis

leaflet()%>%
  addProviderTiles("CartoDB.Positron")%>%
  addPolygons(data=waMedvG,
              fillColor= ~qpal(waMedvG$estimate),
              fillOpacity = 0.7,
                  color="grey",
                  opacity=.5,
                  weight = 0.4,
                  smoothFactor = 0.2,
                  popup = popup)%>%
      addLegend("bottomright", pal=qpal, values=waMedvG$estimate,
                opacity = .7,
                title="Percentiles")
## Warning: sf layer has inconsistent datum (+proj=longlat +datum=NAD83 +no_defs).
## Need '+proj=longlat +datum=WGS84'