library(gbm)
## Loaded gbm 2.1.8
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.4 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.0.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
library(maps)
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
library(usmap)
library(viridis)
## Loading required package: viridisLite
##
## Attaching package: 'viridis'
## The following object is masked from 'package:maps':
##
## unemp
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(sf)
## Linking to GEOS 3.9.1, GDAL 3.2.1, PROJ 7.2.1
library(tigris)
## To enable
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
library(tidycensus)
##
## Attaching package: 'tidycensus'
## The following object is masked from 'package:tigris':
##
## fips_codes
library(tmap)
## Registered S3 methods overwritten by 'stars':
## method from
## st_bbox.SpatRaster sf
## st_crs.SpatRaster sf
library(leaflet)
vote<-read.csv("https://raw.githubusercontent.com/kitadasmalley/DATA502/main/FALL2021/Data/voterTurnOut1618.csv",
header=TRUE,
stringsAsFactors = FALSE)
states <- usmap::us_map()
head(states)
## x y order hole piece group fips abbr full
## 1 1091779 -1380695 1 FALSE 1 01.1 01 AL Alabama
## 2 1091268 -1376372 2 FALSE 1 01.1 01 AL Alabama
## 3 1091140 -1362998 3 FALSE 1 01.1 01 AL Alabama
## 4 1090940 -1343517 4 FALSE 1 01.1 01 AL Alabama
## 5 1090913 -1341006 5 FALSE 1 01.1 01 AL Alabama
## 6 1090796 -1334480 6 FALSE 1 01.1 01 AL Alabama
mapPropVote<-states%>%
mutate(State=full)%>%
left_join(vote)
## Joining, by = "State"
head(mapPropVote)
## x y order hole piece group fips abbr full State YEAR Voted
## 1 1091779 -1380695 1 FALSE 1 01.1 01 AL Alabama Alabama 2016 Voted
## 2 1091779 -1380695 1 FALSE 1 01.1 01 AL Alabama Alabama 2018 Voted
## 3 1091268 -1376372 2 FALSE 1 01.1 01 AL Alabama Alabama 2016 Voted
## 4 1091268 -1376372 2 FALSE 1 01.1 01 AL Alabama Alabama 2018 Voted
## 5 1091140 -1362998 3 FALSE 1 01.1 01 AL Alabama Alabama 2016 Voted
## 6 1091140 -1362998 3 FALSE 1 01.1 01 AL Alabama Alabama 2018 Voted
## nVote nWgtVote n nWgt sampPropVote wgtPropVote
## 1 1208 2095097 1691 2954157 0.7143702 0.7092028
## 2 976 1830305 1511 2856889 0.6459298 0.6406635
## 3 1208 2095097 1691 2954157 0.7143702 0.7092028
## 4 976 1830305 1511 2856889 0.6459298 0.6406635
## 5 1208 2095097 1691 2954157 0.7143702 0.7092028
## 6 976 1830305 1511 2856889 0.6459298 0.6406635
this.year <- 2016
plot <- mapPropVote %>%
filter(YEAR == this.year)%>%
ggplot(aes(text= State, x, y, group= group))+
geom_polygon(aes(fill= wgtPropVote), color = "black")+
theme_bw()+
scale_fill_viridis(option= "magma", direction = -1)
ggplotly(plot)
wa_counties <- map_data("county", "washington") %>%
select(lon = long, lat, group, id = subregion)
head(wa_counties)
## lon lat group id
## 1 -118.2356 46.73617 1 adams
## 2 -119.3700 46.74190 1 adams
## 3 -119.3700 46.74190 1 adams
## 4 -119.3757 46.90232 1 adams
## 5 -118.9804 46.90805 1 adams
## 6 -118.9804 47.25756 1 adams
ggplot(wa_counties, aes(lon, lat)) +
geom_point(size = .25, show.legend = FALSE) +
coord_quickmap()
ggplot(wa_counties, aes(lon, lat, group= group))+
geom_polygon(fill= "white", colour = "grey50")+
coord_quickmap()
options(tigris_use_cache = TRUE)
# set year for boundaries
this.year = 2017 #the last census
wa_tracts <- tracts(state = 'WA', county = 'King',
cb = T, year = this.year)
head(wa_tracts)
## Simple feature collection with 6 features and 9 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -122.3966 ymin: 47.63231 xmax: -122.2551 ymax: 47.73405
## Geodetic CRS: NAD83
## STATEFP COUNTYFP TRACTCE AFFGEOID GEOID NAME LSAD ALAND
## 20 53 033 000200 1400000US53033000200 53033000200 2 CT 3286278
## 21 53 033 000900 1400000US53033000900 53033000900 9 CT 1007015
## 22 53 033 002000 1400000US53033002000 53033002000 20 CT 1103320
## 23 53 033 005000 1400000US53033005000 53033005000 50 CT 757380
## 24 53 033 005801 1400000US53033005801 53033005801 58.01 CT 1814668
## 25 53 033 006800 1400000US53033006800 53033006800 68 CT 718104
## AWATER geometry
## 20 0 MULTIPOLYGON (((-122.3236 4...
## 21 1851901 MULTIPOLYGON (((-122.285 47...
## 22 28680 MULTIPOLYGON (((-122.3177 4...
## 23 0 MULTIPOLYGON (((-122.3473 4...
## 24 264894 MULTIPOLYGON (((-122.3966 4...
## 25 0 MULTIPOLYGON (((-122.3623 4...
# BASE PLOT
plot(wa_tracts)
# GGPLOT
ggplot(wa_tracts) +
geom_sf() +
coord_sf()
We mapped the ONC cities to census tracts, which was a challenging task. The Census uses AFFGEOID (formerly GEO_IDs) for their shapefiles and there uniquely define areas based on three things: State, County, and Census tract. However, there is not a level for city. We approximated to the best of our abilities by overlaying maps for the cities with Census tracts.
# IMPORT OUR DATA
oncT<-read.csv("https://raw.githubusercontent.com/kitadasmalley/fallChallenge2019/main/data/oncTract2.csv",
header=TRUE,
stringsAsFactors = FALSE)%>%
mutate(AFFGEOID=GEO_ID)
oncT$City<-as.factor(oncT$ï..City)
head(oncT)
## ï..City GEOID GEO_ID AFFGEOID City
## 1 Seattle 5.3033e+10 1400000US53033000100 1400000US53033000100 Seattle
## 2 Seattle 5.3033e+10 1400000US53033000200 1400000US53033000200 Seattle
## 3 Seattle 5.3033e+10 1400000US53033000300 1400000US53033000300 Seattle
## 4 Seattle 5.3033e+10 1400000US53033000401 1400000US53033000401 Seattle
## 5 Seattle 5.3033e+10 1400000US53033000402 1400000US53033000402 Seattle
## 6 Seattle 5.3033e+10 1400000US53033000500 1400000US53033000500 Seattle
These tract codes were then joined with shapefiles from the Census’ Tigris package in R.
# GEO JOIN DATA
joinONC<- geo_join(wa_tracts, oncT,
by_sp="AFFGEOID", by_df="AFFGEOID")
## Warning: We recommend using the dplyr::*_join() family of functions instead.
## Warning: `group_by_()` was deprecated in dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
ggplot(data = joinONC, aes(fill=City))+
geom_sf()+
theme_bw()+
ggtitle("King County Cities Included in One Night Count Data")
We also wanted to explore the placement of meal programs in the Seattle area. This data came from the City of Seattle and is also publically hosted on Kaggle. Addresses for each location are provided, which we geo-coded into their latitudes and longitudes.
Note that there is a high concentration of meal programs in downtown Seattle but not many in other locations. This might suggest that this is where homeless people are located.
# IMPORT THE GEOCODED DATA
food<-read.csv("https://raw.githubusercontent.com/kitadasmalley/fallChallenge2019/main/data/GEOCODE_FOOD%20-%20Sheet1.csv",
header=TRUE)
ggplot(data = joinONC)+
geom_sf(aes(fill= City))+
geom_point(data=food, aes(Long, Lat))+
theme_bw()
First you will need to get a API key: https://api.census.gov/data/key_signup.html
Once you have an API key load it into your R environment so that you can access the ACS data.
#install.packages("tidycensus")
# Get your code here https://api.census.gov/data/key_signup.html
library(tidycensus)
# Set a year of interest
this.year = 2010
# This looks at the 5 year estimates
# You can also do "acs1"
vars <- load_variables(year = this.year,
dataset = "acs5",
cache = TRUE)
# There are 25070 possible variables
dim(vars)
## [1] 20927 3
B02001_001: Total B03002_003: White alone (Not Hispanic or Latino) B03002_004 Black or African American alone (Not Hispanic or Latino) B03002_012: Hispanic or Latino B03002_005: Native American alone (Not Hispanic or Latino) B03002_006: Asian alone (Not Hispanic or Latino) B03002_007: Native Hawaiian or Pacific Islander alone (Not Hispanic or Latino) B03002_009: Multiple Races (Not Hispanic or Latino) B03002_008: Other (Not Hispanic or Latino) B25064_001 MEDIAN GROSS RENT B25071_001: Rent Burden (MEDIAN GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME) B19013_001: MEDIAN HOUSEHOLD INCOME IN PAST 12 MONTHS B01002_001: Median age B25115_016: Renter Occupied - family B25115_027: Renter Occupied - nonfamily
# MEDIAN HOME VALUE
## Specify year and state
waMedv <- get_acs(geography = "tract", year=this.year,
state = "WA", county = "King",
variables = "B25077_001E") ## this is the table that we want
## Getting data from the 2006-2010 5-year ACS
head(waMedv)
## # A tibble: 6 x 5
## GEOID NAME variable estimate moe
## <chr> <chr> <chr> <dbl> <dbl>
## 1 53033000100 Census Tract 1, King County, Washington B25077_~ 497300 66258
## 2 53033000200 Census Tract 2, King County, Washington B25077_~ 358700 10609
## 3 53033000300 Census Tract 3, King County, Washington B25077_~ 346500 10807
## 4 53033000401 Census Tract 4.01, King County, Washington B25077_~ 342100 45503
## 5 53033000402 Census Tract 4.02, King County, Washington B25077_~ 387400 28878
## 6 53033000500 Census Tract 5, King County, Washington B25077_~ 596100 33208
Get census tract level estimates for King County:
## Names for variable types
# Gives five year estimates
waHouse <- get_acs(geography = "tract", year=this.year,
state = "WA", county = "King", geometry = TRUE,
variables = c(popululation = "B02001_001",
median.gross.rent = "B25064_001",
median.household.income = "B19013_001",
rent.burden = "B25071_001"))
## Getting data from the 2006-2010 5-year ACS
head(waHouse)
## Simple feature collection with 6 features and 5 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -122.3515 ymin: 47.76317 xmax: -122.3052 ymax: 47.77779
## Geodetic CRS: NAD83
## GEOID NAME
## 1 53033020300 Census Tract 203, King County, Washington
## 2 53033020300 Census Tract 203, King County, Washington
## 3 53033020300 Census Tract 203, King County, Washington
## 4 53033020300 Census Tract 203, King County, Washington
## 5 53033020401 Census Tract 204.01, King County, Washington
## 6 53033020401 Census Tract 204.01, King County, Washington
## variable estimate moe geometry
## 1 popululation 5642.0 355.0 MULTIPOLYGON (((-122.3404 4...
## 2 median.household.income 54773.0 6375.0 MULTIPOLYGON (((-122.3404 4...
## 3 median.gross.rent 1027.0 108.0 MULTIPOLYGON (((-122.3404 4...
## 4 rent.burden 29.2 3.6 MULTIPOLYGON (((-122.3404 4...
## 5 popululation 3297.0 213.0 MULTIPOLYGON (((-122.3243 4...
## 6 median.household.income 65353.0 8248.0 MULTIPOLYGON (((-122.3243 4...
waTidy<-as.data.frame(waHouse)[,c(1,3:4)]%>%
spread(variable, estimate)
head(waTidy)
## GEOID median.gross.rent median.household.income popululation
## 1 53033000100 838 47518 5784
## 2 53033000200 906 54797 7682
## 3 53033000300 1101 61966 2548
## 4 53033000401 841 35095 5805
## 5 53033000402 951 45183 4662
## 6 53033000500 1521 110125 3474
## rent.burden
## 1 28.6
## 2 28.8
## 3 34.6
## 4 31.0
## 5 34.5
## 6 24.8
Suppose that we want to study median house value (which is B25077_001E).
### GET CENSUS DATA
### B25077_001E: MEDIAN HOME VALUE
waMedv <- get_acs(geography = "tract", year=this.year,
state = "WA", county = "King",
variables = "B25077_001E")%>%
mutate(AFFGEOID=paste0("1400000US", GEOID))
## Getting data from the 2006-2010 5-year ACS
In order to combine data from the US Census with our spatial data from trigis we will need to use a geo_join.
## USE GEO_JOIN TO COMBINE SPATIAL DATA AND OTHER DATA FRAMES
joinWA<-geo_join(wa_tracts, waMedv,
by_sp="AFFGEOID", by_df="AFFGEOID")
## Warning: We recommend using the dplyr::*_join() family of functions instead.
Then we will use the tmap package to plot the data with a green color gradient (with 7 levels).
#install.packages("tmap")
## USE TMAP PACKAGE
tm_shape(joinWA, projection = 26916)+
tm_fill("estimate", style = "quantile", n=7, palette = "Greens")+ ## good for skewed data
tm_legend(bg.color="white", bg.alpha=0.6) # bg= background
# MEDIAN HOME VALUE
waMedvG <- get_acs(geography = "tract", year=this.year,
state = "WA", county = "King",
variables = "B25077_001E",
geometry = TRUE)
## Getting data from the 2006-2010 5-year ACS
head(waMedvG)
## Simple feature collection with 6 features and 5 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -122.3515 ymin: 47.71841 xmax: -122.1842 ymax: 47.77779
## Geodetic CRS: NAD83
## GEOID NAME variable estimate
## 1 53033020300 Census Tract 203, King County, Washington B25077_001 358100
## 2 53033020401 Census Tract 204.01, King County, Washington B25077_001 335400
## 3 53033020402 Census Tract 204.02, King County, Washington B25077_001 374900
## 4 53033021100 Census Tract 211, King County, Washington B25077_001 339500
## 5 53033021804 Census Tract 218.04, King County, Washington B25077_001 424100
## 6 53033022003 Census Tract 220.03, King County, Washington B25077_001 382300
## moe geometry
## 1 18863 MULTIPOLYGON (((-122.3404 4...
## 2 13782 MULTIPOLYGON (((-122.3243 4...
## 3 24841 MULTIPOLYGON (((-122.2852 4...
## 4 14556 MULTIPOLYGON (((-122.3237 4...
## 5 17403 MULTIPOLYGON (((-122.2061 4...
## 6 17607 MULTIPOLYGON (((-122.1876 4...
#install.packages("leaflet")
pal<- colorNumeric("Greens", domain=0:ceiling(max(waMedvG$estimate, na.rm=TRUE))) # First define the palet.
popup<-paste("Tract: ", as.character(substring(waMedvG$GEOID, 6, 11)), "<br>",
"Median Home Value: ", as.character(waMedvG$estimate)) # Pop up which sensus tract we are looking at
leaflet()%>%
addProviderTiles("CartoDB.Positron")%>%
addPolygons(data=waMedvG,
fillColor= ~pal(waMedvG$estimate), #we're going to pass in the variable that we're using for our color palette, so this is Washington median values with geography dollar sign testament.
fillOpacity = .7,
weight =.5,
smoothFactor = 0.2,
popup = popup) # the pop up is going to have the name pop up and it's going to reference back to this texts that we put together.
## Warning: sf layer has inconsistent datum (+proj=longlat +datum=NAD83 +no_defs).
## Need '+proj=longlat +datum=WGS84'
#create a color palette that is a quantel
qpal<-colorQuantile("viridis", domain=waMedvG$estimate,
n=5,na.color="#FFFFFF") #color palette viridis
leaflet()%>%
addProviderTiles("CartoDB.Positron")%>%
addPolygons(data=waMedvG,
fillColor= ~qpal(waMedvG$estimate),
fillOpacity = 0.7,
color="grey",
opacity=.5,
weight = 0.4,
smoothFactor = 0.2,
popup = popup)%>%
addLegend("bottomright", pal=qpal, values=waMedvG$estimate,
opacity = .7,
title="Percentiles")
## Warning: sf layer has inconsistent datum (+proj=longlat +datum=NAD83 +no_defs).
## Need '+proj=longlat +datum=WGS84'