Measuring the jobs and homes imbalance requires two separate data sources:
Utilizing these two data sources we are able to understand the number of housing units and make comparisons to the overall number of jobs.
library(tidycensus)
library(tidyverse)
library(blscrapeR)
library(tigris)
knitr::opts_chunk$set(echo=TRUE, message=FALSE, warning=FALSE)
# Download total housing unit count from Population Estimates Program (PEP) and
# Decennial Census P.L. 94-171 Redistricting Data.
va_hu_localities <- get_estimates(
geography = "county",
state = "VA",
variables = "HUEST",
year = 2019,
time_series = TRUE
)
va_hu_localities_2020 <- get_decennial(
geography = "county",
state = "VA",
variables = "H1_001N",
year = 2020
)
# The following pulls all localities in Virginia to help iterate through BLS series.
# The TIGRIS package allows us to do this and pull FIPS codes.
virginia <- list_counties("VA")
# The following creates an additional column of series ids based on BLS labels. The
# mutate() function creates a new column called 'seriesid' while the paste0() function
# concatenates the necessary pieces of the series id without spaces.
virginia <- virginia %>% mutate(jobsseries = paste0("LAUCN51", county_code, "0000000005")) #employment
# Only the 'seriesid' field is needed to pull from the BLS API, therefore we need to
# create a list of series ids for all 136 Virginia localities.
jobs_list <- list(virginia$jobsseries)
# The blscrapeR does not accept a list in its query pull and requires a vector. Therefore,
# unlist() is used in order to simplifies the list to a vector.
jobs_list <- unlist(jobs_list)
# The BLS API has a series query limit of 50, but there are 136 localities (series ids)
# to pull. So we need to split the vector into chunks using the split() function.
jobs_list <- split(jobs_list, ceiling(seq_along(jobs_list)/50))
bls_jobs_pull <- function(jobs_list){
bls_api(jobs_list,
startyear = 2010, endyear = 2022, annualaverage = TRUE, Sys.getenv("BLS_KEY")) %>%
dateCast()
}
# Date codes in PEP data frame need to be converted to usable dates, and then PL
# data frame needs to have matching columns added.
va_hu_localities <- va_hu_localities %>%
mutate(longdate =
case_when( # Add date values.
DATE == 1 ~ "4/1/2010",
DATE == 2 ~ "4/1/2010",
DATE == 3 ~ "7/1/2010",
DATE == 4 ~ "7/1/2011",
DATE == 5 ~ "7/1/2012",
DATE == 6 ~ "7/1/2013",
DATE == 7 ~ "7/1/2014",
DATE == 8 ~ "7/1/2015",
DATE == 9 ~ "7/1/2016",
DATE == 10 ~ "7/1/2017",
DATE == 11 ~ "7/1/2018",
DATE == 12 ~ "7/1/2019"
),
counttype =
case_when( # Add description for data value type.
DATE == 1 ~ "Census population",
DATE == 2 ~ "Estimates base",
TRUE ~ "Population estimate"
),
year =
case_when( # Add year values.
DATE == 1 ~ 2010,
DATE == 2 ~ 2010,
DATE == 3 ~ 2010,
DATE == 4 ~ 2011,
DATE == 5 ~ 2012,
DATE == 6 ~ 2013,
DATE == 7 ~ 2014,
DATE == 8 ~ 2015,
DATE == 9 ~ 2016,
DATE == 10 ~ 2017,
DATE == 11 ~ 2018,
DATE == 12 ~ 2019
)
)
# Add matching columns with appropriate data to the PL data frame.
va_hu_localities_2020 <- va_hu_localities_2020 %>%
mutate(DATE = 13,
longdate = "4/1/2020",
counttype = "Census population",
year = 2020)
# Then combine the two data frames.
va_hu <- rbind(va_hu_localities, va_hu_localities_2020)
va_jobs <- map_dfr(jobs_list, bls_jobs_pull) %>%
filter(str_detect(periodName, "Annual")) %>%
mutate(fips = substr(seriesID, 6, 10))
# Combine the the housing unit and labor force data frames together based on year and FIPS.
va_jobs_hu <- va_jobs %>%
left_join(va_hu, by = c('fips' = 'GEOID', 'year' = 'year'))
va_jobs_hu <- va_jobs_hu %>%
rename(employment = value.x, hu = value.y)
# Write to csv file.
write_csv(va_jobs_hu, "data/va_jobs_housing_2010_2020.csv")
Reserved for fuure Shiny app development.