1 Data Sources

Measuring the jobs and homes imbalance requires two separate data sources:

Utilizing these two data sources we are able to understand the number of housing units and make comparisons to the overall number of jobs.

2 Setup

library(tidycensus)
library(tidyverse)
library(blscrapeR)
library(tigris)
knitr::opts_chunk$set(echo=TRUE, message=FALSE, warning=FALSE)

3 Data Collection

# Download total housing unit count from Population Estimates Program (PEP) and
# Decennial Census P.L. 94-171 Redistricting Data.

va_hu_localities <- get_estimates(
  geography = "county",
  state = "VA",
  variables = "HUEST",
  year = 2019,
  time_series = TRUE
) 

va_hu_localities_2020 <- get_decennial(
  geography = "county",
  state = "VA",
  variables = "H1_001N",
  year = 2020
)

# The following pulls all localities in Virginia to help iterate through BLS series.
# The TIGRIS package allows us to do this and pull FIPS codes.

virginia <- list_counties("VA")

# The following creates an additional column of series ids based on BLS labels. The
# mutate() function creates a new column called 'seriesid' while the paste0() function
# concatenates the necessary pieces of the series id without spaces.

virginia <- virginia %>% mutate(jobsseries = paste0("LAUCN51", county_code, "0000000005")) #employment
                         
# Only the 'seriesid' field is needed to pull from the BLS API, therefore we need to
# create a list of series ids for all 136 Virginia localities. 

jobs_list <- list(virginia$jobsseries)

# The blscrapeR does not accept a list in its query pull and requires a vector. Therefore, 
# unlist() is used in order to simplifies the list to a vector.

jobs_list <- unlist(jobs_list)


# The BLS API has a series query limit of 50, but there are 136 localities (series ids)
# to pull. So we need to split the vector into chunks using the split() function.

jobs_list <- split(jobs_list, ceiling(seq_along(jobs_list)/50))

bls_jobs_pull <- function(jobs_list){
  bls_api(jobs_list,
          startyear = 2010, endyear = 2022, annualaverage = TRUE, Sys.getenv("BLS_KEY")) %>%
    dateCast()
}

4 Data prep

# Date codes in PEP data frame need to be converted to usable dates, and then PL 
# data frame needs to have matching columns added.

va_hu_localities <- va_hu_localities %>%
  mutate(longdate = 
    case_when( # Add date values.
      DATE == 1 ~ "4/1/2010",
      DATE == 2 ~ "4/1/2010",
      DATE == 3 ~ "7/1/2010",
      DATE == 4 ~ "7/1/2011",
      DATE == 5 ~ "7/1/2012",
      DATE == 6 ~ "7/1/2013",
      DATE == 7 ~ "7/1/2014",
      DATE == 8 ~ "7/1/2015",
      DATE == 9 ~ "7/1/2016",
      DATE == 10 ~ "7/1/2017",
      DATE == 11 ~ "7/1/2018",
      DATE == 12 ~ "7/1/2019"
    ),
counttype = 
      case_when( # Add description for data value type.
        DATE == 1 ~ "Census population",
        DATE == 2 ~ "Estimates base",
        TRUE ~ "Population estimate"
      ),
year = 
  case_when( # Add year values.
    DATE == 1 ~ 2010,
      DATE == 2 ~ 2010,
      DATE == 3 ~ 2010,
      DATE == 4 ~ 2011,
      DATE == 5 ~ 2012,
      DATE == 6 ~ 2013,
      DATE == 7 ~ 2014,
      DATE == 8 ~ 2015,
      DATE == 9 ~ 2016,
      DATE == 10 ~ 2017,
      DATE == 11 ~ 2018,
      DATE == 12 ~ 2019
  )
  )

# Add matching columns with appropriate data to the PL data frame.

va_hu_localities_2020 <- va_hu_localities_2020 %>%
  mutate(DATE = 13,
         longdate = "4/1/2020",
         counttype = "Census population",
         year = 2020)

# Then combine the two data frames.

va_hu <- rbind(va_hu_localities, va_hu_localities_2020)



va_jobs <- map_dfr(jobs_list, bls_jobs_pull) %>%
  filter(str_detect(periodName, "Annual")) %>%
  mutate(fips = substr(seriesID, 6, 10))

# Combine the the housing unit and labor force data frames together based on year and FIPS.

va_jobs_hu <- va_jobs %>%
  left_join(va_hu, by = c('fips' = 'GEOID', 'year' = 'year'))

va_jobs_hu <- va_jobs_hu %>%
  rename(employment = value.x, hu = value.y)

5 Data export

# Write to csv file.

write_csv(va_jobs_hu, "data/va_jobs_housing_2010_2020.csv")

6 Dashboard development

Reserved for fuure Shiny app development.