Load packages

library(glue)
library(DBI)
library(coriverse)
library(dplyr)
library(plotly)

source('../scripts/utils-data.R')

Read in data

moodys_employ <- getMoodysInnovationEmployStatistics()
cbsa <- getCBSA()
tot_employment_by_county <- readr::read_csv('../data/moodys_employment_by_county.csv')
## Rows: 255348 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): geoid_co
## dbl (2): year, tot_county_employment
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Pull in CBSA designation and group by county

moodys_employ_cbsa<- moodys_employ %>%
  dplyr::filter(year > 2004) %>%
  dplyr::group_by(geoid_co, year) %>%
  dplyr::summarise(
    employment = sum(value, na.rm = TRUE)
  ) %>%
  dplyr::left_join(
    cbsa,
    by = c("geoid_co"="geoid")
  )
## `summarise()` has grouped output by 'geoid_co'. You can override using the `.groups` argument.

2005-2020 Summary Stats

moodys_tot_employment_2005_2020 <- tot_employment_by_county %>%
  dplyr::filter(year == 2005 | year == 2020) %>%
  tidyr::pivot_wider(names_from = year, values_from = tot_county_employment, names_prefix = "tot_county_employment_")

moodys_employ_cbsa_2005_2020 <- moodys_employ_cbsa %>%
  dplyr::filter(year == 2005 | year == 2020) %>%
  tidyr::pivot_wider(names_from = year, values_from = employment, names_prefix = "employment_")

tot_2005_employment <- sum(moodys_employ_cbsa_2005_2020$employment_2005, na.rm = TRUE)
tot_2020_employment <- sum(moodys_employ_cbsa_2005_2020$employment_2020, na.rm = TRUE)

moodys_employ_cbsa_2005_2020 <-
  dplyr::left_join(
    moodys_employ_cbsa_2005_2020,
    moodys_tot_employment_2005_2020,
    by = "geoid_co"
  )

moodys_employ_cbsa_2005_2020 <- moodys_employ_cbsa_2005_2020 %>%
  dplyr::mutate(
    county_share_2005 = employment_2005 / tot_2005_employment,
    county_share_2020 = employment_2020 / tot_2020_employment,
    share_of_county_tot_emp_2005 = employment_2005 / tot_county_employment_2005,
    share_of_county_tot_emp_2020 = employment_2020 / tot_county_employment_2020,
    county_employment_change = employment_2020 - employment_2005,
    county_employment_pct_change = (employment_2020 - employment_2005) / employment_2005,
    county_share_change = county_share_2020 - county_share_2005,
    county_share_pct_change = county_share_change / county_share_2005,
    share_of_county_tot_emp_change = share_of_county_tot_emp_2020 - share_of_county_tot_emp_2005,
    share_of_county_tot_emp_pct_change = share_of_county_tot_emp_change / share_of_county_tot_emp_2005
  )

moodys_employ_cbsa_2005_2020_stats <- moodys_employ_cbsa_2005_2020 %>%
  dplyr::group_by(metropolitan_designation) %>%
  dplyr::summarise(
    # County Means and SD
    mean_county_share_2005 = mean(county_share_2005, na.rm = TRUE),
    sd_county_share_2005 = sd(county_share_2005, na.rm = TRUE),
    mean_county_share_2020 = mean(county_share_2020, na.rm = TRUE),
    sd_county_share_2020 = sd(county_share_2020, na.rm = TRUE),
    mean_county_employment_change = mean(county_employment_change, na.rm = TRUE),
    sd_county_employment_change = sd(county_employment_change, na.rm = TRUE),
    mean_county_employment_pct_change = mean(county_employment_pct_change, na.rm = TRUE),
    sd_county_employment_pct_change = sd(county_employment_pct_change, na.rm = TRUE),
    mean_county_share_change = mean(county_share_change, na.rm = TRUE),
    sd_county_share_change = sd(county_share_change, na.rm = TRUE),
    mean_county_share_pct_change = mean(county_share_pct_change, na.rm = TRUE),
    sd_county_share_pct_change = sd(county_share_pct_change, na.rm = TRUE),
    mean_share_of_county_tot_emp_2005 = mean(share_of_county_tot_emp_2005, na.rm = TRUE),
    sd_share_of_county_tot_emp_2005 = sd(share_of_county_tot_emp_2005, na.rm = TRUE),
    mean_share_of_county_tot_emp_2020 = mean(share_of_county_tot_emp_2020, na.rm = TRUE),
    sd_share_of_county_tot_emp_2020 = sd(share_of_county_tot_emp_2020, na.rm = TRUE),
    mean_share_of_county_tot_emp_change = mean(share_of_county_tot_emp_change, na.rm = TRUE),
    sd_share_of_county_tot_emp_change = sd(share_of_county_tot_emp_change, na.rm = TRUE),
    mean_share_of_county_tot_emp_pct_change = mean(share_of_county_tot_emp_pct_change, na.rm = TRUE),
    sd_share_of_county_tot_emp_pct_change = sd(share_of_county_tot_emp_pct_change, na.rm = TRUE),
    # 2005 and 2020 statistics
    sum_employment_2020 = sum(employment_2020, na.rm = TRUE),
    sum_employment_2005 = sum(employment_2005, na.rm = TRUE),
    employment_share_2020 = sum_employment_2020 / sum(moodys_employ_cbsa_2005_2020$employment_2020, na.rm = TRUE),
    employment_share_2005 = sum_employment_2005 / sum(moodys_employ_cbsa_2005_2020$employment_2005, na.rm = TRUE),
    mean_employment_2020 = mean(employment_2020, na.rm = TRUE),
    mean_employment_2005 = mean(employment_2005, na.rm = TRUE),
    sd_employment_2020 = sd(employment_2020, na.rm = TRUE),
    sd_employment_2005 = sd(employment_2005, na.rm = TRUE),
    # Time comparisons
    employment_change = sum_employment_2020 - sum_employment_2005,
    employment_pct_change = (sum_employment_2020 - sum_employment_2005) / sum_employment_2005,
    employment_share_change = employment_share_2020 - employment_share_2005,
    employment_share_pct_change = employment_share_change / employment_share_2005
  )

knitr::kable(moodys_employ_cbsa_2005_2020_stats)
metropolitan_designation mean_county_share_2005 sd_county_share_2005 mean_county_share_2020 sd_county_share_2020 mean_county_employment_change sd_county_employment_change mean_county_employment_pct_change sd_county_employment_pct_change mean_county_share_change sd_county_share_change mean_county_share_pct_change sd_county_share_pct_change mean_share_of_county_tot_emp_2005 sd_share_of_county_tot_emp_2005 mean_share_of_county_tot_emp_2020 sd_share_of_county_tot_emp_2020 mean_share_of_county_tot_emp_change sd_share_of_county_tot_emp_change mean_share_of_county_tot_emp_pct_change sd_share_of_county_tot_emp_pct_change sum_employment_2020 sum_employment_2005 employment_share_2020 employment_share_2005 mean_employment_2020 mean_employment_2005 sd_employment_2020 sd_employment_2005 employment_change employment_pct_change employment_share_change employment_share_pct_change
Metro 0.0008067 0.0028724 0.0008196 0.0033401 0.4000770 4.5668652 0.5775614 14.2020947 0.0000130 0.0010355 0.4017137 12.6190140 0.0018497 0.0031880 0.0017712 0.0030116 -0.0000785 0.0011096 0.1175028 1.1564231 3639.9808 3183.0929 0.9360179 0.9212158 3.1873737 2.7872968 12.9890272 9.9249716 456.8879 0.1435358 0.0148021 0.0160681
Micro 0.0000689 0.0000935 0.0000574 0.0000831 -0.0148855 0.2264069 0.0884708 1.0334723 -0.0000115 0.0000626 -0.0328590 0.9182731 0.0013520 0.0015052 0.0013124 0.0014745 -0.0000396 0.0010566 0.0653785 0.6417925 146.0413 155.7764 0.0375544 0.0450831 0.2233047 0.2381902 0.3230467 0.3230471 -9.7351 -0.0624941 -0.0075287 -0.1669961
Non-CBSA 0.0000169 0.0000292 0.0000144 0.0000272 -0.0021529 0.0497070 Inf NaN -0.0000024 0.0000137 Inf NaN 0.0010794 0.0012764 0.0010643 0.0013734 -0.0000150 0.0007419 Inf NaN 72.4623 75.2438 0.0186336 0.0217762 0.0560854 0.0582382 0.1056082 0.1009619 -2.7815 -0.0369665 -0.0031426 -0.1443140
NA 0.0004969 0.0016180 0.0003248 0.0009148 -0.4539500 2.0451141 -0.0049010 0.2682095 -0.0001721 0.0007058 -0.1158228 0.2383127 0.0015841 0.0011750 0.0014343 0.0008161 -0.0001498 0.0005513 -0.0066944 0.2805044 30.3095 41.2043 0.0077941 0.0119249 1.2628958 1.7168458 3.5574353 5.5906361 -10.8948 -0.2644093 -0.0041308 -0.3464042
rmarkdown::paged_table(moodys_employ_cbsa_2005_2020_stats)

Employment data is in thousands, seasonally adjusted

county_share is the number of innovation jobs in a county divided by the number of innovation jobs in the US

share_of_county_tot_emp is the number of innovation jobs in a county divided by the total number of jobs in that county