Innovation Summary Stats

Load packages

library(glue)
library(DBI)
library(coriverse)
library(dplyr)
library(plotly)

source('../scripts/utils-data.R')

Read in data

moodys_employ <- getMoodysInnovationEmployStatistics()
cbsa <- getCBSA()
tot_employment_by_county <- readr::read_csv('../data/moodys_employment_by_county.csv')

## Rows: 255348 Columns: 3

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): geoid_co
## dbl (2): year, tot_county_employment

## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Pull in CBSA designation and group by county

moodys_employ_cbsa<- moodys_employ %>%
  dplyr::filter(year > 2004) %>%
  dplyr::group_by(geoid_co, year) %>%
  dplyr::summarise(
    employment = sum(value, na.rm = TRUE)
  ) %>%
  dplyr::left_join(
    cbsa,
    by = c("geoid_co"="geoid")
  )

## `summarise()` has grouped output by 'geoid_co'. You can override using the `.groups` argument.

2005-2020 Summary Stats

moodys_tot_employment_2005_2020 <- tot_employment_by_county %>%
  dplyr::filter(year == 2005 | year == 2020) %>%
  tidyr::pivot_wider(names_from = year, values_from = tot_county_employment, names_prefix = "tot_county_employment_")

moodys_employ_cbsa_2005_2020 <- moodys_employ_cbsa %>%
  dplyr::filter(year == 2005 | year == 2020) %>%
  tidyr::pivot_wider(names_from = year, values_from = employment, names_prefix = "employment_")

tot_2005_employment <- sum(moodys_employ_cbsa_2005_2020$employment_2005, na.rm = TRUE)
tot_2020_employment <- sum(moodys_employ_cbsa_2005_2020$employment_2020, na.rm = TRUE)

moodys_employ_cbsa_2005_2020 <-
  dplyr::left_join(
    moodys_employ_cbsa_2005_2020,
    moodys_tot_employment_2005_2020,
    by = "geoid_co"
  )

moodys_employ_cbsa_2005_2020 <- moodys_employ_cbsa_2005_2020 %>%
  dplyr::mutate(
    county_share_2005 = employment_2005 / tot_2005_employment,
    county_share_2020 = employment_2020 / tot_2020_employment,
    share_of_county_tot_emp_2005 = employment_2005 / tot_county_employment_2005,
    share_of_county_tot_emp_2020 = employment_2020 / tot_county_employment_2020,
    county_employment_change = employment_2020 - employment_2005,
    county_employment_pct_change = (employment_2020 - employment_2005) / employment_2005,
    county_share_change = county_share_2020 - county_share_2005,
    county_share_pct_change = county_share_change / county_share_2005,
    share_of_county_tot_emp_change = share_of_county_tot_emp_2020 - share_of_county_tot_emp_2005,
    share_of_county_tot_emp_pct_change = share_of_county_tot_emp_change / share_of_county_tot_emp_2005
  )

moodys_employ_cbsa_2005_2020_stats <- moodys_employ_cbsa_2005_2020 %>%
  dplyr::group_by(metropolitan_designation) %>%
  dplyr::summarise(
    # County Means and SD
    mean_county_share_2005 = mean(county_share_2005, na.rm = TRUE),
    sd_county_share_2005 = sd(county_share_2005, na.rm = TRUE),
    mean_county_share_2020 = mean(county_share_2020, na.rm = TRUE),
    sd_county_share_2020 = sd(county_share_2020, na.rm = TRUE),
    mean_county_employment_change = mean(county_employment_change, na.rm = TRUE),
    sd_county_employment_change = sd(county_employment_change, na.rm = TRUE),
    mean_county_employment_pct_change = mean(county_employment_pct_change, na.rm = TRUE),
    sd_county_employment_pct_change = sd(county_employment_pct_change, na.rm = TRUE),
    mean_county_share_change = mean(county_share_change, na.rm = TRUE),
    sd_county_share_change = sd(county_share_change, na.rm = TRUE),
    mean_county_share_pct_change = mean(county_share_pct_change, na.rm = TRUE),
    sd_county_share_pct_change = sd(county_share_pct_change, na.rm = TRUE),
    mean_share_of_county_tot_emp_2005 = mean(share_of_county_tot_emp_2005, na.rm = TRUE),
    sd_share_of_county_tot_emp_2005 = sd(share_of_county_tot_emp_2005, na.rm = TRUE),
    mean_share_of_county_tot_emp_2020 = mean(share_of_county_tot_emp_2020, na.rm = TRUE),
    sd_share_of_county_tot_emp_2020 = sd(share_of_county_tot_emp_2020, na.rm = TRUE),
    mean_share_of_county_tot_emp_change = mean(share_of_county_tot_emp_change, na.rm = TRUE),
    sd_share_of_county_tot_emp_change = sd(share_of_county_tot_emp_change, na.rm = TRUE),
    mean_share_of_county_tot_emp_pct_change = mean(share_of_county_tot_emp_pct_change, na.rm = TRUE),
    sd_share_of_county_tot_emp_pct_change = sd(share_of_county_tot_emp_pct_change, na.rm = TRUE),
    # 2005 and 2020 statistics
    sum_employment_2020 = sum(employment_2020, na.rm = TRUE),
    sum_employment_2005 = sum(employment_2005, na.rm = TRUE),
    employment_share_2020 = sum_employment_2020 / sum(moodys_employ_cbsa_2005_2020$employment_2020, na.rm = TRUE),
    employment_share_2005 = sum_employment_2005 / sum(moodys_employ_cbsa_2005_2020$employment_2005, na.rm = TRUE),
    mean_employment_2020 = mean(employment_2020, na.rm = TRUE),
    mean_employment_2005 = mean(employment_2005, na.rm = TRUE),
    sd_employment_2020 = sd(employment_2020, na.rm = TRUE),
    sd_employment_2005 = sd(employment_2005, na.rm = TRUE),
    # Time comparisons
    employment_change = sum_employment_2020 - sum_employment_2005,
    employment_pct_change = (sum_employment_2020 - sum_employment_2005) / sum_employment_2005,
    employment_share_change = employment_share_2020 - employment_share_2005,
    employment_share_pct_change = employment_share_change / employment_share_2005
  )

knitr::kable(moodys_employ_cbsa_2005_2020_stats)

metropolitan_designation	mean_county_share_2005	sd_county_share_2005	mean_county_share_2020	sd_county_share_2020	mean_county_employment_change	sd_county_employment_change	mean_county_employment_pct_change	sd_county_employment_pct_change	mean_county_share_change	sd_county_share_change	mean_county_share_pct_change	sd_county_share_pct_change	mean_share_of_county_tot_emp_2005	sd_share_of_county_tot_emp_2005	mean_share_of_county_tot_emp_2020	sd_share_of_county_tot_emp_2020	mean_share_of_county_tot_emp_change	sd_share_of_county_tot_emp_change	mean_share_of_county_tot_emp_pct_change	sd_share_of_county_tot_emp_pct_change	sum_employment_2020	sum_employment_2005	employment_share_2020	employment_share_2005	mean_employment_2020	mean_employment_2005	sd_employment_2020	sd_employment_2005	employment_change	employment_pct_change	employment_share_change	employment_share_pct_change
Metro	0.0008067	0.0028724	0.0008196	0.0033401	0.4000770	4.5668652	0.5775614	14.2020947	0.0000130	0.0010355	0.4017137	12.6190140	0.0018497	0.0031880	0.0017712	0.0030116	-0.0000785	0.0011096	0.1175028	1.1564231	3639.9808	3183.0929	0.9360179	0.9212158	3.1873737	2.7872968	12.9890272	9.9249716	456.8879	0.1435358	0.0148021	0.0160681
Micro	0.0000689	0.0000935	0.0000574	0.0000831	-0.0148855	0.2264069	0.0884708	1.0334723	-0.0000115	0.0000626	-0.0328590	0.9182731	0.0013520	0.0015052	0.0013124	0.0014745	-0.0000396	0.0010566	0.0653785	0.6417925	146.0413	155.7764	0.0375544	0.0450831	0.2233047	0.2381902	0.3230467	0.3230471	-9.7351	-0.0624941	-0.0075287	-0.1669961
Non-CBSA	0.0000169	0.0000292	0.0000144	0.0000272	-0.0021529	0.0497070	Inf	NaN	-0.0000024	0.0000137	Inf	NaN	0.0010794	0.0012764	0.0010643	0.0013734	-0.0000150	0.0007419	Inf	NaN	72.4623	75.2438	0.0186336	0.0217762	0.0560854	0.0582382	0.1056082	0.1009619	-2.7815	-0.0369665	-0.0031426	-0.1443140
NA	0.0004969	0.0016180	0.0003248	0.0009148	-0.4539500	2.0451141	-0.0049010	0.2682095	-0.0001721	0.0007058	-0.1158228	0.2383127	0.0015841	0.0011750	0.0014343	0.0008161	-0.0001498	0.0005513	-0.0066944	0.2805044	30.3095	41.2043	0.0077941	0.0119249	1.2628958	1.7168458	3.5574353	5.5906361	-10.8948	-0.2644093	-0.0041308	-0.3464042

rmarkdown::paged_table(moodys_employ_cbsa_2005_2020_stats)

Employment data is in thousands, seasonally adjusted

county_share is the number of innovation jobs in a county divided by the number of innovation jobs in the US

share_of_county_tot_emp is the number of innovation jobs in a county divided by the total number of jobs in that county