Census

3 is highest risk, 2 is moderate, 1 is low

table <- read_csv("census.csv")

## Rows: 4 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): avg_eal_tercile
## dbl (18): asian, avghhsize, bacplus, black, child, density, hispanic, medgro...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

table2 <- read_csv("industry.csv")

## Rows: 4 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): avg_eal_tercile
## dbl (14): FIRE, agriculutre_forestry_etc, armed_forces, arts_ent_rec_etc, co...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

kable(table,caption="Census Variables by Risk Level")

Census Variables by Risk Level
avg_eal_tercile	asian	avghhsize	bacplus	black	child	density	hispanic	medgrossrents	medhhinc	medianhhval	other	owner_occupied	poverty	senior	total	unemp	vacant	white
1	0.0315712	2.492941	0.3567584	0.1567143	0.2055315	345.1562	0.1144124	1042.8824	68181.35	219141.2	0.0381176	0.6606767	0.1227416	0.1556827	1494376.3	0.0503188	0.0879593	0.6591846
2	0.0374453	2.589091	0.3574478	0.1343262	0.2111732	604.2491	0.1572416	1089.0303	71106.09	240212.1	0.0427232	0.6623240	0.1234514	0.1502595	2060857.9	0.0543446	0.0858594	0.6282638
3	0.0889935	2.749091	0.3387351	0.0946081	0.2056592	828.4572	0.2888781	1412.9091	75917.24	387818.2	0.0494946	0.6279769	0.1258272	0.1651189	3149811.3	0.0588635	0.1053852	0.4780258
Outside	0.0150771	2.527414	0.2405747	0.0833987	0.2033337	115.2304	0.1335524	854.9571	56550.76	176321.7	0.0440430	0.6892702	0.1554496	0.1803648	109954.8	0.0553571	0.1511438	0.7239288

kable(table2,caption="Census Industry by Risk Level")

Census Industry by Risk Level
avg_eal_tercile	FIRE	agriculutre_forestry_etc	armed_forces	arts_ent_rec_etc	construction	education_healthcare_etc	information	manufacturing	other	professional_science_managment_etc	public_admin	retail	transportation_warehouse_utilities	wholesale
1	0.0722513	0.0089891	0.0099165	0.0835972	0.0632696	0.2451436	0.0180693	0.0957320	0.0461523	0.1176257	0.0523191	0.1067635	0.0549619	0.0252088
2	0.0703715	0.0083459	0.0074881	0.0846070	0.0643641	0.2331948	0.0176900	0.1133960	0.0466104	0.1192654	0.0454758	0.1095589	0.0535857	0.0260463
3	0.0670700	0.0173628	0.0086841	0.1020863	0.0728896	0.2182436	0.0198586	0.0764031	0.0474106	0.1310278	0.0458772	0.1119322	0.0562385	0.0249155
Outside	0.0444278	0.0391243	0.0089660	0.0892926	0.0697175	0.2388585	0.0121106	0.1328069	0.0469522	0.0739920	0.0527425	0.1165147	0.0529105	0.0215839

##T-tests 3 is highest risk, 2 is moderate, 1 is low

library(rstatix)

## 
## Attaching package: 'rstatix'

## The following object is masked from 'package:stats':
## 
##     filter

table <- read_csv("census_cbsas.csv")

## Rows: 939 Columns: 19

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): avg_eal_tercile
## dbl (18): asian, avghhsize, black, hispanic, medgrossrents, medhhinc, median...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

names <- names(table)[2:19]
table_tidy <- table %>% pivot_longer(!avg_eal_tercile)
for (i in names) {
  print(i)
  x <- table_tidy %>%
    filter(name==i) %>%
    mutate(avg_eal_tercile=as.character(avg_eal_tercile)) %>%
    select(-name)
  pairwise_result <- x %>% pairwise_t_test(value~avg_eal_tercile, p.adjust.method = "bonferroni") %>%
    mutate(variable=i) %>%
    select(variable, everything()) %>%
    select(-.y.)
  print(pairwise_result)
}

## [1] "asian"
## # A tibble: 6 × 9
##   variable group1 group2     n1    n2        p p.signif    p.adj p.adj.signif
##   <chr>    <chr>  <chr>   <int> <int>    <dbl> <chr>       <dbl> <chr>       
## 1 asian    1      2          34    33 3.9 e- 1 ns       1   e+ 0 ns          
## 2 asian    1      3          34    33 1.55e-16 ****     9.31e-16 ****        
## 3 asian    2      3          33    33 1.57e-13 ****     9.45e-13 ****        
## 4 asian    1      Outside    34   839 7.73e- 4 ***      4.64e- 3 **          
## 5 asian    2      Outside    33   839 7.32e- 6 ****     4.39e- 5 ****        
## 6 asian    3      Outside    33   839 3.22e-45 ****     1.93e-44 ****        
## [1] "avghhsize"
## # A tibble: 6 × 9
##   variable  group1 group2     n1    n2           p p.signif   p.adj p.adj.signif
##   <chr>     <chr>  <chr>   <int> <int>       <dbl> <chr>      <dbl> <chr>       
## 1 avghhsize 1      2          34    33     7.5 e-2 ns       4.5 e-1 ns          
## 2 avghhsize 1      3          34    33     2.37e-6 ****     1.42e-5 ****        
## 3 avghhsize 2      3          33    33     3.32e-3 **       1.99e-2 *           
## 4 avghhsize 1      Outside    34   839     3.72e-1 ns       1   e+0 ns          
## 5 avghhsize 2      Outside    33   839     1.16e-1 ns       6.94e-1 ns          
## 6 avghhsize 3      Outside    33   839     2.03e-8 ****     1.22e-7 ****        
## [1] "black"
## # A tibble: 6 × 9
##   variable group1 group2     n1    n2        p p.signif   p.adj p.adj.signif
##   <chr>    <chr>  <chr>   <int> <int>    <dbl> <chr>      <dbl> <chr>       
## 1 black    1      2          34    33 0.462    ns       1       ns          
## 2 black    1      3          34    33 0.0416   *        0.25    ns          
## 3 black    2      3          33    33 0.196    ns       1       ns          
## 4 black    1      Outside    34   839 0.000798 ***      0.00479 **          
## 5 black    2      Outside    33   839 0.0214   *        0.129   ns          
## 6 black    3      Outside    33   839 0.612    ns       1       ns          
## [1] "hispanic"
## # A tibble: 6 × 9
##   variable group1 group2     n1    n2         p p.signif     p.adj p.adj.signif
##   <chr>    <chr>  <chr>   <int> <int>     <dbl> <chr>        <dbl> <chr>       
## 1 hispanic 1      2          34    33 0.354     ns       1         ns          
## 2 hispanic 1      3          34    33 0.00017   ***      0.00102   **          
## 3 hispanic 2      3          33    33 0.00479   **       0.0287    *           
## 4 hispanic 1      Outside    34   839 0.563     ns       1         ns          
## 5 hispanic 2      Outside    33   839 0.48      ns       1         ns          
## 6 hispanic 3      Outside    33   839 0.0000042 ****     0.0000252 ****        
## [1] "medgrossrents"
## # A tibble: 6 × 9
##   variable     group1 group2    n1    n2        p p.signif    p.adj p.adj.signif
##   <chr>        <chr>  <chr>  <int> <int>    <dbl> <chr>       <dbl> <chr>       
## 1 medgrossren… 1      2         34    33 3.75e- 1 ns       1   e+ 0 ns          
## 2 medgrossren… 1      3         34    33 2.24e-12 ****     1.34e-11 ****        
## 3 medgrossren… 2      3         33    33 9.51e-10 ****     5.71e- 9 ****        
## 4 medgrossren… 1      Outsi…    34   839 5.40e- 7 ****     3.24e- 6 ****        
## 5 medgrossren… 2      Outsi…    33   839 8.64e-10 ****     5.18e- 9 ****        
## 6 medgrossren… 3      Outsi…    33   839 1.57e-44 ****     9.40e-44 ****        
## [1] "medhhinc"
## # A tibble: 6 × 9
##   variable group1 group2     n1    n2        p p.signif    p.adj p.adj.signif
##   <chr>    <chr>  <chr>   <int> <int>    <dbl> <chr>       <dbl> <chr>       
## 1 medhhinc 1      2          34    33 3.35e- 1 ns       1   e+ 0 ns          
## 2 medhhinc 1      3          34    33 1.09e- 2 *        6.54e- 2 ns          
## 3 medhhinc 2      3          33    33 1.16e- 1 ns       6.94e- 1 ns          
## 4 medhhinc 1      Outside    34   839 1.06e- 7 ****     6.39e- 7 ****        
## 5 medhhinc 2      Outside    33   839 6.5 e-11 ****     3.90e-10 ****        
## 6 medhhinc 3      Outside    33   839 6.88e-18 ****     4.13e-17 ****        
## [1] "medianhhval"
## # A tibble: 6 × 9
##   variable    group1 group2     n1    n2        p p.signif    p.adj p.adj.signif
##   <chr>       <chr>  <chr>   <int> <int>    <dbl> <chr>       <dbl> <chr>       
## 1 medianhhval 1      2          34    33 4.07e- 1 ns       1   e+ 0 ns          
## 2 medianhhval 1      3          34    33 5.51e-11 ****     3.31e-10 ****        
## 3 medianhhval 2      3          33    33 1.12e- 8 ****     6.74e- 8 ****        
## 4 medianhhval 1      Outside    34   839 1.89e- 2 *        1.13e- 1 ns          
## 5 medianhhval 2      Outside    33   839 5.64e- 4 ***      3.38e- 3 **          
## 6 medianhhval 3      Outside    33   839 1.61e-28 ****     9.68e-28 ****        
## [1] "owner_occupied"
## # A tibble: 6 × 9
##   variable       group1 group2    n1    n2       p p.signif   p.adj p.adj.signif
##   <chr>          <chr>  <chr>  <int> <int>   <dbl> <chr>      <dbl> <chr>       
## 1 owner_occupied 1      2         34    33 9.14e-1 ns       1   e+0 ns          
## 2 owner_occupied 1      3         34    33 3.16e-2 *        1.89e-1 ns          
## 3 owner_occupied 2      3         33    33 2.5 e-2 *        1.5 e-1 ns          
## 4 owner_occupied 1      Outsi…    34   839 8.68e-3 **       5.21e-2 ns          
## 5 owner_occupied 2      Outsi…    33   839 1.48e-2 *        8.85e-2 ns          
## 6 owner_occupied 3      Outsi…    33   839 3.58e-8 ****     2.15e-7 ****        
## [1] "poverty"
## # A tibble: 6 × 9
##   variable group1 group2     n1    n2       p p.signif  p.adj p.adj.signif
##   <chr>    <chr>  <chr>   <int> <int>   <dbl> <chr>     <dbl> <chr>       
## 1 poverty  1      2          34    33 0.963   ns       1      ns          
## 2 poverty  1      3          34    33 0.839   ns       1      ns          
## 3 poverty  2      3          33    33 0.877   ns       1      ns          
## 4 poverty  1      Outside    34   839 0.00275 **       0.0165 *           
## 5 poverty  2      Outside    33   839 0.00388 **       0.0233 *           
## 6 poverty  3      Outside    33   839 0.00749 **       0.0449 *           
## [1] "total"
## # A tibble: 6 × 9
##   variable group1 group2     n1    n2        p p.signif    p.adj p.adj.signif
##   <chr>    <chr>  <chr>   <int> <int>    <dbl> <chr>       <dbl> <chr>       
## 1 total    1      2          34    33 8.36e- 3 **       5.01e- 2 ns          
## 2 total    1      3          34    33 2.91e-14 ****     1.74e-13 ****        
## 3 total    2      3          33    33 5.50e- 7 ****     3.30e- 6 ****        
## 4 total    1      Outside    34   839 1.02e-18 ****     6.14e-18 ****        
## 5 total    2      Outside    33   839 2.02e-33 ****     1.21e-32 ****        
## 6 total    3      Outside    33   839 1.69e-71 ****     1.01e-70 ****        
## [1] "unemp"
## # A tibble: 6 × 9
##   variable group1 group2     n1    n2     p p.signif p.adj p.adj.signif
##   <chr>    <chr>  <chr>   <int> <int> <dbl> <chr>    <dbl> <chr>       
## 1 unemp    1      2          34    33 0.444 ns       1     ns          
## 2 unemp    1      3          34    33 0.104 ns       0.626 ns          
## 3 unemp    2      3          33    33 0.394 ns       1     ns          
## 4 unemp    1      Outside    34   839 0.181 ns       1     ns          
## 5 unemp    2      Outside    33   839 0.791 ns       1     ns          
## 6 unemp    3      Outside    33   839 0.358 ns       1     ns          
## [1] "vacant"
## # A tibble: 6 × 9
##   variable group1 group2     n1    n2           p p.signif    p.adj p.adj.signif
##   <chr>    <chr>  <chr>   <int> <int>       <dbl> <chr>       <dbl> <chr>       
## 1 vacant   1      2          34    33 0.907       ns        1   e+0 ns          
## 2 vacant   1      3          34    33 0.333       ns        1   e+0 ns          
## 3 vacant   2      3          33    33 0.282       ns        1   e+0 ns          
## 4 vacant   1      Outside    34   839 0.00000109  ****      6.56e-6 ****        
## 5 vacant   2      Outside    33   839 0.000000693 ****      4.16e-6 ****        
## 6 vacant   3      Outside    33   839 0.000482    ***       2.89e-3 **          
## [1] "white"
## # A tibble: 6 × 9
##   variable group1 group2     n1    n2        p p.signif    p.adj p.adj.signif
##   <chr>    <chr>  <chr>   <int> <int>    <dbl> <chr>       <dbl> <chr>       
## 1 white    1      2          34    33 5.38e- 1 ns       1   e+ 0 ns          
## 2 white    1      3          34    33 3.22e- 4 ***      1.93e- 3 **          
## 3 white    2      3          33    33 3.04e- 3 **       1.82e- 2 *           
## 4 white    1      Outside    34   839 7.18e- 2 ns       4.31e- 1 ns          
## 5 white    2      Outside    33   839 8.8 e- 3 **       5.28e- 2 ns          
## 6 white    3      Outside    33   839 2.63e-11 ****     1.58e-10 ****        
## [1] "other"
## # A tibble: 6 × 9
##   variable group1 group2     n1    n2     p p.signif p.adj p.adj.signif
##   <chr>    <chr>  <chr>   <int> <int> <dbl> <chr>    <dbl> <chr>       
## 1 other    1      2          34    33 0.721 ns           1 ns          
## 2 other    1      3          34    33 0.377 ns           1 ns          
## 3 other    2      3          33    33 0.602 ns           1 ns          
## 4 other    1      Outside    34   839 0.52  ns           1 ns          
## 5 other    2      Outside    33   839 0.888 ns           1 ns          
## 6 other    3      Outside    33   839 0.56  ns           1 ns          
## [1] "density"
## # A tibble: 6 × 9
##   variable group1 group2     n1    n2        p p.signif    p.adj p.adj.signif
##   <chr>    <chr>  <chr>   <int> <int>    <dbl> <chr>       <dbl> <chr>       
## 1 density  1      2          34    33 9.40e- 8 ****     5.64e- 7 ****        
## 2 density  1      3          34    33 1.4 e-22 ****     8.39e-22 ****        
## 3 density  2      3          33    33 4.34e- 6 ****     2.61e- 5 ****        
## 4 density  1      Outside    34   839 4.39e-11 ****     2.63e-10 ****        
## 5 density  2      Outside    33   839 1.74e-40 ****     1.05e-39 ****        
## 6 density  3      Outside    33   839 9.06e-77 ****     5.43e-76 ****        
## [1] "child"
## # A tibble: 6 × 9
##   variable group1 group2     n1    n2     p p.signif p.adj p.adj.signif
##   <chr>    <chr>  <chr>   <int> <int> <dbl> <chr>    <dbl> <chr>       
## 1 child    1      2          34    33 0.433 ns       1     ns          
## 2 child    1      3          34    33 0.986 ns       1     ns          
## 3 child    2      3          33    33 0.447 ns       1     ns          
## 4 child    1      Outside    34   839 0.67  ns       1     ns          
## 5 child    2      Outside    33   839 0.134 ns       0.804 ns          
## 6 child    3      Outside    33   839 0.657 ns       1     ns          
## [1] "senior"
## # A tibble: 6 × 9
##   variable group1 group2     n1    n2         p p.signif    p.adj p.adj.signif
##   <chr>    <chr>  <chr>   <int> <int>     <dbl> <chr>       <dbl> <chr>       
## 1 senior   1      2          34    33 0.584     ns       1        ns          
## 2 senior   1      3          34    33 0.34      ns       1        ns          
## 3 senior   2      3          33    33 0.136     ns       0.817    ns          
## 4 senior   1      Outside    34   839 0.000513  ***      0.00308  **          
## 5 senior   2      Outside    33   839 0.0000303 ****     0.000182 ***         
## 6 senior   3      Outside    33   839 0.034     *        0.204    ns          
## [1] "bacplus"
## # A tibble: 6 × 9
##   variable group1 group2     n1    n2        p p.signif    p.adj p.adj.signif
##   <chr>    <chr>  <chr>   <int> <int>    <dbl> <chr>       <dbl> <chr>       
## 1 bacplus  1      2          34    33 9.74e- 1 ns       1   e+ 0 ns          
## 2 bacplus  1      3          34    33 3.89e- 1 ns       1   e+ 0 ns          
## 3 bacplus  2      3          33    33 3.74e- 1 ns       1   e+ 0 ns          
## 4 bacplus  1      Outside    34   839 2.09e-14 ****     1.25e-13 ****        
## 5 bacplus  2      Outside    33   839 3.39e-14 ****     2.03e-13 ****        
## 6 bacplus  3      Outside    33   839 1.58e-10 ****     9.5 e-10 ****

#Line Graphs

data <- read_csv("all_cbsa_summary.csv") %>%
  filter(!is.na(avg_eal))

## Rows: 9400 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): avg_eal_tercile, NAME
## dbl (8): archive_version_year, CBSAFP, rows, with_parent_number, mean_employ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

data$avg_eal_tercile <- factor(data$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
plot1 <- data %>% ggplot()+
  geom_line(aes(archive_version_year,rows,color=NAME),alpha=0.5)+
  facet_wrap(~avg_eal_tercile,ncol =3)+
  ggtitle("Number of Businesses")
ggsave("figures/number_of_businesses.png")

## Saving 7 x 5 in image

plot2 <- data %>% ggplot()+
  geom_line(aes(archive_version_year,mean_year_established,color=NAME),alpha=0.5)+
  facet_wrap(~avg_eal_tercile,ncol =3)+
  ggtitle("Mean year established")
ggsave("figures/mean_year_established.png")

## Saving 7 x 5 in image

## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_line()`).

ggplotly(plot1) %>% layout(showlegend = FALSE)

ggplotly(plot2) %>% layout(showlegend = FALSE)

data <- read_csv("cbsa_births_deaths_exits_entrys.csv")

## Rows: 1000 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): NAME, avg_eal_tercile
## dbl (8): archive_version_year, CBSAFP, entry, exit, births, deaths, avg_eal,...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

data$avg_eal_tercile <- factor(data$avg_eal_tercile, levels = c("High", "Moderate", "Low"))

plot1 <- data %>% ggplot()+
  geom_line(aes(archive_version_year,births,color=NAME,group=NAME),alpha=0.5)+
  facet_wrap(~avg_eal_tercile,ncol =3)+
  ggtitle("Births")
ggsave("figures/births.png")

## Saving 7 x 5 in image

plot2 <- data %>% ggplot()+
  geom_line(aes(archive_version_year,deaths,color=NAME,group=NAME),alpha=0.5)+
  facet_wrap(~avg_eal_tercile,ncol =3)+
  ggtitle("Deaths")
ggsave("figures/deaths.png")

## Saving 7 x 5 in image

## Warning: Removed 100 rows containing missing values or values outside the scale range
## (`geom_line()`).

plot3 <- data %>%
  ggplot()+
  geom_line(aes(archive_version_year,entry,color=NAME,group=NAME),alpha=0.5)+
  facet_wrap(~avg_eal_tercile,ncol =3)+
  ggtitle("Entries")
ggsave("figures/entries.png")

## Saving 7 x 5 in image

plot4 <- data %>% ggplot()+
  geom_line(aes(archive_version_year,exit,color=NAME,group=NAME),alpha=0.5)+
  facet_wrap(~avg_eal_tercile,ncol =)+
  ggtitle("Exits")
ggsave("figures/exits.png")

## Saving 7 x 5 in image

## Warning: Removed 100 rows containing missing values or values outside the scale range
## (`geom_line()`).

ggplotly(plot1) %>% layout(showlegend = FALSE)

ggplotly(plot2) %>% layout(showlegend = FALSE)

ggplotly(plot3) %>% layout(showlegend = FALSE)

ggplotly(plot4) %>% layout(showlegend = FALSE)

all <- read_csv("critical_pop_all.csv")
all$avg_eal_tercile <- factor(all$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
all$pop_tercile <- factor(all$pop_tercile, levels = c("Pop: High", "Pop: Moderate", "Pop: Low"))

births <- read_csv("critical_pop_births.csv") %>%
  filter(archive_version_year != 2012)
births$avg_eal_tercile <- factor(births$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
births$pop_tercile <- factor(births$pop_tercile, levels = c("Pop: High", "Pop: Moderate", "Pop: Low"))

entries <- read_csv("critical_pop_entries.csv")%>%
  filter(archive_version_year != 2012)
entries$avg_eal_tercile <- factor(entries$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
entries$pop_tercile <- factor(entries$pop_tercile, levels = c("Pop: High", "Pop: Moderate", "Pop: Low"))

deaths <- read_csv("critical_pop_deaths.csv")%>%
  filter(archive_version_year != 2022)
deaths$avg_eal_tercile <- factor(deaths$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
deaths$pop_tercile <- factor(deaths$pop_tercile, levels = c("Pop: High", "Pop: Moderate", "Pop: Low"))

exits <- read_csv("critical_pop_exits.csv")%>%
  filter(archive_version_year != 2022)
exits$avg_eal_tercile <- factor(exits$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
exits$pop_tercile <- factor(exits$pop_tercile, levels = c("Pop: High", "Pop: Moderate", "Pop: Low"))


plot1 <- all %>%
  ggplot()+
  geom_line(aes(archive_version_year,row_count,color=avg_eal_tercile))+
  facet_grid(pop_tercile~critical)+
  ggtitle("Critical by population tercile, all")
ggsave("figures/critical_by_pop.png")

plot2 <- births %>%ggplot()+
  geom_line(aes(archive_version_year,births,color=avg_eal_tercile))+
  facet_grid(pop_tercile~critical)+
  ggtitle("Critical by tercile, births")
ggsave("figures/naics_by_tercile_births.png")


plot3 <- deaths %>% ggplot()+
  geom_line(aes(archive_version_year,deaths,color=avg_eal_tercile))+
  facet_grid(pop_tercile~critical)+
  ggtitle("Critical by tercile, deaths")
ggsave("figures/naics_by_tercile_deaths.png")

plot4 <- entries %>% ggplot()+
  geom_line(aes(archive_version_year,entries,color=avg_eal_tercile))+
  facet_grid(pop_tercile~critical)+
  ggtitle("Critical by tercile, entries")
ggsave("figures/naics_by_tercile_entries.png")

plot5 <- exits %>% ggplot()+
  geom_line(aes(archive_version_year,exits,color=avg_eal_tercile))+
  facet_grid(pop_tercile~critical)+
  ggtitle("Critical by tercile, exits")
ggsave("figures/naics_by_tercile_exits.png")

plot1

plot2

plot3

plot4

plot5

Line Employees

all <- read_csv("critical_pop_all_employees.csv")
all$avg_eal_tercile <- factor(all$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
all$pop_tercile <- factor(all$pop_tercile, levels = c("Pop: High", "Pop: Moderate", "Pop: Low"))

births <- read_csv("critical_pop_births_employees.csv") %>%
  filter(archive_version_year != 2012)
births$avg_eal_tercile <- factor(births$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
births$pop_tercile <- factor(births$pop_tercile, levels = c("Pop: High", "Pop: Moderate", "Pop: Low"))

entries <- read_csv("critical_pop_entries_employees.csv")%>%
  filter(archive_version_year != 2012)
entries$avg_eal_tercile <- factor(entries$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
entries$pop_tercile <- factor(entries$pop_tercile, levels = c("Pop: High", "Pop: Moderate", "Pop: Low"))

deaths <- read_csv("critical_pop_deaths_employees.csv")%>%
  filter(archive_version_year != 2022)
deaths$avg_eal_tercile <- factor(deaths$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
deaths$pop_tercile <- factor(deaths$pop_tercile, levels = c("Pop: High", "Pop: Moderate", "Pop: Low"))

exits <- read_csv("critical_pop_exits_employees.csv")%>%
  filter(archive_version_year != 2022)
exits$avg_eal_tercile <- factor(exits$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
exits$pop_tercile <- factor(exits$pop_tercile, levels = c("Pop: High", "Pop: Moderate", "Pop: Low"))


plot1 <- all %>%
  ggplot()+
  geom_line(aes(archive_version_year,employees,color=avg_eal_tercile))+
  facet_grid(pop_tercile~critical)+
  ggtitle("Critical by population tercile, all - employees")
ggsave("figures/critical_by_pop_emp.png")

plot2 <- births %>%ggplot()+
  geom_line(aes(archive_version_year,employees,color=avg_eal_tercile))+
  facet_grid(pop_tercile~critical)+
  ggtitle("Critical by tercile, births, births - employees")
ggsave("figures/naics_by_tercile_births_emp.png")


plot3 <- deaths %>% ggplot()+
  geom_line(aes(archive_version_year,employees,color=avg_eal_tercile))+
  facet_grid(pop_tercile~critical)+
  ggtitle("Critical by tercile, deaths - employees")
ggsave("figures/naics_by_tercile_deaths_emp.png")

plot4 <- entries %>% ggplot()+
  geom_line(aes(archive_version_year,employees,color=avg_eal_tercile))+
  facet_grid(pop_tercile~critical)+
  ggtitle("Critical by tercile, entries - employees")
ggsave("figures/naics_by_tercile_entries_emp.png")

plot5 <- exits %>% ggplot()+
  geom_line(aes(archive_version_year,employees,color=avg_eal_tercile))+
  facet_grid(pop_tercile~critical)+
  ggtitle("Critical by tercile, exits - employees")
ggsave("figures/naics_by_tercile_exit_emps.png")

plot1

plot2

plot3

plot4

plot5

Chord Diagrams

nri_cbsa_all <- read_csv("nri_cbsa_all.csv") %>%
  mutate(CBSAFP =as.character(CBSAFP))

## Rows: 940 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): avg_eal_tercile_all
## dbl (3): CBSAFP, avg_eal_all, tercile_all
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

regions <- read_csv("top100_regions.csv") %>%
  mutate(GEOID =as.character(GEOID)) %>%
  select(GEOID, Region)

## Rows: 100 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): pop_tercile, NAME, Region
## dbl (1): GEOID
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

top100_cbsa <- get_acs(geography = "cbsa", variables = c(pop="B01001_001"),output = "wide") %>%
  top_n(100,popE) %>%
  mutate(pop_tercile_num = ntile(popE, 3)) %>%
  mutate(pop_tercile=as_factor(pop_tercile_num)) %>%
  mutate(pop_tercile = factor(pop_tercile, levels = 1:3, labels = c("Pop: Low", "Pop: Moderate", "Pop: High"))) %>% 
  select(pop_tercile,pop_tercile_num,popE,GEOID) %>%
  left_join(regions)

## Getting data from the 2018-2022 5-year ACS
## Joining with `by = join_by(GEOID)`

top100_cbsa_new <- top100_cbsa %>%
  rename_with(~paste0(., "_new"), everything())
top100_cbsa_old <- top100_cbsa %>%
  rename_with(~paste0(., "_old"), everything())

moves <- read_csv("migration_map.csv") %>%
  select(CBSAFP_old,CBSAFP_new,NAME_old,NAME_new,row_count,avg_eal_tercile_old,avg_eal_tercile_new) %>%
  mutate(CBSAFP_old=as.character(CBSAFP_old)) %>%
  mutate(CBSAFP_new=as.character(CBSAFP_new)) %>%
  mutate(NAME_old=ifelse(is.na(avg_eal_tercile_old),"Outside",NAME_old)) %>%
  mutate(NAME_new=ifelse(is.na(avg_eal_tercile_new),"Outside",NAME_new)) %>%
  group_by(CBSAFP_old,CBSAFP_new,NAME_old,NAME_new,avg_eal_tercile_old,avg_eal_tercile_new) %>%
  summarise(row_count=sum(row_count)) %>%
  left_join(top100_cbsa_old, by=c("CBSAFP_old"="GEOID_old")) %>%
  left_join(top100_cbsa_new, by=c("CBSAFP_new"="GEOID_new")) %>%
  mutate(avg_eal_tercile_old=ifelse(is.na(avg_eal_tercile_old),"Outside",avg_eal_tercile_old)) %>%
  mutate(avg_eal_tercile_new=ifelse(is.na(avg_eal_tercile_new),"Outside",avg_eal_tercile_new)) %>%
  mutate(Region_old=ifelse(is.na(Region_old),"Outside",Region_old)) %>%
  mutate(Region_new=ifelse(is.na(Region_new),"Outside",Region_new))

## Rows: 63970 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): fips_code, lead_county, NAME_old, NAME_new, avg_eal_tercile_old, a...
## dbl (12): archive_version_year, row_count, CBSAFP_old, CBSAFP_new, avg_eal_o...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'CBSAFP_old', 'CBSAFP_new', 'NAME_old', 'NAME_new', 'avg_eal_tercile_old'. You can override using the `.groups` argument.

data <- moves %>%
  group_by(avg_eal_tercile_old,avg_eal_tercile_new) %>%
  summarise(row_count=sum(row_count))

## `summarise()` has grouped output by 'avg_eal_tercile_old'. You can override
## using the `.groups` argument.

data %>%
  transmute(avg_eal_tercile_old,avg_eal_tercile_new,pct=row_count/sum(data$row_count)) %>%
  pivot_wider(names_from = avg_eal_tercile_new,values_from = pct) %>%
  kable(caption="Risk level to Risk level")

Risk level to Risk level
avg_eal_tercile_old	High	Low	Moderate	Outside
High	0.1418442	0.0145098	0.0363402	0.0729597
Low	0.0149617	0.0161366	0.0323389	0.0577516
Moderate	0.0303752	0.0298987	0.0341958	0.0614735
Outside	0.0696075	0.0633796	0.0675453	0.2566818

chordDiagram(
  x=data,
  directional = 1,
  direction.type = c("diffHeight", "arrows"),
  link.arr.type = "big.arrow",
  grid.col = c(High="red",Moderate="yellow", Low="green",Outside="gray"))
title("Risk level to Risk level", cex.main = 1)

png(file="figures/risk_to_risk.png")
dev.off()

## quartz_off_screen 
##                 2

data <- moves %>%
  filter(Region_old==Region_new) %>%
  filter(Region_old!="Outside") %>%
  group_by(avg_eal_tercile_old,avg_eal_tercile_new) %>%
  summarise(row_count=sum(row_count))

## `summarise()` has grouped output by 'avg_eal_tercile_old'. You can override
## using the `.groups` argument.

data %>%
  transmute(avg_eal_tercile_old,avg_eal_tercile_new,pct=row_count/sum(data$row_count)) %>%
  pivot_wider(names_from = avg_eal_tercile_new,values_from = pct) %>%
  kable(caption = "Moves within Region")

Moves within Region
avg_eal_tercile_old	High	Low	Moderate
High	0.4732835	0.0165976	0.0817526
Low	0.0194363	0.0371360	0.0974152
Moderate	0.0714333	0.0946433	0.1083022

chordDiagram(
  x=data, directional = 1, direction.type = c("diffHeight", "arrows"),link.arr.type = "big.arrow",
    grid.col = c(High="red",Moderate="yellow", Low="green"))
title("Moves within Region", cex.main = 1)

png(file="figures/moves_within_region.png")
dev.off()

## quartz_off_screen 
##                 2

data <- moves %>%
  filter(Region_old!=Region_new) %>%
  group_by(avg_eal_tercile_old,avg_eal_tercile_new) %>%
  summarise(row_count=sum(row_count))

## `summarise()` has grouped output by 'avg_eal_tercile_old'. You can override
## using the `.groups` argument.

table <- moves %>%
  filter(Region_old!=Region_new) %>%
  filter(avg_eal_tercile_new=="Outside") %>%
  left_join(nri_cbsa_all,by=c("CBSAFP_new"="CBSAFP")) %>%
  group_by(avg_eal_tercile_all) %>%
  summarise(row_count=sum(row_count,na.rm = TRUE)) %>%
  kable(caption="Risk of Destinations outside top100")
table

Risk of Destinations outside top100
avg_eal_tercile_all	row_count
High	13518
Low	3901
Moderate	5972

data %>%
  transmute(avg_eal_tercile_old,avg_eal_tercile_new,pct=row_count/sum(data$row_count)) %>%
  pivot_wider(names_from = avg_eal_tercile_new,values_from = pct) %>%
  kable(caption = "Moves outside Region")

Moves outside Region
avg_eal_tercile_old	High	Low	Moderate	Outside
High	0.0510855	0.0209662	0.0326306	0.1467138
Low	0.0204705	0.0140766	0.0168357	0.1161319
Moderate	0.0257410	0.0133001	0.0151836	0.1236163
Outside	0.1399729	0.1274494	0.1358259	NA

chordDiagram(
  x=data, directional = 1, direction.type = c("diffHeight", "arrows"),link.arr.type = "big.arrow",
    grid.col = c(High="red",Moderate="yellow", Low="green",Outside="gray"))
title("Moves outside Region", cex.main = 1)

png(file="figures/moves_outside_region.png")
dev.off()

## quartz_off_screen 
##                 2

This chunk produces the chord diagrams for critical services and non-critical services

data <- read_csv("moves_crit.csv") %>%
  mutate(avg_eal_tercile_old=ifelse(is.na(avg_eal_tercile_old),"Outside",avg_eal_tercile_old)) %>%
  mutate(avg_eal_tercile_new=ifelse(is.na(avg_eal_tercile_new),"Outside",avg_eal_tercile_new))

## Rows: 16 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): avg_eal_tercile_old, avg_eal_tercile_new
## dbl (1): row_count
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

data %>%
  transmute(avg_eal_tercile_old,avg_eal_tercile_new,pct=row_count/sum(data$row_count)) %>%
  pivot_wider(names_from = avg_eal_tercile_new,values_from = pct) %>%
  kable(caption = "Critical: Risk level to Risk level")

Critical: Risk level to Risk level
avg_eal_tercile_old	Low	Moderate	High	Outside
Low	0.0160731	0.0317129	0.0136222	0.0600542
Moderate	0.0289641	0.0325660	0.0281381	0.0618009
High	0.0120379	0.0328097	0.1314421	0.0702234
Outside	0.0668382	0.0712932	0.0700339	0.2723900

chordDiagram(
  x=data,
  directional = 1,
  direction.type = c("diffHeight", "arrows"),
  link.arr.type = "big.arrow",
  grid.col = c(High="red",Moderate="yellow", Low="green",Outside="gray"))
title("Critical: Risk level to Risk level", cex.main = 1)

png(file="figures/critical_risk_to_risk.png")
dev.off()

## quartz_off_screen 
##                 2

data <- read_csv("moves_non_crit.csv") %>%
  mutate(avg_eal_tercile_old=ifelse(is.na(avg_eal_tercile_old),"Outside",avg_eal_tercile_old)) %>%
  mutate(avg_eal_tercile_new=ifelse(is.na(avg_eal_tercile_new),"Outside",avg_eal_tercile_new))

## Rows: 16 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): avg_eal_tercile_old, avg_eal_tercile_new
## dbl (1): row_count
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

data %>%
  transmute(avg_eal_tercile_old,avg_eal_tercile_new,pct=row_count/sum(data$row_count)) %>%
  pivot_wider(names_from = avg_eal_tercile_new,values_from = pct) %>%
  kable(caption = "Non-Critical: Risk level to Risk level")

Non-Critical: Risk level to Risk level
avg_eal_tercile_old	Low	Moderate	High	Outside
Low	0.0162345	0.0333048	0.0170285	0.0541986
Moderate	0.0313408	0.0367105	0.0338271	0.0609682
High	0.0183239	0.0417877	0.1578947	0.0771818
Outside	0.0580431	0.0617622	0.0689497	0.2324440

chordDiagram(
  x=data,
  directional = 1,
  direction.type = c("diffHeight", "arrows"),
  link.arr.type = "big.arrow",
  grid.col = c(High="red",Moderate="yellow", Low="green",Outside="gray"))
title("Non-Critical: Risk level to Risk level", cex.main = 1)

png(file="figures/non_critical_risk_to_risk.png")
dev.off()

## quartz_off_screen 
##                 2

This chunk produces the chord diagrams for standalone and chains

data <- read_csv("moves_chain.csv") %>%
  mutate(avg_eal_tercile_old=ifelse(is.na(avg_eal_tercile_old),"Outside",avg_eal_tercile_old)) %>%
  mutate(avg_eal_tercile_new=ifelse(is.na(avg_eal_tercile_new),"Outside",avg_eal_tercile_new))

## Rows: 16 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): avg_eal_tercile_old, avg_eal_tercile_new
## dbl (1): row_count
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

data %>%
  transmute(avg_eal_tercile_old,avg_eal_tercile_new,pct=row_count/sum(data$row_count)) %>%
  pivot_wider(names_from = avg_eal_tercile_new,values_from = pct) %>%
  kable(caption = "Chain: Risk level to Risk level")

Chain: Risk level to Risk level
avg_eal_tercile_old	Low	Moderate	High	Outside
Low	0.0239119	0.0384862	0.0292970	0.0452547
Moderate	0.0373499	0.0460452	0.0461934	0.0478237
High	0.0328541	0.0570130	0.1558223	0.0509856
Outside	0.0636826	0.0644731	0.0606689	0.2001383

chordDiagram(
  x=data,
  directional = 1,
  direction.type = c("diffHeight", "arrows"),
  link.arr.type = "big.arrow",
  grid.col = c(High="red",Moderate="yellow", Low="green",Outside="gray"))
title("Chain: Risk level to Risk level", cex.main = 1)

png(file="figures/chain_risk_to_risk.png")
dev.off()

## quartz_off_screen 
##                 2

data <- read_csv("moves_standalone.csv") %>%
  mutate(avg_eal_tercile_old=ifelse(is.na(avg_eal_tercile_old),"Outside",avg_eal_tercile_old)) %>%
  mutate(avg_eal_tercile_new=ifelse(is.na(avg_eal_tercile_new),"Outside",avg_eal_tercile_new))

## Rows: 16 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): avg_eal_tercile_old, avg_eal_tercile_new
## dbl (1): row_count
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

data %>%
  transmute(avg_eal_tercile_old,avg_eal_tercile_new,pct=row_count/sum(data$row_count)) %>%
  pivot_wider(names_from = avg_eal_tercile_new,values_from = pct) %>%
  kable(caption = "Standalone: Risk level to Risk level")

Standalone: Risk level to Risk level
avg_eal_tercile_old	Low	Moderate	High	Outside
Low	0.0145856	0.0311126	0.0121021	0.0602444
Moderate	0.0284123	0.0318321	0.0272199	0.0641963
High	0.0108505	0.0322164	0.1390559	0.0773431
Outside	0.0633192	0.0681581	0.0713906	0.2679610

chordDiagram(
  x=data,
  directional = 1,
  direction.type = c("diffHeight", "arrows"),
  link.arr.type = "big.arrow",
  grid.col = c(High="red",Moderate="yellow", Low="green",Outside="gray"))
title("Standalone: Risk level to Risk level", cex.main = 1)

png(file="figures/standalone_risk_to_risk.png")
dev.off()

## quartz_off_screen 
##                 2

This chunk produces across population categories. Outside the top100 is excluded.

#within same population category
data <- moves %>%
  filter(!is.na(pop_tercile_old),pop_tercile_num_old==pop_tercile_num_new) %>%
  group_by(avg_eal_tercile_old,avg_eal_tercile_new) %>%
  summarise(row_count=sum(row_count))

## `summarise()` has grouped output by 'avg_eal_tercile_old'. You can override
## using the `.groups` argument.

data %>%
  transmute(avg_eal_tercile_old,avg_eal_tercile_new,pct=row_count/sum(data$row_count)) %>%
  pivot_wider(names_from = avg_eal_tercile_new,values_from = pct) %>%
  kable(caption = "Same pop category: Risk level to Risk level")

Same pop category: Risk level to Risk level
avg_eal_tercile_old	High	Low	Moderate
High	0.4669255	0.0449229	0.0955934
Low	0.0455783	0.0355954	0.0831401
Moderate	0.0779974	0.0715438	0.0787032

chordDiagram(
  x=data,
  directional = 1,
  direction.type = c("diffHeight", "arrows"),
  link.arr.type = "big.arrow",
  grid.col = c(High="red",Moderate="yellow", Low="green",Outside="gray"))
title("Same pop category: Risk level to Risk level", cex.main = 1)

png(file="figures/samepop_risk_to_risk.png")

#lower pop to higher pop
data <- moves %>%
  filter(!is.na(pop_tercile_old),pop_tercile_num_old>pop_tercile_num_new) %>%
  group_by(avg_eal_tercile_old,avg_eal_tercile_new) %>%
  summarise(row_count=sum(row_count))

## `summarise()` has grouped output by 'avg_eal_tercile_old'. You can override
## using the `.groups` argument.

data %>%
  transmute(avg_eal_tercile_old,avg_eal_tercile_new,pct=row_count/sum(data$row_count)) %>%
  pivot_wider(names_from = avg_eal_tercile_new,values_from = pct) %>%
  kable(caption = "Towards higher pop: Risk level to Risk level")

Towards higher pop: Risk level to Risk level
avg_eal_tercile_old	High	Low	Moderate
High	0.3530192	0.0613562	0.1576342
Low	0.0191849	0.0562285	0.0869066
Moderate	0.0564937	0.0899125	0.1192644

chordDiagram(
  x=data,
  directional = 1,
  direction.type = c("diffHeight", "arrows"),
  link.arr.type = "big.arrow",
  grid.col = c(High="red",Moderate="yellow", Low="green",Outside="gray"))
title("Towards higher pop: Risk level to Risk level", cex.main = 1)

png(file="figures/low_to_high_risk_to_risk.png")

#higher pop to lower pop
data <- moves %>%
  filter(!is.na(pop_tercile_old),pop_tercile_num_old<pop_tercile_num_new) %>%
  group_by(avg_eal_tercile_old,avg_eal_tercile_new) %>%
  summarise(row_count=sum(row_count))

## `summarise()` has grouped output by 'avg_eal_tercile_old'. You can override
## using the `.groups` argument.

data %>%
  transmute(avg_eal_tercile_old,avg_eal_tercile_new,pct=row_count/sum(data$row_count)) %>%
  pivot_wider(names_from = avg_eal_tercile_new,values_from = pct) %>%
  kable(caption = "Toward lower pop: Risk level to Risk level")

Toward lower pop: Risk level to Risk level
avg_eal_tercile_old	High	Low	Moderate
High	0.3478789	0.0157023	0.0645441
Low	0.0607270	0.0539603	0.1131257
Moderate	0.1310835	0.1043637	0.1086146

chordDiagram(
  x=data,
  directional = 1,
  direction.type = c("diffHeight", "arrows"),
  link.arr.type = "big.arrow",
  grid.col = c(High="red",Moderate="yellow", Low="green",Outside="gray"))
title("Toward lower pop: Risk level to Risk level", cex.main = 1)

png(file="figures/high_to_low_risk_to_risk.png")

Bar Charts

This chunk produces bar graphs for all 100 CBSAs and corresponding entries/exits and births/deaths for all years of data

library(ggpattern)
data <- read_csv("cbsa_births_deaths_exits_entrys.csv")

## Rows: 1000 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): NAME, avg_eal_tercile
## dbl (8): archive_version_year, CBSAFP, entry, exit, births, deaths, avg_eal,...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

summary(data)

##  archive_version_year     CBSAFP          entry             exit       
##  Min.   :2013         Min.   :10420   Min.   :  1.00   Min.   :  1.00  
##  1st Qu.:2015         1st Qu.:19348   1st Qu.: 26.00   1st Qu.: 25.00  
##  Median :2018         Median :32060   Median : 45.00   Median : 44.00  
##  Mean   :2018         Mean   :30209   Mean   : 67.08   Mean   : 67.08  
##  3rd Qu.:2020         3rd Qu.:40080   3rd Qu.: 84.00   3rd Qu.: 79.00  
##  Max.   :2022         Max.   :49340   Max.   :561.00   Max.   :670.00  
##                                                        NA's   :100     
##      births           deaths           NAME              avg_eal     
##  Min.   :  1328   Min.   :  1128   Length:1000        Min.   :38.21  
##  1st Qu.:  3617   1st Qu.:  3416   Class :character   1st Qu.:65.60  
##  Median :  6166   Median :  5294   Mode  :character   Median :75.28  
##  Mean   : 12411   Mean   : 10836                      Mean   :77.28  
##  3rd Qu.: 13648   3rd Qu.: 11576                      3rd Qu.:94.11  
##  Max.   :171670   Max.   :139494                      Max.   :99.91  
##                   NA's   :100                                        
##     tercile     avg_eal_tercile   
##  Min.   :1.00   Length:1000       
##  1st Qu.:1.00   Class :character  
##  Median :2.00   Mode  :character  
##  Mean   :1.99                     
##  3rd Qu.:3.00                     
##  Max.   :3.00                     
##

data <- data %>% gather(type,row_count, c(entry,exit,births,deaths)) %>%
  group_by(NAME,avg_eal_tercile,tercile,type) %>%
  summarise(row_count=sum(row_count,na.rm = TRUE)) %>%
  mutate(label= paste0(type,"-",avg_eal_tercile))

## `summarise()` has grouped output by 'NAME', 'avg_eal_tercile', 'tercile'. You
## can override using the `.groups` argument.

color_ramp <- c("births-Low" = "#00b100", "births-Moderate" = "#007e00", "births-High" = "#004b00",
          "deaths-Low" = "#c600c6", "deaths-Moderate" = "#9f009f", "deaths-High" = "#780078")
data %>%
  filter(type %in% c("births", "deaths")) %>%
  group_by(NAME) %>%
  mutate(sort = sum(row_count,na.rm = T)) %>%
  ungroup() %>%
  ggplot(aes(x = fct_reorder(NAME, sort), y = row_count, fill = label)) + 
  geom_bar(position = "dodge", stat = "identity")+
  coord_flip()+
  theme_minimal()+
  scale_fill_manual(values=color_ramp)+
  ggtitle("Births and Deaths - 2013-2022")+
  theme(legend.position = "bottom")

ggsave("figures/bar_chart_births_deaths.png")

## Saving 10 x 20 in image

color_ramp <- c("entry-Low" = "#00b100", "entry-Moderate" = "#007e00", "entry-High" = "#004b00",
          "exit-Low" = "#c600c6", "exit-Moderate" = "#9f009f", "exit-High" = "#780078")
data %>% filter(type %in% c("entry","exit")) %>%
  group_by(NAME) %>%
  mutate(sort = sum(row_count,na.rm = T)) %>%
  ungroup() %>%
  ggplot(aes(fill = label, y = row_count, x = forcats::fct_reorder(NAME, sort))) + 
  geom_bar(position = "dodge", stat = "identity")+
  coord_flip()+
  theme_minimal()+
  scale_fill_manual(values=color_ramp)+
  ggtitle("Entries and Exits - 2013-2022")+
  theme(legend.position = "bottom")

ggsave("figures/bar_chart_entries_exits.png")

## Saving 10 x 20 in image

#Expansion Test

expansion_size <- read_csv("matrix_expansion_rawdata.csv") %>%
  mutate(growth_count=employee_size_location-lag_employees,
         growth_pct=growth_count/lag_employees)

## Rows: 100600 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (4): company, avg_eal_tercile, avg_eal_tercile_parent, parent_region
## dbl (13): archive_version_year, parent_number, fips_code, primary_naics_code...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

summary(expansion_size$growth_count)

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##     1.00     1.00     4.00    23.62    11.00 64975.00

expansion_size %>%
  filter(growth_count <= quantile(growth_count, 0.95)) %>%
  ggplot()+geom_histogram(aes(growth_count))

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

summary(expansion_size$growth_pct)

##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
##     0.001     0.333     0.667     3.626     2.000 10999.000

expansion_size %>%
  filter(growth_pct <= quantile(growth_pct, 0.95)) %>%
  ggplot()+geom_histogram(aes(growth_pct))

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

process_data <- function(file_path, caption, include_yearly_column = FALSE) {
  table <- read_csv(file_path) %>%
    mutate(
      avg_eal_tercile_parent = ifelse(is.na(avg_eal_tercile_parent), "Outside100", avg_eal_tercile_parent),
      Total = Low + Moderate + High,
      Low_pct = round(Low / Total, 2),
      Moderate_pct = round(Moderate / Total, 2),
      High_pct = round(High / Total, 2),
    )
  
  if (include_yearly_column) {
    table <- table %>%
      rename(year=archive_version_year) %>%
      select(year,avg_eal_tercile_parent, Low, Low_pct, Moderate, Moderate_pct, High, High_pct, Total)
  } else {
      table <- table %>%
      select(avg_eal_tercile_parent, Low, Low_pct, Moderate, Moderate_pct, High, High_pct, Total)
  }
  
  kable(table, caption = caption)
}



# All expansions
process_data("matrix_expansion_all.csv", "ALL Expansions, rows (hq), columns (branch)")

## Rows: 4 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): avg_eal_tercile_parent
## dbl (4): Low, Moderate, High, <NA>
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

ALL Expansions, rows (hq), columns (branch)
avg_eal_tercile_parent	Low	Low_pct	Moderate	Moderate_pct	High	High_pct	Total
High	3343	0.22	4861	0.32	6970	0.46	15174
Low	5331	0.40	4463	0.34	3412	0.26	13206
Moderate	2857	0.24	5810	0.50	3049	0.26	11716
Outside100	4421	0.31	5441	0.38	4497	0.31	14359

# Yearly expansions
process_data("matrix_expansion_yearly.csv", "A<< Expansions yearly, rows (hq), columns (branch)", TRUE)

## Rows: 40 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): avg_eal_tercile_parent
## dbl (5): archive_version_year, Low, Moderate, High, <NA>
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

A<< Expansions yearly, rows (hq), columns (branch)
year	avg_eal_tercile_parent	Low	Low_pct	Moderate	Moderate_pct	High	High_pct	Total
2013	High	678	0.22	1057	0.34	1356	0.44	3091
2013	Low	673	0.36	630	0.34	558	0.30	1861
2013	Moderate	570	0.26	986	0.45	657	0.30	2213
2013	Outside100	952	0.31	1114	0.36	1028	0.33	3094
2014	High	202	0.20	314	0.31	498	0.49	1014
2014	Low	237	0.43	177	0.32	138	0.25	552
2014	Moderate	205	0.23	453	0.51	237	0.26	895
2014	Outside100	272	0.31	325	0.37	273	0.31	870
2015	High	201	0.25	244	0.31	351	0.44	796
2015	Low	355	0.41	297	0.34	214	0.25	866
2015	Moderate	173	0.23	398	0.54	169	0.23	740
2015	Outside100	212	0.28	301	0.40	245	0.32	758
2016	High	1014	0.21	1502	0.31	2282	0.48	4798
2016	Low	1289	0.43	929	0.31	775	0.26	2993
2016	Moderate	849	0.24	1763	0.50	929	0.26	3541
2016	Outside100	1221	0.29	1622	0.38	1408	0.33	4251
2017	High	230	0.23	321	0.32	449	0.45	1000
2017	Low	313	0.46	217	0.32	154	0.23	684
2017	Moderate	172	0.23	390	0.53	176	0.24	738
2017	Outside100	309	0.32	359	0.38	288	0.30	956
2018	High	163	0.21	249	0.32	366	0.47	778
2018	Low	250	0.44	183	0.32	135	0.24	568
2018	Moderate	180	0.25	362	0.50	180	0.25	722
2018	Outside100	408	0.33	490	0.40	341	0.28	1239
2019	High	119	0.25	148	0.31	210	0.44	477
2019	Low	131	0.43	101	0.33	71	0.23	303
2019	Moderate	97	0.27	172	0.47	96	0.26	365
2019	Outside100	124	0.32	156	0.40	112	0.29	392
2020	High	107	0.21	173	0.33	238	0.46	518
2020	Low	155	0.40	126	0.33	104	0.27	385
2020	Moderate	79	0.25	158	0.50	80	0.25	317
2020	Outside100	120	0.32	153	0.40	106	0.28	379
2021	High	121	0.25	152	0.31	217	0.44	490
2021	Low	210	0.40	149	0.28	170	0.32	529
2021	Moderate	83	0.28	158	0.54	54	0.18	295
2021	Outside100	114	0.28	148	0.36	145	0.36	407
2022	High	508	0.23	701	0.32	1003	0.45	2212
2022	Low	1718	0.38	1654	0.37	1093	0.24	4465
2022	Moderate	449	0.24	970	0.51	471	0.25	1890
2022	Outside100	689	0.34	773	0.38	551	0.27	2013

# All expansions
process_data("matrix_expansion_all_10.csv", "Expansions, rows (hq), columns (branch)")

## Rows: 4 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): avg_eal_tercile_parent
## dbl (4): Low, Moderate, High, <NA>
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Expansions, rows (hq), columns (branch)
avg_eal_tercile_parent	Low	Low_pct	Moderate	Moderate_pct	High	High_pct	Total
High	979	0.21	1424	0.31	2227	0.48	4630
Low	1383	0.47	868	0.29	713	0.24	2964
Moderate	743	0.23	1718	0.53	783	0.24	3244
Outside100	1252	0.31	1530	0.38	1253	0.31	4035

# Yearly expansions
process_data("matrix_expansion_yearly_10.csv", "Expansions yearly, rows (hq), columns (branch)", TRUE)

## Rows: 40 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): avg_eal_tercile_parent
## dbl (5): archive_version_year, Low, Moderate, High, <NA>
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Expansions yearly, rows (hq), columns (branch)
year	avg_eal_tercile_parent	Low	Low_pct	Moderate	Moderate_pct	High	High_pct	Total
2013	High	225	0.22	346	0.33	466	0.45	1037
2013	Low	211	0.39	159	0.29	169	0.31	539
2013	Moderate	150	0.24	287	0.46	189	0.30	626
2013	Outside100	323	0.30	374	0.35	384	0.36	1081
2014	High	49	0.19	77	0.30	134	0.52	260
2014	Low	71	0.44	55	0.34	37	0.23	163
2014	Moderate	41	0.19	128	0.60	43	0.20	212
2014	Outside100	73	0.28	93	0.36	92	0.36	258
2015	High	47	0.20	68	0.30	115	0.50	230
2015	Low	70	0.42	50	0.30	48	0.29	168
2015	Moderate	49	0.25	96	0.49	51	0.26	196
2015	Outside100	58	0.26	88	0.39	77	0.35	223
2016	High	269	0.21	401	0.31	612	0.48	1282
2016	Low	303	0.47	190	0.30	151	0.23	644
2016	Moderate	214	0.24	461	0.52	219	0.24	894
2016	Outside100	255	0.30	334	0.40	251	0.30	840
2017	High	84	0.21	130	0.32	191	0.47	405
2017	Low	119	0.49	72	0.30	53	0.22	244
2017	Moderate	47	0.22	136	0.63	32	0.15	215
2017	Outside100	129	0.36	128	0.36	103	0.29	360
2018	High	60	0.19	93	0.30	161	0.51	314
2018	Low	104	0.50	60	0.29	45	0.22	209
2018	Moderate	56	0.21	140	0.53	67	0.25	263
2018	Outside100	110	0.34	133	0.41	82	0.25	325
2019	High	34	0.24	39	0.27	69	0.49	142
2019	Low	59	0.46	41	0.32	28	0.22	128
2019	Moderate	30	0.27	56	0.51	24	0.22	110
2019	Outside100	49	0.30	70	0.43	45	0.27	164
2020	High	24	0.16	40	0.27	83	0.56	147
2020	Low	49	0.49	35	0.35	16	0.16	100
2020	Moderate	21	0.23	55	0.60	16	0.17	92
2020	Outside100	36	0.29	60	0.48	28	0.23	124
2021	High	43	0.30	33	0.23	65	0.46	141
2021	Low	45	0.46	26	0.27	27	0.28	98
2021	Moderate	23	0.25	56	0.60	14	0.15	93
2021	Outside100	32	0.29	36	0.33	41	0.38	109
2022	High	144	0.21	197	0.29	331	0.49	672
2022	Low	352	0.52	180	0.27	139	0.21	671
2022	Moderate	112	0.21	303	0.56	128	0.24	543
2022	Outside100	187	0.34	214	0.39	150	0.27	551

process_data("matrix_expansion_all_20.csv", "Expansions, rows (hq), columns (branch)")

## Rows: 4 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): avg_eal_tercile_parent
## dbl (4): Low, Moderate, High, <NA>
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Expansions, rows (hq), columns (branch)
avg_eal_tercile_parent	Low	Low_pct	Moderate	Moderate_pct	High	High_pct	Total
High	529	0.20	819	0.31	1337	0.50	2685
Low	886	0.49	522	0.29	409	0.23	1817
Moderate	474	0.23	1127	0.54	480	0.23	2081
Outside100	709	0.29	1006	0.41	723	0.30	2438

# Yearly expansions
process_data("matrix_expansion_yearly_20.csv", "Expansions yearly, rows (hq), columns (branch)", TRUE)

## Rows: 40 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): avg_eal_tercile_parent
## dbl (5): archive_version_year, Low, Moderate, High, <NA>
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Expansions yearly, rows (hq), columns (branch)
year	avg_eal_tercile_parent	Low	Low_pct	Moderate	Moderate_pct	High	High_pct	Total
2013	High	121	0.20	200	0.33	281	0.47	602
2013	Low	115	0.41	78	0.28	90	0.32	283
2013	Moderate	74	0.22	159	0.48	99	0.30	332
2013	Outside100	152	0.26	242	0.41	194	0.33	588
2014	High	25	0.15	50	0.30	92	0.55	167
2014	Low	53	0.49	37	0.34	19	0.17	109
2014	Moderate	25	0.18	87	0.63	26	0.19	138
2014	Outside100	51	0.28	74	0.41	57	0.31	182
2015	High	20	0.14	44	0.31	76	0.54	140
2015	Low	47	0.44	36	0.34	24	0.22	107
2015	Moderate	26	0.22	60	0.51	32	0.27	118
2015	Outside100	33	0.23	62	0.44	47	0.33	142
2016	High	140	0.20	220	0.32	327	0.48	687
2016	Low	185	0.45	122	0.29	108	0.26	415
2016	Moderate	160	0.27	302	0.51	130	0.22	592
2016	Outside100	148	0.27	231	0.43	163	0.30	542
2017	High	53	0.20	83	0.32	127	0.48	263
2017	Low	86	0.52	51	0.31	27	0.16	164
2017	Moderate	28	0.20	93	0.65	22	0.15	143
2017	Outside100	71	0.34	77	0.37	59	0.29	207
2018	High	42	0.20	60	0.29	104	0.50	206
2018	Low	82	0.49	51	0.31	33	0.20	166
2018	Moderate	42	0.21	106	0.54	49	0.25	197
2018	Outside100	63	0.34	73	0.39	49	0.26	185
2019	High	23	0.27	24	0.28	39	0.45	86
2019	Low	45	0.49	29	0.32	18	0.20	92
2019	Moderate	19	0.28	34	0.50	15	0.22	68
2019	Outside100	37	0.33	47	0.42	27	0.24	111
2020	High	14	0.15	20	0.22	58	0.63	92
2020	Low	31	0.49	25	0.40	7	0.11	63
2020	Moderate	13	0.23	32	0.56	12	0.21	57
2020	Outside100	25	0.31	37	0.46	19	0.23	81
2021	High	17	0.24	14	0.20	40	0.56	71
2021	Low	26	0.57	7	0.15	13	0.28	46
2021	Moderate	17	0.27	36	0.56	11	0.17	64
2021	Outside100	18	0.33	17	0.31	20	0.36	55
2022	High	74	0.20	104	0.28	193	0.52	371
2022	Low	216	0.58	86	0.23	70	0.19	372
2022	Moderate	70	0.19	218	0.59	84	0.23	372
2022	Outside100	111	0.32	146	0.42	88	0.26	345

Figures

Census

Line Employees

Chord Diagrams

Bar Charts