Census

3 is highest risk, 2 is moderate, 1 is low

table <- read_csv("census.csv")
## Rows: 4 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): avg_eal_tercile
## dbl (18): asian, avghhsize, bacplus, black, child, density, hispanic, medgro...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
table2 <- read_csv("industry.csv")
## Rows: 4 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): avg_eal_tercile
## dbl (14): FIRE, agriculutre_forestry_etc, armed_forces, arts_ent_rec_etc, co...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
kable(table,caption="Census Variables by Risk Level")
Census Variables by Risk Level
avg_eal_tercile asian avghhsize bacplus black child density hispanic medgrossrents medhhinc medianhhval other owner_occupied poverty senior total unemp vacant white
1 0.0315712 2.492941 0.3567584 0.1567143 0.2055315 345.1562 0.1144124 1042.8824 68181.35 219141.2 0.0381176 0.6606767 0.1227416 0.1556827 1494376.3 0.0503188 0.0879593 0.6591846
2 0.0374453 2.589091 0.3574478 0.1343262 0.2111732 604.2491 0.1572416 1089.0303 71106.09 240212.1 0.0427232 0.6623240 0.1234514 0.1502595 2060857.9 0.0543446 0.0858594 0.6282638
3 0.0889935 2.749091 0.3387351 0.0946081 0.2056592 828.4572 0.2888781 1412.9091 75917.24 387818.2 0.0494946 0.6279769 0.1258272 0.1651189 3149811.3 0.0588635 0.1053852 0.4780258
Outside 0.0150771 2.527414 0.2405747 0.0833987 0.2033337 115.2304 0.1335524 854.9571 56550.76 176321.7 0.0440430 0.6892702 0.1554496 0.1803648 109954.8 0.0553571 0.1511438 0.7239288
kable(table2,caption="Census Industry by Risk Level")
Census Industry by Risk Level
avg_eal_tercile FIRE agriculutre_forestry_etc armed_forces arts_ent_rec_etc construction education_healthcare_etc information manufacturing other professional_science_managment_etc public_admin retail transportation_warehouse_utilities wholesale
1 0.0722513 0.0089891 0.0099165 0.0835972 0.0632696 0.2451436 0.0180693 0.0957320 0.0461523 0.1176257 0.0523191 0.1067635 0.0549619 0.0252088
2 0.0703715 0.0083459 0.0074881 0.0846070 0.0643641 0.2331948 0.0176900 0.1133960 0.0466104 0.1192654 0.0454758 0.1095589 0.0535857 0.0260463
3 0.0670700 0.0173628 0.0086841 0.1020863 0.0728896 0.2182436 0.0198586 0.0764031 0.0474106 0.1310278 0.0458772 0.1119322 0.0562385 0.0249155
Outside 0.0444278 0.0391243 0.0089660 0.0892926 0.0697175 0.2388585 0.0121106 0.1328069 0.0469522 0.0739920 0.0527425 0.1165147 0.0529105 0.0215839

##T-tests 3 is highest risk, 2 is moderate, 1 is low

library(rstatix)
## 
## Attaching package: 'rstatix'
## The following object is masked from 'package:stats':
## 
##     filter
table <- read_csv("census_cbsas.csv")
## Rows: 939 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): avg_eal_tercile
## dbl (18): asian, avghhsize, black, hispanic, medgrossrents, medhhinc, median...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
names <- names(table)[2:19]
table_tidy <- table %>% pivot_longer(!avg_eal_tercile)
for (i in names) {
  print(i)
  x <- table_tidy %>%
    filter(name==i) %>%
    mutate(avg_eal_tercile=as.character(avg_eal_tercile)) %>%
    select(-name)
  pairwise_result <- x %>% pairwise_t_test(value~avg_eal_tercile, p.adjust.method = "bonferroni") %>%
    mutate(variable=i) %>%
    select(variable, everything()) %>%
    select(-.y.)
  print(pairwise_result)
}
## [1] "asian"
## # A tibble: 6 × 9
##   variable group1 group2     n1    n2        p p.signif    p.adj p.adj.signif
##   <chr>    <chr>  <chr>   <int> <int>    <dbl> <chr>       <dbl> <chr>       
## 1 asian    1      2          34    33 3.9 e- 1 ns       1   e+ 0 ns          
## 2 asian    1      3          34    33 1.55e-16 ****     9.31e-16 ****        
## 3 asian    2      3          33    33 1.57e-13 ****     9.45e-13 ****        
## 4 asian    1      Outside    34   839 7.73e- 4 ***      4.64e- 3 **          
## 5 asian    2      Outside    33   839 7.32e- 6 ****     4.39e- 5 ****        
## 6 asian    3      Outside    33   839 3.22e-45 ****     1.93e-44 ****        
## [1] "avghhsize"
## # A tibble: 6 × 9
##   variable  group1 group2     n1    n2           p p.signif   p.adj p.adj.signif
##   <chr>     <chr>  <chr>   <int> <int>       <dbl> <chr>      <dbl> <chr>       
## 1 avghhsize 1      2          34    33     7.5 e-2 ns       4.5 e-1 ns          
## 2 avghhsize 1      3          34    33     2.37e-6 ****     1.42e-5 ****        
## 3 avghhsize 2      3          33    33     3.32e-3 **       1.99e-2 *           
## 4 avghhsize 1      Outside    34   839     3.72e-1 ns       1   e+0 ns          
## 5 avghhsize 2      Outside    33   839     1.16e-1 ns       6.94e-1 ns          
## 6 avghhsize 3      Outside    33   839     2.03e-8 ****     1.22e-7 ****        
## [1] "black"
## # A tibble: 6 × 9
##   variable group1 group2     n1    n2        p p.signif   p.adj p.adj.signif
##   <chr>    <chr>  <chr>   <int> <int>    <dbl> <chr>      <dbl> <chr>       
## 1 black    1      2          34    33 0.462    ns       1       ns          
## 2 black    1      3          34    33 0.0416   *        0.25    ns          
## 3 black    2      3          33    33 0.196    ns       1       ns          
## 4 black    1      Outside    34   839 0.000798 ***      0.00479 **          
## 5 black    2      Outside    33   839 0.0214   *        0.129   ns          
## 6 black    3      Outside    33   839 0.612    ns       1       ns          
## [1] "hispanic"
## # A tibble: 6 × 9
##   variable group1 group2     n1    n2         p p.signif     p.adj p.adj.signif
##   <chr>    <chr>  <chr>   <int> <int>     <dbl> <chr>        <dbl> <chr>       
## 1 hispanic 1      2          34    33 0.354     ns       1         ns          
## 2 hispanic 1      3          34    33 0.00017   ***      0.00102   **          
## 3 hispanic 2      3          33    33 0.00479   **       0.0287    *           
## 4 hispanic 1      Outside    34   839 0.563     ns       1         ns          
## 5 hispanic 2      Outside    33   839 0.48      ns       1         ns          
## 6 hispanic 3      Outside    33   839 0.0000042 ****     0.0000252 ****        
## [1] "medgrossrents"
## # A tibble: 6 × 9
##   variable     group1 group2    n1    n2        p p.signif    p.adj p.adj.signif
##   <chr>        <chr>  <chr>  <int> <int>    <dbl> <chr>       <dbl> <chr>       
## 1 medgrossren… 1      2         34    33 3.75e- 1 ns       1   e+ 0 ns          
## 2 medgrossren… 1      3         34    33 2.24e-12 ****     1.34e-11 ****        
## 3 medgrossren… 2      3         33    33 9.51e-10 ****     5.71e- 9 ****        
## 4 medgrossren… 1      Outsi…    34   839 5.40e- 7 ****     3.24e- 6 ****        
## 5 medgrossren… 2      Outsi…    33   839 8.64e-10 ****     5.18e- 9 ****        
## 6 medgrossren… 3      Outsi…    33   839 1.57e-44 ****     9.40e-44 ****        
## [1] "medhhinc"
## # A tibble: 6 × 9
##   variable group1 group2     n1    n2        p p.signif    p.adj p.adj.signif
##   <chr>    <chr>  <chr>   <int> <int>    <dbl> <chr>       <dbl> <chr>       
## 1 medhhinc 1      2          34    33 3.35e- 1 ns       1   e+ 0 ns          
## 2 medhhinc 1      3          34    33 1.09e- 2 *        6.54e- 2 ns          
## 3 medhhinc 2      3          33    33 1.16e- 1 ns       6.94e- 1 ns          
## 4 medhhinc 1      Outside    34   839 1.06e- 7 ****     6.39e- 7 ****        
## 5 medhhinc 2      Outside    33   839 6.5 e-11 ****     3.90e-10 ****        
## 6 medhhinc 3      Outside    33   839 6.88e-18 ****     4.13e-17 ****        
## [1] "medianhhval"
## # A tibble: 6 × 9
##   variable    group1 group2     n1    n2        p p.signif    p.adj p.adj.signif
##   <chr>       <chr>  <chr>   <int> <int>    <dbl> <chr>       <dbl> <chr>       
## 1 medianhhval 1      2          34    33 4.07e- 1 ns       1   e+ 0 ns          
## 2 medianhhval 1      3          34    33 5.51e-11 ****     3.31e-10 ****        
## 3 medianhhval 2      3          33    33 1.12e- 8 ****     6.74e- 8 ****        
## 4 medianhhval 1      Outside    34   839 1.89e- 2 *        1.13e- 1 ns          
## 5 medianhhval 2      Outside    33   839 5.64e- 4 ***      3.38e- 3 **          
## 6 medianhhval 3      Outside    33   839 1.61e-28 ****     9.68e-28 ****        
## [1] "owner_occupied"
## # A tibble: 6 × 9
##   variable       group1 group2    n1    n2       p p.signif   p.adj p.adj.signif
##   <chr>          <chr>  <chr>  <int> <int>   <dbl> <chr>      <dbl> <chr>       
## 1 owner_occupied 1      2         34    33 9.14e-1 ns       1   e+0 ns          
## 2 owner_occupied 1      3         34    33 3.16e-2 *        1.89e-1 ns          
## 3 owner_occupied 2      3         33    33 2.5 e-2 *        1.5 e-1 ns          
## 4 owner_occupied 1      Outsi…    34   839 8.68e-3 **       5.21e-2 ns          
## 5 owner_occupied 2      Outsi…    33   839 1.48e-2 *        8.85e-2 ns          
## 6 owner_occupied 3      Outsi…    33   839 3.58e-8 ****     2.15e-7 ****        
## [1] "poverty"
## # A tibble: 6 × 9
##   variable group1 group2     n1    n2       p p.signif  p.adj p.adj.signif
##   <chr>    <chr>  <chr>   <int> <int>   <dbl> <chr>     <dbl> <chr>       
## 1 poverty  1      2          34    33 0.963   ns       1      ns          
## 2 poverty  1      3          34    33 0.839   ns       1      ns          
## 3 poverty  2      3          33    33 0.877   ns       1      ns          
## 4 poverty  1      Outside    34   839 0.00275 **       0.0165 *           
## 5 poverty  2      Outside    33   839 0.00388 **       0.0233 *           
## 6 poverty  3      Outside    33   839 0.00749 **       0.0449 *           
## [1] "total"
## # A tibble: 6 × 9
##   variable group1 group2     n1    n2        p p.signif    p.adj p.adj.signif
##   <chr>    <chr>  <chr>   <int> <int>    <dbl> <chr>       <dbl> <chr>       
## 1 total    1      2          34    33 8.36e- 3 **       5.01e- 2 ns          
## 2 total    1      3          34    33 2.91e-14 ****     1.74e-13 ****        
## 3 total    2      3          33    33 5.50e- 7 ****     3.30e- 6 ****        
## 4 total    1      Outside    34   839 1.02e-18 ****     6.14e-18 ****        
## 5 total    2      Outside    33   839 2.02e-33 ****     1.21e-32 ****        
## 6 total    3      Outside    33   839 1.69e-71 ****     1.01e-70 ****        
## [1] "unemp"
## # A tibble: 6 × 9
##   variable group1 group2     n1    n2     p p.signif p.adj p.adj.signif
##   <chr>    <chr>  <chr>   <int> <int> <dbl> <chr>    <dbl> <chr>       
## 1 unemp    1      2          34    33 0.444 ns       1     ns          
## 2 unemp    1      3          34    33 0.104 ns       0.626 ns          
## 3 unemp    2      3          33    33 0.394 ns       1     ns          
## 4 unemp    1      Outside    34   839 0.181 ns       1     ns          
## 5 unemp    2      Outside    33   839 0.791 ns       1     ns          
## 6 unemp    3      Outside    33   839 0.358 ns       1     ns          
## [1] "vacant"
## # A tibble: 6 × 9
##   variable group1 group2     n1    n2           p p.signif    p.adj p.adj.signif
##   <chr>    <chr>  <chr>   <int> <int>       <dbl> <chr>       <dbl> <chr>       
## 1 vacant   1      2          34    33 0.907       ns        1   e+0 ns          
## 2 vacant   1      3          34    33 0.333       ns        1   e+0 ns          
## 3 vacant   2      3          33    33 0.282       ns        1   e+0 ns          
## 4 vacant   1      Outside    34   839 0.00000109  ****      6.56e-6 ****        
## 5 vacant   2      Outside    33   839 0.000000693 ****      4.16e-6 ****        
## 6 vacant   3      Outside    33   839 0.000482    ***       2.89e-3 **          
## [1] "white"
## # A tibble: 6 × 9
##   variable group1 group2     n1    n2        p p.signif    p.adj p.adj.signif
##   <chr>    <chr>  <chr>   <int> <int>    <dbl> <chr>       <dbl> <chr>       
## 1 white    1      2          34    33 5.38e- 1 ns       1   e+ 0 ns          
## 2 white    1      3          34    33 3.22e- 4 ***      1.93e- 3 **          
## 3 white    2      3          33    33 3.04e- 3 **       1.82e- 2 *           
## 4 white    1      Outside    34   839 7.18e- 2 ns       4.31e- 1 ns          
## 5 white    2      Outside    33   839 8.8 e- 3 **       5.28e- 2 ns          
## 6 white    3      Outside    33   839 2.63e-11 ****     1.58e-10 ****        
## [1] "other"
## # A tibble: 6 × 9
##   variable group1 group2     n1    n2     p p.signif p.adj p.adj.signif
##   <chr>    <chr>  <chr>   <int> <int> <dbl> <chr>    <dbl> <chr>       
## 1 other    1      2          34    33 0.721 ns           1 ns          
## 2 other    1      3          34    33 0.377 ns           1 ns          
## 3 other    2      3          33    33 0.602 ns           1 ns          
## 4 other    1      Outside    34   839 0.52  ns           1 ns          
## 5 other    2      Outside    33   839 0.888 ns           1 ns          
## 6 other    3      Outside    33   839 0.56  ns           1 ns          
## [1] "density"
## # A tibble: 6 × 9
##   variable group1 group2     n1    n2        p p.signif    p.adj p.adj.signif
##   <chr>    <chr>  <chr>   <int> <int>    <dbl> <chr>       <dbl> <chr>       
## 1 density  1      2          34    33 9.40e- 8 ****     5.64e- 7 ****        
## 2 density  1      3          34    33 1.4 e-22 ****     8.39e-22 ****        
## 3 density  2      3          33    33 4.34e- 6 ****     2.61e- 5 ****        
## 4 density  1      Outside    34   839 4.39e-11 ****     2.63e-10 ****        
## 5 density  2      Outside    33   839 1.74e-40 ****     1.05e-39 ****        
## 6 density  3      Outside    33   839 9.06e-77 ****     5.43e-76 ****        
## [1] "child"
## # A tibble: 6 × 9
##   variable group1 group2     n1    n2     p p.signif p.adj p.adj.signif
##   <chr>    <chr>  <chr>   <int> <int> <dbl> <chr>    <dbl> <chr>       
## 1 child    1      2          34    33 0.433 ns       1     ns          
## 2 child    1      3          34    33 0.986 ns       1     ns          
## 3 child    2      3          33    33 0.447 ns       1     ns          
## 4 child    1      Outside    34   839 0.67  ns       1     ns          
## 5 child    2      Outside    33   839 0.134 ns       0.804 ns          
## 6 child    3      Outside    33   839 0.657 ns       1     ns          
## [1] "senior"
## # A tibble: 6 × 9
##   variable group1 group2     n1    n2         p p.signif    p.adj p.adj.signif
##   <chr>    <chr>  <chr>   <int> <int>     <dbl> <chr>       <dbl> <chr>       
## 1 senior   1      2          34    33 0.584     ns       1        ns          
## 2 senior   1      3          34    33 0.34      ns       1        ns          
## 3 senior   2      3          33    33 0.136     ns       0.817    ns          
## 4 senior   1      Outside    34   839 0.000513  ***      0.00308  **          
## 5 senior   2      Outside    33   839 0.0000303 ****     0.000182 ***         
## 6 senior   3      Outside    33   839 0.034     *        0.204    ns          
## [1] "bacplus"
## # A tibble: 6 × 9
##   variable group1 group2     n1    n2        p p.signif    p.adj p.adj.signif
##   <chr>    <chr>  <chr>   <int> <int>    <dbl> <chr>       <dbl> <chr>       
## 1 bacplus  1      2          34    33 9.74e- 1 ns       1   e+ 0 ns          
## 2 bacplus  1      3          34    33 3.89e- 1 ns       1   e+ 0 ns          
## 3 bacplus  2      3          33    33 3.74e- 1 ns       1   e+ 0 ns          
## 4 bacplus  1      Outside    34   839 2.09e-14 ****     1.25e-13 ****        
## 5 bacplus  2      Outside    33   839 3.39e-14 ****     2.03e-13 ****        
## 6 bacplus  3      Outside    33   839 1.58e-10 ****     9.5 e-10 ****

#Line Graphs

data <- read_csv("all_cbsa_summary.csv") %>%
  filter(!is.na(avg_eal))
## Rows: 9400 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): avg_eal_tercile, NAME
## dbl (8): archive_version_year, CBSAFP, rows, with_parent_number, mean_employ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data$avg_eal_tercile <- factor(data$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
plot1 <- data %>% ggplot()+
  geom_line(aes(archive_version_year,rows,color=NAME),alpha=0.5)+
  facet_wrap(~avg_eal_tercile,ncol =3)+
  ggtitle("Number of Businesses")
ggsave("figures/number_of_businesses.png")
## Saving 7 x 5 in image
plot2 <- data %>% ggplot()+
  geom_line(aes(archive_version_year,mean_year_established,color=NAME),alpha=0.5)+
  facet_wrap(~avg_eal_tercile,ncol =3)+
  ggtitle("Mean year established")
ggsave("figures/mean_year_established.png")
## Saving 7 x 5 in image
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_line()`).
ggplotly(plot1) %>% layout(showlegend = FALSE)
ggplotly(plot2) %>% layout(showlegend = FALSE)
data <- read_csv("cbsa_births_deaths_exits_entrys.csv")
## Rows: 1000 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): NAME, avg_eal_tercile
## dbl (8): archive_version_year, CBSAFP, entry, exit, births, deaths, avg_eal,...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data$avg_eal_tercile <- factor(data$avg_eal_tercile, levels = c("High", "Moderate", "Low"))

plot1 <- data %>% ggplot()+
  geom_line(aes(archive_version_year,births,color=NAME,group=NAME),alpha=0.5)+
  facet_wrap(~avg_eal_tercile,ncol =3)+
  ggtitle("Births")
ggsave("figures/births.png")
## Saving 7 x 5 in image
plot2 <- data %>% ggplot()+
  geom_line(aes(archive_version_year,deaths,color=NAME,group=NAME),alpha=0.5)+
  facet_wrap(~avg_eal_tercile,ncol =3)+
  ggtitle("Deaths")
ggsave("figures/deaths.png")
## Saving 7 x 5 in image
## Warning: Removed 100 rows containing missing values or values outside the scale range
## (`geom_line()`).
plot3 <- data %>%
  ggplot()+
  geom_line(aes(archive_version_year,entry,color=NAME,group=NAME),alpha=0.5)+
  facet_wrap(~avg_eal_tercile,ncol =3)+
  ggtitle("Entries")
ggsave("figures/entries.png")
## Saving 7 x 5 in image
plot4 <- data %>% ggplot()+
  geom_line(aes(archive_version_year,exit,color=NAME,group=NAME),alpha=0.5)+
  facet_wrap(~avg_eal_tercile,ncol =)+
  ggtitle("Exits")
ggsave("figures/exits.png")
## Saving 7 x 5 in image
## Warning: Removed 100 rows containing missing values or values outside the scale range
## (`geom_line()`).
ggplotly(plot1) %>% layout(showlegend = FALSE)
ggplotly(plot2) %>% layout(showlegend = FALSE)
ggplotly(plot3) %>% layout(showlegend = FALSE)
ggplotly(plot4) %>% layout(showlegend = FALSE)
all <- read_csv("critical_pop_all.csv")
all$avg_eal_tercile <- factor(all$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
all$pop_tercile <- factor(all$pop_tercile, levels = c("Pop: High", "Pop: Moderate", "Pop: Low"))

births <- read_csv("critical_pop_births.csv") %>%
  filter(archive_version_year != 2012)
births$avg_eal_tercile <- factor(births$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
births$pop_tercile <- factor(births$pop_tercile, levels = c("Pop: High", "Pop: Moderate", "Pop: Low"))

entries <- read_csv("critical_pop_entries.csv")%>%
  filter(archive_version_year != 2012)
entries$avg_eal_tercile <- factor(entries$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
entries$pop_tercile <- factor(entries$pop_tercile, levels = c("Pop: High", "Pop: Moderate", "Pop: Low"))

deaths <- read_csv("critical_pop_deaths.csv")%>%
  filter(archive_version_year != 2022)
deaths$avg_eal_tercile <- factor(deaths$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
deaths$pop_tercile <- factor(deaths$pop_tercile, levels = c("Pop: High", "Pop: Moderate", "Pop: Low"))

exits <- read_csv("critical_pop_exits.csv")%>%
  filter(archive_version_year != 2022)
exits$avg_eal_tercile <- factor(exits$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
exits$pop_tercile <- factor(exits$pop_tercile, levels = c("Pop: High", "Pop: Moderate", "Pop: Low"))


plot1 <- all %>%
  ggplot()+
  geom_line(aes(archive_version_year,row_count,color=avg_eal_tercile))+
  facet_grid(pop_tercile~critical)+
  ggtitle("Critical by population tercile, all")
ggsave("figures/critical_by_pop.png")

plot2 <- births %>%ggplot()+
  geom_line(aes(archive_version_year,births,color=avg_eal_tercile))+
  facet_grid(pop_tercile~critical)+
  ggtitle("Critical by tercile, births")
ggsave("figures/naics_by_tercile_births.png")


plot3 <- deaths %>% ggplot()+
  geom_line(aes(archive_version_year,deaths,color=avg_eal_tercile))+
  facet_grid(pop_tercile~critical)+
  ggtitle("Critical by tercile, deaths")
ggsave("figures/naics_by_tercile_deaths.png")

plot4 <- entries %>% ggplot()+
  geom_line(aes(archive_version_year,entries,color=avg_eal_tercile))+
  facet_grid(pop_tercile~critical)+
  ggtitle("Critical by tercile, entries")
ggsave("figures/naics_by_tercile_entries.png")

plot5 <- exits %>% ggplot()+
  geom_line(aes(archive_version_year,exits,color=avg_eal_tercile))+
  facet_grid(pop_tercile~critical)+
  ggtitle("Critical by tercile, exits")
ggsave("figures/naics_by_tercile_exits.png")

plot1

plot2

plot3

plot4

plot5

Line Employees

all <- read_csv("critical_pop_all_employees.csv")
all$avg_eal_tercile <- factor(all$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
all$pop_tercile <- factor(all$pop_tercile, levels = c("Pop: High", "Pop: Moderate", "Pop: Low"))

births <- read_csv("critical_pop_births_employees.csv") %>%
  filter(archive_version_year != 2012)
births$avg_eal_tercile <- factor(births$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
births$pop_tercile <- factor(births$pop_tercile, levels = c("Pop: High", "Pop: Moderate", "Pop: Low"))

entries <- read_csv("critical_pop_entries_employees.csv")%>%
  filter(archive_version_year != 2012)
entries$avg_eal_tercile <- factor(entries$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
entries$pop_tercile <- factor(entries$pop_tercile, levels = c("Pop: High", "Pop: Moderate", "Pop: Low"))

deaths <- read_csv("critical_pop_deaths_employees.csv")%>%
  filter(archive_version_year != 2022)
deaths$avg_eal_tercile <- factor(deaths$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
deaths$pop_tercile <- factor(deaths$pop_tercile, levels = c("Pop: High", "Pop: Moderate", "Pop: Low"))

exits <- read_csv("critical_pop_exits_employees.csv")%>%
  filter(archive_version_year != 2022)
exits$avg_eal_tercile <- factor(exits$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
exits$pop_tercile <- factor(exits$pop_tercile, levels = c("Pop: High", "Pop: Moderate", "Pop: Low"))


plot1 <- all %>%
  ggplot()+
  geom_line(aes(archive_version_year,employees,color=avg_eal_tercile))+
  facet_grid(pop_tercile~critical)+
  ggtitle("Critical by population tercile, all - employees")
ggsave("figures/critical_by_pop_emp.png")

plot2 <- births %>%ggplot()+
  geom_line(aes(archive_version_year,employees,color=avg_eal_tercile))+
  facet_grid(pop_tercile~critical)+
  ggtitle("Critical by tercile, births, births - employees")
ggsave("figures/naics_by_tercile_births_emp.png")


plot3 <- deaths %>% ggplot()+
  geom_line(aes(archive_version_year,employees,color=avg_eal_tercile))+
  facet_grid(pop_tercile~critical)+
  ggtitle("Critical by tercile, deaths - employees")
ggsave("figures/naics_by_tercile_deaths_emp.png")

plot4 <- entries %>% ggplot()+
  geom_line(aes(archive_version_year,employees,color=avg_eal_tercile))+
  facet_grid(pop_tercile~critical)+
  ggtitle("Critical by tercile, entries - employees")
ggsave("figures/naics_by_tercile_entries_emp.png")

plot5 <- exits %>% ggplot()+
  geom_line(aes(archive_version_year,employees,color=avg_eal_tercile))+
  facet_grid(pop_tercile~critical)+
  ggtitle("Critical by tercile, exits - employees")
ggsave("figures/naics_by_tercile_exit_emps.png")

plot1

plot2

plot3

plot4

plot5

Chord Diagrams

nri_cbsa_all <- read_csv("nri_cbsa_all.csv") %>%
  mutate(CBSAFP =as.character(CBSAFP))
## Rows: 940 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): avg_eal_tercile_all
## dbl (3): CBSAFP, avg_eal_all, tercile_all
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
regions <- read_csv("top100_regions.csv") %>%
  mutate(GEOID =as.character(GEOID)) %>%
  select(GEOID, Region)
## Rows: 100 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): pop_tercile, NAME, Region
## dbl (1): GEOID
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
top100_cbsa <- get_acs(geography = "cbsa", variables = c(pop="B01001_001"),output = "wide") %>%
  top_n(100,popE) %>%
  mutate(pop_tercile_num = ntile(popE, 3)) %>%
  mutate(pop_tercile=as_factor(pop_tercile_num)) %>%
  mutate(pop_tercile = factor(pop_tercile, levels = 1:3, labels = c("Pop: Low", "Pop: Moderate", "Pop: High"))) %>% 
  select(pop_tercile,pop_tercile_num,popE,GEOID) %>%
  left_join(regions)
## Getting data from the 2018-2022 5-year ACS
## Joining with `by = join_by(GEOID)`
top100_cbsa_new <- top100_cbsa %>%
  rename_with(~paste0(., "_new"), everything())
top100_cbsa_old <- top100_cbsa %>%
  rename_with(~paste0(., "_old"), everything())

moves <- read_csv("migration_map.csv") %>%
  select(CBSAFP_old,CBSAFP_new,NAME_old,NAME_new,row_count,avg_eal_tercile_old,avg_eal_tercile_new) %>%
  mutate(CBSAFP_old=as.character(CBSAFP_old)) %>%
  mutate(CBSAFP_new=as.character(CBSAFP_new)) %>%
  mutate(NAME_old=ifelse(is.na(avg_eal_tercile_old),"Outside",NAME_old)) %>%
  mutate(NAME_new=ifelse(is.na(avg_eal_tercile_new),"Outside",NAME_new)) %>%
  group_by(CBSAFP_old,CBSAFP_new,NAME_old,NAME_new,avg_eal_tercile_old,avg_eal_tercile_new) %>%
  summarise(row_count=sum(row_count)) %>%
  left_join(top100_cbsa_old, by=c("CBSAFP_old"="GEOID_old")) %>%
  left_join(top100_cbsa_new, by=c("CBSAFP_new"="GEOID_new")) %>%
  mutate(avg_eal_tercile_old=ifelse(is.na(avg_eal_tercile_old),"Outside",avg_eal_tercile_old)) %>%
  mutate(avg_eal_tercile_new=ifelse(is.na(avg_eal_tercile_new),"Outside",avg_eal_tercile_new)) %>%
  mutate(Region_old=ifelse(is.na(Region_old),"Outside",Region_old)) %>%
  mutate(Region_new=ifelse(is.na(Region_new),"Outside",Region_new))
## Rows: 63970 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): fips_code, lead_county, NAME_old, NAME_new, avg_eal_tercile_old, a...
## dbl (12): archive_version_year, row_count, CBSAFP_old, CBSAFP_new, avg_eal_o...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'CBSAFP_old', 'CBSAFP_new', 'NAME_old', 'NAME_new', 'avg_eal_tercile_old'. You can override using the `.groups` argument.
data <- moves %>%
  group_by(avg_eal_tercile_old,avg_eal_tercile_new) %>%
  summarise(row_count=sum(row_count))
## `summarise()` has grouped output by 'avg_eal_tercile_old'. You can override
## using the `.groups` argument.
data %>%
  transmute(avg_eal_tercile_old,avg_eal_tercile_new,pct=row_count/sum(data$row_count)) %>%
  pivot_wider(names_from = avg_eal_tercile_new,values_from = pct) %>%
  kable(caption="Risk level to Risk level")
Risk level to Risk level
avg_eal_tercile_old High Low Moderate Outside
High 0.1418442 0.0145098 0.0363402 0.0729597
Low 0.0149617 0.0161366 0.0323389 0.0577516
Moderate 0.0303752 0.0298987 0.0341958 0.0614735
Outside 0.0696075 0.0633796 0.0675453 0.2566818
chordDiagram(
  x=data,
  directional = 1,
  direction.type = c("diffHeight", "arrows"),
  link.arr.type = "big.arrow",
  grid.col = c(High="red",Moderate="yellow", Low="green",Outside="gray"))
title("Risk level to Risk level", cex.main = 1)

png(file="figures/risk_to_risk.png")
dev.off()
## quartz_off_screen 
##                 2
data <- moves %>%
  filter(Region_old==Region_new) %>%
  filter(Region_old!="Outside") %>%
  group_by(avg_eal_tercile_old,avg_eal_tercile_new) %>%
  summarise(row_count=sum(row_count))
## `summarise()` has grouped output by 'avg_eal_tercile_old'. You can override
## using the `.groups` argument.
data %>%
  transmute(avg_eal_tercile_old,avg_eal_tercile_new,pct=row_count/sum(data$row_count)) %>%
  pivot_wider(names_from = avg_eal_tercile_new,values_from = pct) %>%
  kable(caption = "Moves within Region")
Moves within Region
avg_eal_tercile_old High Low Moderate
High 0.4732835 0.0165976 0.0817526
Low 0.0194363 0.0371360 0.0974152
Moderate 0.0714333 0.0946433 0.1083022
chordDiagram(
  x=data, directional = 1, direction.type = c("diffHeight", "arrows"),link.arr.type = "big.arrow",
    grid.col = c(High="red",Moderate="yellow", Low="green"))
title("Moves within Region", cex.main = 1)

png(file="figures/moves_within_region.png")
dev.off()
## quartz_off_screen 
##                 2
data <- moves %>%
  filter(Region_old!=Region_new) %>%
  group_by(avg_eal_tercile_old,avg_eal_tercile_new) %>%
  summarise(row_count=sum(row_count))
## `summarise()` has grouped output by 'avg_eal_tercile_old'. You can override
## using the `.groups` argument.
table <- moves %>%
  filter(Region_old!=Region_new) %>%
  filter(avg_eal_tercile_new=="Outside") %>%
  left_join(nri_cbsa_all,by=c("CBSAFP_new"="CBSAFP")) %>%
  group_by(avg_eal_tercile_all) %>%
  summarise(row_count=sum(row_count,na.rm = TRUE)) %>%
  kable(caption="Risk of Destinations outside top100")
table
Risk of Destinations outside top100
avg_eal_tercile_all row_count
High 13518
Low 3901
Moderate 5972
data %>%
  transmute(avg_eal_tercile_old,avg_eal_tercile_new,pct=row_count/sum(data$row_count)) %>%
  pivot_wider(names_from = avg_eal_tercile_new,values_from = pct) %>%
  kable(caption = "Moves outside Region")
Moves outside Region
avg_eal_tercile_old High Low Moderate Outside
High 0.0510855 0.0209662 0.0326306 0.1467138
Low 0.0204705 0.0140766 0.0168357 0.1161319
Moderate 0.0257410 0.0133001 0.0151836 0.1236163
Outside 0.1399729 0.1274494 0.1358259 NA
chordDiagram(
  x=data, directional = 1, direction.type = c("diffHeight", "arrows"),link.arr.type = "big.arrow",
    grid.col = c(High="red",Moderate="yellow", Low="green",Outside="gray"))
title("Moves outside Region", cex.main = 1)

png(file="figures/moves_outside_region.png")
dev.off()
## quartz_off_screen 
##                 2

This chunk produces the chord diagrams for critical services and non-critical services

data <- read_csv("moves_crit.csv") %>%
  mutate(avg_eal_tercile_old=ifelse(is.na(avg_eal_tercile_old),"Outside",avg_eal_tercile_old)) %>%
  mutate(avg_eal_tercile_new=ifelse(is.na(avg_eal_tercile_new),"Outside",avg_eal_tercile_new))
## Rows: 16 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): avg_eal_tercile_old, avg_eal_tercile_new
## dbl (1): row_count
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data %>%
  transmute(avg_eal_tercile_old,avg_eal_tercile_new,pct=row_count/sum(data$row_count)) %>%
  pivot_wider(names_from = avg_eal_tercile_new,values_from = pct) %>%
  kable(caption = "Critical: Risk level to Risk level")
Critical: Risk level to Risk level
avg_eal_tercile_old Low Moderate High Outside
Low 0.0160731 0.0317129 0.0136222 0.0600542
Moderate 0.0289641 0.0325660 0.0281381 0.0618009
High 0.0120379 0.0328097 0.1314421 0.0702234
Outside 0.0668382 0.0712932 0.0700339 0.2723900
chordDiagram(
  x=data,
  directional = 1,
  direction.type = c("diffHeight", "arrows"),
  link.arr.type = "big.arrow",
  grid.col = c(High="red",Moderate="yellow", Low="green",Outside="gray"))
title("Critical: Risk level to Risk level", cex.main = 1)

png(file="figures/critical_risk_to_risk.png")
dev.off()
## quartz_off_screen 
##                 2
data <- read_csv("moves_non_crit.csv") %>%
  mutate(avg_eal_tercile_old=ifelse(is.na(avg_eal_tercile_old),"Outside",avg_eal_tercile_old)) %>%
  mutate(avg_eal_tercile_new=ifelse(is.na(avg_eal_tercile_new),"Outside",avg_eal_tercile_new))
## Rows: 16 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): avg_eal_tercile_old, avg_eal_tercile_new
## dbl (1): row_count
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data %>%
  transmute(avg_eal_tercile_old,avg_eal_tercile_new,pct=row_count/sum(data$row_count)) %>%
  pivot_wider(names_from = avg_eal_tercile_new,values_from = pct) %>%
  kable(caption = "Non-Critical: Risk level to Risk level")
Non-Critical: Risk level to Risk level
avg_eal_tercile_old Low Moderate High Outside
Low 0.0162345 0.0333048 0.0170285 0.0541986
Moderate 0.0313408 0.0367105 0.0338271 0.0609682
High 0.0183239 0.0417877 0.1578947 0.0771818
Outside 0.0580431 0.0617622 0.0689497 0.2324440
chordDiagram(
  x=data,
  directional = 1,
  direction.type = c("diffHeight", "arrows"),
  link.arr.type = "big.arrow",
  grid.col = c(High="red",Moderate="yellow", Low="green",Outside="gray"))
title("Non-Critical: Risk level to Risk level", cex.main = 1)

png(file="figures/non_critical_risk_to_risk.png")
dev.off()
## quartz_off_screen 
##                 2

This chunk produces the chord diagrams for standalone and chains

data <- read_csv("moves_chain.csv") %>%
  mutate(avg_eal_tercile_old=ifelse(is.na(avg_eal_tercile_old),"Outside",avg_eal_tercile_old)) %>%
  mutate(avg_eal_tercile_new=ifelse(is.na(avg_eal_tercile_new),"Outside",avg_eal_tercile_new))
## Rows: 16 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): avg_eal_tercile_old, avg_eal_tercile_new
## dbl (1): row_count
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data %>%
  transmute(avg_eal_tercile_old,avg_eal_tercile_new,pct=row_count/sum(data$row_count)) %>%
  pivot_wider(names_from = avg_eal_tercile_new,values_from = pct) %>%
  kable(caption = "Chain: Risk level to Risk level")
Chain: Risk level to Risk level
avg_eal_tercile_old Low Moderate High Outside
Low 0.0239119 0.0384862 0.0292970 0.0452547
Moderate 0.0373499 0.0460452 0.0461934 0.0478237
High 0.0328541 0.0570130 0.1558223 0.0509856
Outside 0.0636826 0.0644731 0.0606689 0.2001383
chordDiagram(
  x=data,
  directional = 1,
  direction.type = c("diffHeight", "arrows"),
  link.arr.type = "big.arrow",
  grid.col = c(High="red",Moderate="yellow", Low="green",Outside="gray"))
title("Chain: Risk level to Risk level", cex.main = 1)

png(file="figures/chain_risk_to_risk.png")
dev.off()
## quartz_off_screen 
##                 2
data <- read_csv("moves_standalone.csv") %>%
  mutate(avg_eal_tercile_old=ifelse(is.na(avg_eal_tercile_old),"Outside",avg_eal_tercile_old)) %>%
  mutate(avg_eal_tercile_new=ifelse(is.na(avg_eal_tercile_new),"Outside",avg_eal_tercile_new))
## Rows: 16 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): avg_eal_tercile_old, avg_eal_tercile_new
## dbl (1): row_count
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data %>%
  transmute(avg_eal_tercile_old,avg_eal_tercile_new,pct=row_count/sum(data$row_count)) %>%
  pivot_wider(names_from = avg_eal_tercile_new,values_from = pct) %>%
  kable(caption = "Standalone: Risk level to Risk level")
Standalone: Risk level to Risk level
avg_eal_tercile_old Low Moderate High Outside
Low 0.0145856 0.0311126 0.0121021 0.0602444
Moderate 0.0284123 0.0318321 0.0272199 0.0641963
High 0.0108505 0.0322164 0.1390559 0.0773431
Outside 0.0633192 0.0681581 0.0713906 0.2679610
chordDiagram(
  x=data,
  directional = 1,
  direction.type = c("diffHeight", "arrows"),
  link.arr.type = "big.arrow",
  grid.col = c(High="red",Moderate="yellow", Low="green",Outside="gray"))
title("Standalone: Risk level to Risk level", cex.main = 1)

png(file="figures/standalone_risk_to_risk.png")
dev.off()
## quartz_off_screen 
##                 2

This chunk produces across population categories. Outside the top100 is excluded.

#within same population category
data <- moves %>%
  filter(!is.na(pop_tercile_old),pop_tercile_num_old==pop_tercile_num_new) %>%
  group_by(avg_eal_tercile_old,avg_eal_tercile_new) %>%
  summarise(row_count=sum(row_count))
## `summarise()` has grouped output by 'avg_eal_tercile_old'. You can override
## using the `.groups` argument.
data %>%
  transmute(avg_eal_tercile_old,avg_eal_tercile_new,pct=row_count/sum(data$row_count)) %>%
  pivot_wider(names_from = avg_eal_tercile_new,values_from = pct) %>%
  kable(caption = "Same pop category: Risk level to Risk level")
Same pop category: Risk level to Risk level
avg_eal_tercile_old High Low Moderate
High 0.4669255 0.0449229 0.0955934
Low 0.0455783 0.0355954 0.0831401
Moderate 0.0779974 0.0715438 0.0787032
chordDiagram(
  x=data,
  directional = 1,
  direction.type = c("diffHeight", "arrows"),
  link.arr.type = "big.arrow",
  grid.col = c(High="red",Moderate="yellow", Low="green",Outside="gray"))
title("Same pop category: Risk level to Risk level", cex.main = 1)

png(file="figures/samepop_risk_to_risk.png")

#lower pop to higher pop
data <- moves %>%
  filter(!is.na(pop_tercile_old),pop_tercile_num_old>pop_tercile_num_new) %>%
  group_by(avg_eal_tercile_old,avg_eal_tercile_new) %>%
  summarise(row_count=sum(row_count))
## `summarise()` has grouped output by 'avg_eal_tercile_old'. You can override
## using the `.groups` argument.
data %>%
  transmute(avg_eal_tercile_old,avg_eal_tercile_new,pct=row_count/sum(data$row_count)) %>%
  pivot_wider(names_from = avg_eal_tercile_new,values_from = pct) %>%
  kable(caption = "Towards higher pop: Risk level to Risk level")
Towards higher pop: Risk level to Risk level
avg_eal_tercile_old High Low Moderate
High 0.3530192 0.0613562 0.1576342
Low 0.0191849 0.0562285 0.0869066
Moderate 0.0564937 0.0899125 0.1192644
chordDiagram(
  x=data,
  directional = 1,
  direction.type = c("diffHeight", "arrows"),
  link.arr.type = "big.arrow",
  grid.col = c(High="red",Moderate="yellow", Low="green",Outside="gray"))
title("Towards higher pop: Risk level to Risk level", cex.main = 1)

png(file="figures/low_to_high_risk_to_risk.png")

#higher pop to lower pop
data <- moves %>%
  filter(!is.na(pop_tercile_old),pop_tercile_num_old<pop_tercile_num_new) %>%
  group_by(avg_eal_tercile_old,avg_eal_tercile_new) %>%
  summarise(row_count=sum(row_count))
## `summarise()` has grouped output by 'avg_eal_tercile_old'. You can override
## using the `.groups` argument.
data %>%
  transmute(avg_eal_tercile_old,avg_eal_tercile_new,pct=row_count/sum(data$row_count)) %>%
  pivot_wider(names_from = avg_eal_tercile_new,values_from = pct) %>%
  kable(caption = "Toward lower pop: Risk level to Risk level")
Toward lower pop: Risk level to Risk level
avg_eal_tercile_old High Low Moderate
High 0.3478789 0.0157023 0.0645441
Low 0.0607270 0.0539603 0.1131257
Moderate 0.1310835 0.1043637 0.1086146
chordDiagram(
  x=data,
  directional = 1,
  direction.type = c("diffHeight", "arrows"),
  link.arr.type = "big.arrow",
  grid.col = c(High="red",Moderate="yellow", Low="green",Outside="gray"))
title("Toward lower pop: Risk level to Risk level", cex.main = 1)

png(file="figures/high_to_low_risk_to_risk.png")

Bar Charts

This chunk produces bar graphs for all 100 CBSAs and corresponding entries/exits and births/deaths for all years of data

library(ggpattern)
data <- read_csv("cbsa_births_deaths_exits_entrys.csv")
## Rows: 1000 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): NAME, avg_eal_tercile
## dbl (8): archive_version_year, CBSAFP, entry, exit, births, deaths, avg_eal,...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
summary(data)
##  archive_version_year     CBSAFP          entry             exit       
##  Min.   :2013         Min.   :10420   Min.   :  1.00   Min.   :  1.00  
##  1st Qu.:2015         1st Qu.:19348   1st Qu.: 26.00   1st Qu.: 25.00  
##  Median :2018         Median :32060   Median : 45.00   Median : 44.00  
##  Mean   :2018         Mean   :30209   Mean   : 67.08   Mean   : 67.08  
##  3rd Qu.:2020         3rd Qu.:40080   3rd Qu.: 84.00   3rd Qu.: 79.00  
##  Max.   :2022         Max.   :49340   Max.   :561.00   Max.   :670.00  
##                                                        NA's   :100     
##      births           deaths           NAME              avg_eal     
##  Min.   :  1328   Min.   :  1128   Length:1000        Min.   :38.21  
##  1st Qu.:  3617   1st Qu.:  3416   Class :character   1st Qu.:65.60  
##  Median :  6166   Median :  5294   Mode  :character   Median :75.28  
##  Mean   : 12411   Mean   : 10836                      Mean   :77.28  
##  3rd Qu.: 13648   3rd Qu.: 11576                      3rd Qu.:94.11  
##  Max.   :171670   Max.   :139494                      Max.   :99.91  
##                   NA's   :100                                        
##     tercile     avg_eal_tercile   
##  Min.   :1.00   Length:1000       
##  1st Qu.:1.00   Class :character  
##  Median :2.00   Mode  :character  
##  Mean   :1.99                     
##  3rd Qu.:3.00                     
##  Max.   :3.00                     
## 
data <- data %>% gather(type,row_count, c(entry,exit,births,deaths)) %>%
  group_by(NAME,avg_eal_tercile,tercile,type) %>%
  summarise(row_count=sum(row_count,na.rm = TRUE)) %>%
  mutate(label= paste0(type,"-",avg_eal_tercile))
## `summarise()` has grouped output by 'NAME', 'avg_eal_tercile', 'tercile'. You
## can override using the `.groups` argument.
color_ramp <- c("births-Low" = "#00b100", "births-Moderate" = "#007e00", "births-High" = "#004b00",
          "deaths-Low" = "#c600c6", "deaths-Moderate" = "#9f009f", "deaths-High" = "#780078")
data %>%
  filter(type %in% c("births", "deaths")) %>%
  group_by(NAME) %>%
  mutate(sort = sum(row_count,na.rm = T)) %>%
  ungroup() %>%
  ggplot(aes(x = fct_reorder(NAME, sort), y = row_count, fill = label)) + 
  geom_bar(position = "dodge", stat = "identity")+
  coord_flip()+
  theme_minimal()+
  scale_fill_manual(values=color_ramp)+
  ggtitle("Births and Deaths - 2013-2022")+
  theme(legend.position = "bottom")

ggsave("figures/bar_chart_births_deaths.png")
## Saving 10 x 20 in image
color_ramp <- c("entry-Low" = "#00b100", "entry-Moderate" = "#007e00", "entry-High" = "#004b00",
          "exit-Low" = "#c600c6", "exit-Moderate" = "#9f009f", "exit-High" = "#780078")
data %>% filter(type %in% c("entry","exit")) %>%
  group_by(NAME) %>%
  mutate(sort = sum(row_count,na.rm = T)) %>%
  ungroup() %>%
  ggplot(aes(fill = label, y = row_count, x = forcats::fct_reorder(NAME, sort))) + 
  geom_bar(position = "dodge", stat = "identity")+
  coord_flip()+
  theme_minimal()+
  scale_fill_manual(values=color_ramp)+
  ggtitle("Entries and Exits - 2013-2022")+
  theme(legend.position = "bottom")

ggsave("figures/bar_chart_entries_exits.png")
## Saving 10 x 20 in image

#Expansion Test

expansion_size <- read_csv("matrix_expansion_rawdata.csv") %>%
  mutate(growth_count=employee_size_location-lag_employees,
         growth_pct=growth_count/lag_employees)
## Rows: 100600 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (4): company, avg_eal_tercile, avg_eal_tercile_parent, parent_region
## dbl (13): archive_version_year, parent_number, fips_code, primary_naics_code...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
summary(expansion_size$growth_count)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##     1.00     1.00     4.00    23.62    11.00 64975.00
expansion_size %>%
  filter(growth_count <= quantile(growth_count, 0.95)) %>%
  ggplot()+geom_histogram(aes(growth_count))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

summary(expansion_size$growth_pct)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
##     0.001     0.333     0.667     3.626     2.000 10999.000
expansion_size %>%
  filter(growth_pct <= quantile(growth_pct, 0.95)) %>%
  ggplot()+geom_histogram(aes(growth_pct))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

process_data <- function(file_path, caption, include_yearly_column = FALSE) {
  table <- read_csv(file_path) %>%
    mutate(
      avg_eal_tercile_parent = ifelse(is.na(avg_eal_tercile_parent), "Outside100", avg_eal_tercile_parent),
      Total = Low + Moderate + High,
      Low_pct = round(Low / Total, 2),
      Moderate_pct = round(Moderate / Total, 2),
      High_pct = round(High / Total, 2),
    )
  
  if (include_yearly_column) {
    table <- table %>%
      rename(year=archive_version_year) %>%
      select(year,avg_eal_tercile_parent, Low, Low_pct, Moderate, Moderate_pct, High, High_pct, Total)
  } else {
      table <- table %>%
      select(avg_eal_tercile_parent, Low, Low_pct, Moderate, Moderate_pct, High, High_pct, Total)
  }
  
  kable(table, caption = caption)
}



# All expansions
process_data("matrix_expansion_all.csv", "ALL Expansions, rows (hq), columns (branch)")
## Rows: 4 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): avg_eal_tercile_parent
## dbl (4): Low, Moderate, High, <NA>
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ALL Expansions, rows (hq), columns (branch)
avg_eal_tercile_parent Low Low_pct Moderate Moderate_pct High High_pct Total
High 3343 0.22 4861 0.32 6970 0.46 15174
Low 5331 0.40 4463 0.34 3412 0.26 13206
Moderate 2857 0.24 5810 0.50 3049 0.26 11716
Outside100 4421 0.31 5441 0.38 4497 0.31 14359
# Yearly expansions
process_data("matrix_expansion_yearly.csv", "A<< Expansions yearly, rows (hq), columns (branch)", TRUE)
## Rows: 40 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): avg_eal_tercile_parent
## dbl (5): archive_version_year, Low, Moderate, High, <NA>
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
A<< Expansions yearly, rows (hq), columns (branch)
year avg_eal_tercile_parent Low Low_pct Moderate Moderate_pct High High_pct Total
2013 High 678 0.22 1057 0.34 1356 0.44 3091
2013 Low 673 0.36 630 0.34 558 0.30 1861
2013 Moderate 570 0.26 986 0.45 657 0.30 2213
2013 Outside100 952 0.31 1114 0.36 1028 0.33 3094
2014 High 202 0.20 314 0.31 498 0.49 1014
2014 Low 237 0.43 177 0.32 138 0.25 552
2014 Moderate 205 0.23 453 0.51 237 0.26 895
2014 Outside100 272 0.31 325 0.37 273 0.31 870
2015 High 201 0.25 244 0.31 351 0.44 796
2015 Low 355 0.41 297 0.34 214 0.25 866
2015 Moderate 173 0.23 398 0.54 169 0.23 740
2015 Outside100 212 0.28 301 0.40 245 0.32 758
2016 High 1014 0.21 1502 0.31 2282 0.48 4798
2016 Low 1289 0.43 929 0.31 775 0.26 2993
2016 Moderate 849 0.24 1763 0.50 929 0.26 3541
2016 Outside100 1221 0.29 1622 0.38 1408 0.33 4251
2017 High 230 0.23 321 0.32 449 0.45 1000
2017 Low 313 0.46 217 0.32 154 0.23 684
2017 Moderate 172 0.23 390 0.53 176 0.24 738
2017 Outside100 309 0.32 359 0.38 288 0.30 956
2018 High 163 0.21 249 0.32 366 0.47 778
2018 Low 250 0.44 183 0.32 135 0.24 568
2018 Moderate 180 0.25 362 0.50 180 0.25 722
2018 Outside100 408 0.33 490 0.40 341 0.28 1239
2019 High 119 0.25 148 0.31 210 0.44 477
2019 Low 131 0.43 101 0.33 71 0.23 303
2019 Moderate 97 0.27 172 0.47 96 0.26 365
2019 Outside100 124 0.32 156 0.40 112 0.29 392
2020 High 107 0.21 173 0.33 238 0.46 518
2020 Low 155 0.40 126 0.33 104 0.27 385
2020 Moderate 79 0.25 158 0.50 80 0.25 317
2020 Outside100 120 0.32 153 0.40 106 0.28 379
2021 High 121 0.25 152 0.31 217 0.44 490
2021 Low 210 0.40 149 0.28 170 0.32 529
2021 Moderate 83 0.28 158 0.54 54 0.18 295
2021 Outside100 114 0.28 148 0.36 145 0.36 407
2022 High 508 0.23 701 0.32 1003 0.45 2212
2022 Low 1718 0.38 1654 0.37 1093 0.24 4465
2022 Moderate 449 0.24 970 0.51 471 0.25 1890
2022 Outside100 689 0.34 773 0.38 551 0.27 2013
# All expansions
process_data("matrix_expansion_all_10.csv", "Expansions, rows (hq), columns (branch)")
## Rows: 4 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): avg_eal_tercile_parent
## dbl (4): Low, Moderate, High, <NA>
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Expansions, rows (hq), columns (branch)
avg_eal_tercile_parent Low Low_pct Moderate Moderate_pct High High_pct Total
High 979 0.21 1424 0.31 2227 0.48 4630
Low 1383 0.47 868 0.29 713 0.24 2964
Moderate 743 0.23 1718 0.53 783 0.24 3244
Outside100 1252 0.31 1530 0.38 1253 0.31 4035
# Yearly expansions
process_data("matrix_expansion_yearly_10.csv", "Expansions yearly, rows (hq), columns (branch)", TRUE)
## Rows: 40 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): avg_eal_tercile_parent
## dbl (5): archive_version_year, Low, Moderate, High, <NA>
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Expansions yearly, rows (hq), columns (branch)
year avg_eal_tercile_parent Low Low_pct Moderate Moderate_pct High High_pct Total
2013 High 225 0.22 346 0.33 466 0.45 1037
2013 Low 211 0.39 159 0.29 169 0.31 539
2013 Moderate 150 0.24 287 0.46 189 0.30 626
2013 Outside100 323 0.30 374 0.35 384 0.36 1081
2014 High 49 0.19 77 0.30 134 0.52 260
2014 Low 71 0.44 55 0.34 37 0.23 163
2014 Moderate 41 0.19 128 0.60 43 0.20 212
2014 Outside100 73 0.28 93 0.36 92 0.36 258
2015 High 47 0.20 68 0.30 115 0.50 230
2015 Low 70 0.42 50 0.30 48 0.29 168
2015 Moderate 49 0.25 96 0.49 51 0.26 196
2015 Outside100 58 0.26 88 0.39 77 0.35 223
2016 High 269 0.21 401 0.31 612 0.48 1282
2016 Low 303 0.47 190 0.30 151 0.23 644
2016 Moderate 214 0.24 461 0.52 219 0.24 894
2016 Outside100 255 0.30 334 0.40 251 0.30 840
2017 High 84 0.21 130 0.32 191 0.47 405
2017 Low 119 0.49 72 0.30 53 0.22 244
2017 Moderate 47 0.22 136 0.63 32 0.15 215
2017 Outside100 129 0.36 128 0.36 103 0.29 360
2018 High 60 0.19 93 0.30 161 0.51 314
2018 Low 104 0.50 60 0.29 45 0.22 209
2018 Moderate 56 0.21 140 0.53 67 0.25 263
2018 Outside100 110 0.34 133 0.41 82 0.25 325
2019 High 34 0.24 39 0.27 69 0.49 142
2019 Low 59 0.46 41 0.32 28 0.22 128
2019 Moderate 30 0.27 56 0.51 24 0.22 110
2019 Outside100 49 0.30 70 0.43 45 0.27 164
2020 High 24 0.16 40 0.27 83 0.56 147
2020 Low 49 0.49 35 0.35 16 0.16 100
2020 Moderate 21 0.23 55 0.60 16 0.17 92
2020 Outside100 36 0.29 60 0.48 28 0.23 124
2021 High 43 0.30 33 0.23 65 0.46 141
2021 Low 45 0.46 26 0.27 27 0.28 98
2021 Moderate 23 0.25 56 0.60 14 0.15 93
2021 Outside100 32 0.29 36 0.33 41 0.38 109
2022 High 144 0.21 197 0.29 331 0.49 672
2022 Low 352 0.52 180 0.27 139 0.21 671
2022 Moderate 112 0.21 303 0.56 128 0.24 543
2022 Outside100 187 0.34 214 0.39 150 0.27 551
process_data("matrix_expansion_all_20.csv", "Expansions, rows (hq), columns (branch)")
## Rows: 4 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): avg_eal_tercile_parent
## dbl (4): Low, Moderate, High, <NA>
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Expansions, rows (hq), columns (branch)
avg_eal_tercile_parent Low Low_pct Moderate Moderate_pct High High_pct Total
High 529 0.20 819 0.31 1337 0.50 2685
Low 886 0.49 522 0.29 409 0.23 1817
Moderate 474 0.23 1127 0.54 480 0.23 2081
Outside100 709 0.29 1006 0.41 723 0.30 2438
# Yearly expansions
process_data("matrix_expansion_yearly_20.csv", "Expansions yearly, rows (hq), columns (branch)", TRUE)
## Rows: 40 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): avg_eal_tercile_parent
## dbl (5): archive_version_year, Low, Moderate, High, <NA>
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Expansions yearly, rows (hq), columns (branch)
year avg_eal_tercile_parent Low Low_pct Moderate Moderate_pct High High_pct Total
2013 High 121 0.20 200 0.33 281 0.47 602
2013 Low 115 0.41 78 0.28 90 0.32 283
2013 Moderate 74 0.22 159 0.48 99 0.30 332
2013 Outside100 152 0.26 242 0.41 194 0.33 588
2014 High 25 0.15 50 0.30 92 0.55 167
2014 Low 53 0.49 37 0.34 19 0.17 109
2014 Moderate 25 0.18 87 0.63 26 0.19 138
2014 Outside100 51 0.28 74 0.41 57 0.31 182
2015 High 20 0.14 44 0.31 76 0.54 140
2015 Low 47 0.44 36 0.34 24 0.22 107
2015 Moderate 26 0.22 60 0.51 32 0.27 118
2015 Outside100 33 0.23 62 0.44 47 0.33 142
2016 High 140 0.20 220 0.32 327 0.48 687
2016 Low 185 0.45 122 0.29 108 0.26 415
2016 Moderate 160 0.27 302 0.51 130 0.22 592
2016 Outside100 148 0.27 231 0.43 163 0.30 542
2017 High 53 0.20 83 0.32 127 0.48 263
2017 Low 86 0.52 51 0.31 27 0.16 164
2017 Moderate 28 0.20 93 0.65 22 0.15 143
2017 Outside100 71 0.34 77 0.37 59 0.29 207
2018 High 42 0.20 60 0.29 104 0.50 206
2018 Low 82 0.49 51 0.31 33 0.20 166
2018 Moderate 42 0.21 106 0.54 49 0.25 197
2018 Outside100 63 0.34 73 0.39 49 0.26 185
2019 High 23 0.27 24 0.28 39 0.45 86
2019 Low 45 0.49 29 0.32 18 0.20 92
2019 Moderate 19 0.28 34 0.50 15 0.22 68
2019 Outside100 37 0.33 47 0.42 27 0.24 111
2020 High 14 0.15 20 0.22 58 0.63 92
2020 Low 31 0.49 25 0.40 7 0.11 63
2020 Moderate 13 0.23 32 0.56 12 0.21 57
2020 Outside100 25 0.31 37 0.46 19 0.23 81
2021 High 17 0.24 14 0.20 40 0.56 71
2021 Low 26 0.57 7 0.15 13 0.28 46
2021 Moderate 17 0.27 36 0.56 11 0.17 64
2021 Outside100 18 0.33 17 0.31 20 0.36 55
2022 High 74 0.20 104 0.28 193 0.52 371
2022 Low 216 0.58 86 0.23 70 0.19 372
2022 Moderate 70 0.19 218 0.59 84 0.23 372
2022 Outside100 111 0.32 146 0.42 88 0.26 345