#Descriptive States
data <- read_csv("all_cbsa_summary.csv")
## Rows: 9400 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): avg_eal_tercile, NAME
## dbl (8): archive_version_year, CBSAFP, rows, with_parent_number, mean_employ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
table1 <- data %>%
mutate(outside_top_100=ifelse(is.na(avg_eal_tercile),"outside_top_100","within_top_100")) %>%
group_by(archive_version_year,outside_top_100) %>%
summarise(rows=sum(rows,na.rm = TRUE)) %>%
pivot_wider(id_cols = archive_version_year,names_from = outside_top_100,values_from = rows) %>%
mutate(outside_prop=outside_top_100/(outside_top_100+within_top_100),
inside_prop=within_top_100/(outside_top_100+within_top_100))
## `summarise()` has grouped output by 'archive_version_year'. You can override
## using the `.groups` argument.
kable(table1,caption="Share of establishments in/out top 100 MSAs")
Share of establishments in/out top 100 MSAs
2013 |
5516520 |
10437838 |
0.3457688 |
0.6542312 |
2014 |
5573021 |
10343882 |
0.3501322 |
0.6498678 |
2015 |
5463901 |
10165015 |
0.3496020 |
0.6503980 |
2016 |
5423220 |
10293413 |
0.3450625 |
0.6549375 |
2017 |
5124766 |
9702386 |
0.3456339 |
0.6543661 |
2018 |
5050960 |
9648466 |
0.3436161 |
0.6563839 |
2019 |
5151613 |
10345141 |
0.3324317 |
0.6675683 |
2020 |
5139349 |
10385438 |
0.3310415 |
0.6689585 |
2021 |
5247277 |
10705948 |
0.3289164 |
0.6710836 |
2022 |
5472590 |
11401758 |
0.3243142 |
0.6756858 |
moves <- read_csv("migration_map.csv") %>%
mutate(old_outside_top_100=ifelse(is.na(avg_eal_tercile_old),"outside_top_100","within_top_100")) %>%
mutate(new_outside_top_100=ifelse(is.na(avg_eal_tercile_new),"outside_top_100","within_top_100")) %>%
mutate(type=paste0(old_outside_top_100,"_to_",new_outside_top_100)) %>%
group_by(archive_version_year,type) %>%
summarise(moves=sum(row_count,na.rm = TRUE)) %>%
pivot_wider(id_cols = archive_version_year,names_from = type,values_from = moves) %>%
rowwise() %>%
mutate(total = sum(c_across(-1), na.rm = TRUE))
## Rows: 63970 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): fips_code, lead_county, avg_eal_tercile_old, NAME_old, avg_eal_ter...
## dbl (12): archive_version_year, row_count, CBSAFP_old, CBSAFP_new, avg_eal_o...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'archive_version_year'. You can override using the `.groups` argument.
kable(moves,caption="Moves in.out top 100 MSAs")
Moves in.out top 100 MSAs
2012 |
3317 |
2218 |
2054 |
3635 |
7907 |
2013 |
3422 |
2435 |
2252 |
3963 |
8650 |
2014 |
2876 |
1899 |
1868 |
3078 |
6845 |
2015 |
2415 |
1740 |
1549 |
2642 |
5931 |
2016 |
3372 |
2437 |
2190 |
3948 |
8575 |
2017 |
2904 |
2411 |
2248 |
3944 |
8603 |
2018 |
4045 |
3731 |
3342 |
6559 |
13632 |
2019 |
3412 |
2721 |
2812 |
4895 |
10428 |
2020 |
2525 |
2143 |
2195 |
4230 |
8568 |
2021 |
2953 |
2672 |
2881 |
5778 |
11331 |
moves_prop <- moves %>%
mutate(prop_outside_top_100_to_outside_top_100=outside_top_100_to_outside_top_100/total,
prop_outside_top_100_to_within_top_100=outside_top_100_to_within_top_100/total,
prop_within_top_100_to_outside_top_100=within_top_100_to_outside_top_100/total,
prop_within_top_100_to_within_top_100=within_top_100_to_within_top_100/total) %>%
select(-c(2:5))
kable(moves,caption="Shares of moves in.out top 100 MSAs")
Shares of moves in.out top 100 MSAs
2012 |
3317 |
2218 |
2054 |
3635 |
7907 |
2013 |
3422 |
2435 |
2252 |
3963 |
8650 |
2014 |
2876 |
1899 |
1868 |
3078 |
6845 |
2015 |
2415 |
1740 |
1549 |
2642 |
5931 |
2016 |
3372 |
2437 |
2190 |
3948 |
8575 |
2017 |
2904 |
2411 |
2248 |
3944 |
8603 |
2018 |
4045 |
3731 |
3342 |
6559 |
13632 |
2019 |
3412 |
2721 |
2812 |
4895 |
10428 |
2020 |
2525 |
2143 |
2195 |
4230 |
8568 |
2021 |
2953 |
2672 |
2881 |
5778 |
11331 |
Census
3 is highest risk, 2 is moderate, 1 is low
table <- read_csv("census.csv")
## Rows: 4 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): avg_eal_tercile
## dbl (18): asian, avghhsize, bacplus, black, child, density, hispanic, medgro...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
table2 <- read_csv("industry.csv")
## Rows: 4 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): avg_eal_tercile
## dbl (14): FIRE, agriculutre_forestry_etc, armed_forces, arts_ent_rec_etc, co...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
kable(table,caption="Census Variables by Risk Level")
Census Variables by Risk Level
1 |
0.0315712 |
2.492941 |
0.3567584 |
0.1567143 |
0.2055315 |
345.1562 |
0.1144124 |
1042.8824 |
68181.35 |
219141.2 |
0.0381176 |
0.6606767 |
0.1227416 |
0.1556827 |
1494376.3 |
0.0503188 |
0.0879593 |
0.6591846 |
2 |
0.0374453 |
2.589091 |
0.3574478 |
0.1343262 |
0.2111732 |
604.2491 |
0.1572416 |
1089.0303 |
71106.09 |
240212.1 |
0.0427232 |
0.6623240 |
0.1234514 |
0.1502595 |
2060857.9 |
0.0543446 |
0.0858594 |
0.6282638 |
3 |
0.0889935 |
2.749091 |
0.3387351 |
0.0946081 |
0.2056592 |
828.4572 |
0.2888781 |
1412.9091 |
75917.24 |
387818.2 |
0.0494946 |
0.6279769 |
0.1258272 |
0.1651189 |
3149811.3 |
0.0588635 |
0.1053852 |
0.4780258 |
Outside |
0.0150771 |
2.527414 |
0.2405747 |
0.0833987 |
0.2033337 |
115.2304 |
0.1335524 |
854.9571 |
56550.76 |
176321.7 |
0.0440430 |
0.6892702 |
0.1554496 |
0.1803648 |
109954.8 |
0.0553571 |
0.1511438 |
0.7239288 |
kable(table2,caption="Census Industry by Risk Level")
Census Industry by Risk Level
1 |
0.0722513 |
0.0089891 |
0.0099165 |
0.0835972 |
0.0632696 |
0.2451436 |
0.0180693 |
0.0957320 |
0.0461523 |
0.1176257 |
0.0523191 |
0.1067635 |
0.0549619 |
0.0252088 |
2 |
0.0703715 |
0.0083459 |
0.0074881 |
0.0846070 |
0.0643641 |
0.2331948 |
0.0176900 |
0.1133960 |
0.0466104 |
0.1192654 |
0.0454758 |
0.1095589 |
0.0535857 |
0.0260463 |
3 |
0.0670700 |
0.0173628 |
0.0086841 |
0.1020863 |
0.0728896 |
0.2182436 |
0.0198586 |
0.0764031 |
0.0474106 |
0.1310278 |
0.0458772 |
0.1119322 |
0.0562385 |
0.0249155 |
Outside |
0.0444278 |
0.0391243 |
0.0089660 |
0.0892926 |
0.0697175 |
0.2388585 |
0.0121106 |
0.1328069 |
0.0469522 |
0.0739920 |
0.0527425 |
0.1165147 |
0.0529105 |
0.0215839 |
##T-tests 3 is highest risk, 2 is moderate, 1 is low
library(rstatix)
##
## Attaching package: 'rstatix'
## The following object is masked from 'package:stats':
##
## filter
table <- read_csv("census_cbsas.csv")
## Rows: 939 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): avg_eal_tercile
## dbl (18): asian, avghhsize, black, hispanic, medgrossrents, medhhinc, median...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
names <- names(table)[2:19]
table_tidy <- table %>% pivot_longer(!avg_eal_tercile)
for (i in names) {
print(i)
x <- table_tidy %>%
filter(name==i) %>%
mutate(avg_eal_tercile=as.character(avg_eal_tercile)) %>%
select(-name)
pairwise_result <- x %>% pairwise_t_test(value~avg_eal_tercile, p.adjust.method = "bonferroni") %>%
mutate(variable=i) %>%
select(variable, everything()) %>%
select(-.y.)
print(pairwise_result)
}
## [1] "asian"
## # A tibble: 6 × 9
## variable group1 group2 n1 n2 p p.signif p.adj p.adj.signif
## <chr> <chr> <chr> <int> <int> <dbl> <chr> <dbl> <chr>
## 1 asian 1 2 34 33 3.9 e- 1 ns 1 e+ 0 ns
## 2 asian 1 3 34 33 1.55e-16 **** 9.31e-16 ****
## 3 asian 2 3 33 33 1.57e-13 **** 9.45e-13 ****
## 4 asian 1 Outside 34 839 7.73e- 4 *** 4.64e- 3 **
## 5 asian 2 Outside 33 839 7.32e- 6 **** 4.39e- 5 ****
## 6 asian 3 Outside 33 839 3.22e-45 **** 1.93e-44 ****
## [1] "avghhsize"
## # A tibble: 6 × 9
## variable group1 group2 n1 n2 p p.signif p.adj p.adj.signif
## <chr> <chr> <chr> <int> <int> <dbl> <chr> <dbl> <chr>
## 1 avghhsize 1 2 34 33 7.5 e-2 ns 4.5 e-1 ns
## 2 avghhsize 1 3 34 33 2.37e-6 **** 1.42e-5 ****
## 3 avghhsize 2 3 33 33 3.32e-3 ** 1.99e-2 *
## 4 avghhsize 1 Outside 34 839 3.72e-1 ns 1 e+0 ns
## 5 avghhsize 2 Outside 33 839 1.16e-1 ns 6.94e-1 ns
## 6 avghhsize 3 Outside 33 839 2.03e-8 **** 1.22e-7 ****
## [1] "black"
## # A tibble: 6 × 9
## variable group1 group2 n1 n2 p p.signif p.adj p.adj.signif
## <chr> <chr> <chr> <int> <int> <dbl> <chr> <dbl> <chr>
## 1 black 1 2 34 33 0.462 ns 1 ns
## 2 black 1 3 34 33 0.0416 * 0.25 ns
## 3 black 2 3 33 33 0.196 ns 1 ns
## 4 black 1 Outside 34 839 0.000798 *** 0.00479 **
## 5 black 2 Outside 33 839 0.0214 * 0.129 ns
## 6 black 3 Outside 33 839 0.612 ns 1 ns
## [1] "hispanic"
## # A tibble: 6 × 9
## variable group1 group2 n1 n2 p p.signif p.adj p.adj.signif
## <chr> <chr> <chr> <int> <int> <dbl> <chr> <dbl> <chr>
## 1 hispanic 1 2 34 33 0.354 ns 1 ns
## 2 hispanic 1 3 34 33 0.00017 *** 0.00102 **
## 3 hispanic 2 3 33 33 0.00479 ** 0.0287 *
## 4 hispanic 1 Outside 34 839 0.563 ns 1 ns
## 5 hispanic 2 Outside 33 839 0.48 ns 1 ns
## 6 hispanic 3 Outside 33 839 0.0000042 **** 0.0000252 ****
## [1] "medgrossrents"
## # A tibble: 6 × 9
## variable group1 group2 n1 n2 p p.signif p.adj p.adj.signif
## <chr> <chr> <chr> <int> <int> <dbl> <chr> <dbl> <chr>
## 1 medgrossren… 1 2 34 33 3.75e- 1 ns 1 e+ 0 ns
## 2 medgrossren… 1 3 34 33 2.24e-12 **** 1.34e-11 ****
## 3 medgrossren… 2 3 33 33 9.51e-10 **** 5.71e- 9 ****
## 4 medgrossren… 1 Outsi… 34 839 5.40e- 7 **** 3.24e- 6 ****
## 5 medgrossren… 2 Outsi… 33 839 8.64e-10 **** 5.18e- 9 ****
## 6 medgrossren… 3 Outsi… 33 839 1.57e-44 **** 9.40e-44 ****
## [1] "medhhinc"
## # A tibble: 6 × 9
## variable group1 group2 n1 n2 p p.signif p.adj p.adj.signif
## <chr> <chr> <chr> <int> <int> <dbl> <chr> <dbl> <chr>
## 1 medhhinc 1 2 34 33 3.35e- 1 ns 1 e+ 0 ns
## 2 medhhinc 1 3 34 33 1.09e- 2 * 6.54e- 2 ns
## 3 medhhinc 2 3 33 33 1.16e- 1 ns 6.94e- 1 ns
## 4 medhhinc 1 Outside 34 839 1.06e- 7 **** 6.39e- 7 ****
## 5 medhhinc 2 Outside 33 839 6.5 e-11 **** 3.90e-10 ****
## 6 medhhinc 3 Outside 33 839 6.88e-18 **** 4.13e-17 ****
## [1] "medianhhval"
## # A tibble: 6 × 9
## variable group1 group2 n1 n2 p p.signif p.adj p.adj.signif
## <chr> <chr> <chr> <int> <int> <dbl> <chr> <dbl> <chr>
## 1 medianhhval 1 2 34 33 4.07e- 1 ns 1 e+ 0 ns
## 2 medianhhval 1 3 34 33 5.51e-11 **** 3.31e-10 ****
## 3 medianhhval 2 3 33 33 1.12e- 8 **** 6.74e- 8 ****
## 4 medianhhval 1 Outside 34 839 1.89e- 2 * 1.13e- 1 ns
## 5 medianhhval 2 Outside 33 839 5.64e- 4 *** 3.38e- 3 **
## 6 medianhhval 3 Outside 33 839 1.61e-28 **** 9.68e-28 ****
## [1] "owner_occupied"
## # A tibble: 6 × 9
## variable group1 group2 n1 n2 p p.signif p.adj p.adj.signif
## <chr> <chr> <chr> <int> <int> <dbl> <chr> <dbl> <chr>
## 1 owner_occupied 1 2 34 33 9.14e-1 ns 1 e+0 ns
## 2 owner_occupied 1 3 34 33 3.16e-2 * 1.89e-1 ns
## 3 owner_occupied 2 3 33 33 2.5 e-2 * 1.5 e-1 ns
## 4 owner_occupied 1 Outsi… 34 839 8.68e-3 ** 5.21e-2 ns
## 5 owner_occupied 2 Outsi… 33 839 1.48e-2 * 8.85e-2 ns
## 6 owner_occupied 3 Outsi… 33 839 3.58e-8 **** 2.15e-7 ****
## [1] "poverty"
## # A tibble: 6 × 9
## variable group1 group2 n1 n2 p p.signif p.adj p.adj.signif
## <chr> <chr> <chr> <int> <int> <dbl> <chr> <dbl> <chr>
## 1 poverty 1 2 34 33 0.963 ns 1 ns
## 2 poverty 1 3 34 33 0.839 ns 1 ns
## 3 poverty 2 3 33 33 0.877 ns 1 ns
## 4 poverty 1 Outside 34 839 0.00275 ** 0.0165 *
## 5 poverty 2 Outside 33 839 0.00388 ** 0.0233 *
## 6 poverty 3 Outside 33 839 0.00749 ** 0.0449 *
## [1] "total"
## # A tibble: 6 × 9
## variable group1 group2 n1 n2 p p.signif p.adj p.adj.signif
## <chr> <chr> <chr> <int> <int> <dbl> <chr> <dbl> <chr>
## 1 total 1 2 34 33 8.36e- 3 ** 5.01e- 2 ns
## 2 total 1 3 34 33 2.91e-14 **** 1.74e-13 ****
## 3 total 2 3 33 33 5.50e- 7 **** 3.30e- 6 ****
## 4 total 1 Outside 34 839 1.02e-18 **** 6.14e-18 ****
## 5 total 2 Outside 33 839 2.02e-33 **** 1.21e-32 ****
## 6 total 3 Outside 33 839 1.69e-71 **** 1.01e-70 ****
## [1] "unemp"
## # A tibble: 6 × 9
## variable group1 group2 n1 n2 p p.signif p.adj p.adj.signif
## <chr> <chr> <chr> <int> <int> <dbl> <chr> <dbl> <chr>
## 1 unemp 1 2 34 33 0.444 ns 1 ns
## 2 unemp 1 3 34 33 0.104 ns 0.626 ns
## 3 unemp 2 3 33 33 0.394 ns 1 ns
## 4 unemp 1 Outside 34 839 0.181 ns 1 ns
## 5 unemp 2 Outside 33 839 0.791 ns 1 ns
## 6 unemp 3 Outside 33 839 0.358 ns 1 ns
## [1] "vacant"
## # A tibble: 6 × 9
## variable group1 group2 n1 n2 p p.signif p.adj p.adj.signif
## <chr> <chr> <chr> <int> <int> <dbl> <chr> <dbl> <chr>
## 1 vacant 1 2 34 33 0.907 ns 1 e+0 ns
## 2 vacant 1 3 34 33 0.333 ns 1 e+0 ns
## 3 vacant 2 3 33 33 0.282 ns 1 e+0 ns
## 4 vacant 1 Outside 34 839 0.00000109 **** 6.56e-6 ****
## 5 vacant 2 Outside 33 839 0.000000693 **** 4.16e-6 ****
## 6 vacant 3 Outside 33 839 0.000482 *** 2.89e-3 **
## [1] "white"
## # A tibble: 6 × 9
## variable group1 group2 n1 n2 p p.signif p.adj p.adj.signif
## <chr> <chr> <chr> <int> <int> <dbl> <chr> <dbl> <chr>
## 1 white 1 2 34 33 5.38e- 1 ns 1 e+ 0 ns
## 2 white 1 3 34 33 3.22e- 4 *** 1.93e- 3 **
## 3 white 2 3 33 33 3.04e- 3 ** 1.82e- 2 *
## 4 white 1 Outside 34 839 7.18e- 2 ns 4.31e- 1 ns
## 5 white 2 Outside 33 839 8.8 e- 3 ** 5.28e- 2 ns
## 6 white 3 Outside 33 839 2.63e-11 **** 1.58e-10 ****
## [1] "other"
## # A tibble: 6 × 9
## variable group1 group2 n1 n2 p p.signif p.adj p.adj.signif
## <chr> <chr> <chr> <int> <int> <dbl> <chr> <dbl> <chr>
## 1 other 1 2 34 33 0.721 ns 1 ns
## 2 other 1 3 34 33 0.377 ns 1 ns
## 3 other 2 3 33 33 0.602 ns 1 ns
## 4 other 1 Outside 34 839 0.52 ns 1 ns
## 5 other 2 Outside 33 839 0.888 ns 1 ns
## 6 other 3 Outside 33 839 0.56 ns 1 ns
## [1] "density"
## # A tibble: 6 × 9
## variable group1 group2 n1 n2 p p.signif p.adj p.adj.signif
## <chr> <chr> <chr> <int> <int> <dbl> <chr> <dbl> <chr>
## 1 density 1 2 34 33 9.40e- 8 **** 5.64e- 7 ****
## 2 density 1 3 34 33 1.4 e-22 **** 8.39e-22 ****
## 3 density 2 3 33 33 4.34e- 6 **** 2.61e- 5 ****
## 4 density 1 Outside 34 839 4.39e-11 **** 2.63e-10 ****
## 5 density 2 Outside 33 839 1.74e-40 **** 1.05e-39 ****
## 6 density 3 Outside 33 839 9.06e-77 **** 5.43e-76 ****
## [1] "child"
## # A tibble: 6 × 9
## variable group1 group2 n1 n2 p p.signif p.adj p.adj.signif
## <chr> <chr> <chr> <int> <int> <dbl> <chr> <dbl> <chr>
## 1 child 1 2 34 33 0.433 ns 1 ns
## 2 child 1 3 34 33 0.986 ns 1 ns
## 3 child 2 3 33 33 0.447 ns 1 ns
## 4 child 1 Outside 34 839 0.67 ns 1 ns
## 5 child 2 Outside 33 839 0.134 ns 0.804 ns
## 6 child 3 Outside 33 839 0.657 ns 1 ns
## [1] "senior"
## # A tibble: 6 × 9
## variable group1 group2 n1 n2 p p.signif p.adj p.adj.signif
## <chr> <chr> <chr> <int> <int> <dbl> <chr> <dbl> <chr>
## 1 senior 1 2 34 33 0.584 ns 1 ns
## 2 senior 1 3 34 33 0.34 ns 1 ns
## 3 senior 2 3 33 33 0.136 ns 0.817 ns
## 4 senior 1 Outside 34 839 0.000513 *** 0.00308 **
## 5 senior 2 Outside 33 839 0.0000303 **** 0.000182 ***
## 6 senior 3 Outside 33 839 0.034 * 0.204 ns
## [1] "bacplus"
## # A tibble: 6 × 9
## variable group1 group2 n1 n2 p p.signif p.adj p.adj.signif
## <chr> <chr> <chr> <int> <int> <dbl> <chr> <dbl> <chr>
## 1 bacplus 1 2 34 33 9.74e- 1 ns 1 e+ 0 ns
## 2 bacplus 1 3 34 33 3.89e- 1 ns 1 e+ 0 ns
## 3 bacplus 2 3 33 33 3.74e- 1 ns 1 e+ 0 ns
## 4 bacplus 1 Outside 34 839 2.09e-14 **** 1.25e-13 ****
## 5 bacplus 2 Outside 33 839 3.39e-14 **** 2.03e-13 ****
## 6 bacplus 3 Outside 33 839 1.58e-10 **** 9.5 e-10 ****
#Line Graphs
data <- read_csv("all_cbsa_summary.csv") %>%
filter(!is.na(avg_eal))
## Rows: 9400 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): avg_eal_tercile, NAME
## dbl (8): archive_version_year, CBSAFP, rows, with_parent_number, mean_employ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data$avg_eal_tercile <- factor(data$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
plot1 <- data %>% ggplot()+
geom_line(aes(archive_version_year,rows,color=NAME),alpha=0.5)+
facet_wrap(~avg_eal_tercile,ncol =3)+
ggtitle("Number of Businesses")
ggsave("figures/number_of_businesses.png")
## Saving 7 x 5 in image
plot2 <- data %>% ggplot()+
geom_line(aes(archive_version_year,mean_year_established,color=NAME),alpha=0.5)+
facet_wrap(~avg_eal_tercile,ncol =3)+
ggtitle("Mean year established")
ggsave("figures/mean_year_established.png")
## Saving 7 x 5 in image
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_line()`).
ggplotly(plot1) %>% layout(showlegend = TRUE)
ggplotly(plot2) %>% layout(showlegend = TRUE)
data <- read_csv("cbsa_births_deaths_exits_entrys.csv")
## Rows: 1000 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): NAME, avg_eal_tercile
## dbl (8): archive_version_year, CBSAFP, entry, exit, births, deaths, avg_eal,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data$avg_eal_tercile <- factor(data$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
plot1 <- data %>% ggplot()+
geom_line(aes(archive_version_year,births,color=NAME,group=NAME),alpha=0.5)+
facet_wrap(~avg_eal_tercile,ncol =3)+
ggtitle("Births")
ggsave("figures/births.png")
## Saving 7 x 5 in image
plot2 <- data %>% ggplot()+
geom_line(aes(archive_version_year,deaths,color=NAME,group=NAME),alpha=0.5)+
facet_wrap(~avg_eal_tercile,ncol =3)+
ggtitle("Deaths")
ggsave("figures/deaths.png")
## Saving 7 x 5 in image
## Warning: Removed 100 rows containing missing values or values outside the scale range
## (`geom_line()`).
plot3 <- data %>%
ggplot()+
geom_line(aes(archive_version_year,entry,color=NAME,group=NAME),alpha=0.5)+
facet_wrap(~avg_eal_tercile,ncol =3)+
ggtitle("Entries")
ggsave("figures/entries.png")
## Saving 7 x 5 in image
plot4 <- data %>% ggplot()+
geom_line(aes(archive_version_year,exit,color=NAME,group=NAME),alpha=0.5)+
facet_wrap(~avg_eal_tercile,ncol =)+
ggtitle("Exits")
ggsave("figures/exits.png")
## Saving 7 x 5 in image
## Warning: Removed 100 rows containing missing values or values outside the scale range
## (`geom_line()`).
ggplotly(plot1)
ggplotly(plot2)
ggplotly(plot3)
ggplotly(plot4)
all <- read_csv("critical_pop_all.csv")
all$avg_eal_tercile <- factor(all$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
all$pop_tercile <- factor(all$pop_tercile, levels = c("Pop: High", "Pop: Moderate", "Pop: Low"))
births <- read_csv("critical_pop_births.csv") %>%
filter(archive_version_year != 2012)
births$avg_eal_tercile <- factor(births$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
births$pop_tercile <- factor(births$pop_tercile, levels = c("Pop: High", "Pop: Moderate", "Pop: Low"))
entries <- read_csv("critical_pop_entries.csv")%>%
filter(archive_version_year != 2012)
entries$avg_eal_tercile <- factor(entries$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
entries$pop_tercile <- factor(entries$pop_tercile, levels = c("Pop: High", "Pop: Moderate", "Pop: Low"))
deaths <- read_csv("critical_pop_deaths.csv")%>%
filter(archive_version_year != 2022)
deaths$avg_eal_tercile <- factor(deaths$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
deaths$pop_tercile <- factor(deaths$pop_tercile, levels = c("Pop: High", "Pop: Moderate", "Pop: Low"))
exits <- read_csv("critical_pop_exits.csv")%>%
filter(archive_version_year != 2022)
exits$avg_eal_tercile <- factor(exits$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
exits$pop_tercile <- factor(exits$pop_tercile, levels = c("Pop: High", "Pop: Moderate", "Pop: Low"))
plot1 <- all %>%
ggplot()+
geom_line(aes(archive_version_year,row_count,color=avg_eal_tercile))+
facet_grid(pop_tercile~critical)+
ggtitle("Critical by population tercile, all")
ggsave("figures/critical_by_pop.png")
plot2 <- births %>%ggplot()+
geom_line(aes(archive_version_year,births,color=avg_eal_tercile))+
facet_grid(pop_tercile~critical)+
ggtitle("Critical by tercile, births")
ggsave("figures/naics_by_tercile_births.png")
plot3 <- deaths %>% ggplot()+
geom_line(aes(archive_version_year,deaths,color=avg_eal_tercile))+
facet_grid(pop_tercile~critical)+
ggtitle("Critical by tercile, deaths")
ggsave("figures/naics_by_tercile_deaths.png")
plot4 <- entries %>% ggplot()+
geom_line(aes(archive_version_year,entries,color=avg_eal_tercile))+
facet_grid(pop_tercile~critical)+
ggtitle("Critical by tercile, entries")
ggsave("figures/naics_by_tercile_entries.png")
plot5 <- exits %>% ggplot()+
geom_line(aes(archive_version_year,exits,color=avg_eal_tercile))+
facet_grid(pop_tercile~critical)+
ggtitle("Critical by tercile, exits")
ggsave("figures/naics_by_tercile_exits.png")
plot1

plot2

plot3

plot4

plot5
##By population category
data <- read_csv("all_cbsa_summary.csv") %>%
mutate(CBSAFP=as.character(CBSAFP)) %>%
filter(!is.na(avg_eal)) %>%
left_join(top100_cbsa,by=c("CBSAFP"="GEOID"))
## Rows: 9400 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): avg_eal_tercile, NAME
## dbl (8): archive_version_year, CBSAFP, rows, with_parent_number, mean_employ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data$avg_eal_tercile <- factor(data$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
data$pop_tercile <- factor(data$pop_tercile, levels = c("Pop: Low", "Pop: Moderate", "Pop: High"))
plot1 <- data %>% ggplot()+
geom_line(aes(archive_version_year,rows,color=NAME),alpha=0.5)+
facet_grid(pop_tercile~avg_eal_tercile)+
ggtitle("Number of Businesses")
ggsave("figures/number_of_businesses.png")
## Saving 7 x 5 in image
plot2 <- data %>% ggplot()+
geom_line(aes(archive_version_year,mean_year_established,color=NAME),alpha=0.5)+
facet_grid(pop_tercile~avg_eal_tercile)+
ggtitle("Mean year established")
ggsave("figures/mean_year_established.png")
## Saving 7 x 5 in image
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_line()`).
ggplotly(plot1) %>% layout(showlegend = TRUE)
ggplotly(plot2) %>% layout(showlegend = TRUE)
data <- read_csv("cbsa_births_deaths_exits_entrys.csv")%>%
mutate(CBSAFP=as.character(CBSAFP)) %>%
filter(!is.na(avg_eal)) %>%
left_join(top100_cbsa,by=c("CBSAFP"="GEOID"))
## Rows: 1000 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): NAME, avg_eal_tercile
## dbl (8): archive_version_year, CBSAFP, entry, exit, births, deaths, avg_eal,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data$avg_eal_tercile <- factor(data$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
data$pop_tercile <- factor(data$pop_tercile, levels = c("Pop: Low", "Pop: Moderate", "Pop: High"))
plot1 <- data %>% ggplot()+
geom_line(aes(archive_version_year,births,color=NAME,group=NAME),alpha=0.5)+
facet_grid(pop_tercile~avg_eal_tercile)+
ggtitle("Births")
ggsave("figures/births.png")
## Saving 7 x 5 in image
plot2 <- data %>% ggplot()+
geom_line(aes(archive_version_year,deaths,color=NAME,group=NAME),alpha=0.5)+
facet_grid(pop_tercile~avg_eal_tercile)+
ggtitle("Deaths")
ggsave("figures/deaths.png")
## Saving 7 x 5 in image
## Warning: Removed 100 rows containing missing values or values outside the scale range
## (`geom_line()`).
plot3 <- data %>%
ggplot()+
geom_line(aes(archive_version_year,entry,color=NAME,group=NAME),alpha=0.5)+
facet_grid(pop_tercile~avg_eal_tercile)+
ggtitle("Entries")
ggsave("figures/entries.png")
## Saving 7 x 5 in image
plot4 <- data %>% ggplot()+
geom_line(aes(archive_version_year,exit,color=NAME,group=NAME),alpha=0.5)+
facet_grid(pop_tercile~avg_eal_tercile)+
ggtitle("Exits")
ggsave("figures/exits.png")
## Saving 7 x 5 in image
## Warning: Removed 100 rows containing missing values or values outside the scale range
## (`geom_line()`).
ggplotly(plot1)
ggplotly(plot2)
ggplotly(plot3)
ggplotly(plot4)
Line Employees
data <- read_csv("cbsa_births_deaths_exits_entrys_employees.csv")
## Rows: 1000 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): NAME, avg_eal_tercile
## dbl (8): archive_version_year, CBSAFP, entry, exit, births, deaths, avg_eal,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data$avg_eal_tercile <- factor(data$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
plot1 <- data %>% ggplot()+
geom_line(aes(archive_version_year,births,color=NAME,group=NAME),alpha=0.5)+
facet_wrap(~avg_eal_tercile,ncol =3)+
ggtitle("Births")
ggsave("figures/births_emp.png")
## Saving 7 x 5 in image
plot2 <- data %>% ggplot()+
geom_line(aes(archive_version_year,deaths,color=NAME,group=NAME),alpha=0.5)+
facet_wrap(~avg_eal_tercile,ncol =3)+
ggtitle("Deaths")
ggsave("figures/deaths_emp.png")
## Saving 7 x 5 in image
## Warning: Removed 100 rows containing missing values or values outside the scale range
## (`geom_line()`).
plot3 <- data %>%
ggplot()+
geom_line(aes(archive_version_year,entry,color=NAME,group=NAME),alpha=0.5)+
facet_wrap(~avg_eal_tercile,ncol =3)+
ggtitle("Entries")
ggsave("figures/entries_emp.png")
## Saving 7 x 5 in image
plot4 <- data %>% ggplot()+
geom_line(aes(archive_version_year,exit,color=NAME,group=NAME),alpha=0.5)+
facet_wrap(~avg_eal_tercile,ncol =)+
ggtitle("Exits")
ggsave("figures/exits_emp.png")
## Saving 7 x 5 in image
## Warning: Removed 100 rows containing missing values or values outside the scale range
## (`geom_line()`).
ggplotly(plot1) %>% layout(showlegend = FALSE)
ggplotly(plot2) %>% layout(showlegend = FALSE)
ggplotly(plot3) %>% layout(showlegend = FALSE)
ggplotly(plot4) %>% layout(showlegend = FALSE)
all <- read_csv("critical_pop_all_employees.csv")
all$avg_eal_tercile <- factor(all$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
all$pop_tercile <- factor(all$pop_tercile, levels = c("Pop: High", "Pop: Moderate", "Pop: Low"))
births <- read_csv("critical_pop_births_employees.csv") %>%
filter(archive_version_year != 2012)
births$avg_eal_tercile <- factor(births$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
births$pop_tercile <- factor(births$pop_tercile, levels = c("Pop: High", "Pop: Moderate", "Pop: Low"))
entries <- read_csv("critical_pop_entries_employees.csv")%>%
filter(archive_version_year != 2012)
entries$avg_eal_tercile <- factor(entries$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
entries$pop_tercile <- factor(entries$pop_tercile, levels = c("Pop: High", "Pop: Moderate", "Pop: Low"))
deaths <- read_csv("critical_pop_deaths_employees.csv")%>%
filter(archive_version_year != 2022)
deaths$avg_eal_tercile <- factor(deaths$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
deaths$pop_tercile <- factor(deaths$pop_tercile, levels = c("Pop: High", "Pop: Moderate", "Pop: Low"))
exits <- read_csv("critical_pop_exits_employees.csv")%>%
filter(archive_version_year != 2022)
exits$avg_eal_tercile <- factor(exits$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
exits$pop_tercile <- factor(exits$pop_tercile, levels = c("Pop: High", "Pop: Moderate", "Pop: Low"))
plot1 <- all %>%
ggplot()+
geom_line(aes(archive_version_year,employees,color=avg_eal_tercile))+
facet_grid(pop_tercile~critical)+
ggtitle("Critical by population tercile, all - employees")
ggsave("figures/critical_by_pop_emp.png")
plot2 <- births %>%ggplot()+
geom_line(aes(archive_version_year,employees,color=avg_eal_tercile))+
facet_grid(pop_tercile~critical)+
ggtitle("Critical by tercile, births, births - employees")
ggsave("figures/naics_by_tercile_births_emp.png")
plot3 <- deaths %>% ggplot()+
geom_line(aes(archive_version_year,employees,color=avg_eal_tercile))+
facet_grid(pop_tercile~critical)+
ggtitle("Critical by tercile, deaths - employees")
ggsave("figures/naics_by_tercile_deaths_emp.png")
plot4 <- entries %>% ggplot()+
geom_line(aes(archive_version_year,employees,color=avg_eal_tercile))+
facet_grid(pop_tercile~critical)+
ggtitle("Critical by tercile, entries - employees")
ggsave("figures/naics_by_tercile_entries_emp.png")
plot5 <- exits %>% ggplot()+
geom_line(aes(archive_version_year,employees,color=avg_eal_tercile))+
facet_grid(pop_tercile~critical)+
ggtitle("Critical by tercile, exits - employees")
ggsave("figures/naics_by_tercile_exit_emps.png")
plot1

plot2

plot3

plot4

plot5

##By Pop Category
data <- read_csv("all_cbsa_summary.csv") %>%
mutate(CBSAFP=as.character(CBSAFP)) %>%
filter(!is.na(avg_eal)) %>%
left_join(top100_cbsa,by=c("CBSAFP"="GEOID"))
## Rows: 9400 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): avg_eal_tercile, NAME
## dbl (8): archive_version_year, CBSAFP, rows, with_parent_number, mean_employ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data$avg_eal_tercile <- factor(data$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
data$pop_tercile <- factor(data$pop_tercile, levels = c("Pop: Low", "Pop: Moderate", "Pop: High"))
plot1 <- data %>% ggplot()+
geom_line(aes(archive_version_year,mean_employees,color=NAME),alpha=0.5)+
facet_grid(pop_tercile~avg_eal_tercile)+
ggtitle("Mean Employees")
ggsave("figures/number_of_employees.png")
## Saving 7 x 5 in image
ggplotly(plot1) %>% layout(showlegend = TRUE)
data <- read_csv("cbsa_births_deaths_exits_entrys_employees.csv")%>%
mutate(CBSAFP=as.character(CBSAFP)) %>%
filter(!is.na(avg_eal)) %>%
left_join(top100_cbsa,by=c("CBSAFP"="GEOID"))
## Rows: 1000 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): NAME, avg_eal_tercile
## dbl (8): archive_version_year, CBSAFP, entry, exit, births, deaths, avg_eal,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data$avg_eal_tercile <- factor(data$avg_eal_tercile, levels = c("High", "Moderate", "Low"))
data$pop_tercile <- factor(data$pop_tercile, levels = c("Pop: Low", "Pop: Moderate", "Pop: High"))
plot1 <- data %>% ggplot()+
geom_line(aes(archive_version_year,births,color=NAME,group=NAME),alpha=0.5)+
facet_grid(pop_tercile~avg_eal_tercile)+
ggtitle("Births")
ggsave("figures/births_emp.png")
## Saving 7 x 5 in image
plot2 <- data %>% ggplot()+
geom_line(aes(archive_version_year,deaths,color=NAME,group=NAME),alpha=0.5)+
facet_grid(pop_tercile~avg_eal_tercile)+
ggtitle("Deaths")
ggsave("figures/deaths_emp.png")
## Saving 7 x 5 in image
## Warning: Removed 100 rows containing missing values or values outside the scale range
## (`geom_line()`).
plot3 <- data %>%
ggplot()+
geom_line(aes(archive_version_year,entry,color=NAME,group=NAME),alpha=0.5)+
facet_grid(pop_tercile~avg_eal_tercile)+
ggtitle("Entries")
ggsave("figures/entries_emp.png")
## Saving 7 x 5 in image
plot4 <- data %>% ggplot()+
geom_line(aes(archive_version_year,exit,color=NAME,group=NAME),alpha=0.5)+
facet_grid(pop_tercile~avg_eal_tercile)+
ggtitle("Exits")
ggsave("figures/exits_emp.png")
## Saving 7 x 5 in image
## Warning: Removed 100 rows containing missing values or values outside the scale range
## (`geom_line()`).
ggplotly(plot1) %>% layout(showlegend = FALSE)
ggplotly(plot2) %>% layout(showlegend = FALSE)
ggplotly(plot3) %>% layout(showlegend = FALSE)
ggplotly(plot4) %>% layout(showlegend = FALSE)
Chord Diagrams
nri_cbsa_all <- read_csv("nri_cbsa_all.csv") %>%
mutate(CBSAFP =as.character(CBSAFP))
## Rows: 940 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): avg_eal_tercile_all
## dbl (3): CBSAFP, avg_eal_all, tercile_all
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
regions <- read_csv("top100_regions.csv") %>%
mutate(GEOID =as.character(GEOID)) %>%
select(GEOID, Region)
## Rows: 100 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): pop_tercile, NAME, Region
## dbl (1): GEOID
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
top100_cbsa <- get_acs(geography = "cbsa", variables = c(pop="B01001_001"),output = "wide") %>%
top_n(100,popE) %>%
mutate(pop_tercile_num = ntile(popE, 3)) %>%
mutate(pop_tercile=as_factor(pop_tercile_num)) %>%
mutate(pop_tercile = factor(pop_tercile, levels = 1:3, labels = c("Pop: Low", "Pop: Moderate", "Pop: High"))) %>%
select(pop_tercile,pop_tercile_num,popE,GEOID) %>%
left_join(regions)
## Getting data from the 2018-2022 5-year ACS
## Joining with `by = join_by(GEOID)`
top100_cbsa_new <- top100_cbsa %>%
rename_with(~paste0(., "_new"), everything())
top100_cbsa_old <- top100_cbsa %>%
rename_with(~paste0(., "_old"), everything())
moves <- read_csv("migration_map.csv") %>%
select(CBSAFP_old,CBSAFP_new,NAME_old,NAME_new,row_count,avg_eal_tercile_old,avg_eal_tercile_new) %>%
mutate(CBSAFP_old=as.character(CBSAFP_old)) %>%
mutate(CBSAFP_new=as.character(CBSAFP_new)) %>%
mutate(NAME_old=ifelse(is.na(avg_eal_tercile_old),"Outside",NAME_old)) %>%
mutate(NAME_new=ifelse(is.na(avg_eal_tercile_new),"Outside",NAME_new)) %>%
group_by(CBSAFP_old,CBSAFP_new,NAME_old,NAME_new,avg_eal_tercile_old,avg_eal_tercile_new) %>%
summarise(row_count=sum(row_count)) %>%
left_join(top100_cbsa_old, by=c("CBSAFP_old"="GEOID_old")) %>%
left_join(top100_cbsa_new, by=c("CBSAFP_new"="GEOID_new")) %>%
mutate(avg_eal_tercile_old=ifelse(is.na(avg_eal_tercile_old),"Outside",avg_eal_tercile_old)) %>%
mutate(avg_eal_tercile_new=ifelse(is.na(avg_eal_tercile_new),"Outside",avg_eal_tercile_new)) %>%
mutate(Region_old=ifelse(is.na(Region_old),"Outside",Region_old)) %>%
mutate(Region_new=ifelse(is.na(Region_new),"Outside",Region_new))
## Rows: 63970 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): fips_code, lead_county, avg_eal_tercile_old, NAME_old, avg_eal_ter...
## dbl (12): archive_version_year, row_count, CBSAFP_old, CBSAFP_new, avg_eal_o...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'CBSAFP_old', 'CBSAFP_new', 'NAME_old', 'NAME_new', 'avg_eal_tercile_old'. You can override using the `.groups` argument.
data <- moves %>%
group_by(avg_eal_tercile_old,avg_eal_tercile_new) %>%
summarise(row_count=sum(row_count))
## `summarise()` has grouped output by 'avg_eal_tercile_old'. You can override
## using the `.groups` argument.
data %>%
transmute(avg_eal_tercile_old,avg_eal_tercile_new,pct=row_count/sum(data$row_count)) %>%
pivot_wider(names_from = avg_eal_tercile_new,values_from = pct) %>%
kable(caption="Risk level to Risk level")
Risk level to Risk level
High |
0.1418442 |
0.0145098 |
0.0363402 |
0.0729597 |
Low |
0.0149617 |
0.0161366 |
0.0323389 |
0.0577516 |
Moderate |
0.0303752 |
0.0298987 |
0.0341958 |
0.0614735 |
Outside |
0.0696075 |
0.0633796 |
0.0675453 |
0.2566818 |
chordDiagram(
x=data,
directional = 1,
direction.type = c("diffHeight", "arrows"),
link.arr.type = "big.arrow",
grid.col = c(High="red",Moderate="yellow", Low="green",Outside="gray"))
title("Risk level to Risk level", cex.main = 1)

png(file="figures/risk_to_risk.png")
dev.off()
## quartz_off_screen
## 2
data <- moves %>%
filter(Region_old==Region_new) %>%
filter(Region_old!="Outside") %>%
group_by(avg_eal_tercile_old,avg_eal_tercile_new) %>%
summarise(row_count=sum(row_count))
## `summarise()` has grouped output by 'avg_eal_tercile_old'. You can override
## using the `.groups` argument.
data %>%
transmute(avg_eal_tercile_old,avg_eal_tercile_new,pct=row_count/sum(data$row_count)) %>%
pivot_wider(names_from = avg_eal_tercile_new,values_from = pct) %>%
kable(caption = "Moves within Region")
Moves within Region
High |
0.4732835 |
0.0165976 |
0.0817526 |
Low |
0.0194363 |
0.0371360 |
0.0974152 |
Moderate |
0.0714333 |
0.0946433 |
0.1083022 |
chordDiagram(
x=data, directional = 1, direction.type = c("diffHeight", "arrows"),link.arr.type = "big.arrow",
grid.col = c(High="red",Moderate="yellow", Low="green"))
title("Moves within Region", cex.main = 1)

png(file="figures/moves_within_region.png")
dev.off()
## quartz_off_screen
## 2
data <- moves %>%
filter(Region_old!=Region_new) %>%
group_by(avg_eal_tercile_old,avg_eal_tercile_new) %>%
summarise(row_count=sum(row_count))
## `summarise()` has grouped output by 'avg_eal_tercile_old'. You can override
## using the `.groups` argument.
table <- moves %>%
filter(Region_old!=Region_new) %>%
filter(avg_eal_tercile_new=="Outside") %>%
left_join(nri_cbsa_all,by=c("CBSAFP_new"="CBSAFP")) %>%
group_by(avg_eal_tercile_all) %>%
summarise(row_count=sum(row_count,na.rm = TRUE)) %>%
kable(caption="Risk of Destinations outside top100")
table
Risk of Destinations outside top100
High |
13518 |
Low |
3901 |
Moderate |
5972 |
data %>%
transmute(avg_eal_tercile_old,avg_eal_tercile_new,pct=row_count/sum(data$row_count)) %>%
pivot_wider(names_from = avg_eal_tercile_new,values_from = pct) %>%
kable(caption = "Moves outside Region")
Moves outside Region
High |
0.0510855 |
0.0209662 |
0.0326306 |
0.1467138 |
Low |
0.0204705 |
0.0140766 |
0.0168357 |
0.1161319 |
Moderate |
0.0257410 |
0.0133001 |
0.0151836 |
0.1236163 |
Outside |
0.1399729 |
0.1274494 |
0.1358259 |
NA |
chordDiagram(
x=data, directional = 1, direction.type = c("diffHeight", "arrows"),link.arr.type = "big.arrow",
grid.col = c(High="red",Moderate="yellow", Low="green",Outside="gray"))
title("Moves outside Region", cex.main = 1)

png(file="figures/moves_outside_region.png")
dev.off()
## quartz_off_screen
## 2
This chunk produces the chord diagrams for critical services and
non-critical services
data <- read_csv("moves_crit.csv") %>%
mutate(avg_eal_tercile_old=ifelse(is.na(avg_eal_tercile_old),"Outside",avg_eal_tercile_old)) %>%
mutate(avg_eal_tercile_new=ifelse(is.na(avg_eal_tercile_new),"Outside",avg_eal_tercile_new))
## Rows: 16 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): avg_eal_tercile_old, avg_eal_tercile_new
## dbl (1): row_count
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data %>%
transmute(avg_eal_tercile_old,avg_eal_tercile_new,pct=row_count/sum(data$row_count)) %>%
pivot_wider(names_from = avg_eal_tercile_new,values_from = pct) %>%
kable(caption = "Critical: Risk level to Risk level")
Critical: Risk level to Risk level
Low |
0.0160731 |
0.0317129 |
0.0136222 |
0.0600542 |
Moderate |
0.0289641 |
0.0325660 |
0.0281381 |
0.0618009 |
High |
0.0120379 |
0.0328097 |
0.1314421 |
0.0702234 |
Outside |
0.0668382 |
0.0712932 |
0.0700339 |
0.2723900 |
chordDiagram(
x=data,
directional = 1,
direction.type = c("diffHeight", "arrows"),
link.arr.type = "big.arrow",
grid.col = c(High="red",Moderate="yellow", Low="green",Outside="gray"))
title("Critical: Risk level to Risk level", cex.main = 1)

png(file="figures/critical_risk_to_risk.png")
dev.off()
## quartz_off_screen
## 2
data <- read_csv("moves_non_crit.csv") %>%
mutate(avg_eal_tercile_old=ifelse(is.na(avg_eal_tercile_old),"Outside",avg_eal_tercile_old)) %>%
mutate(avg_eal_tercile_new=ifelse(is.na(avg_eal_tercile_new),"Outside",avg_eal_tercile_new))
## Rows: 16 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): avg_eal_tercile_old, avg_eal_tercile_new
## dbl (1): row_count
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data %>%
transmute(avg_eal_tercile_old,avg_eal_tercile_new,pct=row_count/sum(data$row_count)) %>%
pivot_wider(names_from = avg_eal_tercile_new,values_from = pct) %>%
kable(caption = "Non-Critical: Risk level to Risk level")
Non-Critical: Risk level to Risk level
Low |
0.0162345 |
0.0333048 |
0.0170285 |
0.0541986 |
Moderate |
0.0313408 |
0.0367105 |
0.0338271 |
0.0609682 |
High |
0.0183239 |
0.0417877 |
0.1578947 |
0.0771818 |
Outside |
0.0580431 |
0.0617622 |
0.0689497 |
0.2324440 |
chordDiagram(
x=data,
directional = 1,
direction.type = c("diffHeight", "arrows"),
link.arr.type = "big.arrow",
grid.col = c(High="red",Moderate="yellow", Low="green",Outside="gray"))
title("Non-Critical: Risk level to Risk level", cex.main = 1)

png(file="figures/non_critical_risk_to_risk.png")
dev.off()
## quartz_off_screen
## 2
This chunk produces the chord diagrams for standalone and chains
data <- read_csv("moves_chain.csv") %>%
mutate(avg_eal_tercile_old=ifelse(is.na(avg_eal_tercile_old),"Outside",avg_eal_tercile_old)) %>%
mutate(avg_eal_tercile_new=ifelse(is.na(avg_eal_tercile_new),"Outside",avg_eal_tercile_new))
## Rows: 16 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): avg_eal_tercile_old, avg_eal_tercile_new
## dbl (1): row_count
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data %>%
transmute(avg_eal_tercile_old,avg_eal_tercile_new,pct=row_count/sum(data$row_count)) %>%
pivot_wider(names_from = avg_eal_tercile_new,values_from = pct) %>%
kable(caption = "Chain: Risk level to Risk level")
Chain: Risk level to Risk level
Low |
0.0239119 |
0.0384862 |
0.0292970 |
0.0452547 |
Moderate |
0.0373499 |
0.0460452 |
0.0461934 |
0.0478237 |
High |
0.0328541 |
0.0570130 |
0.1558223 |
0.0509856 |
Outside |
0.0636826 |
0.0644731 |
0.0606689 |
0.2001383 |
chordDiagram(
x=data,
directional = 1,
direction.type = c("diffHeight", "arrows"),
link.arr.type = "big.arrow",
grid.col = c(High="red",Moderate="yellow", Low="green",Outside="gray"))
title("Chain: Risk level to Risk level", cex.main = 1)

png(file="figures/chain_risk_to_risk.png")
dev.off()
## quartz_off_screen
## 2
data <- read_csv("moves_standalone.csv") %>%
mutate(avg_eal_tercile_old=ifelse(is.na(avg_eal_tercile_old),"Outside",avg_eal_tercile_old)) %>%
mutate(avg_eal_tercile_new=ifelse(is.na(avg_eal_tercile_new),"Outside",avg_eal_tercile_new))
## Rows: 16 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): avg_eal_tercile_old, avg_eal_tercile_new
## dbl (1): row_count
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data %>%
transmute(avg_eal_tercile_old,avg_eal_tercile_new,pct=row_count/sum(data$row_count)) %>%
pivot_wider(names_from = avg_eal_tercile_new,values_from = pct) %>%
kable(caption = "Standalone: Risk level to Risk level")
Standalone: Risk level to Risk level
Low |
0.0145856 |
0.0311126 |
0.0121021 |
0.0602444 |
Moderate |
0.0284123 |
0.0318321 |
0.0272199 |
0.0641963 |
High |
0.0108505 |
0.0322164 |
0.1390559 |
0.0773431 |
Outside |
0.0633192 |
0.0681581 |
0.0713906 |
0.2679610 |
chordDiagram(
x=data,
directional = 1,
direction.type = c("diffHeight", "arrows"),
link.arr.type = "big.arrow",
grid.col = c(High="red",Moderate="yellow", Low="green",Outside="gray"))
title("Standalone: Risk level to Risk level", cex.main = 1)

png(file="figures/standalone_risk_to_risk.png")
dev.off()
## quartz_off_screen
## 2
This chunk produces across population categories. Outside the top100
is excluded.
#within same population category
data <- moves %>%
filter(pop_tercile_num_old==pop_tercile_num_new) %>%
group_by(avg_eal_tercile_old,avg_eal_tercile_new) %>%
summarise(row_count=sum(row_count))
## `summarise()` has grouped output by 'avg_eal_tercile_old'. You can override
## using the `.groups` argument.
data %>%
transmute(avg_eal_tercile_old,avg_eal_tercile_new,pct=row_count/sum(data$row_count)) %>%
pivot_wider(names_from = avg_eal_tercile_new,values_from = pct) %>%
kable(caption = "Same pop category: Risk level to Risk level")
Same pop category: Risk level to Risk level
High |
0.4669255 |
0.0449229 |
0.0955934 |
Low |
0.0455783 |
0.0355954 |
0.0831401 |
Moderate |
0.0779974 |
0.0715438 |
0.0787032 |
chordDiagram(
x=data,
directional = 1,
direction.type = c("diffHeight", "arrows"),
link.arr.type = "big.arrow",
grid.col = c(High="red",Moderate="yellow", Low="green",Outside="gray"))
title("Same pop category: Risk level to Risk level", cex.main = 1)

png(file="figures/samepop_risk_to_risk.png")
#lower pop to higher pop
data <- moves %>%
filter(!is.na(pop_tercile_old),pop_tercile_num_old>pop_tercile_num_new) %>%
group_by(avg_eal_tercile_old,avg_eal_tercile_new) %>%
summarise(row_count=sum(row_count))
## `summarise()` has grouped output by 'avg_eal_tercile_old'. You can override
## using the `.groups` argument.
data %>%
transmute(avg_eal_tercile_old,avg_eal_tercile_new,pct=row_count/sum(data$row_count)) %>%
pivot_wider(names_from = avg_eal_tercile_new,values_from = pct) %>%
kable(caption = "Towards higher pop: Risk level to Risk level")
Towards higher pop: Risk level to Risk level
High |
0.3530192 |
0.0613562 |
0.1576342 |
Low |
0.0191849 |
0.0562285 |
0.0869066 |
Moderate |
0.0564937 |
0.0899125 |
0.1192644 |
chordDiagram(
x=data,
directional = 1,
direction.type = c("diffHeight", "arrows"),
link.arr.type = "big.arrow",
grid.col = c(High="red",Moderate="yellow", Low="green",Outside="gray"))
title("Towards higher pop: Risk level to Risk level", cex.main = 1)
png(file="figures/low_to_high_risk_to_risk.png")
#higher pop to lower pop
data <- moves %>%
filter(!is.na(pop_tercile_old),pop_tercile_num_old<pop_tercile_num_new) %>%
group_by(avg_eal_tercile_old,avg_eal_tercile_new) %>%
summarise(row_count=sum(row_count))
## `summarise()` has grouped output by 'avg_eal_tercile_old'. You can override
## using the `.groups` argument.
data %>%
transmute(avg_eal_tercile_old,avg_eal_tercile_new,pct=row_count/sum(data$row_count)) %>%
pivot_wider(names_from = avg_eal_tercile_new,values_from = pct) %>%
kable(caption = "Toward lower pop: Risk level to Risk level")
Toward lower pop: Risk level to Risk level
High |
0.3478789 |
0.0157023 |
0.0645441 |
Low |
0.0607270 |
0.0539603 |
0.1131257 |
Moderate |
0.1310835 |
0.1043637 |
0.1086146 |
chordDiagram(
x=data,
directional = 1,
direction.type = c("diffHeight", "arrows"),
link.arr.type = "big.arrow",
grid.col = c(High="red",Moderate="yellow", Low="green",Outside="gray"))
title("Toward lower pop: Risk level to Risk level", cex.main = 1)
png(file="figures/high_to_low_risk_to_risk.png")
This chunk produces across population categories. Inclusive of
top100.
#within same population category
data <- moves %>%
filter(pop_tercile_num_old==pop_tercile_num_new|avg_eal_tercile_old=="Outside"|avg_eal_tercile_new=="Outside") %>%
group_by(avg_eal_tercile_old,avg_eal_tercile_new) %>%
summarise(row_count=sum(row_count))
## `summarise()` has grouped output by 'avg_eal_tercile_old'. You can override
## using the `.groups` argument.
data %>%
transmute(avg_eal_tercile_old,avg_eal_tercile_new,pct=row_count/sum(data$row_count)) %>%
pivot_wider(names_from = avg_eal_tercile_new,values_from = pct) %>%
kable(caption = "Same pop category: Risk level to Risk level")
Same pop category: Risk level to Risk level
High |
0.0936656 |
0.0090116 |
0.0191761 |
0.0898122 |
Low |
0.0091430 |
0.0071405 |
0.0166780 |
0.0710912 |
Moderate |
0.0156463 |
0.0143517 |
0.0157879 |
0.0756728 |
Outside |
0.0856857 |
0.0780193 |
0.0831471 |
0.3159710 |
table <- moves %>%
filter(avg_eal_tercile_old=="Outside") %>%
filter(avg_eal_tercile_new=="Outside") %>%
left_join(nri_cbsa_all,by=c("CBSAFP_new"="CBSAFP")) %>%
group_by(avg_eal_tercile_all) %>%
summarise(row_count=sum(row_count,na.rm = TRUE)) %>%
kable(caption="Same pop category:Risk of Destinations outside top100")
table
Same pop category:Risk of Destinations outside top100
High |
13625 |
Low |
8181 |
Moderate |
9435 |
chordDiagram(
x=data,
directional = 1,
direction.type = c("diffHeight", "arrows"),
link.arr.type = "big.arrow",
grid.col = c(High="red",Moderate="yellow", Low="green",Outside="gray"))
title("Same pop category: Risk level to Risk level", cex.main = 1)

png(file="figures/samepop_risk_to_risk.png")
#lower pop to higher pop
data <- moves %>%
filter(!is.na(pop_tercile_old),pop_tercile_num_old>pop_tercile_num_new) %>%
group_by(avg_eal_tercile_old,avg_eal_tercile_new) %>%
summarise(row_count=sum(row_count))
## `summarise()` has grouped output by 'avg_eal_tercile_old'. You can override
## using the `.groups` argument.
data %>%
transmute(avg_eal_tercile_old,avg_eal_tercile_new,pct=row_count/sum(data$row_count)) %>%
pivot_wider(names_from = avg_eal_tercile_new,values_from = pct) %>%
kable(caption = "Towards higher pop: Risk level to Risk level")
Towards higher pop: Risk level to Risk level
High |
0.3530192 |
0.0613562 |
0.1576342 |
Low |
0.0191849 |
0.0562285 |
0.0869066 |
Moderate |
0.0564937 |
0.0899125 |
0.1192644 |
table <- moves %>%
filter(!is.na(pop_tercile_old),pop_tercile_num_old>pop_tercile_num_new) %>%
filter(avg_eal_tercile_new=="Outside") %>%
left_join(nri_cbsa_all,by=c("CBSAFP_new"="CBSAFP")) %>%
group_by(avg_eal_tercile_all) %>%
summarise(row_count=sum(row_count,na.rm = TRUE)) %>%
kable(caption="Towards higher pop:Risk of Destinations outside top100")
table
Towards higher pop:Risk of
Destinations outside top100
chordDiagram(
x=data,
directional = 1,
direction.type = c("diffHeight", "arrows"),
link.arr.type = "big.arrow",
grid.col = c(High="red",Moderate="yellow", Low="green",Outside="gray"))
title("Towards higher pop: Risk level to Risk level", cex.main = 1)
png(file="figures/low_to_high_risk_to_risk.png")
#higher pop to lower pop
data <- moves %>%
filter(!is.na(pop_tercile_old),pop_tercile_num_old<pop_tercile_num_new) %>%
group_by(avg_eal_tercile_old,avg_eal_tercile_new) %>%
summarise(row_count=sum(row_count))
## `summarise()` has grouped output by 'avg_eal_tercile_old'. You can override
## using the `.groups` argument.
data %>%
transmute(avg_eal_tercile_old,avg_eal_tercile_new,pct=row_count/sum(data$row_count)) %>%
pivot_wider(names_from = avg_eal_tercile_new,values_from = pct) %>%
kable(caption = "Toward lower pop: Risk level to Risk level")
Toward lower pop: Risk level to Risk level
High |
0.3478789 |
0.0157023 |
0.0645441 |
Low |
0.0607270 |
0.0539603 |
0.1131257 |
Moderate |
0.1310835 |
0.1043637 |
0.1086146 |
table <- moves %>%
filter(!is.na(pop_tercile_old),pop_tercile_num_old<pop_tercile_num_new) %>%
filter(avg_eal_tercile_new=="Outside") %>%
left_join(nri_cbsa_all,by=c("CBSAFP_new"="CBSAFP")) %>%
group_by(avg_eal_tercile_all) %>%
summarise(row_count=sum(row_count,na.rm = TRUE)) %>%
kable(caption="Towards lower pop:Risk of Destinations outside top100")
table
Towards lower pop:Risk of
Destinations outside top100
chordDiagram(
x=data,
directional = 1,
direction.type = c("diffHeight", "arrows"),
link.arr.type = "big.arrow",
grid.col = c(High="red",Moderate="yellow", Low="green",Outside="gray"))
title("Toward lower pop: Risk level to Risk level", cex.main = 1)
png(file="figures/high_to_low_risk_to_risk.png")
This chunk produces for same population moves, crit/non,
chain/non-chain
data <- read_csv("moves_crit_same_pop.csv") %>%
mutate(avg_eal_tercile_old=ifelse(is.na(avg_eal_tercile_old),"Outside",avg_eal_tercile_old)) %>%
mutate(avg_eal_tercile_new=ifelse(is.na(avg_eal_tercile_new),"Outside",avg_eal_tercile_new))
## Rows: 9 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): avg_eal_tercile_old, avg_eal_tercile_new
## dbl (1): row_count
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data %>%
transmute(avg_eal_tercile_old,avg_eal_tercile_new,pct=row_count/sum(data$row_count)) %>%
pivot_wider(names_from = avg_eal_tercile_new,values_from = pct) %>%
kable(caption = "Critical Same Population Moves: Risk level to Risk level")
Critical Same Population Moves: Risk level to Risk
level
High |
0.4626455 |
0.0411587 |
0.0918508 |
Low |
0.0439087 |
0.0399670 |
0.0880924 |
Moderate |
0.0735173 |
0.0747090 |
0.0841507 |
chordDiagram(
x=data,
directional = 1,
direction.type = c("diffHeight", "arrows"),
link.arr.type = "big.arrow",
grid.col = c(High="red",Moderate="yellow", Low="green",Outside="gray"))
title("Critical Same Population Moves: Risk level to Risk level", cex.main = 1)

png(file="figures/chain_same_pop_risk_to_risk.png")
dev.off()
## quartz_off_screen
## 2
#noncrit
data <- read_csv("moves_non_crit_same_pop.csv") %>%
mutate(avg_eal_tercile_old=ifelse(is.na(avg_eal_tercile_old),"Outside",avg_eal_tercile_old)) %>%
mutate(avg_eal_tercile_new=ifelse(is.na(avg_eal_tercile_new),"Outside",avg_eal_tercile_new))
## Rows: 9 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): avg_eal_tercile_old, avg_eal_tercile_new
## dbl (1): row_count
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data %>%
transmute(avg_eal_tercile_old,avg_eal_tercile_new,pct=row_count/sum(data$row_count)) %>%
pivot_wider(names_from = avg_eal_tercile_new,values_from = pct) %>%
kable(caption = "Non-Critical Same Population Moves: Risk level to Risk level")
Non-Critical Same Population Moves: Risk level to Risk
level
High |
0.4721569 |
0.0495238 |
0.1001681 |
Low |
0.0476190 |
0.0302521 |
0.0770868 |
Moderate |
0.0834734 |
0.0676751 |
0.0720448 |
chordDiagram(
x=data,
directional = 1,
direction.type = c("diffHeight", "arrows"),
link.arr.type = "big.arrow",
grid.col = c(High="red",Moderate="yellow", Low="green",Outside="gray"))
title("Non-Critical Same Population Moves: Risk level to Risk level", cex.main = 1)

png(file="figures/chain_same_pop_risk_to_risk.png")
dev.off()
## quartz_off_screen
## 2
#chain
data <- read_csv("moves_chain_same_pop.csv") %>%
mutate(avg_eal_tercile_old=ifelse(is.na(avg_eal_tercile_old),"Outside",avg_eal_tercile_old)) %>%
mutate(avg_eal_tercile_new=ifelse(is.na(avg_eal_tercile_new),"Outside",avg_eal_tercile_new))
## Rows: 9 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): avg_eal_tercile_old, avg_eal_tercile_new
## dbl (1): row_count
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data %>%
transmute(avg_eal_tercile_old,avg_eal_tercile_new,pct=row_count/sum(data$row_count)) %>%
pivot_wider(names_from = avg_eal_tercile_new,values_from = pct) %>%
kable(caption = "Chain Same Population Moves: Risk level to Risk level")
Chain Same Population Moves: Risk level to Risk level
High |
0.3718961 |
0.0741097 |
0.1376323 |
Low |
0.0666025 |
0.0381136 |
0.0694899 |
Moderate |
0.1054860 |
0.0625602 |
0.0741097 |
chordDiagram(
x=data,
directional = 1,
direction.type = c("diffHeight", "arrows"),
link.arr.type = "big.arrow",
grid.col = c(High="red",Moderate="yellow", Low="green",Outside="gray"))
title("Chain Same Population Moves: Risk level to Risk level", cex.main = 1)

png(file="figures/chain_same_pop_risk_to_risk.png")
dev.off()
## quartz_off_screen
## 2
#standalone
data <- read_csv("moves_standalone_same_pop.csv") %>%
mutate(avg_eal_tercile_old=ifelse(is.na(avg_eal_tercile_old),"Outside",avg_eal_tercile_old)) %>%
mutate(avg_eal_tercile_new=ifelse(is.na(avg_eal_tercile_new),"Outside",avg_eal_tercile_new))
## Rows: 9 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): avg_eal_tercile_old, avg_eal_tercile_new
## dbl (1): row_count
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data %>%
transmute(avg_eal_tercile_old,avg_eal_tercile_new,pct=row_count/sum(data$row_count)) %>%
pivot_wider(names_from = avg_eal_tercile_new,values_from = pct) %>%
kable(caption = "Standalone Same Population Moves: Risk level to Risk level")
Standalone Same Population Moves: Risk level to Risk
level
High |
0.5006490 |
0.0345652 |
0.0806749 |
Low |
0.0381174 |
0.0347018 |
0.0879842 |
Moderate |
0.0682424 |
0.0747319 |
0.0803334 |
chordDiagram(
x=data,
directional = 1,
direction.type = c("diffHeight", "arrows"),
link.arr.type = "big.arrow",
grid.col = c(High="red",Moderate="yellow", Low="green",Outside="gray"))
title("Standalone Same Population Moves: Risk level to Risk level", cex.main = 1)

png(file="figures/chain_same_pop_risk_to_risk.png")
dev.off()
## quartz_off_screen
## 2
Bar Charts
This chunk produces bar graphs for all 100 CBSAs and corresponding
entries/exits and births/deaths for all years of data
library(ggpattern)
data <- read_csv("cbsa_births_deaths_exits_entrys.csv")
## Rows: 1000 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): NAME, avg_eal_tercile
## dbl (8): archive_version_year, CBSAFP, entry, exit, births, deaths, avg_eal,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
summary(data)
## archive_version_year CBSAFP entry exit
## Min. :2013 Min. :10420 Min. : 1.00 Min. : 1.00
## 1st Qu.:2015 1st Qu.:19348 1st Qu.: 26.00 1st Qu.: 25.00
## Median :2018 Median :32060 Median : 45.00 Median : 44.00
## Mean :2018 Mean :30209 Mean : 67.08 Mean : 67.08
## 3rd Qu.:2020 3rd Qu.:40080 3rd Qu.: 84.00 3rd Qu.: 79.00
## Max. :2022 Max. :49340 Max. :561.00 Max. :670.00
## NA's :100
## births deaths NAME avg_eal
## Min. : 1328 Min. : 1128 Length:1000 Min. :38.21
## 1st Qu.: 3617 1st Qu.: 3416 Class :character 1st Qu.:65.60
## Median : 6166 Median : 5294 Mode :character Median :75.28
## Mean : 12411 Mean : 10836 Mean :77.28
## 3rd Qu.: 13648 3rd Qu.: 11576 3rd Qu.:94.11
## Max. :171670 Max. :139494 Max. :99.91
## NA's :100
## tercile avg_eal_tercile
## Min. :1.00 Length:1000
## 1st Qu.:1.00 Class :character
## Median :2.00 Mode :character
## Mean :1.99
## 3rd Qu.:3.00
## Max. :3.00
##
data <- data %>% gather(type,row_count, c(entry,exit,births,deaths)) %>%
group_by(NAME,avg_eal_tercile,tercile,type) %>%
summarise(row_count=sum(row_count,na.rm = TRUE)) %>%
mutate(label= paste0(type,"-",avg_eal_tercile))
## `summarise()` has grouped output by 'NAME', 'avg_eal_tercile', 'tercile'. You
## can override using the `.groups` argument.
color_ramp <- c("births-Low" = "#00b100", "births-Moderate" = "#007e00", "births-High" = "#004b00",
"deaths-Low" = "#c600c6", "deaths-Moderate" = "#9f009f", "deaths-High" = "#780078")
data %>%
filter(type %in% c("births", "deaths")) %>%
group_by(NAME) %>%
mutate(sort = sum(row_count,na.rm = T)) %>%
ungroup() %>%
ggplot(aes(x = fct_reorder(NAME, sort), y = row_count, fill = label)) +
geom_bar(position = "dodge", stat = "identity")+
coord_flip()+
theme_minimal()+
scale_fill_manual(values=color_ramp)+
ggtitle("Births and Deaths - 2013-2022")+
theme(legend.position = "bottom")

ggsave("figures/bar_chart_births_deaths.png")
## Saving 10 x 20 in image
color_ramp <- c("entry-Low" = "#00b100", "entry-Moderate" = "#007e00", "entry-High" = "#004b00",
"exit-Low" = "#c600c6", "exit-Moderate" = "#9f009f", "exit-High" = "#780078")
data %>% filter(type %in% c("entry","exit")) %>%
group_by(NAME) %>%
mutate(sort = sum(row_count,na.rm = T)) %>%
ungroup() %>%
ggplot(aes(fill = label, y = row_count, x = forcats::fct_reorder(NAME, sort))) +
geom_bar(position = "dodge", stat = "identity")+
coord_flip()+
theme_minimal()+
scale_fill_manual(values=color_ramp)+
ggtitle("Entries and Exits - 2013-2022")+
theme(legend.position = "bottom")

ggsave("figures/bar_chart_entries_exits.png")
## Saving 10 x 20 in image
#Expansion Test
expansion_size <- read_csv("matrices_old/matrix_expansion_rawdata.csv") %>%
mutate(growth_count=employee_size_location-lag_employees,
growth_pct=growth_count/lag_employees)
## Rows: 100600 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): company, avg_eal_tercile, avg_eal_tercile_parent, parent_region
## dbl (13): archive_version_year, parent_number, fips_code, primary_naics_code...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
summary(expansion_size$growth_count)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 1.00 4.00 23.62 11.00 64975.00
expansion_size %>%
filter(growth_count <= quantile(growth_count, 0.95)) %>%
ggplot()+geom_histogram(aes(growth_count))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

summary(expansion_size$growth_pct)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.001 0.333 0.667 3.626 2.000 10999.000
expansion_size %>%
filter(growth_pct <= quantile(growth_pct, 0.95)) %>%
ggplot()+geom_histogram(aes(growth_pct))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

process_data <- function(file_path, caption, include_yearly_column = FALSE) {
table <- read_csv(file_path) %>%
mutate(
avg_eal_tercile_parent = ifelse(is.na(avg_eal_tercile_parent), "Outside100", avg_eal_tercile_parent),
Total = Low + Moderate + High,
Low_pct = round(Low / Total, 2),
Moderate_pct = round(Moderate / Total, 2),
High_pct = round(High / Total, 2),
)
if (include_yearly_column) {
table <- table %>%
rename(year=archive_version_year) %>%
select(year,avg_eal_tercile_parent, Low, Low_pct, Moderate, Moderate_pct, High, High_pct, Total)
} else {
table <- table %>%
select(avg_eal_tercile_parent, Low, Low_pct, Moderate, Moderate_pct, High, High_pct, Total)
}
kable(table, caption = caption)
}
# All expansions
process_data("matrices_old/matrix_expansion_all.csv", "ALL Expansions, rows (hq), columns (branch)")
## Rows: 4 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): avg_eal_tercile_parent
## dbl (4): Low, Moderate, High, <NA>
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ALL Expansions, rows (hq), columns (branch)
High |
3343 |
0.22 |
4861 |
0.32 |
6970 |
0.46 |
15174 |
Low |
5331 |
0.40 |
4463 |
0.34 |
3412 |
0.26 |
13206 |
Moderate |
2857 |
0.24 |
5810 |
0.50 |
3049 |
0.26 |
11716 |
Outside100 |
4421 |
0.31 |
5441 |
0.38 |
4497 |
0.31 |
14359 |
# Yearly expansions
process_data("matrices_old/matrix_expansion_yearly.csv", "A<< Expansions yearly, rows (hq), columns (branch)", TRUE)
## Rows: 40 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): avg_eal_tercile_parent
## dbl (5): archive_version_year, Low, Moderate, High, <NA>
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
A<< Expansions yearly, rows (hq), columns
(branch)
2013 |
High |
678 |
0.22 |
1057 |
0.34 |
1356 |
0.44 |
3091 |
2013 |
Low |
673 |
0.36 |
630 |
0.34 |
558 |
0.30 |
1861 |
2013 |
Moderate |
570 |
0.26 |
986 |
0.45 |
657 |
0.30 |
2213 |
2013 |
Outside100 |
952 |
0.31 |
1114 |
0.36 |
1028 |
0.33 |
3094 |
2014 |
High |
202 |
0.20 |
314 |
0.31 |
498 |
0.49 |
1014 |
2014 |
Low |
237 |
0.43 |
177 |
0.32 |
138 |
0.25 |
552 |
2014 |
Moderate |
205 |
0.23 |
453 |
0.51 |
237 |
0.26 |
895 |
2014 |
Outside100 |
272 |
0.31 |
325 |
0.37 |
273 |
0.31 |
870 |
2015 |
High |
201 |
0.25 |
244 |
0.31 |
351 |
0.44 |
796 |
2015 |
Low |
355 |
0.41 |
297 |
0.34 |
214 |
0.25 |
866 |
2015 |
Moderate |
173 |
0.23 |
398 |
0.54 |
169 |
0.23 |
740 |
2015 |
Outside100 |
212 |
0.28 |
301 |
0.40 |
245 |
0.32 |
758 |
2016 |
High |
1014 |
0.21 |
1502 |
0.31 |
2282 |
0.48 |
4798 |
2016 |
Low |
1289 |
0.43 |
929 |
0.31 |
775 |
0.26 |
2993 |
2016 |
Moderate |
849 |
0.24 |
1763 |
0.50 |
929 |
0.26 |
3541 |
2016 |
Outside100 |
1221 |
0.29 |
1622 |
0.38 |
1408 |
0.33 |
4251 |
2017 |
High |
230 |
0.23 |
321 |
0.32 |
449 |
0.45 |
1000 |
2017 |
Low |
313 |
0.46 |
217 |
0.32 |
154 |
0.23 |
684 |
2017 |
Moderate |
172 |
0.23 |
390 |
0.53 |
176 |
0.24 |
738 |
2017 |
Outside100 |
309 |
0.32 |
359 |
0.38 |
288 |
0.30 |
956 |
2018 |
High |
163 |
0.21 |
249 |
0.32 |
366 |
0.47 |
778 |
2018 |
Low |
250 |
0.44 |
183 |
0.32 |
135 |
0.24 |
568 |
2018 |
Moderate |
180 |
0.25 |
362 |
0.50 |
180 |
0.25 |
722 |
2018 |
Outside100 |
408 |
0.33 |
490 |
0.40 |
341 |
0.28 |
1239 |
2019 |
High |
119 |
0.25 |
148 |
0.31 |
210 |
0.44 |
477 |
2019 |
Low |
131 |
0.43 |
101 |
0.33 |
71 |
0.23 |
303 |
2019 |
Moderate |
97 |
0.27 |
172 |
0.47 |
96 |
0.26 |
365 |
2019 |
Outside100 |
124 |
0.32 |
156 |
0.40 |
112 |
0.29 |
392 |
2020 |
High |
107 |
0.21 |
173 |
0.33 |
238 |
0.46 |
518 |
2020 |
Low |
155 |
0.40 |
126 |
0.33 |
104 |
0.27 |
385 |
2020 |
Moderate |
79 |
0.25 |
158 |
0.50 |
80 |
0.25 |
317 |
2020 |
Outside100 |
120 |
0.32 |
153 |
0.40 |
106 |
0.28 |
379 |
2021 |
High |
121 |
0.25 |
152 |
0.31 |
217 |
0.44 |
490 |
2021 |
Low |
210 |
0.40 |
149 |
0.28 |
170 |
0.32 |
529 |
2021 |
Moderate |
83 |
0.28 |
158 |
0.54 |
54 |
0.18 |
295 |
2021 |
Outside100 |
114 |
0.28 |
148 |
0.36 |
145 |
0.36 |
407 |
2022 |
High |
508 |
0.23 |
701 |
0.32 |
1003 |
0.45 |
2212 |
2022 |
Low |
1718 |
0.38 |
1654 |
0.37 |
1093 |
0.24 |
4465 |
2022 |
Moderate |
449 |
0.24 |
970 |
0.51 |
471 |
0.25 |
1890 |
2022 |
Outside100 |
689 |
0.34 |
773 |
0.38 |
551 |
0.27 |
2013 |
# All expansions
process_data("matrices_old/matrix_expansion_all_10.csv", "Expansions, rows (hq), columns (branch)")
## Rows: 4 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): avg_eal_tercile_parent
## dbl (4): Low, Moderate, High, <NA>
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Expansions, rows (hq), columns (branch)
High |
979 |
0.21 |
1424 |
0.31 |
2227 |
0.48 |
4630 |
Low |
1383 |
0.47 |
868 |
0.29 |
713 |
0.24 |
2964 |
Moderate |
743 |
0.23 |
1718 |
0.53 |
783 |
0.24 |
3244 |
Outside100 |
1252 |
0.31 |
1530 |
0.38 |
1253 |
0.31 |
4035 |
# Yearly expansions
process_data("matrices_old/matrix_expansion_yearly_10.csv", "Expansions yearly, rows (hq), columns (branch)", TRUE)
## Rows: 40 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): avg_eal_tercile_parent
## dbl (5): archive_version_year, Low, Moderate, High, <NA>
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Expansions yearly, rows (hq), columns (branch)
2013 |
High |
225 |
0.22 |
346 |
0.33 |
466 |
0.45 |
1037 |
2013 |
Low |
211 |
0.39 |
159 |
0.29 |
169 |
0.31 |
539 |
2013 |
Moderate |
150 |
0.24 |
287 |
0.46 |
189 |
0.30 |
626 |
2013 |
Outside100 |
323 |
0.30 |
374 |
0.35 |
384 |
0.36 |
1081 |
2014 |
High |
49 |
0.19 |
77 |
0.30 |
134 |
0.52 |
260 |
2014 |
Low |
71 |
0.44 |
55 |
0.34 |
37 |
0.23 |
163 |
2014 |
Moderate |
41 |
0.19 |
128 |
0.60 |
43 |
0.20 |
212 |
2014 |
Outside100 |
73 |
0.28 |
93 |
0.36 |
92 |
0.36 |
258 |
2015 |
High |
47 |
0.20 |
68 |
0.30 |
115 |
0.50 |
230 |
2015 |
Low |
70 |
0.42 |
50 |
0.30 |
48 |
0.29 |
168 |
2015 |
Moderate |
49 |
0.25 |
96 |
0.49 |
51 |
0.26 |
196 |
2015 |
Outside100 |
58 |
0.26 |
88 |
0.39 |
77 |
0.35 |
223 |
2016 |
High |
269 |
0.21 |
401 |
0.31 |
612 |
0.48 |
1282 |
2016 |
Low |
303 |
0.47 |
190 |
0.30 |
151 |
0.23 |
644 |
2016 |
Moderate |
214 |
0.24 |
461 |
0.52 |
219 |
0.24 |
894 |
2016 |
Outside100 |
255 |
0.30 |
334 |
0.40 |
251 |
0.30 |
840 |
2017 |
High |
84 |
0.21 |
130 |
0.32 |
191 |
0.47 |
405 |
2017 |
Low |
119 |
0.49 |
72 |
0.30 |
53 |
0.22 |
244 |
2017 |
Moderate |
47 |
0.22 |
136 |
0.63 |
32 |
0.15 |
215 |
2017 |
Outside100 |
129 |
0.36 |
128 |
0.36 |
103 |
0.29 |
360 |
2018 |
High |
60 |
0.19 |
93 |
0.30 |
161 |
0.51 |
314 |
2018 |
Low |
104 |
0.50 |
60 |
0.29 |
45 |
0.22 |
209 |
2018 |
Moderate |
56 |
0.21 |
140 |
0.53 |
67 |
0.25 |
263 |
2018 |
Outside100 |
110 |
0.34 |
133 |
0.41 |
82 |
0.25 |
325 |
2019 |
High |
34 |
0.24 |
39 |
0.27 |
69 |
0.49 |
142 |
2019 |
Low |
59 |
0.46 |
41 |
0.32 |
28 |
0.22 |
128 |
2019 |
Moderate |
30 |
0.27 |
56 |
0.51 |
24 |
0.22 |
110 |
2019 |
Outside100 |
49 |
0.30 |
70 |
0.43 |
45 |
0.27 |
164 |
2020 |
High |
24 |
0.16 |
40 |
0.27 |
83 |
0.56 |
147 |
2020 |
Low |
49 |
0.49 |
35 |
0.35 |
16 |
0.16 |
100 |
2020 |
Moderate |
21 |
0.23 |
55 |
0.60 |
16 |
0.17 |
92 |
2020 |
Outside100 |
36 |
0.29 |
60 |
0.48 |
28 |
0.23 |
124 |
2021 |
High |
43 |
0.30 |
33 |
0.23 |
65 |
0.46 |
141 |
2021 |
Low |
45 |
0.46 |
26 |
0.27 |
27 |
0.28 |
98 |
2021 |
Moderate |
23 |
0.25 |
56 |
0.60 |
14 |
0.15 |
93 |
2021 |
Outside100 |
32 |
0.29 |
36 |
0.33 |
41 |
0.38 |
109 |
2022 |
High |
144 |
0.21 |
197 |
0.29 |
331 |
0.49 |
672 |
2022 |
Low |
352 |
0.52 |
180 |
0.27 |
139 |
0.21 |
671 |
2022 |
Moderate |
112 |
0.21 |
303 |
0.56 |
128 |
0.24 |
543 |
2022 |
Outside100 |
187 |
0.34 |
214 |
0.39 |
150 |
0.27 |
551 |
process_data("matrices_old/matrix_expansion_all_20.csv", "Expansions, rows (hq), columns (branch)")
## Rows: 4 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): avg_eal_tercile_parent
## dbl (4): Low, Moderate, High, <NA>
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Expansions, rows (hq), columns (branch)
High |
529 |
0.20 |
819 |
0.31 |
1337 |
0.50 |
2685 |
Low |
886 |
0.49 |
522 |
0.29 |
409 |
0.23 |
1817 |
Moderate |
474 |
0.23 |
1127 |
0.54 |
480 |
0.23 |
2081 |
Outside100 |
709 |
0.29 |
1006 |
0.41 |
723 |
0.30 |
2438 |
# Yearly expansions
process_data("matrices_old/matrix_expansion_yearly_20.csv", "Expansions yearly, rows (hq), columns (branch)", TRUE)
## Rows: 40 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): avg_eal_tercile_parent
## dbl (5): archive_version_year, Low, Moderate, High, <NA>
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Expansions yearly, rows (hq), columns (branch)
2013 |
High |
121 |
0.20 |
200 |
0.33 |
281 |
0.47 |
602 |
2013 |
Low |
115 |
0.41 |
78 |
0.28 |
90 |
0.32 |
283 |
2013 |
Moderate |
74 |
0.22 |
159 |
0.48 |
99 |
0.30 |
332 |
2013 |
Outside100 |
152 |
0.26 |
242 |
0.41 |
194 |
0.33 |
588 |
2014 |
High |
25 |
0.15 |
50 |
0.30 |
92 |
0.55 |
167 |
2014 |
Low |
53 |
0.49 |
37 |
0.34 |
19 |
0.17 |
109 |
2014 |
Moderate |
25 |
0.18 |
87 |
0.63 |
26 |
0.19 |
138 |
2014 |
Outside100 |
51 |
0.28 |
74 |
0.41 |
57 |
0.31 |
182 |
2015 |
High |
20 |
0.14 |
44 |
0.31 |
76 |
0.54 |
140 |
2015 |
Low |
47 |
0.44 |
36 |
0.34 |
24 |
0.22 |
107 |
2015 |
Moderate |
26 |
0.22 |
60 |
0.51 |
32 |
0.27 |
118 |
2015 |
Outside100 |
33 |
0.23 |
62 |
0.44 |
47 |
0.33 |
142 |
2016 |
High |
140 |
0.20 |
220 |
0.32 |
327 |
0.48 |
687 |
2016 |
Low |
185 |
0.45 |
122 |
0.29 |
108 |
0.26 |
415 |
2016 |
Moderate |
160 |
0.27 |
302 |
0.51 |
130 |
0.22 |
592 |
2016 |
Outside100 |
148 |
0.27 |
231 |
0.43 |
163 |
0.30 |
542 |
2017 |
High |
53 |
0.20 |
83 |
0.32 |
127 |
0.48 |
263 |
2017 |
Low |
86 |
0.52 |
51 |
0.31 |
27 |
0.16 |
164 |
2017 |
Moderate |
28 |
0.20 |
93 |
0.65 |
22 |
0.15 |
143 |
2017 |
Outside100 |
71 |
0.34 |
77 |
0.37 |
59 |
0.29 |
207 |
2018 |
High |
42 |
0.20 |
60 |
0.29 |
104 |
0.50 |
206 |
2018 |
Low |
82 |
0.49 |
51 |
0.31 |
33 |
0.20 |
166 |
2018 |
Moderate |
42 |
0.21 |
106 |
0.54 |
49 |
0.25 |
197 |
2018 |
Outside100 |
63 |
0.34 |
73 |
0.39 |
49 |
0.26 |
185 |
2019 |
High |
23 |
0.27 |
24 |
0.28 |
39 |
0.45 |
86 |
2019 |
Low |
45 |
0.49 |
29 |
0.32 |
18 |
0.20 |
92 |
2019 |
Moderate |
19 |
0.28 |
34 |
0.50 |
15 |
0.22 |
68 |
2019 |
Outside100 |
37 |
0.33 |
47 |
0.42 |
27 |
0.24 |
111 |
2020 |
High |
14 |
0.15 |
20 |
0.22 |
58 |
0.63 |
92 |
2020 |
Low |
31 |
0.49 |
25 |
0.40 |
7 |
0.11 |
63 |
2020 |
Moderate |
13 |
0.23 |
32 |
0.56 |
12 |
0.21 |
57 |
2020 |
Outside100 |
25 |
0.31 |
37 |
0.46 |
19 |
0.23 |
81 |
2021 |
High |
17 |
0.24 |
14 |
0.20 |
40 |
0.56 |
71 |
2021 |
Low |
26 |
0.57 |
7 |
0.15 |
13 |
0.28 |
46 |
2021 |
Moderate |
17 |
0.27 |
36 |
0.56 |
11 |
0.17 |
64 |
2021 |
Outside100 |
18 |
0.33 |
17 |
0.31 |
20 |
0.36 |
55 |
2022 |
High |
74 |
0.20 |
104 |
0.28 |
193 |
0.52 |
371 |
2022 |
Low |
216 |
0.58 |
86 |
0.23 |
70 |
0.19 |
372 |
2022 |
Moderate |
70 |
0.19 |
218 |
0.59 |
84 |
0.23 |
372 |
2022 |
Outside100 |
111 |
0.32 |
146 |
0.42 |
88 |
0.26 |
345 |