Migration Diagrams

wd <- "/Users/stevenli/Library/CloudStorage/OneDrive-SharedLibraries-HarvardUniversity/Teicher, Hannah Meredith - DataAxle Data"
setwd(wd)
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(knitr)
library(tidycensus)
library(mapdeck)

## 
## Attaching package: 'mapdeck'
## 
## The following object is masked from 'package:tibble':
## 
##     add_column

library(plotly)

## 
## Attaching package: 'plotly'
## 
## The following objects are masked from 'package:mapdeck':
## 
##     add_heatmap, add_mesh, add_sf, add_text
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout

library(networkD3)
library(ggridges)
library(circlize)

## ========================================
## circlize version 0.4.16
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
## 
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
##   in R. Bioinformatics 2014.
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(circlize))
## ========================================

library(flowmapblue)
library(RColorBrewer)
library(kableExtra)

## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows

knitr::opts_chunk$set(echo = TRUE)
key <- "pk.eyJ1IjoibGlzdGV2ZW45NSIsImEiOiIxQXZVbVUwIn0.FFsQ0Fz095m-pAh1uFGAZg"
set_token(key)

Migration Table

## Rows: 33999 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): fips_code, lead_county, NAME_old, NAME_new, avg_eal_tercile_old, a...
## dbl (12): archive_version_year, row_count, CBSAFP_old, CBSAFP_new, avg_eal_o...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'avg_eal_tercile_old', 'avg_eal_tercile_new'. You can override using the `.groups` argument.

Top 100 Moves by CBSA
avg_eal_tercile_old	avg_eal_tercile_new	cbsa_move	row_count
High	High	Los Angeles-Long Beach-Anaheim, CA to Riverside-San Bernardino-Ontario, CA	1966
High	High	Riverside-San Bernardino-Ontario, CA to Los Angeles-Long Beach-Anaheim, CA	1133
High	High	San Jose-Sunnyvale-Santa Clara, CA to San Francisco-Oakland-Berkeley, CA	894
High	High	Los Angeles-Long Beach-Anaheim, CA to Oxnard-Thousand Oaks-Ventura, CA	887
Low	Moderate	Washington-Arlington-Alexandria, DC-VA-MD-WV to Baltimore-Columbia-Towson, MD	695
High	High	San Francisco-Oakland-Berkeley, CA to San Jose-Sunnyvale-Santa Clara, CA	682
High	High	Oxnard-Thousand Oaks-Ventura, CA to Los Angeles-Long Beach-Anaheim, CA	628
Moderate	Low	Baltimore-Columbia-Towson, MD to Washington-Arlington-Alexandria, DC-VA-MD-WV	500
Outside	Low	Boulder, CO to Denver-Aurora-Lakewood, CO	465
Moderate	Moderate	Boston-Cambridge-Newton, MA-NH to Worcester, MA-CT	419
Low	Moderate	Durham-Chapel Hill, NC to Raleigh-Cary, NC	414
Outside	High	Trenton-Princeton, NJ to New York-Newark-Jersey City, NY-NJ-PA	403
Moderate	Moderate	Boston-Cambridge-Newton, MA-NH to Providence-Warwick, RI-MA	398
High	Moderate	New Haven-Milford, CT to Hartford-East Hartford-Middletown, CT	387
Outside	High	Naples-Marco Island, FL to Cape Coral-Fort Myers, FL	387
Outside	Moderate	Ann Arbor, MI to Detroit-Warren-Dearborn, MI	387
Moderate	Low	Raleigh-Cary, NC to Durham-Chapel Hill, NC	368
Moderate	Moderate	Worcester, MA-CT to Boston-Cambridge-Newton, MA-NH	362
Moderate	High	Hartford-East Hartford-Middletown, CT to New Haven-Milford, CT	344
High	High	Bridgeport-Stamford-Norwalk, CT to New Haven-Milford, CT	337
High	High	New York-Newark-Jersey City, NY-NJ-PA to Philadelphia-Camden-Wilmington, PA-NJ-DE-MD	326
Moderate	Moderate	Providence-Warwick, RI-MA to Boston-Cambridge-Newton, MA-NH	325
High	High	San Diego-Chula Vista-Carlsbad, CA to Los Angeles-Long Beach-Anaheim, CA	317
Moderate	Low	Akron, OH to Cleveland-Elyria, OH	306
High	High	San Francisco-Oakland-Berkeley, CA to Los Angeles-Long Beach-Anaheim, CA	305
High	High	Los Angeles-Long Beach-Anaheim, CA to San Diego-Chula Vista-Carlsbad, CA	297
Low	Moderate	Cleveland-Elyria, OH to Akron, OH	289
High	Moderate	Salt Lake City, UT to Provo-Orem, UT	281
Outside	Low	Gainesville, GA to Atlanta-Sandy Springs-Alpharetta, GA	279
High	Moderate	Houston-The Woodlands-Sugar Land, TX to Dallas-Fort Worth-Arlington, TX	273
High	High	Orlando-Kissimmee-Sanford, FL to Deltona-Daytona Beach-Ormond Beach, FL	265
High	Moderate	New York-Newark-Jersey City, NY-NJ-PA to Poughkeepsie-Newburgh-Middletown, NY	264
High	High	New Haven-Milford, CT to Bridgeport-Stamford-Norwalk, CT	254
High	High	Philadelphia-Camden-Wilmington, PA-NJ-DE-MD to New York-Newark-Jersey City, NY-NJ-PA	252
High	Moderate	Salt Lake City, UT to Ogden-Clearfield, UT	241
Moderate	High	Dallas-Fort Worth-Arlington, TX to Houston-The Woodlands-Sugar Land, TX	238
Outside	Moderate	Manchester-Nashua, NH to Boston-Cambridge-Newton, MA-NH	238
Moderate	High	Ogden-Clearfield, UT to Salt Lake City, UT	235
Moderate	High	Provo-Orem, UT to Salt Lake City, UT	235
Outside	High	Santa Cruz-Watsonville, CA to San Jose-Sunnyvale-Santa Clara, CA	232
Moderate	Low	Colorado Springs, CO to Denver-Aurora-Lakewood, CO	231
Moderate	Low	Dayton-Kettering, OH to Cincinnati, OH-KY-IN	214
Low	Moderate	Denver-Aurora-Lakewood, CO to Colorado Springs, CO	211
High	High	Tucson, AZ to Phoenix-Mesa-Chandler, AZ	206
High	High	San Diego-Chula Vista-Carlsbad, CA to Riverside-San Bernardino-Ontario, CA	203
High	High	Los Angeles-Long Beach-Anaheim, CA to San Francisco-Oakland-Berkeley, CA	199
Moderate	Low	Greensboro-High Point, NC to Winston-Salem, NC	189
High	High	Deltona-Daytona Beach-Ormond Beach, FL to Orlando-Kissimmee-Sanford, FL	184
Outside	High	Port St. Lucie, FL to Miami-Fort Lauderdale-Pompano Beach, FL	183
Low	Moderate	Winston-Salem, NC to Greensboro-High Point, NC	182
Moderate	High	Poughkeepsie-Newburgh-Middletown, NY to New York-Newark-Jersey City, NY-NJ-PA	181
High	High	San Francisco-Oakland-Berkeley, CA to Sacramento-Roseville-Folsom, CA	177
High	High	New York-Newark-Jersey City, NY-NJ-PA to Miami-Fort Lauderdale-Pompano Beach, FL	173
High	Moderate	New York-Newark-Jersey City, NY-NJ-PA to Allentown-Bethlehem-Easton, PA-NJ	170
Outside	Moderate	Spartanburg, SC to Greenville-Anderson, SC	159
Low	Moderate	Cincinnati, OH-KY-IN to Dayton-Kettering, OH	157
Moderate	Moderate	Austin-Round Rock-Georgetown, TX to San Antonio-New Braunfels, TX	156
High	High	Riverside-San Bernardino-Ontario, CA to San Diego-Chula Vista-Carlsbad, CA	152
Outside	High	Reading, PA to Philadelphia-Camden-Wilmington, PA-NJ-DE-MD	150
Outside	Moderate	Flint, MI to Detroit-Warren-Dearborn, MI	150
High	High	Bridgeport-Stamford-Norwalk, CT to New York-Newark-Jersey City, NY-NJ-PA	148
Outside	High	Salem, OR to Portland-Vancouver-Hillsboro, OR-WA	146
Outside	High	Santa Rosa-Petaluma, CA to San Francisco-Oakland-Berkeley, CA	145
Outside	Low	Glens Falls, NY to Albany-Schenectady-Troy, NY	141
High	High	Los Angeles-Long Beach-Anaheim, CA to New York-Newark-Jersey City, NY-NJ-PA	140
High	High	Miami-Fort Lauderdale-Pompano Beach, FL to Orlando-Kissimmee-Sanford, FL	139
Low	Moderate	Nashville-Davidson–Murfreesboro–Franklin, TN to Charlotte-Concord-Gastonia, NC-SC	139
Outside	High	Vallejo, CA to San Francisco-Oakland-Berkeley, CA	138
Outside	High	Modesto, CA to Stockton, CA	136
High	High	Miami-Fort Lauderdale-Pompano Beach, FL to Tampa-St. Petersburg-Clearwater, FL	128
Moderate	High	Allentown-Bethlehem-Easton, PA-NJ to New York-Newark-Jersey City, NY-NJ-PA	127
High	High	Orlando-Kissimmee-Sanford, FL to Miami-Fort Lauderdale-Pompano Beach, FL	125
High	High	Sacramento-Roseville-Folsom, CA to San Francisco-Oakland-Berkeley, CA	125
Moderate	Moderate	Austin-Round Rock-Georgetown, TX to Dallas-Fort Worth-Arlington, TX	125
Outside	High	Bremerton-Silverdale-Port Orchard, WA to Seattle-Tacoma-Bellevue, WA	125
Outside	High	Torrington, CT to New Haven-Milford, CT	125
High	High	New York-Newark-Jersey City, NY-NJ-PA to Bridgeport-Stamford-Norwalk, CT	124
High	High	Orlando-Kissimmee-Sanford, FL to Lakeland-Winter Haven, FL	124
Moderate	High	Allentown-Bethlehem-Easton, PA-NJ to Philadelphia-Camden-Wilmington, PA-NJ-DE-MD	122
Outside	Low	Greeley, CO to Denver-Aurora-Lakewood, CO	122
Outside	Moderate	Holland, MI to Grand Rapids-Kentwood, MI	122
High	High	Lakeland-Winter Haven, FL to Orlando-Kissimmee-Sanford, FL	121
Outside	Low	Indiana, PA to Pittsburgh, PA	121
Outside	Moderate	Sherman-Denison, TX to Dallas-Fort Worth-Arlington, TX	121
Outside	High	Trenton-Princeton, NJ to Philadelphia-Camden-Wilmington, PA-NJ-DE-MD	119
Moderate	Low	Greenville-Anderson, SC to Atlanta-Sandy Springs-Alpharetta, GA	118
Outside	High	Punta Gorda, FL to North Port-Sarasota-Bradenton, FL	118
Outside	Moderate	Canton-Massillon, OH to Akron, OH	116
High	High	North Port-Sarasota-Bradenton, FL to Tampa-St. Petersburg-Clearwater, FL	110
High	High	Phoenix-Mesa-Chandler, AZ to Tucson, AZ	110
High	High	Tampa-St. Petersburg-Clearwater, FL to North Port-Sarasota-Bradenton, FL	108
High	Moderate	New Orleans-Metairie, LA to Baton Rouge, LA	108
Moderate	Moderate	San Antonio-New Braunfels, TX to Austin-Round Rock-Georgetown, TX	108
Outside	Moderate	Barnstable Town, MA to Boston-Cambridge-Newton, MA-NH	104
High	High	San Jose-Sunnyvale-Santa Clara, CA to Los Angeles-Long Beach-Anaheim, CA	103
Outside	Moderate	Kingston, NY to Poughkeepsie-Newburgh-Middletown, NY	103
High	Moderate	Houston-The Woodlands-Sugar Land, TX to Austin-Round Rock-Georgetown, TX	102
Outside	High	The Villages, FL to Orlando-Kissimmee-Sanford, FL	102
High	High	New York-Newark-Jersey City, NY-NJ-PA to Los Angeles-Long Beach-Anaheim, CA	100
High	Moderate	Los Angeles-Long Beach-Anaheim, CA to Dallas-Fort Worth-Arlington, TX	100

## `summarise()` has grouped output by 'avg_eal_tercile_old'. You can override
## using the `.groups` argument.

Moves by Tercile, rows (old), columns (new)
avg_eal_tercile_old	High	Low	Moderate
High	17264	1766	4423
Low	1821	1964	3936
Moderate	3697	3639	4162
NA	8472	7714	8221

Migration Map

## Rows: 8679 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): cbsa_move, avg_eal_tercile_old, avg_eal_tercile_new, tercile_move, ...
## dbl (7): CBSAFP_old, CBSAFP_new, row_count, lat_old, lon_old, lat_new, lon_new
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Registered S3 method overwritten by 'jsonify':
##   method     from    
##   print.json jsonlite

## animated_arc is an experimental layer and the function may change without warning

Bar Charts

data <- read_csv("cbsa_births_deaths_exits_entrys.csv")

## Rows: 1000 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): NAME, avg_eal_tercile
## dbl (8): archive_version_year, CBSAFP, entry, exit, births, deaths, avg_eal,...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

summary(data)

##  archive_version_year     CBSAFP          entry             exit       
##  Min.   :2013         Min.   :10420   Min.   :  1.00   Min.   :  1.00  
##  1st Qu.:2015         1st Qu.:19348   1st Qu.: 26.00   1st Qu.: 25.00  
##  Median :2018         Median :32060   Median : 45.00   Median : 44.00  
##  Mean   :2018         Mean   :30209   Mean   : 67.08   Mean   : 67.08  
##  3rd Qu.:2020         3rd Qu.:40080   3rd Qu.: 84.00   3rd Qu.: 79.00  
##  Max.   :2022         Max.   :49340   Max.   :561.00   Max.   :670.00  
##                                                        NA's   :100     
##      births           deaths           NAME              avg_eal     
##  Min.   :  1328   Min.   :  1128   Length:1000        Min.   :38.21  
##  1st Qu.:  3617   1st Qu.:  3416   Class :character   1st Qu.:65.60  
##  Median :  6166   Median :  5294   Mode  :character   Median :75.28  
##  Mean   : 12411   Mean   : 10836                      Mean   :77.28  
##  3rd Qu.: 13648   3rd Qu.: 11576                      3rd Qu.:94.11  
##  Max.   :171670   Max.   :139494                      Max.   :99.91  
##                   NA's   :100                                        
##     tercile     avg_eal_tercile   
##  Min.   :1.00   Length:1000       
##  1st Qu.:1.00   Class :character  
##  Median :2.00   Mode  :character  
##  Mean   :1.99                     
##  3rd Qu.:3.00                     
##  Max.   :3.00                     
##

data <- data %>% gather(type,row_count, c(entry,exit,births,deaths))

data %>%
  filter(type %in% c("births", "deaths")) %>%
  group_by( NAME) %>%
  mutate(sort = sum(row_count,na.rm = T)) %>%
  ungroup() %>%
  ggplot(aes(fill = type, y = row_count, x = forcats::fct_reorder(NAME, sort))) + 
  geom_bar(position = "dodge", stat = "identity")+
  coord_flip()+
  theme_minimal()+
  ggtitle("Births and Deaths - 2013-2022")

## Warning: Removed 100 rows containing missing values (`geom_bar()`).

data %>% filter(type %in% c("entry","exit")) %>%
  group_by( NAME) %>%
  mutate(sort = sum(row_count,na.rm = T)) %>%
  ungroup() %>%
  ggplot(aes(fill = type, y = row_count, x = forcats::fct_reorder(NAME, sort))) + 
  geom_bar(position = "dodge", stat = "identity")+
  coord_flip()+
  theme_minimal()+
  ggtitle("Entries and Exits - 2013-2022")

## Warning: Removed 100 rows containing missing values (`geom_bar()`).

## Chord

moves <- read_csv("migration_map.csv") %>%
  select(NAME_old,NAME_new,row_count,avg_eal_tercile_old,avg_eal_tercile_new) %>%
  mutate(NAME_old=ifelse(is.na(avg_eal_tercile_old),"Outside",NAME_old)) %>%
  mutate(NAME_new=ifelse(is.na(avg_eal_tercile_new),"Outside",NAME_new)) %>%
  group_by(NAME_old,NAME_new,avg_eal_tercile_old) %>%
  summarise(row_count=sum(row_count))

## Rows: 8679 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): cbsa_move, avg_eal_tercile_old, avg_eal_tercile_new, tercile_move, ...
## dbl (7): CBSAFP_old, CBSAFP_new, row_count, lat_old, lon_old, lat_new, lon_new
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'NAME_old', 'NAME_new'. You can override using the `.groups` argument.

chordDiagram(
  x=moves, directional = 1, direction.type = c("diffHeight", "arrows"), link.arr.type = "big.arrow")

moves <- read_csv("migration_map.csv") %>%
  select(NAME_old,NAME_new,row_count,avg_eal_tercile_old,avg_eal_tercile_new) %>%
  mutate(NAME_old=ifelse(is.na(avg_eal_tercile_old),"Outside",NAME_old)) %>%
  mutate(NAME_new=ifelse(is.na(avg_eal_tercile_new),"Outside",NAME_new)) %>%
  group_by(NAME_old,NAME_new,avg_eal_tercile_old) %>%
  summarise(row_count=sum(row_count)) %>%
  filter(row_count>150)

## Rows: 8679 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): cbsa_move, avg_eal_tercile_old, avg_eal_tercile_new, tercile_move, ...
## dbl (7): CBSAFP_old, CBSAFP_new, row_count, lat_old, lon_old, lat_new, lon_new
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'NAME_old', 'NAME_new'. You can override using the `.groups` argument.

chordDiagram(
  x=moves, directional = 1, direction.type = c("diffHeight", "arrows"), link.arr.type = "big.arrow")
title("> 150 flows", cex.main = 2)

moves <- read_csv("migration_map.csv") %>%
  select(NAME_old,NAME_new,row_count,avg_eal_tercile_old,avg_eal_tercile_new) %>%
  mutate(NAME_old=ifelse(is.na(avg_eal_tercile_old),"Outside",NAME_old)) %>%
  mutate(NAME_new=ifelse(is.na(avg_eal_tercile_new),"Outside",NAME_new)) %>%
  group_by(NAME_old,NAME_new,avg_eal_tercile_old) %>%
  summarise(row_count=sum(row_count)) %>%
  filter(row_count>250)

## Rows: 8679 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): cbsa_move, avg_eal_tercile_old, avg_eal_tercile_new, tercile_move, ...
## dbl (7): CBSAFP_old, CBSAFP_new, row_count, lat_old, lon_old, lat_new, lon_new
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'NAME_old', 'NAME_new'. You can override using the `.groups` argument.

chordDiagram(
  x=moves, directional = 1, direction.type = c("diffHeight", "arrows"), link.arr.type = "big.arrow")
title("> 250 flows", cex.main = 2)

moves <- read_csv("migration_map.csv") %>%
  select(NAME_old,NAME_new,row_count,avg_eal_tercile_old,avg_eal_tercile_new) %>%
  mutate(NAME_old=ifelse(is.na(avg_eal_tercile_old),"Outside",NAME_old)) %>%
  mutate(NAME_new=ifelse(is.na(avg_eal_tercile_new),"Outside",NAME_new)) %>%
  group_by(NAME_old,NAME_new,avg_eal_tercile_old) %>%
  summarise(row_count=sum(row_count)) %>%
  filter(row_count>500)

## Rows: 8679 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): cbsa_move, avg_eal_tercile_old, avg_eal_tercile_new, tercile_move, ...
## dbl (7): CBSAFP_old, CBSAFP_new, row_count, lat_old, lon_old, lat_new, lon_new
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'NAME_old', 'NAME_new'. You can override using the `.groups` argument.

title("> 500 flows", cex.main = 2)

chordDiagram(
  x=moves, directional = 1, direction.type = c("diffHeight", "arrows"), link.arr.type = "big.arrow")

moves <- read_csv("migration_map.csv") %>%
  mutate(avg_eal_tercile_old=ifelse(is.na(avg_eal_tercile_old),"Outside",avg_eal_tercile_old)) %>%
  mutate(avg_eal_tercile_new=ifelse(is.na(avg_eal_tercile_new),"Outside",avg_eal_tercile_new)) %>%
  group_by(avg_eal_tercile_old,avg_eal_tercile_new) %>%
  summarise(row_count=sum(row_count)) %>%
  mutate(row_count>100)

## Rows: 8679 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): cbsa_move, avg_eal_tercile_old, avg_eal_tercile_new, tercile_move, ...
## dbl (7): CBSAFP_old, CBSAFP_new, row_count, lat_old, lon_old, lat_new, lon_new
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'avg_eal_tercile_old'. You can override using the `.groups` argument.

chordDiagram(
  x=moves, directional = 1, direction.type = c("diffHeight", "arrows"),link.arr.type = "big.arrow")
title("Risk level to Risk level", cex.main = 2)

Migration Diagrams

2024-03-17

Migration Table

Migration Map

Bar Charts