wd <- "/Users/stevenli/Library/CloudStorage/OneDrive-SharedLibraries-HarvardUniversity/Teicher, Hannah Meredith - DataAxle Data"
setwd(wd)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(knitr)
library(tidycensus)
library(mapdeck)
## 
## Attaching package: 'mapdeck'
## 
## The following object is masked from 'package:tibble':
## 
##     add_column
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following objects are masked from 'package:mapdeck':
## 
##     add_heatmap, add_mesh, add_sf, add_text
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(networkD3)
library(ggridges)
library(circlize)
## ========================================
## circlize version 0.4.16
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
## 
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
##   in R. Bioinformatics 2014.
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(circlize))
## ========================================
library(flowmapblue)
library(RColorBrewer)
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
knitr::opts_chunk$set(echo = TRUE)
key <- "pk.eyJ1IjoibGlzdGV2ZW45NSIsImEiOiIxQXZVbVUwIn0.FFsQ0Fz095m-pAh1uFGAZg"
set_token(key)

Migration Table

## Rows: 33999 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): fips_code, lead_county, NAME_old, NAME_new, avg_eal_tercile_old, a...
## dbl (12): archive_version_year, row_count, CBSAFP_old, CBSAFP_new, avg_eal_o...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'avg_eal_tercile_old', 'avg_eal_tercile_new'. You can override using the `.groups` argument.
Top 100 Moves by CBSA
avg_eal_tercile_old avg_eal_tercile_new cbsa_move row_count
High High Los Angeles-Long Beach-Anaheim, CA to Riverside-San Bernardino-Ontario, CA 1966
High High Riverside-San Bernardino-Ontario, CA to Los Angeles-Long Beach-Anaheim, CA 1133
High High San Jose-Sunnyvale-Santa Clara, CA to San Francisco-Oakland-Berkeley, CA 894
High High Los Angeles-Long Beach-Anaheim, CA to Oxnard-Thousand Oaks-Ventura, CA 887
Low Moderate Washington-Arlington-Alexandria, DC-VA-MD-WV to Baltimore-Columbia-Towson, MD 695
High High San Francisco-Oakland-Berkeley, CA to San Jose-Sunnyvale-Santa Clara, CA 682
High High Oxnard-Thousand Oaks-Ventura, CA to Los Angeles-Long Beach-Anaheim, CA 628
Moderate Low Baltimore-Columbia-Towson, MD to Washington-Arlington-Alexandria, DC-VA-MD-WV 500
Outside Low Boulder, CO to Denver-Aurora-Lakewood, CO 465
Moderate Moderate Boston-Cambridge-Newton, MA-NH to Worcester, MA-CT 419
Low Moderate Durham-Chapel Hill, NC to Raleigh-Cary, NC 414
Outside High Trenton-Princeton, NJ to New York-Newark-Jersey City, NY-NJ-PA 403
Moderate Moderate Boston-Cambridge-Newton, MA-NH to Providence-Warwick, RI-MA 398
High Moderate New Haven-Milford, CT to Hartford-East Hartford-Middletown, CT 387
Outside High Naples-Marco Island, FL to Cape Coral-Fort Myers, FL 387
Outside Moderate Ann Arbor, MI to Detroit-Warren-Dearborn, MI 387
Moderate Low Raleigh-Cary, NC to Durham-Chapel Hill, NC 368
Moderate Moderate Worcester, MA-CT to Boston-Cambridge-Newton, MA-NH 362
Moderate High Hartford-East Hartford-Middletown, CT to New Haven-Milford, CT 344
High High Bridgeport-Stamford-Norwalk, CT to New Haven-Milford, CT 337
High High New York-Newark-Jersey City, NY-NJ-PA to Philadelphia-Camden-Wilmington, PA-NJ-DE-MD 326
Moderate Moderate Providence-Warwick, RI-MA to Boston-Cambridge-Newton, MA-NH 325
High High San Diego-Chula Vista-Carlsbad, CA to Los Angeles-Long Beach-Anaheim, CA 317
Moderate Low Akron, OH to Cleveland-Elyria, OH 306
High High San Francisco-Oakland-Berkeley, CA to Los Angeles-Long Beach-Anaheim, CA 305
High High Los Angeles-Long Beach-Anaheim, CA to San Diego-Chula Vista-Carlsbad, CA 297
Low Moderate Cleveland-Elyria, OH to Akron, OH 289
High Moderate Salt Lake City, UT to Provo-Orem, UT 281
Outside Low Gainesville, GA to Atlanta-Sandy Springs-Alpharetta, GA 279
High Moderate Houston-The Woodlands-Sugar Land, TX to Dallas-Fort Worth-Arlington, TX 273
High High Orlando-Kissimmee-Sanford, FL to Deltona-Daytona Beach-Ormond Beach, FL 265
High Moderate New York-Newark-Jersey City, NY-NJ-PA to Poughkeepsie-Newburgh-Middletown, NY 264
High High New Haven-Milford, CT to Bridgeport-Stamford-Norwalk, CT 254
High High Philadelphia-Camden-Wilmington, PA-NJ-DE-MD to New York-Newark-Jersey City, NY-NJ-PA 252
High Moderate Salt Lake City, UT to Ogden-Clearfield, UT 241
Moderate High Dallas-Fort Worth-Arlington, TX to Houston-The Woodlands-Sugar Land, TX 238
Outside Moderate Manchester-Nashua, NH to Boston-Cambridge-Newton, MA-NH 238
Moderate High Ogden-Clearfield, UT to Salt Lake City, UT 235
Moderate High Provo-Orem, UT to Salt Lake City, UT 235
Outside High Santa Cruz-Watsonville, CA to San Jose-Sunnyvale-Santa Clara, CA 232
Moderate Low Colorado Springs, CO to Denver-Aurora-Lakewood, CO 231
Moderate Low Dayton-Kettering, OH to Cincinnati, OH-KY-IN 214
Low Moderate Denver-Aurora-Lakewood, CO to Colorado Springs, CO 211
High High Tucson, AZ to Phoenix-Mesa-Chandler, AZ 206
High High San Diego-Chula Vista-Carlsbad, CA to Riverside-San Bernardino-Ontario, CA 203
High High Los Angeles-Long Beach-Anaheim, CA to San Francisco-Oakland-Berkeley, CA 199
Moderate Low Greensboro-High Point, NC to Winston-Salem, NC 189
High High Deltona-Daytona Beach-Ormond Beach, FL to Orlando-Kissimmee-Sanford, FL 184
Outside High Port St. Lucie, FL to Miami-Fort Lauderdale-Pompano Beach, FL 183
Low Moderate Winston-Salem, NC to Greensboro-High Point, NC 182
Moderate High Poughkeepsie-Newburgh-Middletown, NY to New York-Newark-Jersey City, NY-NJ-PA 181
High High San Francisco-Oakland-Berkeley, CA to Sacramento-Roseville-Folsom, CA 177
High High New York-Newark-Jersey City, NY-NJ-PA to Miami-Fort Lauderdale-Pompano Beach, FL 173
High Moderate New York-Newark-Jersey City, NY-NJ-PA to Allentown-Bethlehem-Easton, PA-NJ 170
Outside Moderate Spartanburg, SC to Greenville-Anderson, SC 159
Low Moderate Cincinnati, OH-KY-IN to Dayton-Kettering, OH 157
Moderate Moderate Austin-Round Rock-Georgetown, TX to San Antonio-New Braunfels, TX 156
High High Riverside-San Bernardino-Ontario, CA to San Diego-Chula Vista-Carlsbad, CA 152
Outside High Reading, PA to Philadelphia-Camden-Wilmington, PA-NJ-DE-MD 150
Outside Moderate Flint, MI to Detroit-Warren-Dearborn, MI 150
High High Bridgeport-Stamford-Norwalk, CT to New York-Newark-Jersey City, NY-NJ-PA 148
Outside High Salem, OR to Portland-Vancouver-Hillsboro, OR-WA 146
Outside High Santa Rosa-Petaluma, CA to San Francisco-Oakland-Berkeley, CA 145
Outside Low Glens Falls, NY to Albany-Schenectady-Troy, NY 141
High High Los Angeles-Long Beach-Anaheim, CA to New York-Newark-Jersey City, NY-NJ-PA 140
High High Miami-Fort Lauderdale-Pompano Beach, FL to Orlando-Kissimmee-Sanford, FL 139
Low Moderate Nashville-Davidson–Murfreesboro–Franklin, TN to Charlotte-Concord-Gastonia, NC-SC 139
Outside High Vallejo, CA to San Francisco-Oakland-Berkeley, CA 138
Outside High Modesto, CA to Stockton, CA 136
High High Miami-Fort Lauderdale-Pompano Beach, FL to Tampa-St. Petersburg-Clearwater, FL 128
Moderate High Allentown-Bethlehem-Easton, PA-NJ to New York-Newark-Jersey City, NY-NJ-PA 127
High High Orlando-Kissimmee-Sanford, FL to Miami-Fort Lauderdale-Pompano Beach, FL 125
High High Sacramento-Roseville-Folsom, CA to San Francisco-Oakland-Berkeley, CA 125
Moderate Moderate Austin-Round Rock-Georgetown, TX to Dallas-Fort Worth-Arlington, TX 125
Outside High Bremerton-Silverdale-Port Orchard, WA to Seattle-Tacoma-Bellevue, WA 125
Outside High Torrington, CT to New Haven-Milford, CT 125
High High New York-Newark-Jersey City, NY-NJ-PA to Bridgeport-Stamford-Norwalk, CT 124
High High Orlando-Kissimmee-Sanford, FL to Lakeland-Winter Haven, FL 124
Moderate High Allentown-Bethlehem-Easton, PA-NJ to Philadelphia-Camden-Wilmington, PA-NJ-DE-MD 122
Outside Low Greeley, CO to Denver-Aurora-Lakewood, CO 122
Outside Moderate Holland, MI to Grand Rapids-Kentwood, MI 122
High High Lakeland-Winter Haven, FL to Orlando-Kissimmee-Sanford, FL 121
Outside Low Indiana, PA to Pittsburgh, PA 121
Outside Moderate Sherman-Denison, TX to Dallas-Fort Worth-Arlington, TX 121
Outside High Trenton-Princeton, NJ to Philadelphia-Camden-Wilmington, PA-NJ-DE-MD 119
Moderate Low Greenville-Anderson, SC to Atlanta-Sandy Springs-Alpharetta, GA 118
Outside High Punta Gorda, FL to North Port-Sarasota-Bradenton, FL 118
Outside Moderate Canton-Massillon, OH to Akron, OH 116
High High North Port-Sarasota-Bradenton, FL to Tampa-St. Petersburg-Clearwater, FL 110
High High Phoenix-Mesa-Chandler, AZ to Tucson, AZ 110
High High Tampa-St. Petersburg-Clearwater, FL to North Port-Sarasota-Bradenton, FL 108
High Moderate New Orleans-Metairie, LA to Baton Rouge, LA 108
Moderate Moderate San Antonio-New Braunfels, TX to Austin-Round Rock-Georgetown, TX 108
Outside Moderate Barnstable Town, MA to Boston-Cambridge-Newton, MA-NH 104
High High San Jose-Sunnyvale-Santa Clara, CA to Los Angeles-Long Beach-Anaheim, CA 103
Outside Moderate Kingston, NY to Poughkeepsie-Newburgh-Middletown, NY 103
High Moderate Houston-The Woodlands-Sugar Land, TX to Austin-Round Rock-Georgetown, TX 102
Outside High The Villages, FL to Orlando-Kissimmee-Sanford, FL 102
High High New York-Newark-Jersey City, NY-NJ-PA to Los Angeles-Long Beach-Anaheim, CA 100
High Moderate Los Angeles-Long Beach-Anaheim, CA to Dallas-Fort Worth-Arlington, TX 100
## `summarise()` has grouped output by 'avg_eal_tercile_old'. You can override
## using the `.groups` argument.
Moves by Tercile, rows (old), columns (new)
avg_eal_tercile_old High Low Moderate
High 17264 1766 4423
Low 1821 1964 3936
Moderate 3697 3639 4162
NA 8472 7714 8221

Migration Map

## Rows: 8679 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): cbsa_move, avg_eal_tercile_old, avg_eal_tercile_new, tercile_move, ...
## dbl (7): CBSAFP_old, CBSAFP_new, row_count, lat_old, lon_old, lat_new, lon_new
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Registered S3 method overwritten by 'jsonify':
##   method     from    
##   print.json jsonlite
## animated_arc is an experimental layer and the function may change without warning

Bar Charts

data <- read_csv("cbsa_births_deaths_exits_entrys.csv")
## Rows: 1000 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): NAME, avg_eal_tercile
## dbl (8): archive_version_year, CBSAFP, entry, exit, births, deaths, avg_eal,...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
summary(data)
##  archive_version_year     CBSAFP          entry             exit       
##  Min.   :2013         Min.   :10420   Min.   :  1.00   Min.   :  1.00  
##  1st Qu.:2015         1st Qu.:19348   1st Qu.: 26.00   1st Qu.: 25.00  
##  Median :2018         Median :32060   Median : 45.00   Median : 44.00  
##  Mean   :2018         Mean   :30209   Mean   : 67.08   Mean   : 67.08  
##  3rd Qu.:2020         3rd Qu.:40080   3rd Qu.: 84.00   3rd Qu.: 79.00  
##  Max.   :2022         Max.   :49340   Max.   :561.00   Max.   :670.00  
##                                                        NA's   :100     
##      births           deaths           NAME              avg_eal     
##  Min.   :  1328   Min.   :  1128   Length:1000        Min.   :38.21  
##  1st Qu.:  3617   1st Qu.:  3416   Class :character   1st Qu.:65.60  
##  Median :  6166   Median :  5294   Mode  :character   Median :75.28  
##  Mean   : 12411   Mean   : 10836                      Mean   :77.28  
##  3rd Qu.: 13648   3rd Qu.: 11576                      3rd Qu.:94.11  
##  Max.   :171670   Max.   :139494                      Max.   :99.91  
##                   NA's   :100                                        
##     tercile     avg_eal_tercile   
##  Min.   :1.00   Length:1000       
##  1st Qu.:1.00   Class :character  
##  Median :2.00   Mode  :character  
##  Mean   :1.99                     
##  3rd Qu.:3.00                     
##  Max.   :3.00                     
## 
data <- data %>% gather(type,row_count, c(entry,exit,births,deaths))

data %>%
  filter(type %in% c("births", "deaths")) %>%
  group_by( NAME) %>%
  mutate(sort = sum(row_count,na.rm = T)) %>%
  ungroup() %>%
  ggplot(aes(fill = type, y = row_count, x = forcats::fct_reorder(NAME, sort))) + 
  geom_bar(position = "dodge", stat = "identity")+
  coord_flip()+
  theme_minimal()+
  ggtitle("Births and Deaths - 2013-2022")
## Warning: Removed 100 rows containing missing values (`geom_bar()`).

data %>% filter(type %in% c("entry","exit")) %>%
  group_by( NAME) %>%
  mutate(sort = sum(row_count,na.rm = T)) %>%
  ungroup() %>%
  ggplot(aes(fill = type, y = row_count, x = forcats::fct_reorder(NAME, sort))) + 
  geom_bar(position = "dodge", stat = "identity")+
  coord_flip()+
  theme_minimal()+
  ggtitle("Entries and Exits - 2013-2022")
## Warning: Removed 100 rows containing missing values (`geom_bar()`).

## Chord

moves <- read_csv("migration_map.csv") %>%
  select(NAME_old,NAME_new,row_count,avg_eal_tercile_old,avg_eal_tercile_new) %>%
  mutate(NAME_old=ifelse(is.na(avg_eal_tercile_old),"Outside",NAME_old)) %>%
  mutate(NAME_new=ifelse(is.na(avg_eal_tercile_new),"Outside",NAME_new)) %>%
  group_by(NAME_old,NAME_new,avg_eal_tercile_old) %>%
  summarise(row_count=sum(row_count))
## Rows: 8679 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): cbsa_move, avg_eal_tercile_old, avg_eal_tercile_new, tercile_move, ...
## dbl (7): CBSAFP_old, CBSAFP_new, row_count, lat_old, lon_old, lat_new, lon_new
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'NAME_old', 'NAME_new'. You can override using the `.groups` argument.
chordDiagram(
  x=moves, directional = 1, direction.type = c("diffHeight", "arrows"), link.arr.type = "big.arrow")

moves <- read_csv("migration_map.csv") %>%
  select(NAME_old,NAME_new,row_count,avg_eal_tercile_old,avg_eal_tercile_new) %>%
  mutate(NAME_old=ifelse(is.na(avg_eal_tercile_old),"Outside",NAME_old)) %>%
  mutate(NAME_new=ifelse(is.na(avg_eal_tercile_new),"Outside",NAME_new)) %>%
  group_by(NAME_old,NAME_new,avg_eal_tercile_old) %>%
  summarise(row_count=sum(row_count)) %>%
  filter(row_count>150)
## Rows: 8679 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): cbsa_move, avg_eal_tercile_old, avg_eal_tercile_new, tercile_move, ...
## dbl (7): CBSAFP_old, CBSAFP_new, row_count, lat_old, lon_old, lat_new, lon_new
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'NAME_old', 'NAME_new'. You can override using the `.groups` argument.
chordDiagram(
  x=moves, directional = 1, direction.type = c("diffHeight", "arrows"), link.arr.type = "big.arrow")
title("> 150 flows", cex.main = 2)

moves <- read_csv("migration_map.csv") %>%
  select(NAME_old,NAME_new,row_count,avg_eal_tercile_old,avg_eal_tercile_new) %>%
  mutate(NAME_old=ifelse(is.na(avg_eal_tercile_old),"Outside",NAME_old)) %>%
  mutate(NAME_new=ifelse(is.na(avg_eal_tercile_new),"Outside",NAME_new)) %>%
  group_by(NAME_old,NAME_new,avg_eal_tercile_old) %>%
  summarise(row_count=sum(row_count)) %>%
  filter(row_count>250)
## Rows: 8679 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): cbsa_move, avg_eal_tercile_old, avg_eal_tercile_new, tercile_move, ...
## dbl (7): CBSAFP_old, CBSAFP_new, row_count, lat_old, lon_old, lat_new, lon_new
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'NAME_old', 'NAME_new'. You can override using the `.groups` argument.
chordDiagram(
  x=moves, directional = 1, direction.type = c("diffHeight", "arrows"), link.arr.type = "big.arrow")
title("> 250 flows", cex.main = 2)

moves <- read_csv("migration_map.csv") %>%
  select(NAME_old,NAME_new,row_count,avg_eal_tercile_old,avg_eal_tercile_new) %>%
  mutate(NAME_old=ifelse(is.na(avg_eal_tercile_old),"Outside",NAME_old)) %>%
  mutate(NAME_new=ifelse(is.na(avg_eal_tercile_new),"Outside",NAME_new)) %>%
  group_by(NAME_old,NAME_new,avg_eal_tercile_old) %>%
  summarise(row_count=sum(row_count)) %>%
  filter(row_count>500)
## Rows: 8679 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): cbsa_move, avg_eal_tercile_old, avg_eal_tercile_new, tercile_move, ...
## dbl (7): CBSAFP_old, CBSAFP_new, row_count, lat_old, lon_old, lat_new, lon_new
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'NAME_old', 'NAME_new'. You can override using the `.groups` argument.
title("> 500 flows", cex.main = 2)

chordDiagram(
  x=moves, directional = 1, direction.type = c("diffHeight", "arrows"), link.arr.type = "big.arrow")

moves <- read_csv("migration_map.csv") %>%
  mutate(avg_eal_tercile_old=ifelse(is.na(avg_eal_tercile_old),"Outside",avg_eal_tercile_old)) %>%
  mutate(avg_eal_tercile_new=ifelse(is.na(avg_eal_tercile_new),"Outside",avg_eal_tercile_new)) %>%
  group_by(avg_eal_tercile_old,avg_eal_tercile_new) %>%
  summarise(row_count=sum(row_count)) %>%
  mutate(row_count>100)
## Rows: 8679 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): cbsa_move, avg_eal_tercile_old, avg_eal_tercile_new, tercile_move, ...
## dbl (7): CBSAFP_old, CBSAFP_new, row_count, lat_old, lon_old, lat_new, lon_new
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'avg_eal_tercile_old'. You can override using the `.groups` argument.
chordDiagram(
  x=moves, directional = 1, direction.type = c("diffHeight", "arrows"),link.arr.type = "big.arrow")
title("Risk level to Risk level", cex.main = 2)