# devtools::install_github('walkerke/networkD3@fixes/arrow')
# Tim Bock/Numbers International deserves credit for the fix, I'm just combining a couple PRs
library(readxl)
library(tidyr)
library(dplyr)
library(networkD3)
# Data from http://www.acf.hhs.gov/programs/orr/resource/fiscal-year-2014-refugee-arrivals
ref <- read_excel('fy14_arrivals_by_state_nationality.xls', skip = 1)
names(ref)[1] <- "state"
ref <- ref[1:49, ]
ref2 <- ref %>%
gather(country, total, AFGHANISTAN:ZIMBABWE) %>%
select(-`Grand Total`) %>%
mutate(country = as.character(country)) %>%
filter(country %in% c("SOMALIA", "BURMA", "BHUTAN", "IRAQ", "DEM. REP. CONGO"),
total > 100)
# Prep for Sankey diagram
name_vec <- c(unique(ref2$country), unique(ref2$state))
name_df <- data.frame("name" = name_vec, "id" = 0:39)
ref3 <- ref2 %>%
left_join(name_df, by = c("state" = "name")) %>%
rename(state_id = id) %>%
left_join(name_df, by = c("country" = "name")) %>%
rename(country_id = id)
sankeyNetwork(Links = ref3, Nodes = name_df, Source = "country_id",
Target = "state_id", Value = "total", NodeID = "name",
fontSize = 12, nodeWidth = 30, colourScale = "d3.scale.category10()")