Testing out the ipumsr package
Internal migration flows between provinces in China 1995-2000
Data from China’s 2000 Census Data from IPUMS International, small numbers suppressed to maintain confidentiality. Map made with the ipumsr package and several other R packages, see below for the code and the MIGCN variable page for more details about the definition.
suppressPackageStartupMessages({
library(ipumsr)
library(dplyr)
library(tidyr)
library(leaflet)
library(sf)
library(purrr)
library(geosphere)
library(rmapshaper)
library(htmltools)
library(scales)
})
# Calculate net migration by province from IPUMS data ----
# IPUMS Extract from https://international.ipums.org/
# sample: China 2000
# variables: AGE, GEO1_CN2000, MIGCN and PERWT
ddi <- read_ipums_ddi("ipumsi_00019.xml")
data <- read_ipums_micro(ddi, verbose = FALSE)
data <- data %>%
filter(AGE >= 5) %>%
mutate_at(vars(GEO1_CN2000, MIGCN), zap_ipums_attributes)
migration_by_province_in <- data %>%
filter(GEO1_CN2000 != MIGCN) %>%
group_by(YEAR, GEO1START = GEO1_CN2000, GEO1END = MIGCN) %>%
summarize(N_IN = n(), WT_N_IN = sum(PERWT))
migration_by_province_out <- data %>%
filter(GEO1_CN2000 != MIGCN) %>%
group_by(YEAR, GEO1START = MIGCN, GEO1END = GEO1_CN2000) %>%
summarize(N_OUT = n(), WT_N_OUT = sum(PERWT))
migration_by_province <- full_join(
migration_by_province_in,
migration_by_province_out,
by = c("YEAR", "GEO1START", "GEO1END")
) %>%
mutate_at(vars(N_IN, N_OUT, WT_N_IN, WT_N_OUT), ~ifelse(is.na(.), 0, .)) %>%
mutate(
N_ABS = N_IN + N_OUT,
N_NET = N_IN - N_OUT,
WT_N_NET = WT_N_IN - WT_N_OUT
) %>%
filter(N_ABS >= 50) %>%
ungroup()
# Create map objects ----
# China 2000 Shape File from:
# https://international.ipums.org/international/gis_yrspecific_1st.shtml
proj4_china <- "+proj=lcc +lat_1=18 +lat_2=24 +lat_0=21 +lon_0=114 +x_0=500000 +y_0=500000 +ellps=WGS72 +towgs84=0,0,1.9,0,0,0.814,-0.38 +units=m +no_defs "
proj4_latlon <- "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
provinces <- read_ipums_sf("geo1_cn2000.zip", verbose = FALSE) %>%
ms_simplify(keep = 0.01)
prov_point <- provinces %>%
st_transform(proj4_china) %>%
st_point_on_surface() %>%
st_transform(proj4_latlon) %>%
select(IPUM2000, ADMIN_NAME)
prov_lines <- prov_point %>%
st_transform(proj4_latlon) %>%
crossing(., .) %>%
as_data_frame() %>%
rename(GEO1START = IPUM2000, NAMESTART = ADMIN_NAME, GEO1END = IPUM20001, NAMEEND = ADMIN_NAME1) %>%
filter(GEO1START != GEO1END) %>%
mutate(
geometry = gcIntermediate(as(geometry, "Spatial"), as(geometry1, "Spatial")) %>%
map(st_linestring) %>%
st_sfc()
) %>%
select(-geometry1) %>%
st_as_sf()
prov_lines_data <- ipums_shape_inner_join(
migration_by_province,
prov_lines,
by = c("GEO1START", "GEO1END"),
verbose = FALSE
) %>%
mutate(
ANON = N_IN < 20 | N_OUT < 20,
WT_N_IN = ifelse(ANON, NA, WT_N_IN),
WT_N_OUT = ifelse(ANON, NA, WT_N_OUT),
LINE_WIDTH = sqrt(abs(WT_N_NET)) / 100,
COLOR = ifelse(WT_N_NET < 0, "#f1a340", "#998ec3"),
LABEL = map(paste0(
NAMESTART, " - ", NAMEEND,
"<br><strong>In:</strong> ", comma(WT_N_IN),
"<br><strong>Out:</strong> ", comma(WT_N_OUT),
"<br><strong>Net:</strong> ", comma(WT_N_NET)
), HTML)
)
# Create Map -----
# Add base layer from ESRI and polygons from IPUMS
leaflet_map <- leaflet(provinces, height = 550, width = 800) %>%
addProviderTiles(providers$Esri.WorldGrayCanvas) %>%
addPolygons(
color = "black", weight = 1, smoothFactor = 0.5,
opacity = 1.0, fillOpacity = 0.1,
highlightOptions = highlightOptions(color = "black", weight = 2),
label = ~ADMIN_NAME,
labelOptions = labelOptions(
style = list("font-weight" = "normal", padding = "3px 8px"),
textsize = "15px",
direction = "auto"
)
)
# Add lines connecting each province
all_provs <- sort(unique(prov_lines_data$NAMESTART))
walk(all_provs, function(prov_name) {
leaflet_map <<- leaflet_map %>% addPolylines(
data = prov_lines_data %>% filter(NAMESTART == prov_name),
color = ~COLOR, opacity = 0.8, weight = ~LINE_WIDTH,
label = ~LABEL, group = prov_name,
labelOptions = labelOptions(
style = list("font-weight" = "normal", padding = "3px 8px"),
textsize = "15px",
direction = "auto"
)
) %>% hideGroup(prov_name)
})
# Add Control items and legend
leaflet_map <- leaflet_map %>%
addLayersControl(
baseGroups = all_provs,
options = layersControlOptions(collapsed = FALSE)
) %>%
showGroup("Anhui") %>%
addLegend(
position = "bottomleft", colors = c("#f1a340", "#998ec3"),
labels = c("Out", "In"), opacity = 1,
title = "Direction of net migration"
)
leaflet_map