Abstract
This document provides the methodology in R to estimate the density
of WIC stores for each state in the US, plus Porto Rico and DC. It
explains the mechanims to fetch census data, census boundaries, reading
RUCA code and ADI value for each census boundaries. Some data was
provided by Beth Racine team (location of WIC stores), but most data was
downloaded from internet locally or interactively in R using the
tidycensus and tigris libraries. Because some
of the geographic units do not match (e.g. blockgroups are smaller than
census tracts), and information is only available at certain scales,
some geospatial processing is needed (such as spatial overlay). The
output are a table summarizing the density of WIC stores per states, but
also cross tabluated by high and low ADI, and also by
rurality/urbanicity. joining information prepare maps that shows the
density of WIC stores, statewide. The addition of maps showing spatial
clustering of states (by means of the LISA statistic) is a powerful
addition. We still need to (1) prepare maps that include Porto Rico,
Alaska and Hawai - this can be done in ArcGIS, (2) add the source of
where the data came from and (3) export (and share) the results in a
CSV.
rm(list=ls(all=TRUE))
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_knit$set(root.dir = "/Users/delmelle/UNC Charlotte Dropbox/Eric Delmelle/ACTIVE-PROJECTS/WIC/June19B/")
if (!require('dplyr')) install.packages('dplyr'); library('dplyr')
if (!require('ggplot2')) install.packages('ggplot2'); library('ggplot2')
if (!require('purrr')) install.packages('purrr'); library('purrr')
if (!require('RColorBrewer')) install.packages('RColorBrewer'); library('RColorBrewer')
if (!require('readr')) install.packages('readr'); library('readr')
if (!require('rmapshaper')) install.packages('rmapshaper'); library('rmapshaper')
if (!require('sf')) install.packages('sf'); library('sf')
if (!require('spdep')) install.packages('spdep'); library('spdep')
if (!require('stringr')) install.packages('stringr'); library('stringr')
if (!require('tidycensus')) install.packages('tidycensus'); library('tidycensus')
if (!require('tidyverse')) install.packages('tidyverse'); library('tidyverse')
if (!require('tigris')) install.packages('tigris'); library('tigris')
census_api_key("bd48ccbbe80006071141a76230184957dd248abe", install = TRUE, overwrite = TRUE)
readRenviron("~/.Renviron")
state_list <- c(state.abb, "DC", "PR")
quiet_get_acs <- quietly(get_acs)
acs_pop <- map_dfr(
state_list,
~ quiet_get_acs(
geography = "block group",
variables = "B01003_001",
state = .x,
year = 2022,
survey = "acs5",
geometry = FALSE,
output = "wide"
)$result
)
block_groups_sf <- map_dfr(
state_list,
~ {
bg <- suppressWarnings(suppressMessages(
block_groups(state = .x, year = 2022, class = "sf")
))
bg %>% left_join(acs_pop, by = "GEOID")
}
)
block_groups_sf <- block_groups_sf %>%
rename(total_pop = B01003_001E)
block_groups_sf <- block_groups_sf %>%
rename(
MoE = B01003_001M
)
ADI <- read_csv("US_2023_ADI_Census_Block_Group.csv", col_types = cols(.default = "c"))
ADI <- ADI[, -1]
joined_data <- block_groups_sf %>%
left_join(ADI, by = c("GEOID" = "FIPS"))
rm(block_groups_sf)
rural or
urban# Load urban area shapefile (adjust the path as needed)
urban_areas <- st_read("tl_2020_us_uac20.shp")
# Ensure CRS is the same before spatial join
urban_areas <- st_transform(urban_areas, st_crs(joined_data))
# Spatially tag block groups as urban if they intersect with any urban area polygon
joined_data <- joined_data %>%
mutate(area_type = if_else(
lengths(st_intersects(geometry, urban_areas)) > 0,
"urban", "rural"
))
# List of unique state FIPS
state_fips_list <- unique(substr(joined_data$GEOID, 1, 2))
# Split and simplify by state, then recombine
joined_data_simplified <- map_dfr(state_fips_list, function(fips) {
state_data <- joined_data %>% filter(substr(GEOID, 1, 2) == fips)
rmapshaper::ms_simplify(state_data, keep = 0.02, keep_shapes = TRUE)
})
ggplot(joined_data_simplified) +
geom_sf(aes(fill = area_type), color = NA) +
scale_fill_manual(values = c("urban" = "steelblue", "rural" = "forestgreen")) +
theme_void() +
labs(title = "Urban vs Rural Classification (Simplified Geometry)")
# Filter for the 48 contiguous states
contiguous_states <- setdiff(sprintf("%02d", 1:56), c("02", "15", "72", "78", "60", "66", "69", "74", "78", "11")) # remove AK, HI, PR, DC, territories
joined_data_48 <- joined_data_simplified %>%
filter(STATEFP %in% contiguous_states)
# Plot
ggplot(joined_data_48) +
geom_sf(aes(fill = area_type), color = NA) +
scale_fill_manual(values = c("urban" = "steelblue", "rural" = "forestgreen")) +
theme_void() +
labs(title = "Urban vs Rural Classification (48 Contiguous States)")
# Step 1: Load WIC store shapefile
wic_points <- st_read("WIC_stores_USA.shp")
# Step 2: Reproject WIC store points to match block group CRS
wic_points <- st_transform(wic_points, st_crs(joined_data))
# Step 3: Create ADI category if not already present
joined_data <- joined_data %>%
mutate(
adi_category = case_when(
ADI_STATERNK %in% as.character(6:10) ~ "highADI",
ADI_STATERNK %in% as.character(1:5) ~ "lowADI",
TRUE ~ NA_character_
)
)
# Step 4: Spatial join — assign each WIC store the block group’s STATEFP, ADI, and area_type
wic_with_data <- st_join(
wic_points,
joined_data %>% select(GEOID, STATEFP, adi_category, area_type),
left = FALSE
)
# Step 5: Optional check — number of stores with complete classification
table(wic_with_data$adi_category, wic_with_data$area_type, useNA = "ifany")
# Step 1: Count WIC stores by STATEFP, adi_category, and area_type
wic_counts <- wic_with_data %>%
st_drop_geometry() %>%
group_by(STATEFP, adi_category, area_type) %>%
summarise(total_wic = n(), .groups = "drop")
# Step 2: Compute total population by STATEFP, adi_category, and area_type
pop_counts <- joined_data %>%
st_drop_geometry() %>%
filter(!is.na(adi_category), !is.na(area_type)) %>%
group_by(STATEFP, adi_category, area_type) %>%
summarise(total_pop = sum(as.numeric(total_pop), na.rm = TRUE), .groups = "drop")
# Step 3: Merge WIC store counts with population counts
wic_summary <- left_join(wic_counts, pop_counts, by = c("STATEFP", "adi_category", "area_type")) %>%
mutate(WIC_per_10k = 10000 * total_wic / total_pop)
# Step 4: Create statewide summary (combining urban + rural)
wic_counts_statewide <- wic_with_data %>%
st_drop_geometry() %>%
filter(!is.na(adi_category)) %>%
group_by(STATEFP, adi_category) %>%
summarise(total_wic = n(), .groups = "drop")
pop_counts_statewide <- joined_data %>%
st_drop_geometry() %>%
filter(!is.na(adi_category)) %>%
group_by(STATEFP, adi_category) %>%
summarise(total_pop = sum(as.numeric(total_pop), na.rm = TRUE), .groups = "drop")
wic_summary_statewide <- left_join(wic_counts_statewide, pop_counts_statewide, by = c("STATEFP", "adi_category")) %>%
mutate(
WIC_per_10k = 10000 * total_wic / total_pop,
area_type = "statewide"
)
# Step 5: Combine both tables
wic_summary_combined <- bind_rows(wic_summary, wic_summary_statewide) %>%
select(STATEFP, adi_category, area_type, total_wic, total_pop, WIC_per_10k)
# Step 6: View results
print(wic_summary_combined)
# Optional: Save to CSV
write_csv(wic_summary_combined, "wic_summary_by_state_ADI_area.csv")
# Load state geometries (2022 TIGER, 20m resolution)
states_sf <- states(cb = TRUE, resolution = "20m", year = 2022) %>%
st_transform(4326)
# Filter to contiguous US + DC
contig_states <- states_sf %>%
filter(!STUSPS %in% c("AK", "HI", "PR"))
# Make sure STATEFP in summary is character
wic_summary_statewide$STATEFP <- as.character(wic_summary_statewide$STATEFP)
# Join geometry to summary data
wic_summary_geom <- wic_summary_statewide %>%
left_join(contig_states %>% select(STATEFP, geometry), by = "STATEFP") %>%
st_as_sf()
# Filter only "statewide" area_type
wic_summary_geom <- wic_summary_geom %>%
filter(area_type == "statewide")
# Plot faceted map by ADI category
ggplot(wic_summary_geom) +
geom_sf(aes(fill = WIC_per_10k), color = "white", size = 0.2) +
scale_fill_distiller(
palette = "YlGnBu", direction = 1, na.value = "grey90",
name = "WIC Stores per 10k"
) +
facet_wrap(~adi_category, ncol = 2) +
theme_void() +
theme(
strip.text = element_text(size = 12, margin = margin(b = 2)), # Move label text up
strip.background = element_blank(), # Remove background strip
strip.placement = "outside", # Place strip outside the panel
panel.spacing = unit(1.5, "lines"), # Add more space between panels
legend.position = "right"
)
options(tigris_use_cache = TRUE)
# Load US states geometry
states_sf <- states(cb = TRUE, resolution = "20m", year = 2022) %>%
st_transform(4326) %>%
filter(!STUSPS %in% c("AK", "HI", "PR")) # Only 48 + DC
# Ensure STATEFP is character
states_sf$STATEFP <- as.character(states_sf$STATEFP)
wic_summary_combined$STATEFP <- as.character(wic_summary_combined$STATEFP)
# Join state geometries
wic_summary_detailed <- wic_summary_combined %>%
filter(area_type %in% c("urban", "rural"),
adi_category %in% c("highADI", "lowADI")) %>%
mutate(facet_group = paste(area_type, adi_category, sep = " - ")) %>%
left_join(states_sf %>% select(STATEFP, geometry), by = "STATEFP") %>%
st_as_sf()
# Plot faceted map
ggplot(wic_summary_detailed) +
geom_sf(aes(fill = WIC_per_10k), color = "white", size = 0.2) +
scale_fill_distiller(palette = "YlGnBu", direction = 1, na.value = "grey90",
name = "WIC Stores per 10k") +
facet_wrap(~facet_group, ncol = 2) +
theme_void() +
theme(
strip.text = element_text(size = 12, face = "bold", margin = margin(b = 10)),
strip.placement = "outside",
legend.position = "right"
)
# 1. Load geometry of states (48 + DC)
states_sf <- states(cb = TRUE, resolution = "20m", year = 2022) %>%
st_transform(4326) %>%
filter(!STUSPS %in% c("AK", "HI", "PR"))
# 2. Join geometry to WIC data
wic_with_geom <- wic_summary_statewide %>%
filter(area_type == "statewide") %>%
left_join(states_sf %>% select(STATEFP, geometry), by = "STATEFP") %>%
st_as_sf() %>%
filter(!is.na(WIC_per_10k)) %>%
filter(!st_is_empty(geometry)) %>%
st_make_valid()
# 3. Create neighbors and weights
neighbors <- poly2nb(wic_with_geom, queen = TRUE)
weights <- nb2listw(neighbors, style = "W", zero.policy = TRUE)
# 4. Global Moran
print(moran.test(wic_with_geom$WIC_per_10k, weights, zero.policy = TRUE))
# 5. Local Moran and clusters
local_moran <- localmoran(wic_with_geom$WIC_per_10k, weights)
wic_with_geom <- wic_with_geom %>%
mutate(
local_I = local_moran[, "Ii"],
local_p = local_moran[, "Pr(z != E(Ii))"],
lagged_value = lag.listw(weights, WIC_per_10k),
lisa_cluster = case_when(
WIC_per_10k > mean(WIC_per_10k, na.rm = TRUE) & lagged_value > mean(WIC_per_10k, na.rm = TRUE) & local_p <= 0.05 ~ "High-High",
WIC_per_10k < mean(WIC_per_10k, na.rm = TRUE) & lagged_value < mean(WIC_per_10k, na.rm = TRUE) & local_p <= 0.05 ~ "Low-Low",
WIC_per_10k > mean(WIC_per_10k, na.rm = TRUE) & lagged_value < mean(WIC_per_10k, na.rm = TRUE) & local_p <= 0.05 ~ "High-Low",
WIC_per_10k < mean(WIC_per_10k, na.rm = TRUE) & lagged_value > mean(WIC_per_10k, na.rm = TRUE) & local_p <= 0.05 ~ "Low-High",
TRUE ~ "Not Significant"
)
)
# 6. LISA Map
ggplot(wic_with_geom) +
geom_sf(aes(fill = lisa_cluster), color = "white", size = 0.2) +
scale_fill_manual(
values = c("High-High" = "#B2182B", "Low-Low" = "#2166AC",
"High-Low" = "#EF8A62", "Low-High" = "#67A9CF",
"Not Significant" = "grey80"),
name = "LISA Cluster"
) +
labs(title = "Local Indicators of Spatial Association (LISA)",
subtitle = "WIC Stores per 10,000 People (Statewide)",
caption = "48 Contiguous States + DC") +
theme_minimal()
# 1. Load state geometry (48 + DC)
states_sf <- states(cb = TRUE, resolution = "20m", year = 2022) %>%
st_transform(4326) %>%
filter(!STUSPS %in% c("AK", "HI", "PR"))
# 2. Join geometries to the detailed summary (assumes `wic_summary_detailed` exists)
wic_detailed_geom <- states_sf %>%
left_join(st_drop_geometry(wic_summary_detailed), by = "STATEFP") %>%
filter(
area_type %in% c("urban", "rural"),
adi_category %in% c("highADI", "lowADI"),
!is.na(WIC_per_10k),
!st_is_empty(geometry)
) %>%
st_make_valid() %>%
mutate(group = paste(area_type, adi_category, sep = "-"))
# 3. Function to compute LISA
compute_lisa <- function(df) {
neighbors <- poly2nb(df, queen = TRUE)
weights <- nb2listw(neighbors, style = "W", zero.policy = TRUE)
local_moran <- localmoran(df$WIC_per_10k, weights)
df %>%
mutate(
local_I = local_moran[, "Ii"],
local_p = local_moran[, "Pr(z != E(Ii))"],
lagged_value = lag.listw(weights, df$WIC_per_10k),
lisa_cluster = case_when(
WIC_per_10k > mean(WIC_per_10k, na.rm = TRUE) &
lagged_value > mean(WIC_per_10k, na.rm = TRUE) &
local_p <= 0.05 ~ "High-High",
WIC_per_10k < mean(WIC_per_10k, na.rm = TRUE) &
lagged_value < mean(WIC_per_10k, na.rm = TRUE) &
local_p <= 0.05 ~ "Low-Low",
WIC_per_10k > mean(WIC_per_10k, na.rm = TRUE) &
lagged_value < mean(WIC_per_10k, na.rm = TRUE) &
local_p <= 0.05 ~ "High-Low",
WIC_per_10k < mean(WIC_per_10k, na.rm = TRUE) &
lagged_value > mean(WIC_per_10k, na.rm = TRUE) &
local_p <= 0.05 ~ "Low-High",
TRUE ~ "Not Significant"
),
group = unique(df$group)
)
}
# 4. Apply function by group
wic_lisa_all <- wic_detailed_geom %>%
group_split(group, .keep = TRUE) %>%
map_dfr(compute_lisa)
# 5. Faceted map
ggplot(wic_lisa_all) +
geom_sf(aes(fill = lisa_cluster), color = "white", size = 0.2) +
facet_wrap(~group, ncol = 2) +
scale_fill_manual(
values = c(
"High-High" = "#B2182B",
"Low-Low" = "#2166AC",
"High-Low" = "#EF8A62",
"Low-High" = "#67A9CF",
"Not Significant" = "grey80"
),
name = "LISA Cluster"
) +
labs(
title = "Local Moran's I – WIC Stores per 10k",
subtitle = "By Area Type and ADI Category",
caption = "Contiguous US + DC"
) +
theme_minimal() +
theme(
strip.text = element_text(face = "bold", size = 12),
panel.grid = element_blank(), # Remove gridlines
axis.text = element_blank(), # Remove axis labels
axis.ticks = element_blank(), # Remove axis ticks
axis.title = element_blank() # Remove axis titles
)