Preambule

Set up environment, and indicate where the data we need is stored

rm(list=ls(all=TRUE))
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_knit$set(root.dir = "/Users/delmelle/UNC Charlotte Dropbox/Eric Delmelle/ACTIVE-PROJECTS/WIC/June19B/") 

Install necessary libraries

  • we use data for data manipulation, spatial operations and plotting.
  • we nee libraries that contain functions to do just that.
if (!require('dplyr')) install.packages('dplyr'); library('dplyr')
if (!require('ggplot2')) install.packages('ggplot2'); library('ggplot2')
if (!require('purrr')) install.packages('purrr'); library('purrr')
if (!require('RColorBrewer')) install.packages('RColorBrewer'); library('RColorBrewer')
if (!require('readr')) install.packages('readr'); library('readr')
if (!require('rmapshaper')) install.packages('rmapshaper'); library('rmapshaper')
if (!require('sf')) install.packages('sf'); library('sf')
if (!require('spdep')) install.packages('spdep'); library('spdep')
if (!require('stringr')) install.packages('stringr'); library('stringr')
if (!require('tidycensus')) install.packages('tidycensus'); library('tidycensus')
if (!require('tidyverse')) install.packages('tidyverse'); library('tidyverse')
if (!require('tigris')) install.packages('tigris'); library('tigris')

Set your Census API Key

  • to get access to the census, we need a Key.
census_api_key("bd48ccbbe80006071141a76230184957dd248abe", install = TRUE, overwrite = TRUE)
readRenviron("~/.Renviron")

List of all states + DC + Puerto Rico

  • list the states for which we will need data for.
  • note that DC and Porto Rico are included.
  • territories like Guam, Virgin Islands, etc… are not included
state_list <- c(state.abb, "DC", "PR")

DATA PREPARATION

Load ACS 5-year estimates: Total Population (B01003_001)

  • get population data for each block group in 2022.
  • this data is not in a shapefile format.
  • later, we will download the shapefile and join
quiet_get_acs <- quietly(get_acs)

acs_pop <- map_dfr(
  state_list,
  ~ quiet_get_acs(
      geography = "block group",
      variables = "B01003_001",
      state = .x,
      year = 2022,
      survey = "acs5",
      geometry = FALSE,
      output = "wide"
    )$result
)

Download 2022 TIGER block groups and join population estimates

  • Now we get spatial data for each blockgroup.
  • This can take some time, depending on your connection.
  • We get the geographies of the block groups (shapefile)
  • We then use a left join this data to the shapefile
block_groups_sf <- map_dfr(
  state_list,
  ~ {
    bg <- suppressWarnings(suppressMessages(
      block_groups(state = .x, year = 2022, class = "sf")
    ))
    bg %>% left_join(acs_pop, by = "GEOID")
  }
)

Rename population estimate

  • for easier interpretation, we rename the population field
  • that field is called “B01003_001E”; ‘E’ is for estimate
block_groups_sf <- block_groups_sf %>%
  rename(total_pop = B01003_001E)
block_groups_sf <- block_groups_sf %>%
  rename(
    MoE = B01003_001M
  )

Load ADI file for 2023

  • we now load the ADI file for the entire US.
  • because it is for 2023, it will match easily the FIPS of the census block groups
  • remove the first column which is not useful
ADI <- read_csv("US_2023_ADI_Census_Block_Group.csv", col_types = cols(.default = "c"))
ADI <- ADI[, -1]

Join ADI ranking to blockgroup

  • we join ADI information to each block group, based on the tract ID!
  • this may leave a few block groups not assigned a proper ADI
joined_data <- block_groups_sf %>%
  left_join(ADI, by = c("GEOID" = "FIPS"))
rm(block_groups_sf)

Add Urban/Rural Classification via Spatial Intersection

  • here, we use the 2020 tract level urban area shapefile
  • we make sure the coordinate system is the same for both
  • we compute a spatial intersection
  • each block group is assigned rural or urban
# Load urban area shapefile (adjust the path as needed)
urban_areas <- st_read("tl_2020_us_uac20.shp")

# Ensure CRS is the same before spatial join
urban_areas <- st_transform(urban_areas, st_crs(joined_data))

# Spatially tag block groups as urban if they intersect with any urban area polygon
joined_data <- joined_data %>%
  mutate(area_type = if_else(
    lengths(st_intersects(geometry, urban_areas)) > 0,
    "urban", "rural"
  ))

For visualization: Simplify in Chunks by State

  • (optional) - VISUALIZATION ONLY
  • we need to simplify the geometry of the block groups
  • this is necessary for speeding up the visualization
  • note that it is done after the intersection is completely
# List of unique state FIPS
state_fips_list <- unique(substr(joined_data$GEOID, 1, 2))

# Split and simplify by state, then recombine
joined_data_simplified <- map_dfr(state_fips_list, function(fips) {
  state_data <- joined_data %>% filter(substr(GEOID, 1, 2) == fips)
  rmapshaper::ms_simplify(state_data, keep = 0.02, keep_shapes = TRUE)
})

SANITY CHECK

  • we can check visually if the join was well done
  • this still take a while
ggplot(joined_data_simplified) +
  geom_sf(aes(fill = area_type), color = NA) +
  scale_fill_manual(values = c("urban" = "steelblue", "rural" = "forestgreen")) +
  theme_void() +
  labs(title = "Urban vs Rural Classification (Simplified Geometry)")

  • and also for the 48 states
# Filter for the 48 contiguous states
contiguous_states <- setdiff(sprintf("%02d", 1:56), c("02", "15", "72", "78", "60", "66", "69", "74", "78", "11"))  # remove AK, HI, PR, DC, territories

joined_data_48 <- joined_data_simplified %>%
  filter(STATEFP %in% contiguous_states)

# Plot
ggplot(joined_data_48) +
  geom_sf(aes(fill = area_type), color = NA) +
  scale_fill_manual(values = c("urban" = "steelblue", "rural" = "forestgreen")) +
  theme_void() +
  labs(title = "Urban vs Rural Classification (48 Contiguous States)")

LOAD WIC STORES and OTHER THINGS!

  • Here we load the dataset that contains the locations of the WIC store
  • we reproject to make sure that any spatial intersection (point in polygon) follows the same projection
  • we spatially join the WIC stores to the block groups
# Step 1: Load WIC store shapefile
wic_points <- st_read("WIC_stores_USA.shp")

# Step 2: Reproject WIC store points to match block group CRS
wic_points <- st_transform(wic_points, st_crs(joined_data))

# Step 3: Create ADI category if not already present
joined_data <- joined_data %>%
  mutate(
    adi_category = case_when(
      ADI_STATERNK %in% as.character(6:10) ~ "highADI",
      ADI_STATERNK %in% as.character(1:5) ~ "lowADI",
      TRUE ~ NA_character_
    )
  )

# Step 4: Spatial join — assign each WIC store the block group’s STATEFP, ADI, and area_type
wic_with_data <- st_join(
  wic_points,
  joined_data %>% select(GEOID, STATEFP, adi_category, area_type),
  left = FALSE
)

# Step 5: Optional check — number of stores with complete classification
table(wic_with_data$adi_category, wic_with_data$area_type, useNA = "ifany")

Count number of WIC stores in each gblock group

  • Here we count the number of WIC per 10,000k and per category
  • Categories include urban/rural, and high/low ADI
# Step 1: Count WIC stores by STATEFP, adi_category, and area_type
wic_counts <- wic_with_data %>%
  st_drop_geometry() %>%
  group_by(STATEFP, adi_category, area_type) %>%
  summarise(total_wic = n(), .groups = "drop")

# Step 2: Compute total population by STATEFP, adi_category, and area_type
pop_counts <- joined_data %>%
  st_drop_geometry() %>%
  filter(!is.na(adi_category), !is.na(area_type)) %>%
  group_by(STATEFP, adi_category, area_type) %>%
  summarise(total_pop = sum(as.numeric(total_pop), na.rm = TRUE), .groups = "drop")

# Step 3: Merge WIC store counts with population counts
wic_summary <- left_join(wic_counts, pop_counts, by = c("STATEFP", "adi_category", "area_type")) %>%
  mutate(WIC_per_10k = 10000 * total_wic / total_pop)

# Step 4: Create statewide summary (combining urban + rural)
wic_counts_statewide <- wic_with_data %>%
  st_drop_geometry() %>%
  filter(!is.na(adi_category)) %>%
  group_by(STATEFP, adi_category) %>%
  summarise(total_wic = n(), .groups = "drop")

pop_counts_statewide <- joined_data %>%
  st_drop_geometry() %>%
  filter(!is.na(adi_category)) %>%
  group_by(STATEFP, adi_category) %>%
  summarise(total_pop = sum(as.numeric(total_pop), na.rm = TRUE), .groups = "drop")

wic_summary_statewide <- left_join(wic_counts_statewide, pop_counts_statewide, by = c("STATEFP", "adi_category")) %>%
  mutate(
    WIC_per_10k = 10000 * total_wic / total_pop,
    area_type = "statewide"
  )

# Step 5: Combine both tables
wic_summary_combined <- bind_rows(wic_summary, wic_summary_statewide) %>%
  select(STATEFP, adi_category, area_type, total_wic, total_pop, WIC_per_10k)

# Step 6: View results
print(wic_summary_combined)

# Optional: Save to CSV
write_csv(wic_summary_combined, "wic_summary_by_state_ADI_area.csv")

SUMMARIZE RESULTS BY STATES

  • this code provide a summary of the measures (# wic) per state
  • they are then plotted on a map (48 states only + DC)
# Load state geometries (2022 TIGER, 20m resolution)
states_sf <- states(cb = TRUE, resolution = "20m", year = 2022) %>%
  st_transform(4326)

# Filter to contiguous US + DC
contig_states <- states_sf %>%
  filter(!STUSPS %in% c("AK", "HI", "PR"))

# Make sure STATEFP in summary is character
wic_summary_statewide$STATEFP <- as.character(wic_summary_statewide$STATEFP)

# Join geometry to summary data
wic_summary_geom <- wic_summary_statewide %>%
  left_join(contig_states %>% select(STATEFP, geometry), by = "STATEFP") %>%
  st_as_sf()

# Filter only "statewide" area_type
wic_summary_geom <- wic_summary_geom %>%
  filter(area_type == "statewide")

# Plot faceted map by ADI category
ggplot(wic_summary_geom) +
  geom_sf(aes(fill = WIC_per_10k), color = "white", size = 0.2) +
  scale_fill_distiller(
    palette = "YlGnBu", direction = 1, na.value = "grey90",
    name = "WIC Stores per 10k"
  ) +
  facet_wrap(~adi_category, ncol = 2) +
  theme_void() +
  theme(
    strip.text = element_text(size = 12, margin = margin(b = 2)),  # Move label text up
    strip.background = element_blank(),                           # Remove background strip
    strip.placement = "outside",                                  # Place strip outside the panel
    panel.spacing = unit(1.5, "lines"),                           # Add more space between panels
    legend.position = "right"
  )

CHOROPLETH MAP for each state & by category (2*2)

  • urban low ADI
  • urban high ADI
  • rural low ADi
  • rural high ADI
  • we use Color Brewer and we make sure each map follows the same legend for comparison
options(tigris_use_cache = TRUE)

# Load US states geometry
states_sf <- states(cb = TRUE, resolution = "20m", year = 2022) %>%
  st_transform(4326) %>%
  filter(!STUSPS %in% c("AK", "HI", "PR"))  # Only 48 + DC

# Ensure STATEFP is character
states_sf$STATEFP <- as.character(states_sf$STATEFP)
wic_summary_combined$STATEFP <- as.character(wic_summary_combined$STATEFP)

# Join state geometries
wic_summary_detailed <- wic_summary_combined %>%
  filter(area_type %in% c("urban", "rural"),
         adi_category %in% c("highADI", "lowADI")) %>%
  mutate(facet_group = paste(area_type, adi_category, sep = " - ")) %>%
  left_join(states_sf %>% select(STATEFP, geometry), by = "STATEFP") %>%
  st_as_sf()

# Plot faceted map
ggplot(wic_summary_detailed) +
  geom_sf(aes(fill = WIC_per_10k), color = "white", size = 0.2) +
  scale_fill_distiller(palette = "YlGnBu", direction = 1, na.value = "grey90",
                       name = "WIC Stores per 10k") +
  facet_wrap(~facet_group, ncol = 2) +
  theme_void() +
  theme(
    strip.text = element_text(size = 12, face = "bold", margin = margin(b = 10)),
    strip.placement = "outside",
    legend.position = "right"
  )

LISA for each state

  • based on the number of WIC stores per 10,000K
  • regardless of urban, rural, lowADI or highADI
  • objective is to evaluate whether some states experience high levels of WIC stores, and how they do in regards to neighborhing states.
# 1. Load geometry of states (48 + DC)
states_sf <- states(cb = TRUE, resolution = "20m", year = 2022) %>%
  st_transform(4326) %>%
  filter(!STUSPS %in% c("AK", "HI", "PR"))

# 2. Join geometry to WIC data
wic_with_geom <- wic_summary_statewide %>%
  filter(area_type == "statewide") %>%
  left_join(states_sf %>% select(STATEFP, geometry), by = "STATEFP") %>%
  st_as_sf() %>%
  filter(!is.na(WIC_per_10k)) %>%
  filter(!st_is_empty(geometry)) %>%
  st_make_valid()

# 3. Create neighbors and weights
neighbors <- poly2nb(wic_with_geom, queen = TRUE)
weights <- nb2listw(neighbors, style = "W", zero.policy = TRUE)

# 4. Global Moran
print(moran.test(wic_with_geom$WIC_per_10k, weights, zero.policy = TRUE))

# 5. Local Moran and clusters
local_moran <- localmoran(wic_with_geom$WIC_per_10k, weights)
wic_with_geom <- wic_with_geom %>%
  mutate(
    local_I = local_moran[, "Ii"],
    local_p = local_moran[, "Pr(z != E(Ii))"],
    lagged_value = lag.listw(weights, WIC_per_10k),
    lisa_cluster = case_when(
      WIC_per_10k > mean(WIC_per_10k, na.rm = TRUE) & lagged_value > mean(WIC_per_10k, na.rm = TRUE) & local_p <= 0.05 ~ "High-High",
      WIC_per_10k < mean(WIC_per_10k, na.rm = TRUE) & lagged_value < mean(WIC_per_10k, na.rm = TRUE) & local_p <= 0.05 ~ "Low-Low",
      WIC_per_10k > mean(WIC_per_10k, na.rm = TRUE) & lagged_value < mean(WIC_per_10k, na.rm = TRUE) & local_p <= 0.05 ~ "High-Low",
      WIC_per_10k < mean(WIC_per_10k, na.rm = TRUE) & lagged_value > mean(WIC_per_10k, na.rm = TRUE) & local_p <= 0.05 ~ "Low-High",
      TRUE ~ "Not Significant"
    )
  )

# 6. LISA Map
ggplot(wic_with_geom) +
  geom_sf(aes(fill = lisa_cluster), color = "white", size = 0.2) +
  scale_fill_manual(
    values = c("High-High" = "#B2182B", "Low-Low" = "#2166AC", 
               "High-Low" = "#EF8A62", "Low-High" = "#67A9CF", 
               "Not Significant" = "grey80"),
    name = "LISA Cluster"
  ) +
  labs(title = "Local Indicators of Spatial Association (LISA)",
       subtitle = "WIC Stores per 10,000 People (Statewide)",
       caption = "48 Contiguous States + DC") +
  theme_minimal()

LISA for each possible combination

  • urban low ADI
  • urban high ADI
  • rural low ADi
  • rural high ADI
# 1. Load state geometry (48 + DC)
states_sf <- states(cb = TRUE, resolution = "20m", year = 2022) %>%
  st_transform(4326) %>%
  filter(!STUSPS %in% c("AK", "HI", "PR"))

# 2. Join geometries to the detailed summary (assumes `wic_summary_detailed` exists)
wic_detailed_geom <- states_sf %>%
  left_join(st_drop_geometry(wic_summary_detailed), by = "STATEFP") %>%
  filter(
    area_type %in% c("urban", "rural"),
    adi_category %in% c("highADI", "lowADI"),
    !is.na(WIC_per_10k),
    !st_is_empty(geometry)
  ) %>%
  st_make_valid() %>%
  mutate(group = paste(area_type, adi_category, sep = "-"))

# 3. Function to compute LISA
compute_lisa <- function(df) {
  neighbors <- poly2nb(df, queen = TRUE)
  weights <- nb2listw(neighbors, style = "W", zero.policy = TRUE)
  local_moran <- localmoran(df$WIC_per_10k, weights)

  df %>%
    mutate(
      local_I = local_moran[, "Ii"],
      local_p = local_moran[, "Pr(z != E(Ii))"],
      lagged_value = lag.listw(weights, df$WIC_per_10k),
      lisa_cluster = case_when(
        WIC_per_10k > mean(WIC_per_10k, na.rm = TRUE) &
          lagged_value > mean(WIC_per_10k, na.rm = TRUE) &
          local_p <= 0.05 ~ "High-High",
        WIC_per_10k < mean(WIC_per_10k, na.rm = TRUE) &
          lagged_value < mean(WIC_per_10k, na.rm = TRUE) &
          local_p <= 0.05 ~ "Low-Low",
        WIC_per_10k > mean(WIC_per_10k, na.rm = TRUE) &
          lagged_value < mean(WIC_per_10k, na.rm = TRUE) &
          local_p <= 0.05 ~ "High-Low",
        WIC_per_10k < mean(WIC_per_10k, na.rm = TRUE) &
          lagged_value > mean(WIC_per_10k, na.rm = TRUE) &
          local_p <= 0.05 ~ "Low-High",
        TRUE ~ "Not Significant"
      ),
      group = unique(df$group)
    )
}

# 4. Apply function by group
wic_lisa_all <- wic_detailed_geom %>%
  group_split(group, .keep = TRUE) %>%
  map_dfr(compute_lisa)

# 5. Faceted map
ggplot(wic_lisa_all) +
  geom_sf(aes(fill = lisa_cluster), color = "white", size = 0.2) +
  facet_wrap(~group, ncol = 2) +
  scale_fill_manual(
    values = c(
      "High-High" = "#B2182B",
      "Low-Low" = "#2166AC",
      "High-Low" = "#EF8A62",
      "Low-High" = "#67A9CF",
      "Not Significant" = "grey80"
    ),
    name = "LISA Cluster"
  ) +
  labs(
    title = "Local Moran's I – WIC Stores per 10k",
    subtitle = "By Area Type and ADI Category",
    caption = "Contiguous US + DC"
  ) +
  theme_minimal() +
  theme(
    strip.text = element_text(face = "bold", size = 12),
    panel.grid = element_blank(),           # Remove gridlines
    axis.text = element_blank(),            # Remove axis labels
    axis.ticks = element_blank(),           # Remove axis ticks
    axis.title = element_blank()            # Remove axis titles
  )