Packages Used

Show Packages Used
library(tidyverse)
library(janitor)
library(data.table)
library(scales)
library(usmap)
library(ggalluvial)
library(broom)

Load/Tidy Data

Show code for loading and tidying data

Source: IPUMS Current Population Survey

food_poverty_data <- read_csv("food_poverty_data2.csv")

Source: National Longitudinal Survey of Youth 1997 (NLSY97)

 child_dev_data <- read_csv("child_dev_data.csv") |> 
  clean_names()

Data Tidy

df <- food_poverty_data |> 
  mutate(

    pubhous  = str_to_lower(pubhous),
    rentsub  = str_to_lower(rentsub),
    foodstmp = str_to_lower(foodstmp),

    pubhous_yes  = as.integer(pubhous  == "yes"),
    rentsub_yes  = as.integer(rentsub  == "yes"),
    foodstmp_yes = as.integer(foodstmp == "yes"),

    age_group = cut(
      age,
      breaks = c(0, 5, 12, 17, 24, 34, 44, 54, 64, Inf),
      labels = c("0–5", "6–12", "13–17", "18–24", "25–34", "35–44", "45–54", "55–64", "65+"),
      right = TRUE, include.lowest = TRUE
    ),

    life_stage = case_when(
      age <= 17 ~ "Children (0–17)",
      age >= 18 & age <= 64 ~ "Adults (18–64)",
      age >= 65 ~ "Seniors (65+)",
      TRUE ~ NA_character_
    ),

    sex = toupper(sex)
  ) |> 
  filter(!is.na(state), !is.na(year))


df <- df |> 
  mutate(income_quint = ntile(hhincome, 5)) |> 
  mutate(
    income_quint = factor(
      income_quint,
      levels = 1:5,
      labels = c("Q1 (lowest)", "Q2", "Q3", "Q4", "Q5 (highest)")
    )
  )

Visualizations

latest_year <- max(df$year, na.rm = TRUE)

state_latest <- df |> 
  filter(year == latest_year) |> 
  group_by(state) |> 
  summarise(
    n = n(),
    snap_rate = mean(foodstmp_yes, na.rm = TRUE),
    med_income = median(hhincome, na.rm = TRUE),
    .groups = "drop"
  )

plot_usmap(data = state_latest, values = "snap_rate", labels = FALSE) +
  scale_fill_continuous(
    label = percent_format(accuracy = 1),
    name = "SNAP rate"
  ) +
  labs(
    title = paste0("Food insecurity risk is not 'elsewhere' — SNAP reliance by state (", latest_year, ")"),
    subtitle = "Higher SNAP rates signal greater household food hardship / economic stress",
  ) +
  theme(legend.position = "right")

q1_range <- df |> 
  filter(income_quint == "Q1 (lowest)") |> 
  summarise(
    lo = dollar(min(hhincome, na.rm = TRUE)),
    hi = dollar(max(hhincome, na.rm = TRUE))
  )

q1_label <- paste0("<", q1_range$hi)

# ---- aggregate ----
inc_gradient <- df |> 
  group_by(income_quint) |> 
  summarise(
    snap_rate = mean(foodstmp_yes, na.rm = TRUE),
    n = n(),
    .groups = "drop"
  ) |> 
  mutate(
    income_quint = as.character(income_quint),
    income_quint = if_else(
      income_quint == "Q1 (lowest)",
      q1_label,
      income_quint
    ),
    income_quint = factor(
      income_quint,
      levels = c(q1_label, "Q2", "Q3", "Q4", "Q5 (highest)")
    )
  )

# ---- plot ----
ggplot(inc_gradient,
       aes(x = income_quint, y = snap_rate, fill = income_quint)) +
  geom_col() +
  scale_fill_manual(
    values = c(
      "<$18,320" = "red",
      "Q2" = "grey90",
      "Q3" = "grey90",
      "Q4" = "grey90",
      "Q5 (highest)" = "grey90"
    ),
    guide = "none"
  ) +
  scale_y_continuous(labels = percent_format(accuracy = 1)) +
  labs(
    title = "Food hardship is tightly linked to poverty",
    subtitle = "SNAP receipt rises steeply as household income falls (income quintiles)",
    x = NULL,
    y = "SNAP receipt rate",
    caption = "Income quintiles computed from household income."
  ) +
  theme_minimal(base_size = 13)

age_sex <- df |> 
  group_by(life_stage) |> 
  summarise(
    snap_rate = mean(foodstmp_yes, na.rm = TRUE),
    n = n(),
    .groups = "drop"
  ) |> 
  filter(!is.na(life_stage))

ggplot(
  age_sex,
  aes(
    x = life_stage,
    y = snap_rate,
    fill = ifelse(life_stage == "Children (0–17)", "Children", "Other")
  )
) +
  geom_col() +
  scale_fill_manual(
    values = c(
      "Children" = "red",   # red
      "Other"    = "grey90"    # light grey
    ),
    guide = "none"
  ) +
  scale_y_continuous(labels = percent_format(accuracy = 1)) +
  labs(
    title = "Children show the highest SNAP reliance",
    subtitle = "A clear signal of food hardship risk among households with kids (split by gender)",
    x = NULL,
    y = "SNAP receipt rate"
  ) +
  theme_minimal(base_size = 13) +
  theme(axis.text.x = element_text(angle = 35, hjust = 1))

More Data Tidy

Show code for tidying data
df <- child_dev_data

demo <- df |>
  transmute(
    pubid = pubid_1997,

    sex = case_when(
      key_sex_1997 == 1 ~ "Male",
      key_sex_1997 == 2 ~ "Female",
      TRUE ~ NA_character_
    ),

    race_eth = case_when(
      key_race_ethnicity_1997 == 1 ~ "Hispanic/Latino",
      key_race_ethnicity_1997 == 2 ~ "Black",
      key_race_ethnicity_1997 == 3 ~ "Non-Black / Non-Hispanic",
      key_race_ethnicity_1997 == 4 ~ "Mixed / Non-Hispanic",
      TRUE ~ NA_character_
    ),

    birth_year = key_bdate_y_1997
  )


# 1. Childhood household income (1997–2003)

income_long <- df |>
  select(pubid = pubid_1997, matches("^cv_income_gross_yr_\\d{4}$")) |>
  pivot_longer(
    cols = -pubid,
    names_to = "var",
    values_to = "income"
  ) |>
  mutate(
    year = as.integer(str_extract(var, "\\d{4}")),
    income = if_else(income < 0, NA_real_, income)
  ) |>
  filter(year %in% 1997:2003)

child_income <- income_long |>
  group_by(pubid) |>
  summarise(
    child_income_median = median(income, na.rm = TRUE),
    child_income_years  = sum(!is.na(income)),
    .groups = "drop"
  ) |>
  mutate(
    child_income_group = case_when(
      child_income_median <= 17600 ~ "Low childhood income",
      child_income_median <= 100000 & child_income_median > 17600 ~ "Middle childhood income",
      child_income_median > 100000 ~ "High childhood income",
      .default = NA
    ),
    child_income_group = factor(
      child_income_group,
      levels = c(
        "Low childhood income",
        "Middle childhood income",
        "High childhood income"
      )
    )
  )


# 2. UI spells (counts)

ui_spells <- df |>
  select(pubid = pubid_1997,
         matches("^cvc_ui_spells_yr_\\d{2}_xrnd$")) |>
  pivot_longer(
    cols = -pubid,
    names_to = "var",
    values_to = "ui_spells"
  ) |>
  mutate(
    yy   = as.integer(str_extract(var, "(?<=yr_)\\d{2}(?=_xrnd$)")),
    year = if_else(yy >= 90, 1900L + yy, 2000L + yy),
    ui_spells = if_else(ui_spells < 0, NA_real_, as.numeric(ui_spells))
  ) |>
  select(pubid, year, ui_spells)


# 3. UI receipt (indicator)

ui_recv <- df |>
  select(pubid = pubid_1997,
         matches("^cvc_ui_yr_\\d{2}_xrnd$")) |>
  pivot_longer(
    cols = -pubid,
    names_to = "var",
    values_to = "ui_recv"
  ) |>
  mutate(
    yy   = as.integer(str_extract(var, "(?<=yr_)\\d{2}(?=_xrnd$)")),
    year = if_else(yy >= 90, 1900L + yy, 2000L + yy),
    ui_recv = if_else(ui_recv < 0, NA_real_, as.numeric(ui_recv))
  ) |>
  select(pubid, year, ui_recv)


# 4. UI amount (dollars)

ui_amt <- df |>
  select(pubid = pubid_1997,
         matches("^cvc_amt_ui_yr_\\d{2}_xrnd$")) |>
  pivot_longer(
    cols = -pubid,
    names_to = "var",
    values_to = "ui_amount"
  ) |>
  mutate(
    yy   = as.integer(str_extract(var, "(?<=yr_)\\d{2}(?=_xrnd$)")),
    year = if_else(yy >= 90, 1900L + yy, 2000L + yy),
    ui_amount = if_else(ui_amount < 0, NA_real_, as.numeric(ui_amount))
  ) |>
  select(pubid, year, ui_amount)


# 5. Combine UI + derive age

ui_long <- ui_spells |>
  full_join(ui_recv, by = c("pubid", "year")) |>
  full_join(ui_amt,  by = c("pubid", "year")) |>
  left_join(demo, by = "pubid") |>
  mutate(
    age = year - birth_year,
    any_ui = case_when(
      !is.na(ui_recv)   ~ as.integer(ui_recv > 0),
      !is.na(ui_spells) ~ as.integer(ui_spells > 0),
      TRUE              ~ NA_integer_
    )
  )


# 6. Adult outcome summary (age 25+)

adult_ui <- ui_long |>
  filter(age >= 25) |>
  group_by(pubid) |>
  summarise(
    adult_ui_spells_total = sum(ui_spells, na.rm = TRUE),
    adult_any_ui = as.integer(any(any_ui == 1, na.rm = TRUE)),
    .groups = "drop"
  ) |>
  mutate(
    adult_support_level = case_when(
      adult_ui_spells_total == 0 ~ "Economically stable (no UI)",
      adult_ui_spells_total <= 2 ~ "Occasional instability (1–2 UI spells)",
      adult_ui_spells_total >= 3 ~ "Chronic instability (3+ UI spells)"
    ),
    adult_support_level = factor(
      adult_support_level,
      levels = c(
        "Economically stable (no UI)",
        "Occasional instability (1–2 UI spells)",
        "Chronic instability (3+ UI spells)"
      )
    )
  )

analysis_df <- demo |>
  left_join(child_income, by = "pubid") |>
  left_join(adult_ui,    by = "pubid")
# VISUALIZATION
plot_df <- analysis_df |>
  filter(!is.na(child_income_group),
         !is.na(adult_support_level)) |>
  count(child_income_group, adult_support_level) |>
  group_by(child_income_group) |>
  mutate(prop = n / sum(n)) |>
  ungroup() |>
  mutate(
    fill_group = if_else(
      child_income_group == "Low childhood income",
      "Low childhood income",
      "Other"
    ),
    draw_order = if_else(
      child_income_group == "Low childhood income",
      2,
      1
    )
  )

ggplot(
  plot_df,
  aes(
    axis1 = child_income_group,
    axis2 = adult_support_level,
    y = prop
  )
) +
  geom_alluvium(
    aes(
      fill  = fill_group,
      alpha = fill_group,
      order = draw_order
    )
  ) +
  geom_stratum(
    fill = "grey92",
    color = "grey40"
  ) +
  geom_text(
    stat = "stratum",
    aes(label = after_stat(stratum)),
    size = 3
  ) +
  scale_fill_manual(
    values = c(
      "Low childhood income" = "blue",
      "Other" = "grey70"
    )
  ) +
  scale_alpha_manual(
    values = c(
      "Low childhood income" = 0.95,
      "Other" = 0.95
    )
  ) +
  scale_y_continuous(expand = c(0, 0)) +
  labs(
    title = "Childhood poverty predicts adult economic instability",
    subtitle = "NLSY97: pathways from youth income to adult reliance on unemployment insurance",
    x = NULL,
    y = NULL
  ) +
  theme_minimal() +
  theme(
    legend.position = "none",
    axis.text  = element_blank(),
    axis.ticks = element_blank(),
    panel.grid = element_blank()
  )