Visualizations
latest_year <- max(df$year, na.rm = TRUE)
state_latest <- df |>
filter(year == latest_year) |>
group_by(state) |>
summarise(
n = n(),
snap_rate = mean(foodstmp_yes, na.rm = TRUE),
med_income = median(hhincome, na.rm = TRUE),
.groups = "drop"
)
plot_usmap(data = state_latest, values = "snap_rate", labels = FALSE) +
scale_fill_continuous(
label = percent_format(accuracy = 1),
name = "SNAP rate"
) +
labs(
title = paste0("Food insecurity risk is not 'elsewhere' — SNAP reliance by state (", latest_year, ")"),
subtitle = "Higher SNAP rates signal greater household food hardship / economic stress",
) +
theme(legend.position = "right")

q1_range <- df |>
filter(income_quint == "Q1 (lowest)") |>
summarise(
lo = dollar(min(hhincome, na.rm = TRUE)),
hi = dollar(max(hhincome, na.rm = TRUE))
)
q1_label <- paste0("<", q1_range$hi)
# ---- aggregate ----
inc_gradient <- df |>
group_by(income_quint) |>
summarise(
snap_rate = mean(foodstmp_yes, na.rm = TRUE),
n = n(),
.groups = "drop"
) |>
mutate(
income_quint = as.character(income_quint),
income_quint = if_else(
income_quint == "Q1 (lowest)",
q1_label,
income_quint
),
income_quint = factor(
income_quint,
levels = c(q1_label, "Q2", "Q3", "Q4", "Q5 (highest)")
)
)
# ---- plot ----
ggplot(inc_gradient,
aes(x = income_quint, y = snap_rate, fill = income_quint)) +
geom_col() +
scale_fill_manual(
values = c(
"<$18,320" = "red",
"Q2" = "grey90",
"Q3" = "grey90",
"Q4" = "grey90",
"Q5 (highest)" = "grey90"
),
guide = "none"
) +
scale_y_continuous(labels = percent_format(accuracy = 1)) +
labs(
title = "Food hardship is tightly linked to poverty",
subtitle = "SNAP receipt rises steeply as household income falls (income quintiles)",
x = NULL,
y = "SNAP receipt rate",
caption = "Income quintiles computed from household income."
) +
theme_minimal(base_size = 13)

age_sex <- df |>
group_by(life_stage) |>
summarise(
snap_rate = mean(foodstmp_yes, na.rm = TRUE),
n = n(),
.groups = "drop"
) |>
filter(!is.na(life_stage))
ggplot(
age_sex,
aes(
x = life_stage,
y = snap_rate,
fill = ifelse(life_stage == "Children (0–17)", "Children", "Other")
)
) +
geom_col() +
scale_fill_manual(
values = c(
"Children" = "red", # red
"Other" = "grey90" # light grey
),
guide = "none"
) +
scale_y_continuous(labels = percent_format(accuracy = 1)) +
labs(
title = "Children show the highest SNAP reliance",
subtitle = "A clear signal of food hardship risk among households with kids (split by gender)",
x = NULL,
y = "SNAP receipt rate"
) +
theme_minimal(base_size = 13) +
theme(axis.text.x = element_text(angle = 35, hjust = 1))

More Data Tidy
Show code for tidying data
df <- child_dev_data
demo <- df |>
transmute(
pubid = pubid_1997,
sex = case_when(
key_sex_1997 == 1 ~ "Male",
key_sex_1997 == 2 ~ "Female",
TRUE ~ NA_character_
),
race_eth = case_when(
key_race_ethnicity_1997 == 1 ~ "Hispanic/Latino",
key_race_ethnicity_1997 == 2 ~ "Black",
key_race_ethnicity_1997 == 3 ~ "Non-Black / Non-Hispanic",
key_race_ethnicity_1997 == 4 ~ "Mixed / Non-Hispanic",
TRUE ~ NA_character_
),
birth_year = key_bdate_y_1997
)
# 1. Childhood household income (1997–2003)
income_long <- df |>
select(pubid = pubid_1997, matches("^cv_income_gross_yr_\\d{4}$")) |>
pivot_longer(
cols = -pubid,
names_to = "var",
values_to = "income"
) |>
mutate(
year = as.integer(str_extract(var, "\\d{4}")),
income = if_else(income < 0, NA_real_, income)
) |>
filter(year %in% 1997:2003)
child_income <- income_long |>
group_by(pubid) |>
summarise(
child_income_median = median(income, na.rm = TRUE),
child_income_years = sum(!is.na(income)),
.groups = "drop"
) |>
mutate(
child_income_group = case_when(
child_income_median <= 17600 ~ "Low childhood income",
child_income_median <= 100000 & child_income_median > 17600 ~ "Middle childhood income",
child_income_median > 100000 ~ "High childhood income",
.default = NA
),
child_income_group = factor(
child_income_group,
levels = c(
"Low childhood income",
"Middle childhood income",
"High childhood income"
)
)
)
# 2. UI spells (counts)
ui_spells <- df |>
select(pubid = pubid_1997,
matches("^cvc_ui_spells_yr_\\d{2}_xrnd$")) |>
pivot_longer(
cols = -pubid,
names_to = "var",
values_to = "ui_spells"
) |>
mutate(
yy = as.integer(str_extract(var, "(?<=yr_)\\d{2}(?=_xrnd$)")),
year = if_else(yy >= 90, 1900L + yy, 2000L + yy),
ui_spells = if_else(ui_spells < 0, NA_real_, as.numeric(ui_spells))
) |>
select(pubid, year, ui_spells)
# 3. UI receipt (indicator)
ui_recv <- df |>
select(pubid = pubid_1997,
matches("^cvc_ui_yr_\\d{2}_xrnd$")) |>
pivot_longer(
cols = -pubid,
names_to = "var",
values_to = "ui_recv"
) |>
mutate(
yy = as.integer(str_extract(var, "(?<=yr_)\\d{2}(?=_xrnd$)")),
year = if_else(yy >= 90, 1900L + yy, 2000L + yy),
ui_recv = if_else(ui_recv < 0, NA_real_, as.numeric(ui_recv))
) |>
select(pubid, year, ui_recv)
# 4. UI amount (dollars)
ui_amt <- df |>
select(pubid = pubid_1997,
matches("^cvc_amt_ui_yr_\\d{2}_xrnd$")) |>
pivot_longer(
cols = -pubid,
names_to = "var",
values_to = "ui_amount"
) |>
mutate(
yy = as.integer(str_extract(var, "(?<=yr_)\\d{2}(?=_xrnd$)")),
year = if_else(yy >= 90, 1900L + yy, 2000L + yy),
ui_amount = if_else(ui_amount < 0, NA_real_, as.numeric(ui_amount))
) |>
select(pubid, year, ui_amount)
# 5. Combine UI + derive age
ui_long <- ui_spells |>
full_join(ui_recv, by = c("pubid", "year")) |>
full_join(ui_amt, by = c("pubid", "year")) |>
left_join(demo, by = "pubid") |>
mutate(
age = year - birth_year,
any_ui = case_when(
!is.na(ui_recv) ~ as.integer(ui_recv > 0),
!is.na(ui_spells) ~ as.integer(ui_spells > 0),
TRUE ~ NA_integer_
)
)
# 6. Adult outcome summary (age 25+)
adult_ui <- ui_long |>
filter(age >= 25) |>
group_by(pubid) |>
summarise(
adult_ui_spells_total = sum(ui_spells, na.rm = TRUE),
adult_any_ui = as.integer(any(any_ui == 1, na.rm = TRUE)),
.groups = "drop"
) |>
mutate(
adult_support_level = case_when(
adult_ui_spells_total == 0 ~ "Economically stable (no UI)",
adult_ui_spells_total <= 2 ~ "Occasional instability (1–2 UI spells)",
adult_ui_spells_total >= 3 ~ "Chronic instability (3+ UI spells)"
),
adult_support_level = factor(
adult_support_level,
levels = c(
"Economically stable (no UI)",
"Occasional instability (1–2 UI spells)",
"Chronic instability (3+ UI spells)"
)
)
)
analysis_df <- demo |>
left_join(child_income, by = "pubid") |>
left_join(adult_ui, by = "pubid")
# VISUALIZATION
plot_df <- analysis_df |>
filter(!is.na(child_income_group),
!is.na(adult_support_level)) |>
count(child_income_group, adult_support_level) |>
group_by(child_income_group) |>
mutate(prop = n / sum(n)) |>
ungroup() |>
mutate(
fill_group = if_else(
child_income_group == "Low childhood income",
"Low childhood income",
"Other"
),
draw_order = if_else(
child_income_group == "Low childhood income",
2,
1
)
)
ggplot(
plot_df,
aes(
axis1 = child_income_group,
axis2 = adult_support_level,
y = prop
)
) +
geom_alluvium(
aes(
fill = fill_group,
alpha = fill_group,
order = draw_order
)
) +
geom_stratum(
fill = "grey92",
color = "grey40"
) +
geom_text(
stat = "stratum",
aes(label = after_stat(stratum)),
size = 3
) +
scale_fill_manual(
values = c(
"Low childhood income" = "blue",
"Other" = "grey70"
)
) +
scale_alpha_manual(
values = c(
"Low childhood income" = 0.95,
"Other" = 0.95
)
) +
scale_y_continuous(expand = c(0, 0)) +
labs(
title = "Childhood poverty predicts adult economic instability",
subtitle = "NLSY97: pathways from youth income to adult reliance on unemployment insurance",
x = NULL,
y = NULL
) +
theme_minimal() +
theme(
legend.position = "none",
axis.text = element_blank(),
axis.ticks = element_blank(),
panel.grid = element_blank()
)
