Household Type Estimates

Author

Kaitlan Wong

Estimates of households for the different scenarios used in the compounding effects analysis.

Data extract was filtered to Texas before download.

There are no ACS variables to measure whether a person is pregnant. Alternatively, I included scenarios for single mother households.

library(ipumsr)
library(dplyr)
library(writexl)

ddi <- read_ipums_ddi("usa_00051.xml")
ipums_data <- read_ipums_micro(ddi, data_file = "usa_00051.dat", verbose = FALSE)

# remove NAs
ipums_clean <- ipums_data %>%
  filter(
    !is.na(AGE),
    !is.na(SEX),
    !is.na(RELATE),
    !is.na(HHINCOME),
    !is.na(HHWT),
    !is.na(SERIAL)
  )

# define roles and flags
hh_data <- ipums_clean %>%
  mutate(
    is_child = RELATE == 3 & AGE < 18,
    is_elderly = AGE >= 65,
    is_female = SEX == 2,
    is_householder = RELATE == 1,
    is_spouse = RELATE == 2
  )

hh_summary <- hh_data %>%
  group_by(SERIAL) %>%
  summarise(
    n_children = sum(is_child),
    n_elderly = sum(is_elderly),
    has_female_householder = any(is_householder & is_female),
    n_parents = sum(RELATE %in% c(1, 2)),
    hh_income = max(HHINCOME),
    hh_weight = max(HHWT),
    .groups = "drop"
  )

# Scenario 1: Female parent + 1 child + income ≤ $19,307
scenario1 <- hh_summary %>%
  filter(
    has_female_householder,
    n_parents == 1,
    n_children == 1,
    n_elderly == 0,
    hh_income <= 19307
  )

# Scenario 2: Single parent (any gender) + 1 child + 1 elderly + income ≤ $19,307
scenario2 <- hh_summary %>%
  filter(
    n_parents == 1,
    n_children == 1,
    n_elderly == 1,
    hh_income <= 19307
  )

# Scenario 3: Two parents + 1 child + 1 elderly + income ≤ $25,678
scenario3 <- hh_summary %>%
  filter(
    n_parents == 2,
    n_children == 1,
    n_elderly == 1,
    hh_income <= 25678
  )

# Scenario 4: Female parent + 1 child + 1 elderly + income ≤ $19,307
scenario4 <- hh_summary %>%
  filter(
    has_female_householder,
    n_parents == 1,
    n_children == 1,
    n_elderly == 1,
    hh_income <= 19307
  )

# Weighted household counts
scenario_counts <- list(
  Scenario1 = sum(scenario1$hh_weight),
  Scenario2 = sum(scenario2$hh_weight),
  Scenario3 = sum(scenario3$hh_weight),
  Scenario4 = sum(scenario4$hh_weight)
)

print(scenario_counts)

$Scenario1
[1] 57685

$Scenario2
[1] 1890

$Scenario3
[1] 926

$Scenario4
[1] 567

#save to Excel
write_xlsx(as.data.frame(scenario_counts), "hh_types.xlsx")