library(ipumsr)
library(dplyr)
library(writexl)Household Type Estimates (under 200% poverty threshold)
Estimates of households for the different scenarios used in the compounding effects analysis.
For this second draft, instead of using income cutoffs, I filter by poverty status (households under 200% poverty line).
Data extract was filtered to Texas before download.
There are no ACS variables to measure whether a person is pregnant. Alternatively, I included scenarios for single mother households.
ddi <- read_ipums_ddi("usa_00051.xml")
ipums_data <- read_ipums_micro(ddi, data_file = "usa_00051.dat", verbose = FALSE)# remove NAs
ipums_clean <- ipums_data %>%
filter(
!is.na(AGE),
!is.na(SEX),
!is.na(RELATE),
!is.na(HHINCOME),
!is.na(HHWT),
!is.na(SERIAL),
!is.na(POVERTY)
)# define roles and flags
hh_data <- ipums_clean %>%
mutate(
is_child = RELATE == 3 & AGE < 18,
is_elderly = AGE >= 65,
is_female = SEX == 2,
is_householder = RELATE == 1,
is_spouse = RELATE == 2
)hh_summary <- hh_data %>%
group_by(SERIAL) %>%
summarise(
n_children = sum(is_child),
n_elderly = sum(is_elderly),
has_female_householder = any(is_householder & is_female),
n_parents = sum(RELATE %in% c(1, 2)),
hh_income = max(HHINCOME),
hh_weight = max(HHWT),
poverty = first(POVERTY),
.groups = "drop"
)# filter for households under 200% poverty
hh_summary <- hh_summary %>%
filter(poverty < 200)# Scenario 1: Female parent + 1 child + income ≤ $19,307
scenario1 <- hh_summary %>%
filter(
has_female_householder,
n_parents == 1,
n_children == 1,
n_elderly == 0
)
# Scenario 2: Single parent (any gender) + 1 child + 1 elderly + income ≤ $19,307
scenario2 <- hh_summary %>%
filter(
n_parents == 1,
n_children == 1,
n_elderly == 1
)
# Scenario 3: Two parents + 1 child + 1 elderly + income ≤ $25,678
scenario3 <- hh_summary %>%
filter(
n_parents == 2,
n_children == 1,
n_elderly == 1
)
# Scenario 4: Female parent + 1 child + 1 elderly + income ≤ $19,307
scenario4 <- hh_summary %>%
filter(
has_female_householder,
n_parents == 1,
n_children == 1,
n_elderly == 1
)
# Weighted household counts
scenario_counts <- list(
Scenario1 = sum(scenario1$hh_weight),
Scenario2 = sum(scenario2$hh_weight),
Scenario3 = sum(scenario3$hh_weight),
Scenario4 = sum(scenario4$hh_weight)
)
print(scenario_counts)$Scenario1
[1] 158966
$Scenario2
[1] 7096
$Scenario3
[1] 7217
$Scenario4
[1] 4874
#save to Excel
write_xlsx(as.data.frame(scenario_counts), "hh_pov.xlsx")