Poverty Reduction Estimate for Women in Texas, 2024

Author

Kaitlan Wong, based on Coda Rayo-Garza’s original code

Estimating Poverty Reduction for Women in Texas Given Gender Pay Equality

Source: 2024 Current Population Survey Annual Social and Economic supplement (CPS-ASEC) via IPUMS CPS

Sources, Methods, and Data Notes

This analysis calculates the estimated reduction of poverty for women in Texas if there was no gender pay inequality for the year 2024. The code is based on Coda Rayo-Garza’s original code.

For the analysis, I use 2024 CPS-ASEC Cross-sectional data from IPUMS CPS. I did not use any of the basic monthly samples. I filtered the data to be for Texas (FIPS=48) before downloading the data extract.

Data Prep

library(ipumsr)
library(tidyverse)
library(survey)
library(writexl)
library(purrr)

# Load CPS-ASEC data extract (2024)
ddi <- read_ipums_ddi("cps_00021.xml")
data <- read_ipums_micro(ddi, data_file = "cps_00021.dat", verbose = FALSE)

data <- rename_with(data, tolower)

Cleaning Data

# CRG's code (from gender wage gap analysis)
data <- data %>%
  mutate(
    sex_c = case_when(
      sex == 1 ~ "Men",
      sex == 2 ~ "Women",
      TRUE ~ NA_character_
    ) %>% factor(),

    age_18up = if_else(age > 17, 1, 0),
    uhrsworkly = na_if(uhrsworkly, 999),
    incwage = na_if(incwage, 99999999) %>% as.numeric(),

    #ed categories
    educ_c = case_when(
      educ >= 2 & educ <= 71 ~ "1. Below HS",
      educ == 73 ~ "2. HS Diploma or Equivalent",
      educ >= 81 & educ <= 92 ~ "3. Some College or Associates",
      educ == 111 ~ "4. Bachelors Degree",
      educ >= 123 & educ <= 125 ~ "5. Higher Degree",
      TRUE ~ NA_character_
    ) |> factor(levels = c("1. Below HS", "2. HS Diploma or Equivalent", 
                            "3. Some College or Associates", "4. Bachelors Degree", "5. Higher Degree")),

    #metro
    metro_c = case_when(
      metro == 1 ~ "1. Not in Metro",
      metro %in% c(2, 3, 4) ~ "2. In Metro",
      TRUE ~ NA_character_
    ) %>% factor(levels = c("1. Not in Metro", "2. In Metro")),
    
    # KW add: make sure poverty is numeric
    poverty = as.numeric(poverty),

    ln_ann_earn = log(incwage),
    age2 = age^2
  ) |>
  
  filter(
    age_18up == 1, 
    !is.na(incwage) & incwage > 0, 
    !is.na(uhrsworkly) & uhrsworkly > 0,
    !is.na(wkswork1) & wkswork1 > 0,
    !is.na(educ_c),
    !is.na(metro_c),
    !is.na(hhincome) & hhincome > 0, # added household income to use for poverty analysis
    !is.na(cutoff), # added cutoff for poverty analysis
  ) |>
  mutate(
    hourswrk_LY = uhrsworkly * wkswork1, 
    hourly_wage = incwage / hourswrk_LY,
    in_poverty = if_else(poverty == 10, 1, 0) # added poverty vairable for poverty analysis
  )

Calculating the actual poverty rate for working women

Here I am just doing some debugging because I can’t figure out why my poverty rates are so low!! So I have double checked the actual poverty rate for working women three different ways.

# Method 1: using svymean()

working_women <- data %>%
  filter(
    year == 2024,
    sex_c == "Women",
  )

#define survey design
design <- svydesign(ids = ~1, weights = ~asecwt, data = working_women)

#calculate weighted poverty rate
poverty_rate_women <- svymean(~in_poverty, design, na.rm = TRUE)

print(poverty_rate_women)

               mean     SE
in_poverty 0.057148 0.0056

# I end up getting the same rate as I did below, which is an actual poverty rate of 5.7% for working women age 18+. This seems low.

# Method 2: using svytotal()

weighted_poverty_count <- svytotal(~in_poverty, design, na.rm = TRUE)

# Total weighted count of all working women
weighted_total_women <- svytotal(~as.numeric(sex_c == "Women"), design, na.rm = TRUE)

# Calculate weighted poverty rate manually
poverty_rate_women_manual <- as.numeric(weighted_poverty_count) / as.numeric(weighted_total_women)

print(poverty_rate_women_manual)

[1] 0.05714815

# still get 5.7%

# Method 3: using survey::svyratio()

working_women_3 <- working_women %>%
  mutate(one = 1)

# redefine sd with 'one' column
design <- svydesign(ids = ~1, weights = ~asecwt, data = working_women_3)

#calculate weighted poverty rate
poverty_rate_women_ratio <- svyratio(~in_poverty, ~one, design, na.rm = TRUE)

print(poverty_rate_women_ratio)

Ratio estimator: svyratio.survey.design2(~in_poverty, ~one, design, na.rm = TRUE)
Ratios=
                  one
in_poverty 0.05714815
SEs=
                   one
in_poverty 0.005632614

# still get 5.7%

# Method 4: Post-Stratification with srvyr Package

library(srvyr)

Warning: package 'srvyr' was built under R version 4.2.3


Attaching package: 'srvyr'

The following object is masked from 'package:stats':

    filter

# convert design into srvyr object
design_srvyr <- as_survey_design(design)

# calculate poverty rate
poverty_rate_women_srvyr <- design_srvyr %>%
  summarize(poverty_rate = survey_mean(in_poverty, na.rm = TRUE))

print(poverty_rate_women_srvyr)

# A tibble: 1 × 2
  poverty_rate poverty_rate_se
         <dbl>           <dbl>
1       0.0571         0.00563

# still get 5.7%

Analysis - Wage Gap

# CRG's code (from gender wage gap analysis)

#compute 90th percentile earnings cutoff for men per year (not hardcoded..reasoning is that dynamic allows us to capture time changes..in other words...The 2017 report dynamically determined the 90th percentile using the CPS dataset at the time of analysis.Hardcoding ignores actual wage distribution changes over time)

men_90th_percentile <- data |>
  filter(sex_c == "Men") |>
  group_by(year) |>
  summarise(value_90th_percentile = quantile(incwage, 0.9, na.rm = TRUE), .groups = "drop")

#merge cutoff into dataset
data <- left_join(data, men_90th_percentile, by = "year")

#subset men below 90th percentile
data_men <- data |> filter(sex_c == "Men", incwage < value_90th_percentile)

#alternative to SS FOR LOOP: Run OLS models per year using `map()`
models <- data_men |>
  group_split(year) |>
  set_names(unique(data$year)) |>
  map(~{
    if (nrow(.x) > 0) {
      des <- svydesign(id = ~1, weights = ~as.numeric(.x$asecwt), data = .x)
      return(svyglm(ln_ann_earn ~ age + age2 + educ_c + metro_c + hourswrk_LY, design = des))
    } else {
      return(NULL)
    }
  })

#apply men's wage model to predict women's earnings
data_women <- data %>% filter(sex_c == "Women")

#ensure predictions return numeric values to avoid svystat issue
data_women <- data_women |>
  group_split(year) |>
  set_names(unique(data_women$year)) |>
  map_dfr(~{
    y <- as.character(.x$year[1])
    model <- models[[y]]
    
    if (!is.null(model)) {
      .x <- .x |>
        filter(complete.cases(age, age2, educ_c, metro_c, hourswrk_LY)) %>%
        mutate(
          Predict_log_annincome = as.numeric(predict(model, newdata = .)),
          Predict_annincome = exp(Predict_log_annincome),
          Predict_difference = as.numeric(Predict_annincome) - as.numeric(incwage),
          Predict_difference_Positive = if_else(as.numeric(Predict_difference) > 0, as.numeric(Predict_difference), 0)
        )
    }
    return(.x)
  })

total_wage_gap <- data_women |>
  group_by(year) |>
  summarise(total_wage_increase = sum(as.numeric(Predict_difference_Positive) * as.numeric(asecwt), na.rm = TRUE) / 1e9)

print(total_wage_gap)

# A tibble: 10 × 2
    year total_wage_increase
   <dbl>               <dbl>
 1  2015                42.5
 2  2016                43.2
 3  2017                45.4
 4  2018                46.2
 5  2019                47.0
 6  2020                53.7
 7  2021                45.7
 8  2022                56.6
 9  2023                61.6
10  2024                60.1

Analysis - Poverty Reduction

The below is my original code.

I use the variable CUTOFF. The CUTOFF variable is located on the person record, although it treats respondents who live in families collectively. CUTOFF is the official poverty threshold used by the Census Bureau to evaluate the poverty status (POVERTY) of each family in the sample. For instance, for a family consisting of 2 individuals, both of whom are under 65, with no children, the poverty threshold in 1989 was 7,495 dollars. If a sampled family of this composition reported income below 7,495 dollars in that year, then they would be coded as “below poverty” in the POVERTY variable. CUTOFF uses Census-defined family units, which do not necessarily correspond to the IPUMS-derived family units as specified in FAMUNIT. See FTYPE, FAMKIND, and FAMREL for more on Census family units.

# calculate new predicted household income
data_women <- data_women %>%
  mutate(
    Predict_household_income = if_else(
      Predict_difference_Positive > 0,
      hhincome + Predict_difference_Positive,
      hhincome
    ),
    predict_pov_status = if_else(Predict_household_income < cutoff, 10, 20) # 10 = Below Poverty, 20 = Above Poverty
  )

# calculate actual poverty rate
actual_pov_rate <- data_women %>%
  group_by(year) %>%
  summarise(
    tot_women_w = sum(asecwt, na.rm = TRUE),
    women_pov_w = sum(asecwt * (poverty == 10), na.rm = TRUE),
    actual_pov_rate = women_pov_w / tot_women_w,
    .groups = "drop"
  )

# calculate new predicted poverty rate
predicted_pov_rate <- data_women %>%
  group_by(year) %>%
  summarise(
    tot_women_w = sum(asecwt, na.rm = TRUE),
    predicted_pov_w = sum(asecwt * (predict_pov_status == 10), na.rm = TRUE),
    predicted_pov_rate = predicted_pov_w / tot_women_w,
    .groups = "drop"
  )

# merge
difference_pov_rate <- actual_pov_rate %>%
  left_join(predicted_pov_rate, by = "year") %>%
  mutate(
    difference = actual_pov_rate - predicted_pov_rate,
    percent_change_poverty_number = ((predicted_pov_w - women_pov_w) / women_pov_w) * 100
  )


print(difference_pov_rate)

# A tibble: 10 × 9
    year tot_women_w.x women_pov_w actual_pov_rate tot_women_w.y predicted_pov_w
   <dbl>         <dbl>       <dbl>           <dbl>         <dbl>           <dbl>
 1  2015      5708381.     497475.          0.0871      5708381.         166139.
 2  2016      5742270.     414481.          0.0722      5742270.         150830.
 3  2017      5814314.     405280.          0.0697      5814314.         153749.
 4  2018      5986619.     402983.          0.0673      5986619.         129519.
 5  2019      6251022.     423918           0.0678      6251022.         160713.
 6  2020      6466837.     314778.          0.0487      6466837.         101584.
 7  2021      6184068.     400106.          0.0647      6184068.         172653.
 8  2022      6286258.     406543.          0.0647      6286258.         150564.
 9  2023      6731254.     406290.          0.0604      6731254.         123780.
10  2024      6762147.     386444.          0.0571      6762147.         160064.
# ℹ 3 more variables: predicted_pov_rate <dbl>, difference <dbl>,
#   percent_change_poverty_number <dbl>

write_xlsx(difference_pov_rate, "difference_poverty_rate.xlsx")

Notes:

The total weighted number for working women looks right at 6,762,147. However, the poverty rate I am getting is low at 5.7%. For reference, the 2016 IWPR study reported 10.5% of working women were in poverty, which decreased to 4.8% assuming gender pay equality. The 2021 IWPR study reported 10.3% of working women were in poverty, which decreased to 5.8% assuming gender pay equality.