Cost of Paid Family Leave for Texas, 2022

Author

Kaitlan Wong

Methods - Calculating Number of Employed People and Max Benefits Using ACS Data


For this estimate, I am utilizing 2022 American Community Survey (ACS) 1-Year estimates from IPUMS USA. The parameters for estimating costs to Texas include:

  • Max benefit of 12 weeks paid parental leave for all workers that have worked a minimum of 3 months of full time employment, a minimum of 3 months of part time employments, or 540 hours in the last year. I provide a final breakdown of cost estimates by these three worker types.

  • Salary replacement between 80 and 100 percent of worker’s wages based on their weekly earnings.

  • Maximum weekly benefit of $1,117.69 (per Amanda’s email).

The ACS variables I am using include:

  • INCWAGE: total pre-tax wage and salary income - that is, money received as an employee - for the previous year.

  • EMPSTAT: indicates whether the respondent was a part of the labor force -- working or seeking work -- and, if so, whether the person was currently unemployed

  • UHRSWORK: number of hours per week that the respondent usually worked, if the person worked during the previous year.

  • WKSWORK1: number of weeks that the respondent worked for profit, pay, or as an unpaid family worker during the previous year.

I filtered the data to only be for the state of Texas (FIPS = 48) in IPUMS before downloading the data.

Load ACS Data

library(tidyverse)
library (ipumsr)
library(writexl)
# Read in ipums data

ddi <- read_ipums_ddi("usa_00038.xml")
data <- read_ipums_micro(ddi, data_file = ("usa_00038.datgz"), verbose = FALSE)
# make variable name lowercase
names(data) <- tolower(names(data))

# preview data
head(data)
# A tibble: 6 × 17
   year sample             serial cbserial  hhwt cluster statefip strata gq     
  <int> <int+lbl>           <dbl>    <dbl> <dbl>   <dbl> <int+lb>  <dbl> <int+l>
1  2022 202201 [2022 ACS] 1248440  2.02e12    55 2.02e12 48 [Tex…  40048 3 [Gro…
2  2022 202201 [2022 ACS] 1248441  2.02e12    50 2.02e12 48 [Tex… 530848 3 [Gro…
3  2022 202201 [2022 ACS] 1248442  2.02e12    80 2.02e12 48 [Tex… 380248 3 [Gro…
4  2022 202201 [2022 ACS] 1248443  2.02e12    13 2.02e12 48 [Tex… 530748 4 [Oth…
5  2022 202201 [2022 ACS] 1248444  2.02e12     5 2.02e12 48 [Tex…  20048 3 [Gro…
6  2022 202201 [2022 ACS] 1248445  2.02e12    10 2.02e12 48 [Tex… 462048 3 [Gro…
# ℹ 8 more variables: pernum <dbl>, perwt <dbl>, age <int+lbl>,
#   empstat <int+lbl>, empstatd <int+lbl>, wkswork1 <dbl+lbl>,
#   uhrswork <int+lbl>, incwage <dbl+lbl>
# check variable names are correct
names(data)
 [1] "year"     "sample"   "serial"   "cbserial" "hhwt"     "cluster" 
 [7] "statefip" "strata"   "gq"       "pernum"   "perwt"    "age"     
[13] "empstat"  "empstatd" "wkswork1" "uhrswork" "incwage" 

ACS Data Prep

# convert data to tibble for better readability 
work_data <- as_tibble(data)

# Clean data
work_data <- work_data %>%
  mutate(
    hryear = uhrswork * wkswork1, # create a variable for the hours worked a year
    ft_workers = wkswork1 >= 12 & uhrswork >= 40, # full time workers
    pt_workers = wkswork1 >= 12 & uhrswork < 40, # part time workers
    hrs540_workers = hryear >=540
         ) %>% 
  
  filter(
    incwage != 999999, incwage != 999998, incwage > 0, # Remove invalid/missing income and unpaid family workers
    empstat == 1, # keep only people employed last year
    age >= 18, age <= 50 # keep only ages 18-50 years
  )
# count full-time and part-time workers
worker_counts <- work_data %>%
  summarize(
    full_time_count = sum(ft_workers, na.rm = TRUE),
    part_time_count = sum(pt_workers, na.rm = TRUE)
  )

print(worker_counts)
# A tibble: 1 × 2
  full_time_count part_time_count
            <int>           <int>
1           65075           16595

ACS Data Analysis

The Formula

# Function to calculate weighted and unweighted means for each worker type
calculate_means <- function(data, replacement_rate) {
  data %>%
    mutate(
      wkwage = incwage / wkswork1,
      mxbnft = wkwage * replacement_rate,
      capped = pmin(mxbnft, 1117.69) # Cap at 1117.69
    ) %>%
    summarise(
      ft_avg_ben_weighted = weighted.mean(capped[ft_workers], perwt[ft_workers]),
      pt_avg_ben_weighted = weighted.mean(capped[pt_workers], perwt[pt_workers]),
      hrs540_avg_ben_weighted = weighted.mean(capped[hrs540_workers], perwt[hrs540_workers])
    ) %>%
    mutate(replacement_rate = paste0(replacement_rate))
}

# Calculate means for each replacement rate and combine results
replacement_rates <- c(0.8, 0.9, 1.0)
final_results <- map_dfr(replacement_rates, ~ calculate_means(work_data, .x))

print(final_results)
# A tibble: 3 × 4
  ft_avg_ben_weighted pt_avg_ben_weighted hrs540_avg_ben_weighted
                <dbl>               <dbl>                   <dbl>
1                764.                362.                    697.
2                814.                398.                    745.
3                856.                433.                    786.
# ℹ 1 more variable: replacement_rate <chr>

Compare Number of Observations in ACS Data

# I was curious how many observations I dropped after adding all my criteria

cat("Number of observations in data:", nrow(data), "\n")
Number of observations in data: 292919 
cat("Number of observations in work_data:", nrow(work_data), "\n")
Number of observations in work_data: 84110 

Average Benefits using mxbnft

Coda left a comment about how she was unsure if we should use mxbnft or capped to calculate the average max benefits. I am including the estimates using mxbnft as an option.

# function to calculate weighted and unweighted means for each worker type using mxbnft instead of capped

calculate_means_mxbnft  <- function(data, replacement_rate) {
  data %>%
    mutate(
      wkwage = incwage / wkswork1,
      mxbnft = wkwage * replacement_rate,
    ) %>%
    summarise(
      ft_avg_ben_weighted = weighted.mean(mxbnft [ft_workers], perwt[ft_workers]),
      pt_avg_ben_weighted = weighted.mean(mxbnft [pt_workers], perwt[pt_workers]),
      hrs540_avg_ben_weighted = weighted.mean(mxbnft [hrs540_workers], perwt[hrs540_workers])
    ) %>%
    mutate(replacement_rate = paste0(replacement_rate))
}

# Calculate means for each replacement rate and combine results
replacement_rates <- c(0.8, 0.9, 1.0)
final_results_mxbnft <- map_dfr(replacement_rates, ~ calculate_means_mxbnft (work_data, .x))

print(final_results_mxbnft)
# A tibble: 3 × 4
  ft_avg_ben_weighted pt_avg_ben_weighted hrs540_avg_ben_weighted
                <dbl>               <dbl>                   <dbl>
1               1038.                409.                    933.
2               1167.                460.                   1049.
3               1297.                511.                   1166.
# ℹ 1 more variable: replacement_rate <chr>

Figure out Eligible Workers

# Calculate the total weight of the sample (result is total Texans eligible for parental leave)

total_eligible <- sum(work_data$perwt)
print(total_eligible)
[1] 9925102

Using California Usage Rates

Data for California is used since CPS data is unreliable for Texas.

According to the California Employment Development Department (EDD), in 2022 the average monthly labor force participation rate in California was 61.65% and the average number of people employed was 18,350,233 (Source: EDD Tableau dashboard).

In 2022, there were a total of 298,503 total paid family leave claims paid by the state of California (Source: California Open Data Portal).

The formula for take-up rates is the # of family leave claims divided by total # of people employed.

The CA usage rate is therefore: 1.6% , which is much higher than the .79% rate from CS’s 2013 paper.

usage_rate = (298503 / 18350233)
print(usage_rate)
[1] 0.01626699

Estimate of Texans Who Will Use Paid Leave

Updated Numbers:

  • 2022 Number of Eligible Workers in Texas: 9,925,102

  • If I calculate by California’s 2022 usage rate, the result is 161,451.

# calculate number of people who took parental leave
took_leave <- total_eligible * usage_rate
print(paste("# Took Leave:", took_leave))
[1] "# Took Leave: 161451.504310926"
# Calculate weekly costs for each worker type and replacement rate
weekly_costs <- final_results %>%
  pivot_longer(
    cols = starts_with("ft_"):starts_with("hrs540_"),
    names_to = "worker_type",
    values_to = "avg_ben_weighted"
  ) %>%
  mutate(
    weekly_cost = took_leave * avg_ben_weighted,
    worker_type = recode(worker_type,
                         "ft_avg_ben_weighted" = "Full-Time",
                         "pt_avg_ben_weighted" = "Part-Time",
                         "hrs540_avg_ben_weighted" = "540+ Hours")
  )

# overall weekly cost per replacement rate (just summing full and part time)
overall_totals_weekly <- weekly_costs %>%
  filter(worker_type %in% c("Full-Time", "Part-Time")) %>%
  group_by(replacement_rate) %>%
  summarise(overall_weekly_cost = sum(weekly_cost))

# annual costs by multiplying each weekly cost by 12
annual_costs <- weekly_costs %>%
  mutate(annual_cost = weekly_cost * 12) %>%
  left_join(overall_totals_weekly, by = "replacement_rate")

# overall annual cost per replacement rate (just summing full and part time)
overall_totals_annual <- annual_costs %>%
  filter(worker_type %in% c("Full-Time", "Part-Time")) %>%
  group_by(replacement_rate) %>%
  summarise(overall_annual_cost = sum(annual_cost))

# Combine all into a single table with the specified columns
final_table <- annual_costs %>%
  left_join(overall_totals_annual, by = "replacement_rate") %>%
  select(
    replacement_rate,
    worker_type,
    avg_ben_weighted,
    weekly_cost,
    overall_weekly_cost,
    annual_cost,
    overall_annual_cost
  )

print(final_table)
# A tibble: 9 × 7
  replacement_rate worker_type avg_ben_weighted weekly_cost overall_weekly_cost
  <chr>            <chr>                  <dbl>       <dbl>               <dbl>
1 0.8              Full-Time               764.  123275219.          181644542.
2 0.8              Part-Time               362.   58369322.          181644542.
3 0.8              540+ Hours              697.  112574820.          181644542.
4 0.9              Full-Time               814.  131368319.          195636199.
5 0.9              Part-Time               398.   64267881.          195636199.
6 0.9              540+ Hours              745.  120331239.          195636199.
7 1                Full-Time               856.  138188131.          208021130.
8 1                Part-Time               433.   69832999.          208021130.
9 1                540+ Hours              786.  126977464.          208021130.
# ℹ 2 more variables: annual_cost <dbl>, overall_annual_cost <dbl>

Save Data To Excel

write_xlsx(final_table, "final_pfl_2022.xlsx")

Summary of Findings (for Texas in 2022)

High level findings

Workers Eligible 9,925,102
Number People Using Family Leave 161,451

Average Annual Costs by Worker Status and Cap Levels, 2022

80% Cap 90% Cap 100% Cap
Full-time (last 3 months) $1,479,302,629 $1,576,419,826 $1,658,257,574
Part-time (last 3 months) $700,431,869 $771,214,566 $837,995,986
Worked at least 540 hours in last 12 months $1,350,897,842 $1,443,974,873 $1,523,729,566
Total Program Benefits Annual Costs (full-time + part-time workers) $2,179,734,498 $2,347,634,392 $2,496,253,560

Note on CPS Data


Note from Coda’s 2019 analysis: “Tried with CPS data using the”Why Absent” variables and observations dropped too low. The problem I encountered was that the #’s for the maternal/paternal variable were too low.”

I tried using more recent CPS data. Here are the umber of observations I got:

  • 42 in 2024

  • 76 in 2023

  • 87 in 2022

  • 50 in 2021

  • 0 in 2020

  • 0 in 2019

  • 0 in 2018

  • 0 in 2017

Since these are very small population sizes, I decided not to use CPS data for the final analysis and instead use California Usage Rates.

Methods - Calculating the Number of People Taking Parental Leave Using CPS Data


I am using Current Population Survey data from IPUMS CPS to calculate the rate of working Texas adults who took parental leave.

The CPS variable I am using is:

  • WHYABSNT: reports the reason for the absence of jobholders who were not working during the preceding week

  • WTFINL: final person-level weight that should be used in analyses of basic monthly data.

I filtered the data to only be for the state of Texas (FIPS = 48) in IPUMS before downloading the data.

Load CPS Data

cps_ddi<- read_ipums_ddi("cps_00008.xml")
cps_data <- read_ipums_micro(cps_ddi, data_file = ("cps_00008.dat.gz"), verbose = FALSE)

# check out data
names(cps_data) <- tolower(names(cps_data))
head(cps_data)
# A tibble: 6 × 15
   year serial month     hwtfinl   cpsid asecflag  hflag asecwth statefip pernum
  <dbl>  <dbl> <int+lbl>   <dbl>   <dbl> <int+lbl> <int>   <dbl> <int+lb>  <dbl>
1  2009  67616 3 [March]      NA 2.01e13 1 [ASEC]  NA      1475. 48 [Tex…      1
2  2009  67616 3 [March]      NA 2.01e13 1 [ASEC]  NA      1475. 48 [Tex…      2
3  2009  67616 3 [March]      NA 2.01e13 1 [ASEC]  NA      1475. 48 [Tex…      3
4  2009  67619 3 [March]      NA 2.01e13 1 [ASEC]  NA      1719. 48 [Tex…      1
5  2009  67620 3 [March]      NA 2.01e13 1 [ASEC]  NA      1252. 48 [Tex…      1
6  2009  67620 3 [March]      NA 2.01e13 1 [ASEC]  NA      1252. 48 [Tex…      2
# ℹ 5 more variables: wtfinl <dbl>, cpsidp <dbl>, cpsidv <dbl>, asecwt <dbl>,
#   whyabsnt <int+lbl>
names(cps_data)
 [1] "year"     "serial"   "month"    "hwtfinl"  "cpsid"    "asecflag"
 [7] "hflag"    "asecwth"  "statefip" "pernum"   "wtfinl"   "cpsidp"  
[13] "cpsidv"   "asecwt"   "whyabsnt"

CPS Data Prep

# filter data
cps_data2 <- filter(cps_data,
                    whyabsnt == 9, # select only people who took parental leave
                    year == 2024, # select year
                    !is.na(wtfinl)) # remove missing values from final person weight  

# # Weight the data by the final weight
# cps_data3 <- svydesign(ids = ~pernum, weights = ~wtfinl, data = cps_data2)
# # Calculate the number of workers who were absent from work on maternity/paternity leave
# num_parental_leave <- svytotal(~1, design = cps_data3)
# #Print the result
# print(num_parental_leave)
# #Count the number of observations for each value in the WHYABSNT variable in original dataset
# table(cps_data$whyabsnt)