library(tidyverse)
library (ipumsr)
library(writexl)Cost of Paid Family Leave for Texas, 2022
Methods - Calculating Number of Employed People and Max Benefits Using ACS Data
For this estimate, I am utilizing 2022 American Community Survey (ACS) 1-Year estimates from IPUMS USA. The parameters for estimating costs to Texas include:
Max benefit of 12 weeks paid parental leave for all workers that have worked a minimum of 3 months of full time employment, a minimum of 3 months of part time employments, or 540 hours in the last year. I provide a final breakdown of cost estimates by these three worker types.
Salary replacement between 80 and 100 percent of worker’s wages based on their weekly earnings.
Maximum weekly benefit of $1,117.69 (per Amanda’s email).
The ACS variables I am using include:
INCWAGE: total pre-tax wage and salary income - that is, money received as an employee - for the previous year.
EMPSTAT: indicates whether the respondent was a part of the labor force -- working or seeking work -- and, if so, whether the person was currently unemployed
UHRSWORK: number of hours per week that the respondent usually worked, if the person worked during the previous year.
WKSWORK1: number of weeks that the respondent worked for profit, pay, or as an unpaid family worker during the previous year.
I filtered the data to only be for the state of Texas (FIPS = 48) in IPUMS before downloading the data.
Load ACS Data
# Read in ipums data
ddi <- read_ipums_ddi("usa_00038.xml")
data <- read_ipums_micro(ddi, data_file = ("usa_00038.datgz"), verbose = FALSE)# make variable name lowercase
names(data) <- tolower(names(data))
# preview data
head(data)# A tibble: 6 × 17
year sample serial cbserial hhwt cluster statefip strata gq
<int> <int+lbl> <dbl> <dbl> <dbl> <dbl> <int+lb> <dbl> <int+l>
1 2022 202201 [2022 ACS] 1248440 2.02e12 55 2.02e12 48 [Tex… 40048 3 [Gro…
2 2022 202201 [2022 ACS] 1248441 2.02e12 50 2.02e12 48 [Tex… 530848 3 [Gro…
3 2022 202201 [2022 ACS] 1248442 2.02e12 80 2.02e12 48 [Tex… 380248 3 [Gro…
4 2022 202201 [2022 ACS] 1248443 2.02e12 13 2.02e12 48 [Tex… 530748 4 [Oth…
5 2022 202201 [2022 ACS] 1248444 2.02e12 5 2.02e12 48 [Tex… 20048 3 [Gro…
6 2022 202201 [2022 ACS] 1248445 2.02e12 10 2.02e12 48 [Tex… 462048 3 [Gro…
# ℹ 8 more variables: pernum <dbl>, perwt <dbl>, age <int+lbl>,
# empstat <int+lbl>, empstatd <int+lbl>, wkswork1 <dbl+lbl>,
# uhrswork <int+lbl>, incwage <dbl+lbl>
# check variable names are correct
names(data) [1] "year" "sample" "serial" "cbserial" "hhwt" "cluster"
[7] "statefip" "strata" "gq" "pernum" "perwt" "age"
[13] "empstat" "empstatd" "wkswork1" "uhrswork" "incwage"
ACS Data Prep
# convert data to tibble for better readability
work_data <- as_tibble(data)
# Clean data
work_data <- work_data %>%
mutate(
hryear = uhrswork * wkswork1, # create a variable for the hours worked a year
ft_workers = wkswork1 >= 12 & uhrswork >= 40, # full time workers
pt_workers = wkswork1 >= 12 & uhrswork < 40, # part time workers
hrs540_workers = hryear >=540
) %>%
filter(
incwage != 999999, incwage != 999998, incwage > 0, # Remove invalid/missing income and unpaid family workers
empstat == 1, # keep only people employed last year
age >= 18, age <= 50 # keep only ages 18-50 years
)# count full-time and part-time workers
worker_counts <- work_data %>%
summarize(
full_time_count = sum(ft_workers, na.rm = TRUE),
part_time_count = sum(pt_workers, na.rm = TRUE)
)
print(worker_counts)# A tibble: 1 × 2
full_time_count part_time_count
<int> <int>
1 65075 16595
ACS Data Analysis
The Formula
# Function to calculate weighted and unweighted means for each worker type
calculate_means <- function(data, replacement_rate) {
data %>%
mutate(
wkwage = incwage / wkswork1,
mxbnft = wkwage * replacement_rate,
capped = pmin(mxbnft, 1117.69) # Cap at 1117.69
) %>%
summarise(
ft_avg_ben_weighted = weighted.mean(capped[ft_workers], perwt[ft_workers]),
pt_avg_ben_weighted = weighted.mean(capped[pt_workers], perwt[pt_workers]),
hrs540_avg_ben_weighted = weighted.mean(capped[hrs540_workers], perwt[hrs540_workers])
) %>%
mutate(replacement_rate = paste0(replacement_rate))
}
# Calculate means for each replacement rate and combine results
replacement_rates <- c(0.8, 0.9, 1.0)
final_results <- map_dfr(replacement_rates, ~ calculate_means(work_data, .x))
print(final_results)# A tibble: 3 × 4
ft_avg_ben_weighted pt_avg_ben_weighted hrs540_avg_ben_weighted
<dbl> <dbl> <dbl>
1 764. 362. 697.
2 814. 398. 745.
3 856. 433. 786.
# ℹ 1 more variable: replacement_rate <chr>
Compare Number of Observations in ACS Data
# I was curious how many observations I dropped after adding all my criteria
cat("Number of observations in data:", nrow(data), "\n")Number of observations in data: 292919
cat("Number of observations in work_data:", nrow(work_data), "\n")Number of observations in work_data: 84110
Average Benefits using mxbnft
Coda left a comment about how she was unsure if we should use mxbnft or capped to calculate the average max benefits. I am including the estimates using mxbnft as an option.
# function to calculate weighted and unweighted means for each worker type using mxbnft instead of capped
calculate_means_mxbnft <- function(data, replacement_rate) {
data %>%
mutate(
wkwage = incwage / wkswork1,
mxbnft = wkwage * replacement_rate,
) %>%
summarise(
ft_avg_ben_weighted = weighted.mean(mxbnft [ft_workers], perwt[ft_workers]),
pt_avg_ben_weighted = weighted.mean(mxbnft [pt_workers], perwt[pt_workers]),
hrs540_avg_ben_weighted = weighted.mean(mxbnft [hrs540_workers], perwt[hrs540_workers])
) %>%
mutate(replacement_rate = paste0(replacement_rate))
}
# Calculate means for each replacement rate and combine results
replacement_rates <- c(0.8, 0.9, 1.0)
final_results_mxbnft <- map_dfr(replacement_rates, ~ calculate_means_mxbnft (work_data, .x))
print(final_results_mxbnft)# A tibble: 3 × 4
ft_avg_ben_weighted pt_avg_ben_weighted hrs540_avg_ben_weighted
<dbl> <dbl> <dbl>
1 1038. 409. 933.
2 1167. 460. 1049.
3 1297. 511. 1166.
# ℹ 1 more variable: replacement_rate <chr>
Figure out Eligible Workers
# Calculate the total weight of the sample (result is total Texans eligible for parental leave)
total_eligible <- sum(work_data$perwt)
print(total_eligible)[1] 9925102
Using California Usage Rates
Data for California is used since CPS data is unreliable for Texas.
According to the California Employment Development Department (EDD), in 2022 the average monthly labor force participation rate in California was 61.65% and the average number of people employed was 18,350,233 (Source: EDD Tableau dashboard).
In 2022, there were a total of 298,503 total paid family leave claims paid by the state of California (Source: California Open Data Portal).
The formula for take-up rates is the # of family leave claims divided by total # of people employed.
The CA usage rate is therefore: 1.6% , which is much higher than the .79% rate from CS’s 2013 paper.
usage_rate = (298503 / 18350233)
print(usage_rate)[1] 0.01626699
Estimate of Texans Who Will Use Paid Leave
Updated Numbers:
2022 Number of Eligible Workers in Texas: 9,925,102
If I calculate by California’s 2022 usage rate, the result is 161,451.
# calculate number of people who took parental leave
took_leave <- total_eligible * usage_rate
print(paste("# Took Leave:", took_leave))[1] "# Took Leave: 161451.504310926"
# Calculate weekly costs for each worker type and replacement rate
weekly_costs <- final_results %>%
pivot_longer(
cols = starts_with("ft_"):starts_with("hrs540_"),
names_to = "worker_type",
values_to = "avg_ben_weighted"
) %>%
mutate(
weekly_cost = took_leave * avg_ben_weighted,
worker_type = recode(worker_type,
"ft_avg_ben_weighted" = "Full-Time",
"pt_avg_ben_weighted" = "Part-Time",
"hrs540_avg_ben_weighted" = "540+ Hours")
)
# overall weekly cost per replacement rate (just summing full and part time)
overall_totals_weekly <- weekly_costs %>%
filter(worker_type %in% c("Full-Time", "Part-Time")) %>%
group_by(replacement_rate) %>%
summarise(overall_weekly_cost = sum(weekly_cost))
# annual costs by multiplying each weekly cost by 12
annual_costs <- weekly_costs %>%
mutate(annual_cost = weekly_cost * 12) %>%
left_join(overall_totals_weekly, by = "replacement_rate")
# overall annual cost per replacement rate (just summing full and part time)
overall_totals_annual <- annual_costs %>%
filter(worker_type %in% c("Full-Time", "Part-Time")) %>%
group_by(replacement_rate) %>%
summarise(overall_annual_cost = sum(annual_cost))
# Combine all into a single table with the specified columns
final_table <- annual_costs %>%
left_join(overall_totals_annual, by = "replacement_rate") %>%
select(
replacement_rate,
worker_type,
avg_ben_weighted,
weekly_cost,
overall_weekly_cost,
annual_cost,
overall_annual_cost
)
print(final_table)# A tibble: 9 × 7
replacement_rate worker_type avg_ben_weighted weekly_cost overall_weekly_cost
<chr> <chr> <dbl> <dbl> <dbl>
1 0.8 Full-Time 764. 123275219. 181644542.
2 0.8 Part-Time 362. 58369322. 181644542.
3 0.8 540+ Hours 697. 112574820. 181644542.
4 0.9 Full-Time 814. 131368319. 195636199.
5 0.9 Part-Time 398. 64267881. 195636199.
6 0.9 540+ Hours 745. 120331239. 195636199.
7 1 Full-Time 856. 138188131. 208021130.
8 1 Part-Time 433. 69832999. 208021130.
9 1 540+ Hours 786. 126977464. 208021130.
# ℹ 2 more variables: annual_cost <dbl>, overall_annual_cost <dbl>
Save Data To Excel
write_xlsx(final_table, "final_pfl_2022.xlsx")Summary of Findings (for Texas in 2022)
High level findings
| Workers Eligible | 9,925,102 |
| Number People Using Family Leave | 161,451 |
Average Annual Costs by Worker Status and Cap Levels, 2022
| 80% Cap | 90% Cap | 100% Cap | |
|---|---|---|---|
| Full-time (last 3 months) | $1,479,302,629 | $1,576,419,826 | $1,658,257,574 |
| Part-time (last 3 months) | $700,431,869 | $771,214,566 | $837,995,986 |
| Worked at least 540 hours in last 12 months | $1,350,897,842 | $1,443,974,873 | $1,523,729,566 |
| Total Program Benefits Annual Costs (full-time + part-time workers) | $2,179,734,498 | $2,347,634,392 | $2,496,253,560 |
Note on CPS Data
Note from Coda’s 2019 analysis: “Tried with CPS data using the”Why Absent” variables and observations dropped too low. The problem I encountered was that the #’s for the maternal/paternal variable were too low.”
I tried using more recent CPS data. Here are the umber of observations I got:
42 in 2024
76 in 2023
87 in 2022
50 in 2021
0 in 2020
0 in 2019
0 in 2018
0 in 2017
Since these are very small population sizes, I decided not to use CPS data for the final analysis and instead use California Usage Rates.
Methods - Calculating the Number of People Taking Parental Leave Using CPS Data
I am using Current Population Survey data from IPUMS CPS to calculate the rate of working Texas adults who took parental leave.
The CPS variable I am using is:
WHYABSNT: reports the reason for the absence of jobholders who were not working during the preceding week
WTFINL: final person-level weight that should be used in analyses of basic monthly data.
I filtered the data to only be for the state of Texas (FIPS = 48) in IPUMS before downloading the data.
Load CPS Data
cps_ddi<- read_ipums_ddi("cps_00008.xml")
cps_data <- read_ipums_micro(cps_ddi, data_file = ("cps_00008.dat.gz"), verbose = FALSE)
# check out data
names(cps_data) <- tolower(names(cps_data))
head(cps_data)# A tibble: 6 × 15
year serial month hwtfinl cpsid asecflag hflag asecwth statefip pernum
<dbl> <dbl> <int+lbl> <dbl> <dbl> <int+lbl> <int> <dbl> <int+lb> <dbl>
1 2009 67616 3 [March] NA 2.01e13 1 [ASEC] NA 1475. 48 [Tex… 1
2 2009 67616 3 [March] NA 2.01e13 1 [ASEC] NA 1475. 48 [Tex… 2
3 2009 67616 3 [March] NA 2.01e13 1 [ASEC] NA 1475. 48 [Tex… 3
4 2009 67619 3 [March] NA 2.01e13 1 [ASEC] NA 1719. 48 [Tex… 1
5 2009 67620 3 [March] NA 2.01e13 1 [ASEC] NA 1252. 48 [Tex… 1
6 2009 67620 3 [March] NA 2.01e13 1 [ASEC] NA 1252. 48 [Tex… 2
# ℹ 5 more variables: wtfinl <dbl>, cpsidp <dbl>, cpsidv <dbl>, asecwt <dbl>,
# whyabsnt <int+lbl>
names(cps_data) [1] "year" "serial" "month" "hwtfinl" "cpsid" "asecflag"
[7] "hflag" "asecwth" "statefip" "pernum" "wtfinl" "cpsidp"
[13] "cpsidv" "asecwt" "whyabsnt"
CPS Data Prep
# filter data
cps_data2 <- filter(cps_data,
whyabsnt == 9, # select only people who took parental leave
year == 2024, # select year
!is.na(wtfinl)) # remove missing values from final person weight
# # Weight the data by the final weight
# cps_data3 <- svydesign(ids = ~pernum, weights = ~wtfinl, data = cps_data2)
# # Calculate the number of workers who were absent from work on maternity/paternity leave
# num_parental_leave <- svytotal(~1, design = cps_data3)
# #Print the result
# print(num_parental_leave)
# #Count the number of observations for each value in the WHYABSNT variable in original dataset
# table(cps_data$whyabsnt)