library(ipumsr)
library(tidyverse)
library(writexl)Women’s Lifetime Loss of Earnings Due to Gender Inequality in Texas
Estimating Lifetime Loss of Earnings for Women in Texas Due to Gender Pay Inequality
Source: 2015-2024 Current Population Survey Annual Social and Economic supplement (CPS-ASEC) via IPUMS CPS.
Sources, Methods, and Data Notes
This analysis calculates women’s estimated lifetime loss of earnings due to gender pay inequality in Texas.
For the analysis, I use 2015-2024 CPS-ASEC Cross-sectional data from IPUMS CPS. I did not use any of the basic monthly samples. I filtered the data to be for Texas (FIPS=48) before downloading the data extract.
Methodology was inspired by TIMES’ calculations from this article. Instead of using BLS data like them, I used CPS.
Data Prep
# Load CPS-ASEC data extract (2015-2024)
ddi <- read_ipums_ddi("cps_00025.xml")
data <- read_ipums_micro(ddi, data_file = "cps_00025.dat", verbose = FALSE)
data <- rename_with(data, tolower)Cleaning Data
data <- data %>%
# filter for age 16 and up and get rid of NAs
filter(
age > 15,
!is.na(incwage) & incwage > 0,
year==2024
) %>%
# recode sex, age, and income variables
mutate(
sex_c = case_when(
sex == 1 ~ "Men",
sex == 2 ~ "Women",
TRUE ~ NA_character_
) %>% factor(),
age_group = case_when(
age >= 16 & age <= 19 ~ "16 to 19 years",
age >= 20 & age <= 24 ~ "20 to 24 years",
age >= 25 & age <= 34 ~ "25 to 34 years",
age >= 35 & age <= 44 ~ "35 to 44 years",
age >= 45 & age <= 54 ~ "45 to 54 years",
age >= 55 & age <= 64 ~ "55 to 64 years",
age >= 65 & age <= 70 ~ "65 to 70 years",
TRUE ~ NA_character_
),
incwage = na_if(incwage, 99999999) %>% as.numeric(),
)Calculating Lifetime Loss of Earnings
library(matrixStats) # for weighted median calculationWarning: package 'matrixStats' was built under R version 4.2.3
Attaching package: 'matrixStats'
The following object is masked from 'package:dplyr':
count
# define age groups and corresponding time spans
age_spans <- tibble(
age_group = c("16 to 19 years", "20 to 24 years", "25 to 34 years",
"35 to 44 years", "45 to 54 years", "55 to 64 years",
"65 to 70 years"),
time_span = c(4, 5, 10, 10, 10, 10, 6)
)
# function to compute weighted median
weighted_median <- function(x, w) {
if (all(is.na(x))) return(NA_real_)
matrixStats::weightedMedian(x, w, na.rm = TRUE)
}
# calculate median annual income by gender and age group using weights
median_income <- data %>%
group_by(sex_c, age_group) %>%
summarise(median_income = weighted_median(incwage, asecwt), .groups = "drop")
# merge median income data with age span data
income_with_spans <- median_income %>%
left_join(age_spans, by = "age_group") %>%
mutate(tot_accrued_inc = median_income * time_span)
# Calculate cumulative lifetime income
income_with_cumulative <- income_with_spans %>%
arrange(sex_c, age_group) %>%
group_by(sex_c) %>%
mutate(cumulative_lifetime_income = cumsum(tot_accrued_inc))
print(income_with_cumulative)# A tibble: 16 × 6
# Groups: sex_c [2]
sex_c age_group median_income time_span tot_accrued_inc
<fct> <chr> <dbl> <dbl> <dbl>
1 Men 16 to 19 years 10371. 4 41482.
2 Men 20 to 24 years 25000 5 125000
3 Men 25 to 34 years 52000 10 520000
4 Men 35 to 44 years 65000 10 650000
5 Men 45 to 54 years 72471. 10 724709.
6 Men 55 to 64 years 70000 10 700000
7 Men 65 to 70 years 50000 6 300000
8 Men <NA> 37672. NA NA
9 Women 16 to 19 years 8000 4 32000
10 Women 20 to 24 years 21000 5 105000
11 Women 25 to 34 years 43080. 10 430800.
12 Women 35 to 44 years 50647. 10 506474.
13 Women 45 to 54 years 50000 10 500000
14 Women 55 to 64 years 43676. 10 436756.
15 Women 65 to 70 years 26000 6 156000
16 Women <NA> 23374. NA NA
# ℹ 1 more variable: cumulative_lifetime_income <dbl>
# save to excel
writexl::write_xlsx(income_with_cumulative, "lifetime_earnings_loss.xlsx")