Gender Wage Gap Cost to Texas, 2024

Author

Kaitlan Wong

Methods


For this estimate, I am utilizing 2024 Current Population Survey (CPS) ASEC data from IPUMS CPS.

The CPS variables I am using include:

  • INCWAGE: indicates each respondent’s total pre-tax wage and salary income–that is, money received as an employee–for the previous calendar year. Amounts are expressed as they were reported to the interviewer; users must adjust for inflation using Consumer Price Index adjustment factors.

  • UHRSWORKLY: reports the number of hours per week that respondents usually worked if they worked during the previous calendar year. Individuals were asked this question if: 1) they reported working at a job or business at any time during the previous year or 2) they acknowledged doing “any temporary, part-time, or seasonal work even for a few days” during the previous year.

  • WKSWORK1: reports the number of weeks, in single weeks, that the respondent worked for profit, pay, or as an unpaid family worker during the preceding calendar year. Respondents were prompted to count weeks in which they worked for even a few hours and to include paid vacation and sick leave as work. Information on weeks worked during the preceding year is available in the form of intervals for 1962 forward in the WKSWORK2 variable.

I filtered the data to only be for the state of Texas (FIPS = 48) in IPUMS before downloading the data.

Load CPS Data

library(tidyverse)
library (ipumsr)
library(writexl)
# Read in ipums CPS data

ddi <- read_ipums_ddi("cps_00015.xml")
data <- read_ipums_micro(ddi, data_file = ("cps_00015.datgz"), verbose = FALSE)
# make variable name lowercase
names(data) <- tolower(names(data))

# preview data
head(data)
# A tibble: 6 × 21
   year serial month      cpsid asecflag asecwth statefip pernum  cpsidv  cpsidp
  <dbl>  <dbl> <int+lb>   <dbl> <int+lb>   <dbl> <int+lb>  <dbl>   <dbl>   <dbl>
1  2024  60100 3 [Marc… 2.02e13 1 [ASEC]   3554. 48 [Tex…      1 2.02e14 2.02e13
2  2024  60100 3 [Marc… 2.02e13 1 [ASEC]   3554. 48 [Tex…      2 2.02e14 2.02e13
3  2024  60102 3 [Marc… 2.02e13 1 [ASEC]   4355. 48 [Tex…      1 2.02e14 2.02e13
4  2024  60102 3 [Marc… 2.02e13 1 [ASEC]   4355. 48 [Tex…      2 2.02e14 2.02e13
5  2024  60103 3 [Marc… 2.02e13 1 [ASEC]   1901. 48 [Tex…      1 2.02e14 2.02e13
6  2024  60103 3 [Marc… 2.02e13 1 [ASEC]   1901. 48 [Tex…      2 2.02e14 2.02e13
# ℹ 11 more variables: asecwt <dbl>, hourwage2 <dbl>, age <int+lbl>,
#   sex <int+lbl>, empstat <int+lbl>, uhrswork1 <int+lbl>, earnwt <dbl>,
#   wkswork1 <dbl>, uhrsworkly <dbl+lbl>, incwage <dbl+lbl>, paidhour <int+lbl>
# check variable names are correct
names(data)
 [1] "year"       "serial"     "month"      "cpsid"      "asecflag"  
 [6] "asecwth"    "statefip"   "pernum"     "cpsidv"     "cpsidp"    
[11] "asecwt"     "hourwage2"  "age"        "sex"        "empstat"   
[16] "uhrswork1"  "earnwt"     "wkswork1"   "uhrsworkly" "incwage"   
[21] "paidhour"  

Analysis

# filter to only include employed people with valid income and work data
wage_data <- data %>%
  filter(empstat %in% c(10,12), # only employed people
         !is.na(incwage), incwage > 0, # remove missing/invalid income data
         !is.na(uhrsworkly), uhrsworkly > 0,
         !is.na(wkswork1), wkswork1 > 0) %>%
  mutate(hourly_wage = incwage / (uhrsworkly * wkswork1)) # calculate hourly wage
# calculate weighted average hourly wage by sex
gender_data <- wage_data %>%
  group_by(sex) %>%
  summarize(
    avg_hourly_wage = weighted.mean(hourly_wage, asecwt, na.rm = TRUE),
    .groups = "drop"
  )

gender_data
# A tibble: 2 × 2
  sex        avg_hourly_wage
  <int+lbl>            <dbl>
1 1 [Male]              39.1
2 2 [Female]            29.4
# extract average hourly wage for men
men_avg_hourly_wage <- gender_data %>%
  filter(sex == 1) %>%
  pull(avg_hourly_wage)

men_avg_hourly_wage
[1] 39.099
# calculate women's adjusted annual earnings if they were paid the same hourly wage as men
women_earnings_adjusted <- wage_data %>%
  filter(sex == 2) %>% # filter for women
  mutate(
    adjusted_annual_earnings = men_avg_hourly_wage * uhrsworkly * wkswork1,
    earnings_increase = (adjusted_annual_earnings - incwage)
  ) %>%
  summarize(
    current_avg = weighted.mean(incwage, asecwt, na.rm = TRUE),
    adjusted_avg = weighted.mean(adjusted_annual_earnings, asecwt, na.rm = TRUE),
    state_increase = sum(earnings_increase * asecwt, na.rm = TRUE),
    .groups = "drop"
  )

women_earnings_adjusted
# A tibble: 1 × 3
  current_avg adjusted_avg state_increase
        <dbl>        <dbl>          <dbl>
1      55238.       73926.  115524968698.
# statewide total increase would be $115 billion
# calculate increase in earnings if no gender wage gap (individual increase)
current_earnings <- women_earnings_adjusted$current_avg
adjusted_earnings <- women_earnings_adjusted$adjusted_avg
increase <- adjusted_earnings - current_earnings

increase
[1] 18688.18
# There would be an average individual increase of $18,688 per woman

Alternate method: Removing the hours and weeks worked considerations.

# calculate weighted average annual earnings by gender
gender_annual_earnings <- wage_data %>%
  group_by(sex) %>%
  summarize(
    avg_annual_earnings = weighted.mean(incwage, asecwt, na.rm = TRUE),
    .groups = "drop"
  )

# get men's average annual earnings
men_avg_annual_earnings <- gender_annual_earnings %>%
  filter(sex == 1) %>%
  pull(avg_annual_earnings)

# calculate adjusted annual earnings for women, assuming they earn the same as men
women_earnings_adjusted <- wage_data %>%
  filter(sex == 2) %>% # filter for women
  mutate(
    adjusted_annual_earnings = men_avg_annual_earnings,
    earnings_increase = (adjusted_annual_earnings - incwage) * asecwt
  ) %>%
  summarize(
    current_avg = weighted.mean(incwage, asecwt, na.rm = TRUE),
    adjusted_avg = weighted.mean(adjusted_annual_earnings, asecwt, na.rm = TRUE),
    total_increase = sum(earnings_increase * asecwt, na.rm = TRUE),
    .groups = "drop"
  )

women_earnings_adjusted
# A tibble: 1 × 3
  current_avg adjusted_avg total_increase
        <dbl>        <dbl>          <dbl>
1      55238.       82157.        5.89e14
# Increase is $589 trillion - way too high and does not follow IWPR's standard of keeping things "comparable"

Coda’s method: Multiply the wage gap percentage by total wages and use it to estimate the “lost” wages due to the gap.

# calculate weighted average hourly wage by sex
gender_data <- wage_data %>%
  group_by(sex) %>%
  summarize(
    avg_hourly_wage = weighted.mean(hourly_wage, asecwt, na.rm = TRUE),
    total_wages = sum(incwage * asecwt, na.rm = TRUE), # total wages for each gender
    .groups = "drop"
  )


# get average hourly wages
men_avg_hourly_wage <- gender_data %>%
  filter(sex == 1) %>%
  pull(avg_hourly_wage)

women_avg_hourly_wage <- gender_data %>%
  filter(sex == 2) %>%
  pull(avg_hourly_wage)

# calculate wage gap percentage
wage_gap_pct <- (men_avg_hourly_wage - women_avg_hourly_wage) / men_avg_hourly_wage

# multiply  wage gap percentage by total wages for women
women_total_wages <- gender_data %>%
  filter(sex == 2) %>%
  pull(total_wages)

wage_gap_loss <- wage_gap_pct * women_total_wages

# results
result <- tibble(
  men_avg_hourly_wage = men_avg_hourly_wage,
  women_avg_hourly_wage = women_avg_hourly_wage,
  wage_gap_pct = wage_gap_pct,
  women_total_wages = women_total_wages,
  wage_gap_loss = wage_gap_loss
)

result
# A tibble: 1 × 5
  men_avg_hourly_wage women_avg_hourly_wage wage_gap_pct women_total_wages
                <dbl>                 <dbl>        <dbl>             <dbl>
1                39.1                  29.4        0.247     341463491989.
# ℹ 1 more variable: wage_gap_loss <dbl>
# women total wages: 341,463,491,989    
# Wage gap loss: $84,474,245,208

Replicate Coda’s method using 2021 CPS ASEC data.

library(tidyverse)
library (ipumsr)
library(writexl)

# Read in ipums CPS 2021 data
ddi <- read_ipums_ddi("cps_00016.xml")
data <- read_ipums_micro(ddi, data_file = ("cps_00016.datgz"), verbose = FALSE)

# make variable name lowercase
names(data) <- tolower(names(data))


# filter to only include employed people with valid income and work data
wage_data <- data %>%
  filter(empstat %in% c(10,12), # only employed people
         !is.na(incwage), incwage > 0, # remove missing/invalid income data
         !is.na(uhrsworkly), uhrsworkly > 0,
         !is.na(wkswork1), wkswork1 > 0) %>%
  mutate(hourly_wage = incwage / (uhrsworkly * wkswork1)) # calculate hourly wage


# calculate weighted average hourly wage by sex
gender_data <- wage_data %>%
  group_by(sex) %>%
  summarize(
    avg_hourly_wage = weighted.mean(hourly_wage, asecwt, na.rm = TRUE),
    total_wages = sum(incwage * asecwt, na.rm = TRUE), # total wages for each gender
    .groups = "drop"
  )


# get average hourly wages
men_avg_hourly_wage <- gender_data %>%
  filter(sex == 1) %>%
  pull(avg_hourly_wage)

women_avg_hourly_wage <- gender_data %>%
  filter(sex == 2) %>%
  pull(avg_hourly_wage)

# calculate wage gap percentage
wage_gap_pct <- (men_avg_hourly_wage - women_avg_hourly_wage) / men_avg_hourly_wage

# multiply  wage gap percentage by total wages for women
women_total_wages <- gender_data %>%
  filter(sex == 2) %>%
  pull(total_wages)

wage_gap_loss <- wage_gap_pct * women_total_wages

# results
result <- tibble(
  men_avg_hourly_wage = men_avg_hourly_wage,
  women_avg_hourly_wage = women_avg_hourly_wage,
  wage_gap_pct = wage_gap_pct,
  women_total_wages = women_total_wages,
  wage_gap_loss = wage_gap_loss
)

result
# A tibble: 1 × 5
  men_avg_hourly_wage women_avg_hourly_wage wage_gap_pct women_total_wages
                <dbl>                 <dbl>        <dbl>             <dbl>
1                34.9                  29.2        0.164     287533645725.
# ℹ 1 more variable: wage_gap_loss <dbl>
# Wage gap loss: $47,142,046,105

# IWPR's estimate: $47.87 billion (https://cppp-my.sharepoint.com/personal/rayo-garza_everytexan_org/_layouts/15/onedrive.aspx?id=%2Fpersonal%2Frayo%2Dgarza%5Feverytexan%5Forg%2FDocuments%2FFFS%2FTXWF%2F2024%2F2024%20Draft%20Report%2FEconomic%2DImpact%2Dof%2DEqual%2DPay%2Dby%2DState%5FFINAL%20%281%29%2Epdf&parent=%2Fpersonal%2Frayo%2Dgarza%5Feverytexan%5Forg%2FDocuments%2FFFS%2FTXWF%2F2024%2F2024%20Draft%20Report&ga=1)