Loading required package: carData
Attaching package: 'car'
The following object is masked from 'package:dplyr':
recode
The following object is masked from 'package:purrr':
some
#read in CPS data -- this data extract only selected cases for TX (Fips = 48)ddi <-read_ipums_ddi("Equal Earnings OLS/cps_00002.xml")data <-read_ipums_micro(ddi, data_file = ("Equal Earnings OLS/cps_00002.dat"), verbose =FALSE)# make variable name lowercasenames(data) <-tolower(names(data))#cleaning and filtering datadata$sex_c <-Recode(as.numeric(data$sex), recodes ="1='Men'; 2='Women'; else = NA", as.factor=T)data$age_18up <-ifelse(data$age >17,1,0)data$uhrsworkly <-ifelse(data$uhrsworkly ==999, NA,data$uhrsworkly) # 999 = NIUdata$incwage <-ifelse(data$incwage ==99999999,NA,data$incwage)data$educ_c <-Recode(as.numeric(data$educ), recodes ="2:71='1. Below HS'; 73='2. HS Diploma or Equivalent'; 81:92 = '3. Some College or Associates'; 111 = '4. Bachelors Degree'; 123:125 = '5. Higher Degree'; else = NA", as.factor=T)data$metro_c <-Recode(as.numeric(data$metro), recodes ="1='1. Not in a Metro Area'; 2:4='2. In a Metro Area'; else = NA", as.factor=T)data$region_c <-Recode(as.numeric(data$region), recodes ="11:12='Northeast'; 21:22='Midwest'; 31:33='South';41:42='West'; else = NA", as.factor=T)data$ln_ann_earn <-log(data$incwage) #natural log of annual earningsdata$age2 <- data$age^2#age squared#only filter for columns we wantdata_filter <- data[,c("year","serial","cpsid","asecflag","asecwth","region","metro","pernum", "cpsidp","asecwt","age","wkswork1","uhrsworkly","incwage","sex_c","age_18up","educ_c","metro_c","region_c","ln_ann_earn","age2")]#remove those below 18 years of age and without "positive earnings and positive hours of work"data_filter <- data_filter %>%filter(age_18up ==1, # only those 18+!is.na(incwage), incwage >0, # remove missing/invalid income data!is.na(uhrsworkly), uhrsworkly >0,!is.na(wkswork1), wkswork1 >0,!is.na(educ_c), !is.na(metro_c), !is.na(region_c)) %>%mutate(hourswrk_LY = uhrsworkly * wkswork1, # calculate hours worked last yearhourly_wage = incwage /hourswrk_LY) # calculate hourly wage)#Men only: data_men <- data_filter %>%filter (sex_c =="Men")#90th percentile for wage for men is $150,000 in 2024, $136,000 in 2021names(data_men)
Call:
svyglm(formula = ln_ann_earn ~ age + age2 + educ_c + metro_c +
hourswrk_LY, design = des21)
Survey design:
svydesign(id = ~1, weights = ~asecwt, data = data_men21)
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 7.353e+00 1.855e-01 39.643 < 2e-16 ***
age 5.273e-02 7.561e-03 6.974 4.09e-12 ***
age2 -4.898e-04 8.500e-05 -5.762 9.51e-09 ***
educ_c2. HS Diploma or Equivalent 3.000e-01 5.503e-02 5.451 5.59e-08 ***
educ_c3. Some College or Associates 2.997e-01 5.576e-02 5.374 8.56e-08 ***
educ_c4. Bachelors Degree 6.799e-01 5.427e-02 12.528 < 2e-16 ***
educ_c5. Higher Degree 7.012e-01 7.125e-02 9.842 < 2e-16 ***
metro_c2. In a Metro Area 1.789e-02 8.249e-02 0.217 0.828
hourswrk_LY 7.700e-04 3.745e-05 20.563 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for gaussian family taken to be 0.4626209)
Number of Fisher Scoring iterations: 2
Estimate Women’s Earnings with Men’s Coefficients
#predict values based on men's coefficients#add as column to 2024 and 2021 data <-- note that these will give the natural log of annual income. data_women24$Predict_log_annincome <-predict(ols24, newdata=data_women24)data_women21$Predict_log_annincome <-predict(ols21, newdata=data_women21)#reverse the natural log with exp functiondata_women24$Predict_annincome <-exp(data_women24$Predict_log_annincome)data_women21$Predict_annincome <-exp(data_women21$Predict_log_annincome)#calculate differencedata_women24$Predict_difference <- data_women24$Predict_annincome - data_women24$incwagedata_women21$Predict_difference <- data_women21$Predict_annincome - data_women21$incwage#"Women’s earnings are predicted using the coefficients from the men’s earnings equation (this method assumes that women retain their own human capital but are rewarded at the same rates as men would be) and calculated only for the actual hours that women worked during the year. Those with reduced predicted earnings are assigned their actual earnings during the year. The average earnings increase calculated for working women includes those with no predicted earnings increases under equal pay."data_women24$Predict_difference <- data_women24$Predict_annincome - data_women24$incwagedata_women21$Predict_difference <- data_women21$Predict_annincome - data_women21$incwage#only include positive differences and sum valuesdata_women24$Predict_difference_Positive <-ifelse(data_women24$Predict_difference>0,data_women24$Predict_difference,0)data_women21$Predict_difference_Positive <-ifelse(data_women21$Predict_difference>0,data_women21$Predict_difference,0)sum(data_women21$Predict_difference_Positive)