This document includes descriptive plots of the relationship between hours and wages over time in the U.S. in the past 4 decades using data from CPS-ORG and CPS-ASEC surveys.
Patterns for the Literature Costa (2000): while the elasticity between hours per day and houely wages was negative in the end of the 19th centiry and in 1973, by 1991 it became positive.
Mantovani (2024, not published):
knitr::include_graphics("/Users/tamary/Dropbox/WFH/Empirical work/pictures/mantovani.png")
1982-2024; ages 25-65; more than 10 hours per week; non self-employed workers.
Variables used:
ipums_org <- fread("~/Dropbox/WFH/Empirical work/Data/IPUMS CPS ORG/ipums_org.csv")
|--------------------------------------------------|
|==================================================|
# Initial cleanup and variable construction
ipums_org <- ipums_org %>%
filter(AGE >= 25, AGE <= 65) %>%
filter(UHRSWORK1 > 10 & UHRSWORK1 < 100) %>%
filter(CLASSWKR >= 20 & CLASSWKR != 99)
# Use cut to create the bins
bin_breaks <- c(seq(10, 80, by = 5))
bin_labels <- c("10-14","15-19","20-24","25-29","30-34", "35-39", "40-44", "45-49", "50-54", "55-59", "60-64", "65-69", "70-74", "75-79", "80+")
ipums_org <- ipums_org %>%
mutate(AGE2 = AGE * AGE,
HOURWAGE_computed = EARNWEEK / UHRSWORK1,
ln_hrwage = log(HOURWAGE_computed),
bin = as.factor(cut(UHRSWORK1, breaks = c(bin_breaks, Inf), right = FALSE,
labels = NULL)),
salaried = PAIDHOUR == 1)
Below I replicate a plot from Bick et. al. (QJE 2022) to assure data cleaning is correct. This plot includes both the distribution of hours worked and the coefficients of log wages on a set of bins of hours for men, controlling for education, marital status, union status, race as well as year and month fixed effect for the years 1995-2007.
bick_et_al_data <- ipums_org %>%
filter(YEAR >= 1995 & YEAR <= 2007) %>%
filter(SEX == 1) %>%
filter(MULTJOB == 1) # not multijobbed
formula_bins <- as.formula("ln_hrwage ~ bin + AGE + AGE2 +
factor(MARST) + factor(EDUC) +
factor(RACE) + factor(YEAR) + factor(MONTH) + UNION - 1")
combined(bick_et_al_data, weight_var = "EARNWT", title = NULL, filename = NULL, bin_var = "bin", formula = formula_bins, plot = TRUE)
Now I look at a aggregate trends in hours worked over time. First, while women have consistently increased their working hours, men’s working hours have decreased slighlty.
summary_by_year_sex <- ipums_org %>%
group_by(YEAR, SEX) %>%
summarise(mean_hours = weighted.mean(UHRSWORK1, EARNWT),
share_less_40 = weighted.mean(UHRSWORK1 < 40, EARNWT),
share_exacctly_40 = weighted.mean(UHRSWORK1 == 40, EARNWT),
share_above_40 = weighted.mean(UHRSWORK1 > 40, EARNWT), .groups = "drop")
summary_by_year_sex %>%
ggplot(aes(x = YEAR, y = mean_hours, color = factor(SEX))) +
geom_point(size = 0.5) + # Adjust point size
geom_line(size = 0.5) + # Adjust line thickness
scale_color_brewer(palette = "Set2", # Nicer colors
labels = c("Male", "Female")) +
labs(title = "Mean Hours Worked Over Time",
x = "Year",
y = "Mean Hours Worked",
color = "Sex") + # Update legend title
theme_minimal() # Clean theme
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
plot_dist_40(ipums_org, UHRSWORK1, EARNWT, "all Workers, CPS-ORG")
plot_dist_40(ipums_org %>% filter(SEX == 1), UHRSWORK1, EARNWT, "Men, CPS-ORG")
plot_dist_40(ipums_org %>% filter(SEX == 2), UHRSWORK1, EARNWT, "Women, CPS-ORG")
Now I look at how this changed across weekly earnings deciles:
ipums_org <- add_weighted_decile_by_year(ipums_org, "EARNWEEK", "EARNWT", "YEAR")
for (i in 1:10){
plot_dist_40(ipums_org %>% filter(EARNWEEK_decile == i),UHRSWORK1, EARNWT, paste0("all Workers, ",i,"th decile"))
}
Below, I plot the mean of hours worked by decile: - There seems to be something weird about data from 2023-2024 - The Menotovani fact (D1 going down, D10 going down) does not replicate
plot_v1_mean_by_v2_decile_over_v3(ipums_org, "UHRSWORK1", "EARNWEEK", "YEAR", "EARNWT",
color_lab = "EARNWEEK Decile",
caption = "Source: CPS-ORG IPUMS")
[1] "UHRSWORK1 mean by EARNWEEK deciles, All Observations"
plot_v1_mean_by_v2_decile_over_v3(ipums_org, "UHRSWORK1", "HOURWAGE_computed", "YEAR", "EARNWT",
caption = "Source: CPS-ORG IPUMS")
[1] "UHRSWORK1 mean by HOURWAGE_computed deciles, All Observations"
plot_v1_mean_by_v2_decile_over_v3(ipums_org, "UHRSWORK1", "HOURWAGE_computed", "YEAR", "EARNWT",
filter_expr = salaried == 1,
caption = "Source: CPS-ORG IPUMS")
[1] "UHRSWORK1 mean by HOURWAGE_computed deciles, salaried == 1"
plot_v1_mean_by_v2_decile_over_v3(ipums_org, "UHRSWORK1", "HOURWAGE_computed", "YEAR", "EARNWT",
filter_expr = salaried == 1,
caption = "Source: CPS-ORG IPUMS", deciles_to_plot = c(1,5,10))
[1] "UHRSWORK1 mean by HOURWAGE_computed deciles, salaried == 1"
plot_v1_mean_by_v2_decile_over_v3(ipums_org, "UHRSWORK1", "HOURWAGE_computed", "YEAR", "EARNWT",
filter_expr = salaried == 0,
caption = "Source: CPS-ORG IPUMS")
[1] "UHRSWORK1 mean by HOURWAGE_computed deciles, salaried == 0"
plot_v1_mean_by_v2_decile_over_v3(ipums_org, "UHRSWORK1", "HOURWAGE", "YEAR", "EARNWT",
filter_expr = salaried == 0,
caption = "Source: CPS-ORG IPUMS")
[1] "UHRSWORK1 mean by HOURWAGE deciles, salaried == 0"
hourly_workers <- bick_et_al_data %>% filter(salaried == 0) %>% filter(!is.na(HOURWAGE))
cor(hourly_workers$HOURWAGE_computed, hourly_workers$HOURWAGE)
[1] 0.5235918
I also tried to replicate the Mantovani plot from CEPR CPS-MORG data that he states he used but the results are very weird. I am not sure which variables he used.
ORG <- fread(paste0(dir,"/data/CEPR/cepr_data.csv"))
# Understanding the data
ORG <- ORG %>%
mutate(hourly_worker = !is.na(wage1),
salaried_worker = !is.na(wage2))
ORG <- ORG %>% filter(!is.na(uhourse))
ORG <- ORG %>% mutate(log_wage3 = log(wage3),
log_uhoursw = log(uhourse), age_sq = age*age)
# a <- feols(log_uhoursw ~ log_wage3, data = ORG %>% filter(wage3> 0, uhourse >0), weights = ~fnlwgt, fsplit = ~year)
plot_uhourse_mean_by_wage3_decile_over_year_CPSORG <- function(filter_expr){
plot_v1_mean_by_v2_decile_over_v3(ORG, "uhourse", "wage3", "year", "fnlwgt",
filter_expr = !!enquo(filter_expr),
caption = "Source: CPS-ORG",
)
}
plot_uhourse_mean_by_wage3_decile_over_year_CPSORG(uhourse > 35)
[1] "uhourse mean by wage3 deciles, uhourse > 35"
plot_uhourse_mean_by_wage3_decile_over_year_CPSORG(uhourse > 35 & salaried_worker == 1)
[1] "uhourse mean by wage3 deciles, uhourse > 35 & salaried_worker == 1"
plot_uhourse_mean_by_wage3_decile_over_year_CPSORG(uhourse > 35 & hourly_worker == 1)
[1] "uhourse mean by wage3 deciles, uhourse > 35 & hourly_worker == 1"
plot_uhourse_mean_by_wage3_decile_over_year_CPSORG(uhourse > 10)
[1] "uhourse mean by wage3 deciles, uhourse > 10"
plot_uhourse_mean_by_wage3_decile_over_year_CPSORG(uhourse > 10 & salaried_worker == 1)
[1] "uhourse mean by wage3 deciles, uhourse > 10 & salaried_worker == 1"
plot_uhourse_mean_by_wage3_decile_over_year_CPSORG(uhourse > 10 & hourly_worker == 1)
[1] "uhourse mean by wage3 deciles, uhourse > 10 & hourly_worker == 1"
Note that abrupt changes in the topcode occurred in 1989 and 1998, I am not sure what happened in 1994.
I conducted a similar analysis for ASEC data, for the years 1962-2024, workers of ages 25-65. ASEC asks respondents both regarding actual hours worked last week and on usual hour worked last year. Income is documented only at an annual basis - for the previous calander year.
This document first plots patterns with annual income and then uses an estimate of hourly wages constructed by dividing the annual income by (# weeks per year X # hours per week.)
n = 3696401
asec <- fread(paste0(dir,"/data/CPS_long_term/ASEC/ASEC_62_24.csv"))
asec <- asec %>%
filter(AGE >= 25 & AGE <= 65,
INCWAGE > 0,
INCWAGE != 99999998, # missing
INCWAGE != 99999999) # NIU
asec_worked_LY <- asec %>% filter(UHRSWORKLY != 999)
asec_worked_LW <- asec %>% filter(AHRSWORKT != 999)
summary_by_year_sex_asec <- asec_worked_LY %>%
group_by(YEAR, SEX) %>%
summarise(mean_hours = weighted.mean(UHRSWORKLY, ASECWT),
share_less_40 = weighted.mean(UHRSWORKLY < 40, ASECWT),
share_exacctly_40 = weighted.mean(UHRSWORKLY == 40, ASECWT),
share_above_40 = weighted.mean(UHRSWORKLY > 40, ASECWT), .groups = "drop")
summary_by_year_sex_asec %>%
ggplot(aes(x = YEAR, y = mean_hours, color = factor(SEX))) +
geom_point(size = 0.5) + # Adjust point size
geom_line(size = 0.5) + # Adjust line thickness
scale_color_brewer(palette = "Set2", # Nicer colors
labels = c("Male", "Female")) +
labs(title = "Mean Hours Worked Last Year Over Time, ASEC",
x = "Year",
y = "Mean Hours Worked",
color = "Sex") + # Update legend title
theme_minimal() # Clean theme
NA
NA
plot_dist_40(asec_worked_LY, UHRSWORKLY, ASECWT, "all Workers, ASEC")
plot_dist_40(asec_worked_LY %>% filter(SEX == 1), UHRSWORKLY, ASECWT, "Men, ASEC")
plot_dist_40(asec_worked_LY %>% filter(SEX == 2), UHRSWORKLY, ASECWT, "Women, ASEC")
plot_v1_mean_by_v2_decile_over_v3(asec_worked_LY, "UHRSWORKLY", "INCWAGE", "YEAR", "ASECWT",
title = "Usual Hours Worked Last Year over Annual Income, all Workers",
caption = "Source: ASEC 1976-2024")
plot_v1_mean_by_v2_decile_over_v3(asec_worked_LY, "UHRSWORKLY", "INCWAGE", "YEAR", "ASECWT",
deciles_to_plot = c(1, 5, 10),
title = "Usual Hours Worked Last Year over Annual Income, all Workers",
caption = "Source: ASEC 1976-2024")
Now, limit attention to workers that worked 50-52 weeks per week
asec_worked_LY_full <- asec_worked_LY %>% filter(WKSWORK2 == 6)
plot_v1_mean_by_v2_decile_over_v3(asec_worked_LY_full, "UHRSWORKLY", "INCWAGE", "YEAR", "ASECWT",
title = "Usual Hours Worked Last Year over Annual Income, 50-52 weeks worked last year",
caption = "Source: ASEC 1976-2024")
plot_v1_mean_by_v2_decile_over_v3(asec_worked_LY_full, "UHRSWORKLY", "INCWAGE", "YEAR", "ASECWT",
deciles_to_plot = c(1, 5, 10),
title = "Usual Hours Worked Last Year over Annual Income, 50-52 weeks worked last year",
caption = "Source: ASEC 1976-2024")
plot_v1_mean_by_v2_decile_over_v3(asec_worked_LY_full %>% mutate(h_wage_usual = INCWAGE / 51 / UHRSWORKLY),
"UHRSWORKLY", "h_wage_usual", "YEAR", "ASECWT",
title = "Usual Hours Worked Last Year by Computed Hourly Wage, 50-52 weeks worked last year",
caption = "Source: ASEC. Note: computed hourly wage = annual income / 51 / usual hours worked last year")
plot_v1_mean_by_v2_decile_over_v3(asec_worked_LY_full %>% mutate(h_wage_usual = INCWAGE / 51 / UHRSWORKLY),
"UHRSWORKLY", "h_wage_usual", "YEAR", "ASECWT",
deciles_to_plot = c(1, 5, 10),
title = "Usual Hours Worked Last Year by Computed Hourly Wage, 50-52 weeks worked last year",
caption = "Source: ASEC. Note: computed hourly wage = annual income / 51 / usual hours worked last year")
Now, Limit attention to workers with more than 40 hours worked on usual weeks
plot_v1_mean_by_v2_decile_over_v3(asec_worked_LY %>% filter(WKSWORK2 == 6) %>% filter(UHRSWORKLY >= 40),
"UHRSWORKLY", "INCWAGE", "YEAR", "ASECWT",
title = "Usual Hours Worked Last Year over Annual Income,\n50-52 weeks worked last year, >= 40h per usual week",
caption = "Source: ASEC 1976-2024")
plot_v1_mean_by_v2_decile_over_v3(asec_worked_LY %>% filter(WKSWORK2 == 6) %>% filter(UHRSWORKLY >= 40),
"UHRSWORKLY", "INCWAGE", "YEAR", "ASECWT",
deciles_to_plot = c(1, 5, 10),
title = "Usual Hours Worked Last Year over Annual Income,\n50-52 weeks worked last year, >= 40h per usual week",
caption = "Source: ASEC 1976-2024")
plot_v1_mean_by_v2_decile_over_v3(asec_worked_LY %>% filter(WKSWORK2 == 6) %>% filter(UHRSWORKLY >= 40) %>% mutate(h_wage_usual = INCWAGE / 51 / UHRSWORKLY),
"UHRSWORKLY", "h_wage_usual", "YEAR", "ASECWT",
title = "Usual Hours Worked Last Year by Computed Hourly Wage,\n50-52 weeks worked last year, >= 40 hours per usual week",
caption = "Source: ASEC. Note: computed hourly wage = annual income / 51 / usual hours worked last year")
plot_v1_mean_by_v2_decile_over_v3(asec_worked_LY %>% filter(WKSWORK2 == 6) %>% filter(UHRSWORKLY >= 40) %>% mutate(h_wage_usual = INCWAGE / 51 / UHRSWORKLY),
"UHRSWORKLY", "h_wage_usual", "YEAR", "ASECWT",
deciles_to_plot = c(1, 5, 10),
title = "Usual Hours Worked Last Year by Computed Hourly Wage,\n50-52 weeks worked last year, >= than 40 hours per usual week",
caption = "Source: ASEC. Note: computed hourly wage = annual income / 51 / usual hours worked last year")
plot_v1_mean_by_v2_decile_over_v3(asec_worked_LY %>% filter(WKSWORK2 == 6) %>% filter(UHRSWORKLY >= 35) %>% mutate(h_wage_usual = INCWAGE / 51 / UHRSWORKLY),
"UHRSWORKLY", "h_wage_usual", "YEAR", "ASECWT",
title = "Usual Hours Worked Last Year by Computed Hourly Wage,\n50-52 weeks worked last year, >= 35 hours per usual week",
caption = "Source: ASEC. Note: computed hourly wage = annual income / 51 / usual hours worked last year")
plot_v1_mean_by_v2_decile_over_v3(asec_worked_LY %>% filter(WKSWORK2 == 6) %>% filter(UHRSWORKLY >= 35) %>% mutate(h_wage_usual = INCWAGE / 51 / UHRSWORKLY),
"UHRSWORKLY", "h_wage_usual", "YEAR", "ASECWT",
deciles_to_plot = c(1, 5, 10),
title = "Usual Hours Worked Last Year by Computed Hourly Wage,\n50-52 weeks worked last year, >= 35 hours per usual week",
caption = "Source: ASEC. Note: computed hourly wage = annual income / 51 / usual hours worked last year")
plot_v1_mean_by_v2_decile_over_v3(asec_worked_LY %>% filter(WKSWORK2 == 6, UHRSWORKLY >= 40, SEX == 1) %>% mutate(h_wage_usual = INCWAGE / 51 / UHRSWORKLY),
"UHRSWORKLY", "h_wage_usual", "YEAR", "ASECWT",
title = "Usual Hours Worked Last Year by Computed Hourly Wage, Men\n50-52 weeks worked last year, >= 40 hours per usual week",
caption = "Source: ASEC. Note: computed hourly wage = annual income / 51 / usual hours worked last year")
plot_v1_mean_by_v2_decile_over_v3(asec_worked_LY %>% filter(WKSWORK2 == 6, UHRSWORKLY >= 40, SEX == 1) %>% mutate(h_wage_usual = INCWAGE / 51 / UHRSWORKLY),
"UHRSWORKLY", "h_wage_usual", "YEAR", "ASECWT",
deciles_to_plot = c(1, 5, 10),
title = "Usual Hours Worked Last Year by Computed Hourly Wage, Men\n50-52 weeks worked last year, >= than 40 hours per usual week",
caption = "Source: ASEC. Note: computed hourly wage = annual income / 51 / usual hours worked last year")
plot_v1_mean_by_v2_decile_over_v3(asec_worked_LY %>% filter(WKSWORK2 == 6, UHRSWORKLY >= 40, SEX == 2) %>% mutate(h_wage_usual = INCWAGE / 51 / UHRSWORKLY),
"UHRSWORKLY", "h_wage_usual", "YEAR", "ASECWT",
title = "Usual Hours Worked Last Year by Computed Hourly Wage, Women\n50-52 weeks worked last year, >= 40 hours per usual week",
caption = "Source: ASEC. Note: computed hourly wage = annual income / 51 / usual hours worked last year")
plot_v1_mean_by_v2_decile_over_v3(asec_worked_LY %>% filter(WKSWORK2 == 6, UHRSWORKLY >= 40, SEX == 2) %>% mutate(h_wage_usual = INCWAGE / 51 / UHRSWORKLY),
"UHRSWORKLY", "h_wage_usual", "YEAR", "ASECWT",
deciles_to_plot = c(1, 5, 10),
title = "Usual Hours Worked Last Year by Computed Hourly Wage, Women\n50-52 weeks worked last year, >= than 40 hours per usual week",
caption = "Source: ASEC. Note: computed hourly wage = annual income / 51 / usual hours worked last year")
Within-Between Variance Decomposition - To be completed
# Within - between variance decomposition
summary_decomp_df <- ipums_org %>%
filter(!is.na(UHRSWORK1)) %>%
group_by(YEAR) %>%
mutate(year_total = n()) %>% # Add total count for each year
group_by(YEAR, OCC2010) %>%
summarise(variance = var(UHRSWORK1),
mean = mean(UHRSWORK1),
n = n(),
share = n() / unique(year_total), .groups = "drop")
var_decomposition_df <- data.frame(YEAR = unique(summary_decomp_df$YEAR))
for (i in (1:nrow(var_decomposition_df))){
year <- var_decomposition_df[i,"YEAR"]
mean_all <- mean(ipums_org %>% filter(YEAR == year) %>% pull(UHRSWORK1))
summary_decomp_df_year <- summary_decomp_df %>% filter(YEAR == year)
var_within_occ_vec <- summary_decomp_df_year %>% pull(variance)
mean_within_occ_vec <- summary_decomp_df_year %>% pull(mean)
share_occ_vec <- summary_decomp_df_year %>% pull(share)
var_decomposition_df[i,"within_var"] <- sum(var_within_occ_vec * share_occ_vec)
var_decomposition_df[i,"between_var"] <- sum(share_occ_vec * (mean_within_occ_vec - mean_all) ^ 2)
var_decomposition_df[i,"total_var_computed_as_sum"] <- var_decomposition_df[i,"within_var"] + var_decomposition_df[i,"between_var"]
var_decomposition_df[i,"total_var_computed_as_normal"] <- var(ipums_org %>% filter(YEAR == year) %>% pull(UHRSWORK1))
}
within_var <- sum(occ_year_within_var_vector * occ_year_share_vector)
between_var <- sum(share_vector * (mean_vector - mean_all) ^ 2)
total_var_decomposed_calc <- within_var+between_var
var(ipums_org$UHRSWORK1)