1 1. Cross-sectional earnings inequality
2 Variance decompositions: firms, activities, occupations, and regions
3 3. Earnings growth by firm mobility
4 Earnings growth by move type
5 5. Regression evidence on mobility and growth
6 Event study around firm moves
7 Part-time dynamics
8 8. Five-year transition matrices
9 9. Ten-year rank-rank mobility
10 10. Gender earnings gap decomposition
11 11. Firm-level shocks and pass-through to workers
12 12. Event study around firm exit
13 13. Pay-component shares and volatility

The main paper already documents the evolution of earnings levels, inequality, volatility, mobility, and recession-entry scarring over 2002–2023. The below extensions ask more micro-level questions: whether inequality comes from sorting across firms/sectors/regions or dispersion within them; whether earnings losses reflect daily pay or days worked; whether job mobility insures workers or exposes them to losses; and whether gender gaps are mostly sorting gaps or within-firm gaps.

so the purpose is to look at how the crisis reshaped earnings inequality through changes in work intensity, firm sorting, and mobility. For example, the matched employer-employee structure can show whether workers lost because they earned less per day, worked fewer day, moved to worse firms (characterized by the firm’ average pay), decompose variance into within- and between-firm/sector/occupation/region components.

How earnings change: decompose annual earnings growth into daily pay and days worked.

Mobility and reallocation: study firm, sector, and region moves as sources of gains or losses.

Part-time and low-intensity employment: show whether reduced days and part-time transitions drive bottom-tail losses.

Firm shocks and displacement: quantify the importance of firm-level shocks and firm exit.

Gender gaps: decompose raw gaps into occupation, sector, region, firm, and firm-occupation components.

expected_files <- c(
  "ineq_by_year_gender.csv",
  "variance_decomp_firm_id_year.csv",
  "variance_decomp_firm_year.csv",
  "variance_decomp_kad2_id_year.csv",
  "variance_decomp_kad4_id_year.csv",
  "variance_decomp_kad8_id_year.csv",
  "variance_decomp_occ_id_year.csv",
  "variance_decomp_nuts3_id_year.csv",
  "growth_by_firm_move_year_gender.csv",
  "growth_by_move_type_year_gender.csv",
  "mobility_growth_regressions.rtf",
  "eventstudy_firm_move.rtf",
  "parttime_dynamics_year_gender.csv",
  "parttime_growth_regression.rtf",
  "transition_matrix_5yr_quintiles.csv",
  "rank_rank_10yr.csv",
  "gender_gap_decomposition.rtf",
  "firm_shock_pass_through.rtf",
  "eventstudy_firm_exit.rtf",
  "pay_component_shares_volatility.csv"
)

availability <- tibble(
  file = expected_files,
  exists = file.exists(file.path(out_dir, expected_files))
)

availability %>%
  mutate(exists = if_else(exists, "yes", "no")) %>%
  kable(caption = "Expected output files from the Stata do-file") %>%
  kable_styling(full_width = FALSE)

Expected output files from the Stata do-file
file	exists
ineq_by_year_gender.csv	yes
variance_decomp_firm_id_year.csv	yes
variance_decomp_firm_year.csv	yes
variance_decomp_kad2_id_year.csv	yes
variance_decomp_kad4_id_year.csv	yes
variance_decomp_kad8_id_year.csv	yes
variance_decomp_occ_id_year.csv	yes
variance_decomp_nuts3_id_year.csv	yes
growth_by_firm_move_year_gender.csv	yes
growth_by_move_type_year_gender.csv	yes
mobility_growth_regressions.rtf	yes
eventstudy_firm_move.rtf	yes
parttime_dynamics_year_gender.csv	yes
parttime_growth_regression.rtf	yes
transition_matrix_5yr_quintiles.csv	yes
rank_rank_10yr.csv	yes
gender_gap_decomposition.rtf	yes
firm_shock_pass_through.rtf	yes
eventstudy_firm_exit.rtf	yes
pay_component_shares_volatility.csv	yes

1 1. Cross-sectional earnings inequality

ineq <- read_csv_out("ineq_by_year_gender.csv") %>% clean_female()

check whether the broad inequality facts from the paper survive the restricted sample and the specific cleaning choices in the do-file. The key patterns are:

Whether p90_p10 rises sharply during 2009–2013.
Whether bottom inequality, p50_p10, drives the crisis-era rise more than top inequality, p90_p50.
Whether the patterns differ by gender.
Whether inequality declines during the post-2014 recovery.

if (!is.null(ineq)) {
  ineq_long <- ineq %>%
    select(year, gender, p90_p10, p90_p50, p50_p10, sd_256) %>%
    pivot_longer(cols = c(p90_p10, p90_p50, p50_p10, sd_256),
                 names_to = "measure", values_to = "value") %>%
    mutate(measure = recode(measure,
      p90_p10 = "P90-P10",
      p90_p50 = "P90-P50",
      p50_p10 = "P50-P10",
      sd_256 = "2.56 x SD"
    ))

  ggplot(ineq_long, aes(year, value, color = gender, linetype = measure)) +
    shade_periods() +
    geom_line(linewidth = 0.8) +
    facet_wrap(~ measure, scales = "free_y") +
    labs(
      title = "Cross-sectional log earnings dispersion",
      subtitle = "Check whether crisis-era inequality is mainly bottom-tail or top-tail driven",
      x = NULL,
      y = "Log earnings dispersion",
      color = NULL,
      linetype = NULL
    ) +
    theme_minimal(base_size = 12) +
    theme(legend.position = "bottom")
}

if (!is.null(ineq)) {
  ineq_summary <- ineq %>%
    mutate(period = case_when(
      between(year, 2002, 2008) ~ "2002-2008 pre-crisis",
      between(year, 2009, 2013) ~ "2009-2013 crisis",
      between(year, 2014, 2023) ~ "2014-2023 recovery",
      TRUE ~ NA_character_
    )) %>%
    filter(!is.na(period)) %>%
    group_by(gender, period) %>%
    summarise(
      mean_p90_p10 = mean(p90_p10, na.rm = TRUE),
      mean_p90_p50 = mean(p90_p50, na.rm = TRUE),
      mean_p50_p10 = mean(p50_p10, na.rm = TRUE),
      mean_N = mean(N, na.rm = TRUE),
      .groups = "drop"
    )

  ineq_summary %>%
    mutate(across(starts_with("mean_"), ~ round(.x, 3))) %>%
    kable(caption = "Average inequality measures by period and gender") %>%
    kable_styling(full_width = FALSE)
}

Average inequality measures by period and gender
gender	period	mean_p90_p10	mean_p90_p50	mean_p50_p10	mean_N
Men	2002-2008 pre-crisis	3.020	1.210	1.810	32110.29
Men	2009-2013 crisis	3.258	1.260	1.997	35180.80
Men	2014-2023 recovery	4.177	1.274	2.903	69932.70
Women	2002-2008 pre-crisis	2.843	1.113	1.730	23310.71
Women	2009-2013 crisis	2.961	1.132	1.829	29125.20
Women	2014-2023 recovery	3.400	1.045	2.354	54349.30
NA	2002-2008 pre-crisis	0.000	0.000	0.000	1.00
NA	2014-2023 recovery	2.752	0.810	1.942	6.70

2 Variance decompositions: firms, activities, occupations, and regions

decomp_files <- tibble(
  file = c(
    "variance_decomp_firm_year.csv",
    "variance_decomp_firm_id_year.csv",
    "variance_decomp_kad2_id_year.csv",
    "variance_decomp_kad4_id_year.csv",
    "variance_decomp_kad8_id_year.csv",
    "variance_decomp_occ_id_year.csv",
    "variance_decomp_nuts3_id_year.csv"
  ),
  dimension = c(
    "Firm", "Firm ID loop", "2-digit KAD", "4-digit KAD", "8-digit KAD", "Occupation", "NUTS-3 region"
  )
)

decomp <- decomp_files %>%
  mutate(data = map(file, read_csv_out)) %>%
  filter(map_lgl(data, ~ !is.null(.x))) %>%
  unnest(data) %>%
  select(dimension, everything())

# Drop duplicate firm decomposition if both are available and identical in purpose.
if (nrow(decomp) > 0 && any(decomp$dimension == "Firm") && any(decomp$dimension == "Firm ID loop")) {
  decomp <- decomp %>% filter(dimension != "Firm ID loop")
}

The decomposition writes total earnings variance as an employment-weighted sum of within-group and between-group components. The most important object is share_between or share_between_firm.

Between-firm share: how has it changed before during and after the crisis/recovery
Within-firm share
KAD level gradient: industry cells vs. more broad sectors.
Region share: looks at uneven regional exposure of the crisis.

if (nrow(decomp) > 0) {
  decomp_plot <- decomp %>%
    mutate(
      share_between_clean = coalesce(share_between, share_between_firm),
      share_within_clean  = coalesce(share_within, share_within_firm)
    ) %>%
    select(dimension, year, share_between_clean, share_within_clean) %>%
    pivot_longer(cols = c(share_between_clean, share_within_clean),
                 names_to = "component", values_to = "share") %>%
    mutate(component = recode(component,
      share_between_clean = "Between component",
      share_within_clean = "Within component"
    ))

  ggplot(decomp_plot, aes(year, share, color = dimension)) +
    shade_periods() +
    geom_line(linewidth = 0.85) +
    facet_wrap(~ component, scales = "free_y") +
    scale_y_continuous(labels = percent_format(accuracy = 1)) +
    labs(
      title = "Within- and between-group shares of earnings variance",
      subtitle = "Between shares show how much inequality is explained by sorting across firms, sectors, occupations, or regions",
      x = NULL,
      y = "Share of total variance",
      color = NULL
    ) +
    theme_minimal(base_size = 12) +
    theme(legend.position = "bottom")
}

if (nrow(decomp) > 0) {
  decomp_period <- decomp %>%
    mutate(
      share_between_clean = coalesce(share_between, share_between_firm),
      period = case_when(
        between(year, 2002, 2008) ~ "2002-2008",
        between(year, 2009, 2013) ~ "2009-2013",
        between(year, 2014, 2023) ~ "2014-2023",
        TRUE ~ NA_character_
      )
    ) %>%
    filter(!is.na(period)) %>%
    group_by(dimension, period) %>%
    summarise(mean_between_share = mean(share_between_clean, na.rm = TRUE), .groups = "drop") %>%
    pivot_wider(names_from = period, values_from = mean_between_share) %>%
    mutate(across(where(is.numeric), ~ percent(.x, accuracy = 0.1)))

  decomp_period %>%
    kable(caption = "Average between-group share of earnings variance by period") %>%
    kable_styling(full_width = FALSE)
}

Average between-group share of earnings variance by period
dimension	2002-2008	2009-2013	2014-2023
2-digit KAD	10.9%	11.0%	14.0%
4-digit KAD	14.5%	14.8%	17.5%
8-digit KAD	19.4%	20.8%	22.6%
Firm	57.5%	NA	NA
NUTS-3 region	2.8%	2.2%	2.6%
Occupation	18.0%	17.3%	27.4%

3 3. Earnings growth by firm mobility

growth_firm <- read_csv_out("growth_by_firm_move_year_gender.csv") %>% clean_female()

Comparing workers who changed firms with those who stayed at the same firm. Is mobility a ladder or a trap?

Mover premium before the crisis:
Mover penalty during the crisis: if moves become displacement-driven, movers may experience lower growth than stayers.
Days versus daily pay: compare mean_g, mean_g_daily, and mean_g_days. whether earnings changes come from pay rates or work intensity.
Volatility: p90_p10_g and sd_g show whether movers face more risk.

if (!is.null(growth_firm)) {
  gf <- growth_firm %>%
    mutate(move_status = if_else(firm_move == 1, "Firm movers", "Firm stayers"))

  p1 <- ggplot(gf, aes(year, mean_g, color = move_status, linetype = gender)) +
    shade_periods() +
    geom_hline(yintercept = 0, linewidth = 0.3) +
    geom_line(linewidth = 0.85) +
    labs(title = "Mean annual log earnings growth", x = NULL, y = "Mean growth", color = NULL, linetype = NULL) +
    theme_minimal(base_size = 12) + theme(legend.position = "bottom")

  p2 <- ggplot(gf, aes(year, p90_p10_g, color = move_status, linetype = gender)) +
    shade_periods() +
    geom_line(linewidth = 0.85) +
    labs(title = "Dispersion of annual earnings growth", x = NULL, y = "P90-P10 of growth", color = NULL, linetype = NULL) +
    theme_minimal(base_size = 12) + theme(legend.position = "bottom")

  print(p1)
  print(p2)
}

if (!is.null(growth_firm)) {
  gf_long <- growth_firm %>%
    mutate(move_status = if_else(firm_move == 1, "Firm movers", "Firm stayers")) %>%
    select(year, gender, move_status, mean_g, mean_g_daily, mean_g_days) %>%
    pivot_longer(cols = c(mean_g, mean_g_daily, mean_g_days),
                 names_to = "component", values_to = "growth") %>%
    mutate(component = recode(component,
      mean_g = "Annual earnings",
      mean_g_daily = "Daily earnings",
      mean_g_days = "Days worked"
    ))

  ggplot(gf_long, aes(year, growth, color = component)) +
    shade_periods() +
    geom_hline(yintercept = 0, linewidth = 0.3) +
    geom_line(linewidth = 0.85) +
    facet_grid(gender ~ move_status) +
    labs(
      title = "Decomposing earnings growth: daily pay versus days worked",
      subtitle = "Annual earnings growth should approximately equal daily earnings growth plus days-worked growth",
      x = NULL,
      y = "Mean log growth",
      color = NULL
    ) +
    theme_minimal(base_size = 12) +
    theme(legend.position = "bottom")
}

4 Earnings growth by move type

growth_type <- read_csv_out("growth_by_move_type_year_gender.csv") %>% clean_female()

separate firm moves from changes in broad activity (kad2_move), more detailed activity (kad4_move), and workplace region. helps distinguish ordinary employer changes from deeper reallocations. Whether firm moves within the same KAD sector are associated with gains, while firm moves across KAD sectors are associated with losses etc.

Whether women and men differ in returns to mobility?

if (!is.null(growth_type)) {
  gt <- growth_type %>%
    mutate(
      move_type = case_when(
        firm_move == 0 ~ "No firm move",
        firm_move == 1 & kad2_move == 0 & region_move == 0 ~ "New firm, same KAD2 and region",
        firm_move == 1 & kad2_move == 1 & region_move == 0 ~ "New firm, new KAD2, same region",
        firm_move == 1 & kad2_move == 0 & region_move == 1 ~ "New firm, same KAD2, new region",
        firm_move == 1 & kad2_move == 1 & region_move == 1 ~ "New firm, new KAD2 and region",
        TRUE ~ "Other/unclear"
      )
    ) %>%
    group_by(year, gender, move_type) %>%
    summarise(mean_g = weighted.mean(mean_g, w = N, na.rm = TRUE), N = sum(N, na.rm = TRUE), .groups = "drop")

  ggplot(gt, aes(year, mean_g, color = move_type)) +
    shade_periods() +
    geom_hline(yintercept = 0, linewidth = 0.3) +
    geom_line(linewidth = 0.8) +
    facet_wrap(~ gender) +
    labs(
      title = "Mean earnings growth by type of job move",
      subtitle = "Look for whether cross-sector or cross-region moves become loss-making during crisis years",
      x = NULL,
      y = "Mean log earnings growth",
      color = NULL
    ) +
    theme_minimal(base_size = 12) +
    theme(legend.position = "bottom")
}

5 5. Regression evidence on mobility and growth

mobility_reg <- read_esttab_rtf("mobility_growth_regressions.rtf")

growth_mobility_1: earnings growth on firm moves, occupation moves, KAD2 moves, regional moves, gender, age controls, and fixed effects.
growth_decomp_1: annual earnings growth on daily earnings growth and days-worked growth.

if (!is.null(mobility_reg)) {
  mobility_reg %>%
    kable(caption = "Raw parsed lines from mobility growth regressions (.rtf)") %>%
    kable_styling(full_width = TRUE, font_size = 11)
}

Raw parsed lines from mobility growth regressions (.rtf)
line
Times New Roman;
d0.0585***
d(0.00419)
d0.00925*
d(0.00469)
d-0.0617***
d(0.00621)
dChanged workplace NUTS-3 region
d-0.0222***
d(0.00632)
d-0.0454***
d-5.12e-10
d(0.00156)
d(3.87e-10)
d0.000650***
d8.51e-14
d(0.0000238)
d(6.10e-12)
d-0.0340***
d5.55e-09***
d(0.00235)
d(5.42e-10)
d1.000***
d(1.56e-09)
d1.000***
d(1.92e-10)
d0.796***
d3.49e-08***
d(0.0246)
d(5.90e-09)
dObservations
d0.034
d1.000
d* p < 0.05, p < 0.01, * p < 0.001

6 Event study around firm moves

event_move <- read_esttab_rtf("eventstudy_firm_move.rtf")

whether earnings fall before the move, jump at the move, or recover afterward.

if (!is.null(event_move)) {
  event_move %>%
    kable(caption = "Raw parsed lines from event study around firm moves (.rtf)") %>%
    kable_styling(full_width = TRUE, font_size = 11)
}

Raw parsed lines from event study around firm moves (.rtf)
line
Times New Roman;
d0.0519***
d(0.00900)
d0.195***
d(0.00571)
d0.0837***
d(0.00405)
d0.206***
d(0.00526)
d0.130***
d(0.00643)
d0.0878***
d(0.00743)
d0.0398***
d(0.00793)
d-0.000738***
d(0.0000850)
d7.854***
d(0.0740)
dObservations
d0.630
d* p < 0.05, p < 0.01, * p < 0.001

7 Part-time dynamics

pt <- read_csv_out("parttime_dynamics_year_gender.csv") %>% clean_female()

annual earnings fall either daily pay falls or because workers work fewer days? —> ASK THIS

if (!is.null(pt)) {
  pt_long <- pt %>%
    select(year, gender, share_mostly_pt, share_any_pt, mean_pt_share, enter_pt, exit_pt) %>%
    pivot_longer(cols = -c(year, gender), names_to = "measure", values_to = "value") %>%
    mutate(measure = recode(measure,
      share_mostly_pt = "Mostly part-time",
      share_any_pt = "Any part-time",
      mean_pt_share = "Mean part-time days share",
      enter_pt = "Enter mostly part-time",
      exit_pt = "Exit mostly part-time"
    ))

  ggplot(pt_long, aes(year, value, color = gender)) +
    shade_periods() +
    geom_line(linewidth = 0.85) +
    facet_wrap(~ measure, scales = "free_y") +
    scale_y_continuous(labels = percent_format(accuracy = 1)) +
    labs(
      title = "Part-time incidence and transitions",
      subtitle = "Check whether the crisis operated through reduced work intensity and part-time transitions",
      x = NULL,
      y = NULL,
      color = NULL
    ) +
    theme_minimal(base_size = 12) +
    theme(legend.position = "bottom")
}

if (!is.null(pt)) {
  pt_growth <- pt %>%
    select(year, gender, mean_g_earnings, mean_g_daily, mean_g_days) %>%
    pivot_longer(cols = starts_with("mean_g"), names_to = "component", values_to = "growth") %>%
    mutate(component = recode(component,
      mean_g_earnings = "Annual earnings",
      mean_g_daily = "Daily earnings",
      mean_g_days = "Days worked"
    ))

  ggplot(pt_growth, aes(year, growth, color = component)) +
    shade_periods() +
    geom_hline(yintercept = 0, linewidth = 0.3) +
    geom_line(linewidth = 0.85) +
    facet_wrap(~ gender) +
    labs(
      title = "Mean growth among observations used in part-time dynamics",
      subtitle = "Use this to separate pay-rate changes from days-worked changes",
      x = NULL,
      y = "Mean log growth",
      color = NULL
    ) +
    theme_minimal(base_size = 12) +
    theme(legend.position = "bottom")
}

pt_reg <- read_esttab_rtf("parttime_growth_regression.rtf")

if (!is.null(pt_reg)) {
  pt_reg %>%
    kable(caption = "Raw parsed lines from part-time growth regression (.rtf)") %>%
    kable_styling(full_width = TRUE, font_size = 11)
}

Raw parsed lines from part-time growth regression (.rtf)
line
Times New Roman;
d-0.113***
d(0.00560)
d0.175***
d(0.00556)
d-0.0358***
d(0.00234)
d-0.0442***
d(0.00156)
d0.000631***
d(0.0000238)
d0.782***
d(0.0245)
dObservations
d0.036
d* p < 0.05, p < 0.01, * p < 0.001

8 8. Five-year transition matrices

trans5 <- read_csv_out("transition_matrix_5yr_quintiles.csv") %>% clean_female()

if (!is.null(trans5)) {
  trans_period <- trans5 %>%
    mutate(period = case_when(
      between(year, 2002, 2008) ~ "2002-2008",
      between(year, 2009, 2013) ~ "2009-2013",
      between(year, 2014, 2018) ~ "2014-2018",
      TRUE ~ NA_character_
    )) %>%
    filter(!is.na(period)) %>%
    group_by(gender, period, q_earn, F5_q_earn) %>%
    summarise(trans_prob = weighted.mean(trans_prob, w = `_freq`, na.rm = TRUE), .groups = "drop")

  ggplot(trans_period, aes(x = factor(F5_q_earn), y = factor(q_earn), fill = trans_prob)) +
    geom_tile(color = "white") +
    geom_text(aes(label = percent(trans_prob, accuracy = 1)), size = 3) +
    facet_grid(gender ~ period) +
    scale_fill_continuous(labels = percent_format(accuracy = 1)) +
    labs(
      title = "Five-year earnings quintile transition matrices",
      subtitle = "Rows are origin quintiles; columns are destination quintiles five years later",
      x = "Destination quintile after five years",
      y = "Origin quintile",
      fill = "Probability"
    ) +
    theme_minimal(base_size = 12) +
    theme(legend.position = "bottom")
}

if (!is.null(trans5)) {
  mobility_summary <- trans5 %>%
    mutate(period = case_when(
      between(year, 2002, 2008) ~ "2002-2008 pre-crisis",
      between(year, 2009, 2013) ~ "2009-2013 crisis",
      between(year, 2014, 2018) ~ "2014-2018 recovery origin years",
      TRUE ~ NA_character_
    )) %>%
    filter(!is.na(period)) %>%
    group_by(gender, period) %>%
    summarise(
      bottom_stays_bottom = weighted.mean(trans_prob[q_earn == 1 & F5_q_earn == 1], `_freq`[q_earn == 1 & F5_q_earn == 1], na.rm = TRUE),
      top_stays_top = weighted.mean(trans_prob[q_earn == 5 & F5_q_earn == 5], `_freq`[q_earn == 5 & F5_q_earn == 5], na.rm = TRUE),
      bottom_to_top_half = sum(trans_prob[q_earn == 1 & F5_q_earn >= 3] * `_freq`[q_earn == 1 & F5_q_earn >= 3], na.rm = TRUE) / sum(`_freq`[q_earn == 1 & F5_q_earn >= 3], na.rm = TRUE),
      .groups = "drop"
    ) %>%
    mutate(across(where(is.numeric), ~ percent(.x, accuracy = 0.1)))

  mobility_summary %>%
    kable(caption = "Selected five-year mobility statistics") %>%
    kable_styling(full_width = FALSE)
}

Selected five-year mobility statistics
gender	period	bottom_stays_bottom	top_stays_top	bottom_to_top_half
Men	2002-2008 pre-crisis	100.0%	NA	NA
Men	2009-2013 crisis	100.0%	NA	NA
Men	2014-2018 recovery origin years	100.0%	NA	NA
Women	2002-2008 pre-crisis	100.0%	NA	NA
Women	2009-2013 crisis	100.0%	NA	NA
Women	2014-2018 recovery origin years	100.0%	NA	NA

9 9. Ten-year rank-rank mobility

rank10 <- read_csv_out("rank_rank_10yr.csv") %>% clean_female()

The rank-rank graph summarizes long-run mobility.

if (!is.null(rank10)) {
  rank10_period <- rank10 %>%
    mutate(period = case_when(
      between(year, 2002, 2008) ~ "2002-2008 origin years",
      between(year, 2009, 2013) ~ "2009-2013 origin years",
      TRUE ~ NA_character_
    )) %>%
    filter(!is.na(period)) %>%
    group_by(gender, period, rank_bin) %>%
    summarise(mean_future_rank = weighted.mean(mean_future_rank, w = N, na.rm = TRUE), N = sum(N, na.rm = TRUE), .groups = "drop")

  ggplot(rank10_period, aes(rank_bin, mean_future_rank, color = period)) +
    geom_abline(intercept = 0, slope = 1, linetype = "dashed", linewidth = 0.4) +
    geom_hline(yintercept = 50, linetype = "dotted", linewidth = 0.4) +
    geom_line(linewidth = 0.9) +
    facet_wrap(~ gender) +
    labs(
      title = "Ten-year rank-rank earnings mobility",
      subtitle = "Dashed line is perfect persistence; dotted line is complete mean reversion to median rank",
      x = "Initial earnings percentile bin",
      y = "Mean percentile rank ten years later",
      color = NULL
    ) +
    theme_minimal(base_size = 12) +
    theme(legend.position = "bottom")
}

10 10. Gender earnings gap decomposition

gender_gap <- read_esttab_rtf("gender_gap_decomposition.rtf")

if (!is.null(gender_gap)) {
  gender_gap %>%
    kable(caption = "Raw parsed lines from gender earnings gap decomposition (.rtf)") %>%
    kable_styling(full_width = TRUE, font_size = 11)
}

Raw parsed lines from gender earnings gap decomposition (.rtf)
line
Times New Roman;
d0.1792***
d-0.0501***
d-0.0473***
d-0.0344***
d0.0099***
d-0.0351***
d(0.0031)
d(0.0028)
d(0.0028)
d(0.0027)
d(0.0025)
d(0.0023)
d-0.0080***
d0.0774***
d0.0747***
d0.0731***
d0.0501***
d0.0504***
d(0.0019)
d(0.0016)
d(0.0016)
d(0.0016)
d(0.0015)
d(0.0014)
d0.0001***
d-0.0010***
d-0.0010***
d-0.0009***
d-0.0007***
d-0.0006***
d(0.0000)
d(0.0000)
d(0.0000)
d(0.0000)
d(0.0000)
d(0.0000)
d7.5404***
d6.1713***
d6.2231***
d6.2494***
d6.6358***
d6.6246***
d(0.0281)
d(0.0241)
d(0.0239)
d(0.0236)
d(0.0220)
d(0.0211)
dObservations
d0.067
d0.281
d0.298
d0.309
d0.484
d0.574
d* p < 0.05, p < 0.01, * p < 0.001

11 11. Firm-level shocks and pass-through to workers

firm_shock <- read_esttab_rtf("firm_shock_pass_through.rtf")

How much of worker earnings risk is firm-level risk?

if (!is.null(firm_shock)) {
  firm_shock %>%
    kable(caption = "Raw parsed lines from firm shock pass-through regression (.rtf)") %>%
    kable_styling(full_width = TRUE, font_size = 11)
}

Raw parsed lines from firm shock pass-through regression (.rtf)
line
Times New Roman;
d0.654***
d(0.00310)
d0.208***
d(0.00944)
d-0.0262***
d(0.00296)
d-0.0357***
d(0.00640)
d0.000524***
d(0.0000907)
d0.594***
d(0.106)
dObservations
d0.199
d* p < 0.05, p < 0.01, * p < 0.001

12 12. Event study around firm exit

firm_exit <- read_esttab_rtf("eventstudy_firm_exit.rtf")

estimates earnings around exposure to a firm’s last observed year.

if (!is.null(firm_exit)) {
  firm_exit %>%
    kable(caption = "Raw parsed lines from firm-exit event study (.rtf)") %>%
    kable_styling(full_width = TRUE, font_size = 11)
}

Raw parsed lines from firm-exit event study (.rtf)
line
Times New Roman;
d-0.156***
d(0.00866)
d-0.109***
d(0.00619)
d-0.225***
d(0.00426)
d-0.228***
d(0.00761)
d-0.106***
d(0.00890)
d-0.0432***
d(0.00993)
d-0.0163
d(0.0104)
d-0.00191***
d(0.000102)
d9.295***
d(0.0949)
dObservations
d0.665
d* p < 0.05, p < 0.01, * p < 0.001

13 13. Pay-component shares and volatility

paycomp <- read_csv_out("pay_component_shares_volatility.csv") %>% clean_female()

whether earnings dynamics are driven by base pay or variable pay.

if (!is.null(paycomp)) {
  shares <- paycomp %>%
    select(year, gender, share_base, share_extra, share_bonus_ot) %>%
    pivot_longer(cols = starts_with("share"), names_to = "component", values_to = "share") %>%
    mutate(component = recode(component,
      share_base = "Base pay",
      share_extra = "All extra pay",
      share_bonus_ot = "Bonuses + overtime"
    ))

  ggplot(shares, aes(year, share, color = component)) +
    shade_periods() +
    geom_line(linewidth = 0.85) +
    facet_wrap(~ gender) +
    scale_y_continuous(labels = percent_format(accuracy = 1)) +
    labs(
      title = "Pay-component shares",
      subtitle = "Check whether bonuses and overtime collapse during downturns",
      x = NULL,
      y = "Share of annual earnings",
      color = NULL
    ) +
    theme_minimal(base_size = 12) +
    theme(legend.position = "bottom")
}

if (!is.null(paycomp)) {
  vol <- paycomp %>%
    select(year, gender, sd_g_earnings, sd_g_base, sd_g_extra) %>%
    pivot_longer(cols = starts_with("sd_g"), names_to = "component", values_to = "sd_growth") %>%
    mutate(component = recode(component,
      sd_g_earnings = "Annual earnings",
      sd_g_base = "Base pay",
      sd_g_extra = "Extra pay"
    ))

  ggplot(vol, aes(year, sd_growth, color = component)) +
    shade_periods() +
    geom_line(linewidth = 0.85) +
    facet_wrap(~ gender) +
    labs(
      title = "Volatility by pay component",
      subtitle = "Extra pay should be more volatile if bonuses and overtime absorb shocks",
      x = NULL,
      y = "Standard deviation of log growth",
      color = NULL
    ) +
    theme_minimal(base_size = 12) +
    theme(legend.position = "bottom")
}

Earnings Dynamics Extensions: Output Analysis

08 May 2026