# =============================================================================
# SECTION 0: LOAD REQUIRED PACKAGES
# =============================================================================
required_pkgs <- c(
  "tidyverse", "ggplot2", "dplyr", "tidyr", "knitr", "kableExtra",
  "broom", "scales", "readr", "gridExtra", "gtsummary", "gt",
  "ggrepel", "patchwork", "ggfortify", "MASS"
)
new_pkgs <- required_pkgs[!(required_pkgs %in% rownames(installed.packages()))]
if (length(new_pkgs)) install.packages(new_pkgs, repos = "https://cran.rstudio.com/")

library(tidyverse)
library(ggplot2)
library(dplyr)
library(tidyr)
library(knitr)
library(kableExtra)
library(broom)
library(scales)
library(readr)
library(gridExtra)
library(gtsummary)
library(gt)
library(ggrepel)
library(patchwork)
library(ggfortify)
library(MASS)

select <- dplyr::select
filter <- dplyr::filter
recode <- dplyr::recode

cat("R version:", R.version$major, ".", R.version$minor, "\n")

## R version: 4 . 5.3

cat("All packages loaded successfully.\n")

## All packages loaded successfully.

# =============================================================================
# SECTION 1: LOAD DATA AND BUILD ANALYTIC SUBFILES
# =============================================================================
# Data source: Hoyert, D.L. (2025). Maternal Mortality Rates in the United
#   States, 2023. NCHS Health E-Stats. DOI: 10.15620/cdc/174577
#
# UPDATE the file path below to match your local directory:
csv_path <- "/Users/emmanuelsmac/Desktop/maternal_mortality_2023_NCHS.csv"

df_raw <- suppressWarnings(
  read_csv(
    csv_path,
    col_types = cols(
      source                    = col_character(),
      dataset                   = col_character(),
      year                      = col_integer(),
      category                  = col_character(),
      group                     = col_character(),
      live_births               = col_double(),
      maternal_deaths           = col_double(),
      rate_per_100k_live_births = col_double(),
      notes                     = col_character()
    ),
    show_col_types = FALSE
  )
)

cat("=== RAW CSV STRUCTURE ===\n")

## === RAW CSV STRUCTURE ===

cat("Rows:", nrow(df_raw), "| Columns:", ncol(df_raw), "\n\n")

## Rows: 17 | Columns: 9

# --- Subfile 1: Overall trend (2018–2023, n = 6) ---
overall <- df_raw %>%
  dplyr::filter(category == "Overall") %>%
  dplyr::select(year, rate_per_100k_live_births, maternal_deaths, live_births) %>%
  rename(rate = rate_per_100k_live_births) %>%
  mutate(
    year            = as.integer(year),
    rate            = as.numeric(rate),
    maternal_deaths = as.integer(maternal_deaths),
    live_births     = as.integer(live_births)
  ) %>%
  arrange(year)
cat("Subfile 1 (overall trend): n =", nrow(overall), "rows\n")

## Subfile 1 (overall trend): n = 6 rows

# --- Subfile 2a: Race/ethnicity 2022–2023 panel (n = 8) ---
# Rates hardcoded from Hoyert (2025) Table 1 — CSV race rows have blank
# live_births / maternal_deaths cells (documented in Check-in 1, Section 1.3)
race_verified <- data.frame(
  race      = factor(
    c("Black non-Hispanic","White non-Hispanic","Hispanic","Asian non-Hispanic"),
    levels = c("Black non-Hispanic","White non-Hispanic","Hispanic","Asian non-Hispanic")
  ),
  rate_2022 = c(49.5, 19.0, 16.9, 13.2),
  rate_2023 = c(50.3, 14.5, 12.4, 10.7)
)

race_long <- race_verified %>%
  pivot_longer(cols = c(rate_2022, rate_2023),
               names_to  = "year_label",
               values_to = "rate") %>%
  mutate(
    year      = as.integer(ifelse(year_label == "rate_2022", 2022, 2023)),
    year_c    = year - 2022,
    race_char = as.character(race)
  )
cat("Subfile 2a (race 2-year panel): n =", nrow(race_long), "rows\n")

## Subfile 2a (race 2-year panel): n = 8 rows

# --- Subfile 2b: Extended race panel 2018–2023 (n = 24) ---
# Rates drawn from NCHS Health E-Stats series (Hoyert 2023, 2024, 2025;
# Thoma & Declercq 2022) — verified against published tables
race_extended <- data.frame(
  race = rep(
    factor(c("Black non-Hispanic","White non-Hispanic","Hispanic","Asian non-Hispanic"),
           levels = c("Black non-Hispanic","White non-Hispanic","Hispanic","Asian non-Hispanic")),
    each = 6
  ),
  year = rep(2018:2023, times = 4),
  rate = c(
    37.1, 44.0, 55.3, 69.9, 49.5, 50.3,   # Black non-Hispanic
    14.7, 17.9, 19.1, 26.6, 19.0, 14.5,   # White non-Hispanic
    11.8, 12.6, 18.2, 28.3, 16.9, 12.4,   # Hispanic
    13.0, 13.8, 14.9, 24.4, 13.2, 10.7    # Asian non-Hispanic
  )
) %>%
  mutate(
    year_c      = year - 2020,
    is_pandemic = as.integer(year == 2021)
  )
cat("Subfile 2b (race extended 2018-2023): n =", nrow(race_extended), "rows\n")

## Subfile 2b (race extended 2018-2023): n = 24 rows

# --- Subfile 3: Age group (2023 only, n = 3) ---
age_verified <- data.frame(
  age_group   = factor(
    c("Under 25","25-39","40 and older"),
    levels = c("Under 25","25-39","40 and older")
  ),
  rate_2023   = c(12.5, 18.1, 59.8),
  age_numeric = c(20, 32, 45)
)
cat("Subfile 3 (age group, 2023): n =", nrow(age_verified), "rows\n")

## Subfile 3 (age group, 2023): n = 3 rows

# Key constants
national_avg_2023 <- 18.6
black_rate_2023   <- 50.3

# Shared color palette (defined once, used throughout)
race_colors <- c(
  "Black non-Hispanic"  = "#C0392B",
  "White non-Hispanic"  = "#2980B9",
  "Hispanic"            = "#27AE60",
  "Asian non-Hispanic"  = "#8E44AD"
)

base_theme <- theme_minimal(base_size = 13) +
  theme(
    plot.title       = element_text(face = "bold", size = 14, hjust = 0),
    plot.subtitle    = element_text(size = 10.5, color = "gray40", hjust = 0),
    plot.caption     = element_text(size = 9, color = "gray50", hjust = 1),
    axis.title       = element_text(size = 11),
    panel.grid.minor = element_blank(),
    legend.title     = element_text(face = "bold")
  )

cat("\nAll subfiles built successfully.\n")

## 
## All subfiles built successfully.

1 Abstract

Background: Maternal mortality in the United States remains disproportionately concentrated among Black non-Hispanic women, representing a persistent structural health equity failure. Despite an overall national decline in maternal mortality from a pandemic peak of 32.9 to 18.6 deaths per 100,000 live births between 2021 and 2023, aggregate trend data obscure deepening racial disparities.

Methods: Quantitative secondary data analysis of NCHS National Vital Statistics System aggregate statistics (Hoyert, 2025). Analytic sample: overall national trends (n = 6 year-rows, 2018–2023), race/ethnicity-stratified rates (n = 24 group-year cells for extended panel; n = 8 for 2022–2023 primary panel), and age-group rates (n = 3, 2023). Ordinary least squares linear regression models assessed secular trend (Models A1/A2) and racial disparities (Models B1/B2/B2-Ext), with a Poisson sensitivity analysis, race-by-year interaction test, and a comprehensive effect modification analysis examining whether the COVID-19 pandemic (2021) differentially modified the year–mortality association across racial/ethnic groups and across the full 2018–2023 trajectory.

Results: In the adjusted model (Model B2, n = 8), White non-Hispanic women had a maternal mortality rate 33.15 deaths per 100,000 lower than Black non-Hispanic women (95% CI: −45.00 to −21.30, p < 0.001); Hispanic women 35.50 lower; and Asian non-Hispanic women 38.45 lower. All non-Black groups’ rates declined from 2022 to 2023 while the Black non-Hispanic rate increased from 49.5 to 50.3. The Poisson sensitivity analysis yielded IRR = 0.951 per year (95% CI: 0.948–0.953, p < 0.001), corroborating the OLS declining trend. Effect modification analysis revealed that the COVID-19 pandemic year (2021) significantly modified the year–mortality association (pandemic indicator β = +12.90, p < 0.001), and that race modified the pandemic effect differently across groups — with Black non-Hispanic women experiencing a disproportionately amplified and slower-recovering pandemic impact, evidenced by a post-pandemic rebound trajectory distinct from all other groups.

Conclusion: Black non-Hispanic women face a maternal mortality rate 3.47 times the national average, a disparity that persisted and directionally widened in 2023. The COVID-19 pandemic was a significant effect modifier of the secular trend, and its differential racial impact has persisted beyond the acute pandemic period. Targeted structural interventions are required.

2 Introduction

Maternal mortality is a sentinel indicator of health system performance and equity. In the United States, the national maternal mortality rate followed a non-monotonic trajectory between 2018 and 2023: rising from 17.4 deaths per 100,000 live births in 2018 to a pandemic peak of 32.9 in 2021 before declining to 22.3 in 2022 and 18.6 in 2023. This overall improvement, while encouraging, conceals deeply unequal experiences across racial and ethnic groups that have persisted for decades (Thoma & Declercq, 2022).

Black non-Hispanic women have consistently faced maternal mortality rates two to three times higher than their White non-Hispanic counterparts, a disparity documented across multiple surveillance systems (Petersen et al., 2019; Crear-Perry et al., 2021). Structural racism — operating through chronic physiological stress from discrimination, implicit bias in clinical settings, differential access to high-quality prenatal and obstetric care, and residential segregation — is increasingly identified as a root cause (Hardeman et al., 2020; Crear-Perry et al., 2021).

This study examines two primary research questions: (1) To what extent do racial and ethnic disparities in maternal mortality persist as of 2023, and how large are these disparities after adjusting for the secular year trend? (2) Did racial disparities narrow, remain stable, or widen between 2022 and 2023?

H₁: Black non-Hispanic women will have substantially higher maternal mortality rates compared to all other racial/ethnic groups in 2023, with a rate ratio exceeding 2.0 for all pairwise comparisons.

H₂: Racial disparities will not have meaningfully narrowed between 2022 and 2023; the rate for Black non-Hispanic women will remain disproportionately elevated relative to all other groups, with possible widening of the absolute gap.

3 Methods

3.1 Data Source and Study Design

Data were drawn from the NCHS National Vital Statistics System (NVSS), as reported in the NCHS Health E-Stats publication “Maternal Mortality Rates in the United States, 2023” (Hoyert, 2025). A maternal death is defined according to WHO as the death of a woman while pregnant or within 42 days of termination of pregnancy from any cause related to or aggravated by the pregnancy, identified using ICD-10 codes A34 and O00–O99 (WHO, 2019). This study employs a quantitative secondary data analysis design using published aggregate vital statistics. The study period covers 2018–2023.

3.2 Study Population

Three analytic subfiles: (1) overall annual mortality rates and death counts for 2018–2023 (n = 6 year-rows); (2) race/ethnicity-stratified rates for Black non-Hispanic, White non-Hispanic, Hispanic, and Asian non-Hispanic women across 2018–2023 (n = 24 group-year cells for the extended panel; n = 8 for the 2022–2023 panel used in primary analyses); and (3) age-group-stratified rates for 2023 only (n = 3 rows). Groups not meeting NCHS statistical reliability thresholds (American Indian/Alaska Native; Native Hawaiian/Pacific Islander) are suppressed and absent from this analysis.

3.3 Variables

Outcome variable: Maternal mortality rate per 100,000 live births — continuous, unbounded below zero (range: 10.7–69.9 across all cells).

Primary exposure: Race/ethnicity (four levels). Black non-Hispanic women are the reference category in all regression models — a disparity-centered parameterization ensuring coefficients read as the mortality rate reduction associated with non-Black race (directly answering H₁).

Covariates: Calendar year (continuous; centered at 2022 for two-year panel; centered at 2020 for extended panel). A binary pandemic indicator (1 = 2021, 0 = otherwise) is included in extended panel models as a sensitivity covariate.

Missing data: Zero missing values in any analytic subfile; all cells are directly transcribed from published NCHS tables.

3.4 Statistical Analysis

All analyses conducted in R (version 4.5.x). Significance threshold: α = 0.05.

Models A1/A2 — secular trend (overall rate ~ year, Subfile 1, n = 6). Model A2 excludes 2021 (COVID-19 outlier, Cook’s D > 1 in A1).

Models B1/B2/B3 — racial disparity (Subfiles 2a and 2b). B1 = unadjusted (rate ~ race); B2 = year-adjusted (rate ~ race + year_c); B3 = race × year interaction (tests H₂). An extended panel version (B2-Ext, n = 24) is also presented.

Poisson sensitivity analysis (professor-recommended): Poisson regression with log(live_births) offset on overall death counts (maternal_deaths ~ year + offset[log(live_births)]) as a count-based confirmation of the trend finding.

Effect modification analysis (Section 5): Effect modification examines whether the relationship between year (exposure/time) and maternal mortality rate (outcome) differs across levels of a third variable — in this case, (a) the COVID-19 pandemic status (pandemic vs. non-pandemic years) and (b) race/ethnicity. Three complementary approaches are used:

Pandemic as effect modifier of the year–mortality association (Model EM-1): A segmented/piecewise linear model that allows the year slope to differ before, during, and after the pandemic, formally testing whether 2021 modifies the temporal trend rather than simply being an additive outlier.
Race as effect modifier of the pandemic impact (Model EM-2): A three-way model on the extended panel (rate ~ race × is_pandemic + year_c), testing whether the magnitude of the pandemic spike and recovery differs by racial/ethnic group — i.e., whether the pandemic modified the race–mortality association.
Segmented pre/post-pandemic trend analysis by race (Model EM-3): Separate pre-pandemic (2018–2019) and post-pandemic (2022–2023) trend slopes estimated by race using the extended panel, quantifying divergence in recovery trajectories as a measure of differential effect modification.

Visualization of effect modification includes: annotated interrupted time series plots, pandemic effect size comparison by race, segmented regression curves by race, and a slope change heat map.

Model C — age gradient (descriptive only, n = 3).

Confounding assessed by comparing race coefficients between B1 and B2 using the 10% change criterion.

4 Section 1: Data Cleaning and Analytic Sample (Check-in 1)

4.1 Constructing Analytic Subfiles

# Summary confirmation of all three subfiles
cat("=== ANALYTIC SUBFILE SUMMARY ===\n")

## === ANALYTIC SUBFILE SUMMARY ===

cat("Subfile 1 (Overall trend, 2018-2023): n =", nrow(overall), "\n")

## Subfile 1 (Overall trend, 2018-2023): n = 6

cat("Subfile 2a (Race 2022-2023 long): n =", nrow(race_long), "\n")

## Subfile 2a (Race 2022-2023 long): n = 8

cat("Subfile 2b (Race extended 2018-2023): n =", nrow(race_extended), "\n")

## Subfile 2b (Race extended 2018-2023): n = 24

cat("Subfile 3 (Age group 2023): n =", nrow(age_verified), "\n")

## Subfile 3 (Age group 2023): n = 3

cat("\n--- Subfile 1: Overall ---\n")

## 
## --- Subfile 1: Overall ---

print(overall)

## # A tibble: 6 × 4
##    year  rate maternal_deaths live_births
##   <int> <dbl>           <int>       <int>
## 1  2018  17.4             658     3791712
## 2  2019  20.1             754     3745540
## 3  2020  23.8             861     3613647
## 4  2021  32.9            1205     3664292
## 5  2022  22.3             817     3667758
## 6  2023  18.6             669     3594612

cat("\n--- Subfile 2a: Race (2022-2023) ---\n")

## 
## --- Subfile 2a: Race (2022-2023) ---

print(race_verified)

##                 race rate_2022 rate_2023
## 1 Black non-Hispanic      49.5      50.3
## 2 White non-Hispanic      19.0      14.5
## 3           Hispanic      16.9      12.4
## 4 Asian non-Hispanic      13.2      10.7

cat("\n--- Subfile 3: Age group (2023) ---\n")

## 
## --- Subfile 3: Age group (2023) ---

print(age_verified)

##      age_group rate_2023 age_numeric
## 1     Under 25      12.5          20
## 2        25-39      18.1          32
## 3 40 and older      59.8          45

4.2 Data Issue: Race Rate NA — Identified and Resolved

The CSV file contains blank live_births and maternal_deaths cells for race/ethnicity rows, causing R to coerce rate_per_100k_live_births to NA for those rows during parsing. All race-specific and age-specific rates were hardcoded directly from the published NCHS table (Hoyert, 2025, Table 1) and verified against the source. This is documented as a structural artifact of the CSV format, not true missing data.

4.3 Missing Data Assessment

cat("--- Subfile 1: Overall ---\n")

## --- Subfile 1: Overall ---

print(colSums(is.na(overall)))

##            year            rate maternal_deaths     live_births 
##               0               0               0               0

cat("\n--- Subfile 2a: Race (before NA fix) ---\n")

## 
## --- Subfile 2a: Race (before NA fix) ---

cat("rate column: all NA (CSV parsing artifact — resolved by hardcoding from NCHS Table 1)\n")

## rate column: all NA (CSV parsing artifact — resolved by hardcoding from NCHS Table 1)

cat("\n--- Subfile 3: Age ---\n")

## 
## --- Subfile 3: Age ---

print(colSums(is.na(age_verified)))

##   age_group   rate_2023 age_numeric 
##           0           0           0

cat("\n=== SUMMARY ===\n")

## 
## === SUMMARY ===

cat("True missing data for all published key variables: 0%\n")

## True missing data for all published key variables: 0%

cat("Structural gaps (not random missing):\n")

## Structural gaps (not random missing):

cat("  - Race-stratified data: full panel 2018-2023 (24 cells)\n")

##   - Race-stratified data: full panel 2018-2023 (24 cells)

cat("  - Age data: available 2023 only\n")

##   - Age data: available 2023 only

cat("  - AI/AN and NH/PI groups: suppressed by NCHS (<20 deaths threshold)\n")

##   - AI/AN and NH/PI groups: suppressed by NCHS (<20 deaths threshold)

4.4 Descriptive Statistics — Table 1

# Overall descriptive statistics (Check-in 1 style)
overall_desc <- overall %>%
  summarise(
    n      = n(),
    Mean   = round(mean(rate), 2),
    SD     = round(sd(rate),   2),
    Median = round(median(rate), 2),
    Min    = min(rate),
    Max    = max(rate),
    Range  = max(rate) - min(rate)
  )

cat("Overall maternal mortality rate distribution (2018-2023, n = 6):\n")

## Overall maternal mortality rate distribution (2018-2023, n = 6):

print(overall_desc)

## # A tibble: 1 × 7
##       n  Mean    SD Median   Min   Max Range
##   <int> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1     6  22.5   5.6   21.2  17.4  32.9  15.5

cat("\nNote: Mean (", overall_desc$Mean, ") > Median (",
    overall_desc$Median, ") => right-skewed distribution.\n")

## 
## Note: Mean ( 22.52 ) > Median ( 21.2 ) => right-skewed distribution.

cat("The 2021 pandemic peak (32.9) is the primary driver of skewness.\n")

## The 2021 pandemic peak (32.9) is the primary driver of skewness.

# Race/ethnicity descriptive table
race_summ <- race_verified %>%
  mutate(
    mean_rate = round((rate_2022 + rate_2023)/2, 2),
    yoy_chg   = round(rate_2023 - rate_2022, 1),
    rr_vs_blk = round(black_rate_2023 / rate_2023, 2)
  )

t1 <- data.frame(
  Characteristic = c(
    "Maternal Mortality Rate (per 100,000 live births)",
    "  2023 rate",
    "  2022 rate",
    "  Year-over-year change",
    "  Rate ratio: Black / group (2023)",
    "  Excess deaths vs. National Avg (per 100k, 2023)"
  ),
  `Black`    = c("","50.3","49.5","+0.8","1.00 (Ref)","+31.7"),
  `White`    = c("","14.5","19.0","-4.5","3.47x","-4.1"),
  `Hispanic` = c("","12.4","16.9","-4.5","4.06x","-6.2"),
  `Asian`    = c("","10.7","13.2","-2.5","4.70x","-7.9"),
  `National` = c("","18.6","22.3","-3.7","—","Reference"),
  check.names = FALSE
)

kable(t1,
      caption = "Table 1. Descriptive Summary: Maternal Mortality Rate by Race/Ethnicity and National Average",
      col.names = c("Characteristic","Black non-Hispanic","White non-Hispanic",
                    "Hispanic","Asian non-Hispanic","National Average"),
      align = c("l","c","c","c","c","c")) %>%
  kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
                full_width = TRUE, font_size = 10) %>%
  row_spec(0, bold = TRUE, background = "#1A3C5A", color = "white") %>%
  row_spec(c(1), bold = TRUE, background = "#EBF3FB") %>%
  row_spec(2, bold = TRUE) %>%
  footnote(general = paste0(
    "Rate ratio = Black non-Hispanic 2023 rate ÷ each group's 2023 rate. ",
    "Ref = reference group. National Average data from Hoyert (2025), NCHS Health E-Stats."
  ))

Table 1. Descriptive Summary: Maternal Mortality Rate by Race/Ethnicity and National Average
Characteristic	Black non-Hispanic	White non-Hispanic	Hispanic	Asian non-Hispanic	National Average
Maternal Mortality Rate (per 100,000 live births)
2023 rate	50.3	14.5	12.4	10.7	18.6
2022 rate	49.5	19.0	16.9	13.2	22.3
Year-over-year change	+0.8	-4.5	-4.5	-2.5	-3.7
Rate ratio: Black / group (2023)	1.00 (Ref)	3.47x	4.06x	4.70x	—
Excess deaths vs. National Avg (per 100k, 2023)	+31.7	-4.1	-6.2	-7.9	Reference
Note:
Rate ratio = Black non-Hispanic 2023 rate ÷ each group’s 2023 rate. Ref = reference group. National Average data from Hoyert (2025), NCHS Health E-Stats.

kable(
  data.frame(
    Age       = c("Under 25","25-39","40 and older"),
    Rate_2023 = c(12.5, 18.1, 59.8),
    Diff_Natl = c("-6.1","-0.5","+41.2"),
    Ratio_U25 = c("1.00 (Reference)","1.45","4.78")
  ),
  caption = "Table 1b. Maternal Mortality Rate by Age Group (Key Covariate), 2023",
  col.names = c("Age Group","Rate (per 100k)","Diff from National Avg (18.6)","Ratio vs. Under 25"),
  align = c("l","c","c","c")
) %>%
  kable_styling(bootstrap_options = c("striped","hover","bordered"),
                full_width = FALSE, font_size = 11) %>%
  row_spec(0, bold = TRUE, background = "#1A3C5A", color = "white") %>%
  row_spec(3, bold = TRUE, background = "#FADBD8") %>%
  footnote(general = "2023 only. n = 3. Source: Hoyert (2025), NCHS Health E-Stats.")

Table 1b. Maternal Mortality Rate by Age Group (Key Covariate), 2023
Age Group	Rate (per 100k)	Diff from National Avg (18.6)	Ratio vs. Under 25
Under 25	12.5	-6.1	1.00 (Reference)
25-39	18.1	-0.5	1.45
40 and older	59.8	+41.2	4.78
Note:
2023 only. n = 3. Source: Hoyert (2025), NCHS Health E-Stats.

5 Section 2: Exploratory Data Analysis (Check-in 1)

5.1 Distribution of the Outcome Variable

fill_colors <- c("2021" = "#C0392B", "2023" = "#27AE60",
                 "2018" = "#2980B9", "2019" = "#2980B9",
                 "2020" = "#2980B9", "2022" = "#2980B9")

p1a <- ggplot(overall, aes(x = rate)) +
  geom_histogram(bins = 5, fill = "#2980B9", color = "white", alpha = 0.85) +
  geom_vline(xintercept = mean(overall$rate),   color = "#E67E22",
             linetype = "dashed", linewidth = 1.4) +
  geom_vline(xintercept = median(overall$rate), color = "#27AE60",
             linetype = "dotted", linewidth = 1.4) +
  annotate("text", x = mean(overall$rate) + 0.6, y = 1.45,
           label = paste0("Mean = ", round(mean(overall$rate),2)),
           color = "#E67E22", size = 3.3, hjust = 0) +
  annotate("text", x = median(overall$rate) - 0.5, y = 1.15,
           label = paste0("Median = ", round(median(overall$rate),1)),
           color = "#27AE60", size = 3.3, hjust = 1) +
  base_theme +
  labs(title = "(A) Histogram of Annual Rates",
       x = "Rate per 100,000 Live Births", y = "Frequency (n years)")

p1b <- ggplot(overall, aes(x = factor(year), y = rate, fill = factor(year))) +
  geom_col(width = 0.6, show.legend = FALSE) +
  geom_text(aes(label = rate), vjust = -0.5, size = 3.8, fontface = "bold") +
  geom_hline(yintercept = mean(overall$rate),   color = "#E67E22",
             linetype = "dashed", linewidth = 1.2) +
  geom_hline(yintercept = median(overall$rate), color = "#27AE60",
             linetype = "dotted", linewidth = 1.2) +
  scale_fill_manual(values = fill_colors) +
  scale_y_continuous(limits = c(0, 39)) +
  base_theme +
  labs(title = "(B) Rate by Year (Red = 2021 peak; Green = 2023)",
       x = "Year", y = "Rate per 100,000 Live Births")

grid.arrange(p1a, p1b, ncol = 2,
  top = grid::textGrob(
    "Figure 1. Distribution of Outcome Variable: Overall Maternal Mortality Rate (2018–2023)",
    gp = grid::gpar(fontsize = 13, fontface = "bold")
  ))

Figure 1. Distribution of the outcome variable: overall U.S. maternal mortality rate, 2018-2023 (n=6). Panel A: histogram. Panel B: bar chart by year. Orange dashed = mean (22.52); green dotted = median (21.2). Source: Hoyert (2025).

Interpretation: The overall maternal mortality rate is right-skewed (mean = 22.52 > median = 21.2), driven by the 2021 COVID-19 pandemic peak of 32.9 per 100,000. With n = 6 data points, formal normality testing is not meaningful. The non-monotonic trajectory means a linear regression of rate on year fits poorly without outlier handling (confirmed by Model A1: R² = 0.043, p = 0.694).

5.2 Bivariate Association: Race/Ethnicity × Maternal Mortality

race_2023_df <- data.frame(
  race = factor(
    c("Black non-Hispanic","White non-Hispanic","Hispanic","Asian non-Hispanic"),
    levels = c("Asian non-Hispanic","Hispanic","White non-Hispanic","Black non-Hispanic")
  ),
  rate = c(50.3, 14.5, 12.4, 10.7)
)

p2 <- ggplot(race_2023_df, aes(x = race, y = rate, fill = race)) +
  geom_col(width = 0.65, show.legend = FALSE) +
  geom_hline(yintercept = national_avg_2023, linetype = "dashed",
             color = "black", linewidth = 1.0) +
  annotate("text", x = 4.45, y = national_avg_2023 + 2.2,
           label = paste0("National avg: ", national_avg_2023),
           size = 3.8, hjust = 1, fontface = "italic") +
  geom_text(aes(label = paste0(rate, " per 100k")),
            vjust = 1.4, color = "white", size = 3.8, fontface = "bold") +
  scale_fill_manual(values = race_colors) +
  scale_y_continuous(limits = c(0, 65), expand = expansion(mult = c(0, 0.05))) +
  coord_flip() +
  base_theme +
  labs(
    title    = "Figure 2. Maternal Mortality Rate by Race/Ethnicity (2023)",
    subtitle = "Primary Exposure vs. Outcome | Dashed = national average (18.6 per 100,000)",
    x        = NULL,
    y        = "Deaths per 100,000 Live Births",
    caption  = "Source: Hoyert (2025), NCHS Health E-Stats."
  )
print(p2)

Figure 2. Maternal mortality rate by race/ethnicity, 2023. Bivariate association between primary exposure and outcome. Dashed = national average (18.6). Source: Hoyert (2025).

race_long_plot <- data.frame(
  race = rep(c("Black non-Hispanic","White non-Hispanic","Hispanic","Asian non-Hispanic"), each = 2),
  year = rep(c(2022, 2023), times = 4),
  rate = c(49.5, 50.3, 19.0, 14.5, 16.9, 12.4, 13.2, 10.7)
) %>%
  mutate(
    race = factor(race, levels = c("Black non-Hispanic","White non-Hispanic",
                                   "Hispanic","Asian non-Hispanic")),
    year = factor(year)
  )

change_df <- data.frame(
  race      = c("Black non-Hispanic","White non-Hispanic","Hispanic","Asian non-Hispanic"),
  rate_2022 = c(49.5, 19.0, 16.9, 13.2),
  rate_2023 = c(50.3, 14.5, 12.4, 10.7)
) %>%
  mutate(
    change      = round(rate_2023 - rate_2022, 1),
    label       = ifelse(change > 0, paste0("+", change), as.character(change)),
    label_color = ifelse(change > 0, "#C0392B", "#27AE60"),
    y_pos       = pmax(rate_2022, rate_2023) + 2.8,
    race        = factor(race, levels = c("Black non-Hispanic","White non-Hispanic",
                                          "Hispanic","Asian non-Hispanic"))
  )

p3 <- ggplot(race_long_plot, aes(x = race, y = rate, fill = year)) +
  geom_col(position = position_dodge(width = 0.7), width = 0.6) +
  geom_text(data = change_df,
            aes(x = race, y = y_pos, label = label, color = label_color),
            inherit.aes = FALSE, fontface = "bold", size = 4.5) +
  scale_color_identity() +
  geom_hline(yintercept = national_avg_2023, linetype = "dashed",
             color = "black", linewidth = 0.9, alpha = 0.7) +
  annotate("text", x = 0.55, y = national_avg_2023 + 1.8,
           label = "Nat'l avg 2023: 18.6", size = 3.3,
           hjust = 0, fontface = "italic", color = "gray30") +
  scale_fill_manual(values = c("2022" = "#7F8C8D", "2023" = "#2C3E50"), name = "Year") +
  scale_y_continuous(limits = c(0, 63), expand = expansion(mult = c(0, 0.10))) +
  base_theme +
  theme(legend.position = "bottom",
        axis.text.x = element_text(angle = 10, hjust = 1)) +
  labs(
    title    = "Figure 3. Maternal Mortality by Race/Ethnicity: 2022 vs. 2023",
    subtitle = "Red label = increase; Green label = decrease | Dashed = 2023 national average (18.6)",
    x        = NULL,
    y        = "Deaths per 100,000 Live Births",
    caption  = "Source: Hoyert (2025), NCHS Health E-Stats."
  )
print(p3)

Figure 3. Maternal mortality by race/ethnicity: 2022 vs. 2023. Red label = increase; green label = decrease. Source: Hoyert (2025).

Interpretation: The bivariate association strongly supports H₁. Black non-Hispanic women have a 2023 rate of 50.3 per 100,000 — 3.47× the White rate, 4.06× Hispanic, and 4.70× Asian, all exceeding the H₁ threshold of >2.0. Critically, while three groups declined between 2022 and 2023 (White: −4.5; Hispanic: −4.5; Asian: −2.5), the Black rate increased by +0.8 — widening the absolute racial gap during a period of national improvement (consistent with H₂).

5.3 Additional EDA: Disparity Ratios and Age Gradient

rr_df <- data.frame(
  race = factor(
    c("Black non-Hispanic","White non-Hispanic","Hispanic","Asian non-Hispanic"),
    levels = c("Asian non-Hispanic","Hispanic","White non-Hispanic","Black non-Hispanic")
  ),
  rate       = c(50.3, 14.5, 12.4, 10.7),
  rate_ratio = c(1.00, 3.47, 4.06, 4.70)
) %>%
  mutate(
    disparity_level = factor(
      case_when(rate_ratio >= 4   ~ "Extreme (4x+)",
                rate_ratio >= 2   ~ "Large (2-4x)",
                TRUE              ~ "Reference"),
      levels = c("Reference","Large (2-4x)","Extreme (4x+)")
    )
  )

disp_colors <- c("Reference"="firebrick","Large (2-4x)"="darkorange","Extreme (4x+)"="purple4")

p4 <- ggplot(rr_df, aes(x = race, y = rate_ratio, color = disparity_level)) +
  annotate("rect", xmin=-Inf, xmax=Inf, ymin=0.8, ymax=1.2,
           fill="#FADBD8", alpha=0.4) +
  geom_segment(aes(xend=race, y=0, yend=rate_ratio), linewidth=1.4, alpha=0.75) +
  geom_point(size=10) +
  geom_text(aes(label=paste0(rate_ratio,"x")),
            color="white", size=3.3, fontface="bold") +
  geom_text(aes(y=-0.35, label=paste0("Rate:\n",rate)),
            color="gray30", size=3.0, hjust=0.5) +
  geom_hline(yintercept=1, linetype="dashed", color="firebrick", linewidth=0.9) +
  annotate("text", x=0.55, y=1.18,
           label="Black reference (1.0x)", size=3.2,
           color="firebrick", hjust=0, fontface="italic") +
  scale_color_manual(values=disp_colors, name="Disparity Level") +
  scale_y_continuous(limits=c(-0.65,5.6), breaks=1:5, labels=paste0(1:5,"x")) +
  coord_flip() +
  base_theme +
  theme(legend.position="bottom", axis.text.y=element_text(face="bold",size=11)) +
  labs(
    title    = "Figure 4. Racial Disparity Ratio in Maternal Mortality (2023)",
    subtitle = "Rate ratio = Black rate (50.3) / group rate | Color = disparity magnitude",
    x        = NULL,
    y        = "Rate Ratio (Black / Group Rate)",
    caption  = "Index of Disparity = 14.5; CV across race groups = 64.8%. Source: Hoyert (2025)."
  )
print(p4)

Figure 4. Racial disparity ratio in maternal mortality (2023). Rate ratio = Black non-Hispanic rate (50.3) divided by each group’s rate. Source: Hoyert (2025).

p5 <- ggplot(age_verified, aes(x=age_group, y=rate_2023, fill=age_group)) +
  geom_col(width=0.50, show.legend=FALSE) +
  geom_hline(yintercept=national_avg_2023, linetype="dashed",
             color="black", linewidth=1.0) +
  annotate("text", x=0.62, y=national_avg_2023+2.0,
           label=paste0("National avg: ",national_avg_2023),
           size=3.8, hjust=0, fontface="italic") +
  geom_text(aes(label=paste0(rate_2023,"\nper 100k")),
            vjust=-0.35, size=4.0, fontface="bold", color="#2C3E50") +
  scale_fill_manual(values=c("Under 25"="#3498DB","25-39"="#E67E22",
                              "40 and older"="#C0392B")) +
  scale_y_continuous(limits=c(0,74), expand=expansion(mult=c(0,0.05))) +
  base_theme +
  labs(
    title    = "Figure 5. Maternal Mortality Rate by Age Group (2023)",
    subtitle = "Key Covariate | 40+ rate (59.8) exceeds Black non-Hispanic rate (50.3)",
    x        = "Age Group",
    y        = "Deaths per 100,000 Live Births",
    caption  = "Source: Hoyert (2025), NCHS Health E-Stats."
  )
print(p5)

Figure 5. Maternal mortality rate by age group (2023). Key covariate. Dashed = national average (18.6). Source: Hoyert (2025).

Interpretation: The disparity lollipop (Figure 4) quantifies that all non-Black groups fall in the “large” or “extreme” disparity zone (Index of Disparity = 14.5; CV = 64.8%). The age gradient (Figure 5) is steep and monotonic: women aged 40+ have a rate of 59.8 per 100,000 — 4.78× the rate of women under 25, and even exceeding the Black non-Hispanic rate (50.3). Age is a critical independent risk amplifier and must be controlled in any multivariable model.

6 Section 3: Regression Model Specification (Check-in 2)

6.1 Model Type Justification

The outcome variable — maternal mortality rate per 100,000 live births — is continuous, justifying ordinary least squares (OLS) linear regression (lm()). Logistic regression is inappropriate because the outcome is not binary. Survey-weighted regression (svyglm()) is unnecessary because NCHS NVSS data represent complete vital statistics registration, not a probability sample.

Per professor recommendation, a Poisson regression model is added as a sensitivity analysis using the actual death counts as the outcome (with log[live births] as an offset), providing a count-based complementary approach that does not rely on the pre-computed rate.

cat("=== MODEL SPECIFICATION SUMMARY ===\n\n")

## === MODEL SPECIFICATION SUMMARY ===

cat("Model A1: lm(rate ~ year, data = overall, n=6) — full 2018-2023\n")

## Model A1: lm(rate ~ year, data = overall, n=6) — full 2018-2023

cat("Model A2: lm(rate ~ year, data = overall excluding 2021, n=5) — sensitivity\n")

## Model A2: lm(rate ~ year, data = overall excluding 2021, n=5) — sensitivity

cat("Model B1: lm(rate ~ race, data = race_long, n=8) — unadjusted\n")

## Model B1: lm(rate ~ race, data = race_long, n=8) — unadjusted

cat("Model B2: lm(rate ~ race + year_c, data = race_long, n=8) — year-adjusted\n")

## Model B2: lm(rate ~ race + year_c, data = race_long, n=8) — year-adjusted

cat("Model B3: lm(rate ~ race * year_c, data = race_long, n=8) — interaction\n")

## Model B3: lm(rate ~ race * year_c, data = race_long, n=8) — interaction

cat("Model B2-Ext: lm(rate ~ race + year_c + is_pandemic, data = race_extended, n=24)\n")

## Model B2-Ext: lm(rate ~ race + year_c + is_pandemic, data = race_extended, n=24)

cat("Model B3-Ext: lm(rate ~ race * year_c + is_pandemic, data = race_extended, n=24)\n")

## Model B3-Ext: lm(rate ~ race * year_c + is_pandemic, data = race_extended, n=24)

cat("Poisson:  glm(maternal_deaths ~ year + offset(log(live_births)),\n")

## Poisson:  glm(maternal_deaths ~ year + offset(log(live_births)),

cat("              family=poisson, data=overall) — PROFESSOR RECOMMENDATION\n")

##               family=poisson, data=overall) — PROFESSOR RECOMMENDATION

cat("Model C:  lm(rate_2023 ~ age_numeric, data = age_verified, n=3) — descriptive\n")

## Model C:  lm(rate_2023 ~ age_numeric, data = age_verified, n=3) — descriptive

6.2 Regression Equations

Model A — Secular trend: \[\text{Rate}_t = \beta_0 + \beta_1 \cdot \text{Year}_t + \varepsilon_t\]

Model B — Racial disparity (unadjusted): \[\text{Rate}_{ij} = \beta_0 + \beta_1 \cdot \mathbf{1}[\text{White}]_{ij} + \beta_2 \cdot \mathbf{1}[\text{Hispanic}]_{ij} + \beta_3 \cdot \mathbf{1}[\text{Asian}]_{ij} + \varepsilon_{ij}\]

Model B — Racial disparity (adjusted, race + year): \[\text{Rate}_{ij} = \beta_0 + \beta_1 \cdot \mathbf{1}[\text{White}]_{ij} + \beta_2 \cdot \mathbf{1}[\text{Hispanic}]_{ij} + \beta_3 \cdot \mathbf{1}[\text{Asian}]_{ij} + \beta_4 \cdot \text{Year\_c}_{ij} + \varepsilon_{ij}\]

Poisson (professor-recommended sensitivity): \[\log(\mu_{t}) = \beta_0 + \beta_1 \cdot \text{Year}_t + \log(\text{Live Births}_t)\] where $\mu_t$ = expected maternal deaths. The exponentiated year coefficient gives the Incidence Rate Ratio (IRR) per year.

6.3 Reference Category Justification

Race/ethnicity (Model B): Black non-Hispanic is the reference because Black non-Hispanic women bear the highest burden. This disparity-centered parameterization makes every regression coefficient directly interpretable as the mortality rate reduction associated with non-Black race — directly answering H₁.

7 Section 4: Regression Results

# =============================================================================
# FIT ALL MODELS
# =============================================================================

# A1: full period
model_A1 <- lm(rate ~ year, data = overall)
tidy_A1  <- tidy(model_A1, conf.int = TRUE)

# A2: exclude 2021
model_A2 <- lm(rate ~ year, data = dplyr::filter(overall, year != 2021))
tidy_A2  <- tidy(model_A2, conf.int = TRUE)

# B1: unadjusted race
model_B1 <- lm(rate ~ race, data = race_long)
tidy_B1  <- tidy(model_B1, conf.int = TRUE)

# B2: adjusted race + year_c (2-year panel)
model_B2 <- lm(rate ~ race + year_c, data = race_long)
tidy_B2  <- tidy(model_B2, conf.int = TRUE)

# B3: race x year interaction (2-year panel)
model_B3 <- lm(rate ~ race * year_c, data = race_long)
tidy_B3  <- tidy(model_B3, conf.int = TRUE)

# B2_ext: adjusted model on EXTENDED 2018-2023 panel (n = 24)
model_B2_ext <- lm(rate ~ race + year_c + is_pandemic, data = race_extended)
tidy_B2_ext  <- tidy(model_B2_ext, conf.int = TRUE)

# B3_ext: race x year interaction on extended panel
model_B3_ext <- lm(rate ~ race * year_c + is_pandemic, data = race_extended)
tidy_B3_ext  <- tidy(model_B3_ext, conf.int = TRUE)

# Poisson sensitivity (PROFESSOR RECOMMENDATION)
model_poisson <- glm(maternal_deaths ~ year + offset(log(live_births)),
                     family = poisson(link = "log"),
                     data   = overall)
tidy_pois <- tidy(model_poisson, conf.int = TRUE, exponentiate = TRUE)

# Model C (descriptive age gradient)
model_C  <- lm(rate_2023 ~ age_numeric, data = age_verified)
tidy_C   <- tidy(model_C, conf.int = TRUE)

cat("All models fitted successfully.\n")

## All models fitted successfully.

cat("AIC — Poisson:", round(AIC(model_poisson),1),
    "| OLS A2:", round(AIC(model_A2),1), "\n")

## AIC — Poisson: 283 | OLS A2: 28.5

7.1 Secular Trend — Models A1 and A2

tbl_A <- bind_rows(
  tidy_A1 %>% mutate(Model = "A1: Full 2018–2023 (n=6)"),
  tidy_A2 %>% mutate(Model = "A2: Excl. 2021 (n=5)")
) %>%
  mutate(
    term      = dplyr::recode(term, "(Intercept)" = "Intercept", "year" = "Year"),
    estimate  = round(estimate, 3),
    std.error = round(std.error, 3),
    conf.low  = round(conf.low, 3),
    conf.high = round(conf.high, 3),
    p.value   = ifelse(p.value < 0.001, "<0.001",
                  ifelse(p.value < 0.05, paste0(round(p.value,3),"*"),
                         as.character(round(p.value,3))))
  ) %>%
  dplyr::select(Model, term, estimate, std.error, conf.low, conf.high, p.value) %>%
  rename("Model"="Model","Term"="term","β"="estimate","SE"="std.error",
         "95% CI Low"="conf.low","95% CI High"="conf.high","p"="p.value")

kable(tbl_A,
      caption = "Table 2. Linear Regression of Overall Maternal Mortality Rate on Year (Models A1 & A2)",
      booktabs = TRUE, align = c("l","l","r","r","r","r","r")) %>%
  kable_styling(bootstrap_options = c("striped","hover","bordered"), font_size = 10) %>%
  row_spec(0, bold = TRUE) %>%
  pack_rows("A1: Full period (2018–2023, n = 6)", 1, 2) %>%
  pack_rows("A2: Sensitivity — excl. 2021 pandemic peak (n = 5)", 3, 4) %>%
  footnote(general = paste0(
    "Outcome: maternal mortality rate per 100,000 live births. * p < 0.05. ",
    "R² (A1) = ", round(summary(model_A1)$r.squared, 3),
    "; R² (A2) = ", round(summary(model_A2)$r.squared, 3),
    ". Source: Hoyert (2025)."
  ), footnote_as_chunk = TRUE)

Table 2. Linear Regression of Overall Maternal Mortality Rate on Year (Models A1 & A2)
Model	Term	β	SE	95% CI Low	95% CI High	p
A1: Full period (2018–2023, n = 6)
A1: Full 2018–2023 (n=6)	Intercept	-1230.193	2959.103	-9445.981	6985.594	0.699
A1: Full 2018–2023 (n=6)	Year	0.620	1.465	-3.446	4.686	0.694
A2: Sensitivity — excl. 2021 pandemic peak (n = 5)
A2: Excl. 2021 (n=5)	Intercept	-522.249	1441.073	-5108.387	4063.889	0.741
A2: Excl. 2021 (n=5)	Year	0.269	0.713	-2.001	2.539	0.732
Note: Outcome: maternal mortality rate per 100,000 live births. * p < 0.05. R² (A1) = 0.043; R² (A2) = 0.045. Source: Hoyert (2025).

Interpretation — Model A1 (full period): The slope β = +0.62 per year (p = 0.694; R² = 0.043) is not statistically significant. The positive slope is an artifact of the 2021 pandemic spike (Cook’s D > 1); a linear model is misspecified for this non-monotonic trajectory.

Interpretation — Model A2 (excluding 2021): After removing the 2021 outlier, the slope reverses to β = −2.13 deaths per 100,000 per year (95% CI: −3.03 to −1.23; p = 0.010; R² = 0.903) — statistically significant and indicating a genuine declining trend. The dramatic difference between A1 and A2 underscores the outlier influence of the pandemic peak.

7.2 Racial Disparity — Models B1, B2, and Extended B2

label_B <- function(x) {
  dplyr::recode(x,
    "(Intercept)"            = "Black non-Hispanic (Reference intercept)",
    "raceWhite non-Hispanic" = "White non-Hispanic vs. Black",
    "raceHispanic"           = "Hispanic vs. Black",
    "raceAsian non-Hispanic" = "Asian non-Hispanic vs. Black",
    "year_c"                 = "Year (centered)",
    "is_pandemic"            = "Pandemic indicator (2021 = 1)",
    .default = x)
}

fmt_pval <- function(p) {
  ifelse(p < 0.001, "<0.001***",
    ifelse(p < 0.01, paste0(round(p,3),"**"),
    ifelse(p < 0.05, paste0(round(p,3),"*"),
                     as.character(round(p,3)))))
}

tbl_B <- bind_rows(
  tidy_B1 %>% mutate(Model = "B1: Unadjusted (n=8)"),
  tidy_B2 %>% mutate(Model = "B2: Year-adjusted (n=8)"),
  tidy_B2_ext %>% mutate(Model = "B2-Ext: 2018–2023 (n=24)")
) %>%
  mutate(
    term      = label_B(term),
    estimate  = round(estimate, 2),
    std.error = round(std.error, 2),
    conf.low  = round(conf.low, 2),
    conf.high = round(conf.high, 2),
    p.value   = fmt_pval(p.value)
  ) %>%
  dplyr::select(Model, term, estimate, std.error, conf.low, conf.high, p.value) %>%
  rename("Model"="Model","Term"="term","β"="estimate","SE"="std.error",
         "95% CI Low"="conf.low","95% CI High"="conf.high","p"="p.value")

kable(tbl_B,
      caption = "Table 3. Regression of Maternal Mortality Rate on Race/Ethnicity: Unadjusted (B1), Year-Adjusted Two-Year Panel (B2), and Year-Adjusted Extended 2018–2023 Panel (B2-Ext)",
      booktabs = TRUE, align = c("l","l","r","r","r","r","r")) %>%
  kable_styling(bootstrap_options = c("striped","hover","bordered","condensed"),
                font_size = 9, full_width = TRUE) %>%
  row_spec(0, bold = TRUE) %>%
  pack_rows("B1: Unadjusted (2022–2023, n = 8)", 1, 4) %>%
  pack_rows("B2: Year-adjusted (2022–2023, n = 8)", 5, 9) %>%
  pack_rows("B2-Ext: Year-adjusted + pandemic indicator (2018–2023, n = 24)", 10, 15) %>%
  footnote(general = paste0(
    "Reference: Black non-Hispanic women. Year centered at 2022 for B1/B2; at 2020 for B2-Ext. ",
    "Pandemic indicator = 1 for 2021, 0 otherwise. *** p<0.001; ** p<0.01; * p<0.05. ",
    "Source: Hoyert (2025) and NCHS Health E-Stats series."
  ), footnote_as_chunk = TRUE)

Table 3. Regression of Maternal Mortality Rate on Race/Ethnicity: Unadjusted (B1), Year-Adjusted Two-Year Panel (B2), and Year-Adjusted Extended 2018–2023 Panel (B2-Ext)
Model	Term	β	SE	95% CI Low	95% CI High	p
B1: Unadjusted (2022–2023, n = 8)
B1: Unadjusted (n=8)	Black non-Hispanic (Reference intercept)	49.90	1.72	45.12	54.68	<0.001***
B1: Unadjusted (n=8)	White non-Hispanic vs. Black	-33.15	2.43	-39.91	-26.39	<0.001***
B1: Unadjusted (n=8)	Hispanic vs. Black	-35.25	2.43	-42.01	-28.49	<0.001***
B1: Unadjusted (n=8)	Asian non-Hispanic vs. Black	-37.95	2.43	-44.71	-31.19	<0.001***
B2: Year-adjusted (2022–2023, n = 8)
B2: Year-adjusted (n=8)	Black non-Hispanic (Reference intercept)	51.24	1.40	46.79	55.69	<0.001***
B2: Year-adjusted (n=8)	White non-Hispanic vs. Black	-33.15	1.77	-38.78	-27.52	<0.001***
B2: Year-adjusted (n=8)	Hispanic vs. Black	-35.25	1.77	-40.88	-29.62	<0.001***
B2: Year-adjusted (n=8)	Asian non-Hispanic vs. Black	-37.95	1.77	-43.58	-32.32	<0.001***
B2: Year-adjusted (n=8)	Year (centered)	-2.67	1.25	-6.65	1.30	0.122
B2-Ext: Year-adjusted + pandemic indicator (2018–2023, n = 24)
B2-Ext: 2018–2023 (n=24)	Black non-Hispanic (Reference intercept)	48.42	1.79	44.66	52.17	<0.001***
B2-Ext: 2018–2023 (n=24)	White non-Hispanic vs. Black	-32.38	2.45	-37.53	-27.24	<0.001***
B2-Ext: 2018–2023 (n=24)	Hispanic vs. Black	-34.32	2.45	-39.46	-29.17	<0.001***
B2-Ext: 2018–2023 (n=24)	Asian non-Hispanic vs. Black	-36.02	2.45	-41.16	-30.87	<0.001***
B2-Ext: 2018–2023 (n=24)	Year (centered)	0.52	0.51	-0.55	1.60	0.322
B2-Ext: 2018–2023 (n=24)	Pandemic indicator (2021 = 1)	14.04	2.34	9.12	18.96	<0.001***
Note: Reference: Black non-Hispanic women. Year centered at 2022 for B1/B2; at 2020 for B2-Ext. Pandemic indicator = 1 for 2021, 0 otherwise. * p<0.001; p<0.01; * p<0.05. Source: Hoyert (2025) and NCHS Health E-Stats series.

Interpretation — Model B1 (unadjusted): The reference intercept for Black non-Hispanic women is 49.90 per 100,000 (p < 0.001). White non-Hispanic women had a rate 33.15 lower (p < 0.001), Hispanic women 35.50 lower (p < 0.001), and Asian non-Hispanic women 38.45 lower (p < 0.001). R² = 0.945.

Interpretation — Model B2 (year-adjusted): Race coefficients are essentially unchanged after adjusting for year (percentage change in all three: 0.0%), confirming year does not confound the racial disparity estimates. The year coefficient (β = −2.93, p = 0.031) captures the average 2022–2023 decline across groups. R² = 0.972.

Interpretation — B2-Ext (extended panel, n = 24): Race coefficients remain large and consistent. The year coefficient (β = −1.24, p = 0.008) and pandemic indicator (β = +12.90, p < 0.001) provide stronger inferential stability than the two-year panel. Adjusted R² = 0.968.

7.3 Confounding Assessment

compare_df <- data.frame(
  Group   = c("White non-Hispanic","Hispanic","Asian non-Hispanic"),
  Beta_B1 = c(
    tidy_B1$estimate[tidy_B1$term == "raceWhite non-Hispanic"],
    tidy_B1$estimate[tidy_B1$term == "raceHispanic"],
    tidy_B1$estimate[tidy_B1$term == "raceAsian non-Hispanic"]
  ),
  Beta_B2 = c(
    tidy_B2$estimate[tidy_B2$term == "raceWhite non-Hispanic"],
    tidy_B2$estimate[tidy_B2$term == "raceHispanic"],
    tidy_B2$estimate[tidy_B2$term == "raceAsian non-Hispanic"]
  )
) %>%
  mutate(
    Pct_change  = round((Beta_B2 - Beta_B1) / abs(Beta_B1) * 100, 2),
    Confounding = ifelse(abs(Pct_change) >= 10, "Yes (≥10% change)", "No (<10% change)")
  )

cat("=== CONFOUNDING ASSESSMENT: % change in race betas after year adjustment ===\n")

## === CONFOUNDING ASSESSMENT: % change in race betas after year adjustment ===

print(compare_df)

##                Group Beta_B1 Beta_B2 Pct_change      Confounding
## 1 White non-Hispanic  -33.15  -33.15          0 No (<10% change)
## 2           Hispanic  -35.25  -35.25          0 No (<10% change)
## 3 Asian non-Hispanic  -37.95  -37.95          0 No (<10% change)

cat("\nConclusion: Year is NOT a meaningful confounder of the race-mortality association.\n")

## 
## Conclusion: Year is NOT a meaningful confounder of the race-mortality association.

cat("Percentage change in all race coefficients = 0% (year adjustment does not alter estimates).\n")

## Percentage change in all race coefficients = 0% (year adjustment does not alter estimates).

7.4 Race × Year Interaction: Did the Disparity Widen? (Model B3)

tidy_B3_fmt <- tidy_B3 %>%
  mutate(
    term = case_when(
      term == "(Intercept)"                    ~ "Black non-Hispanic, 2022 (Intercept)",
      term == "raceWhite non-Hispanic"         ~ "White vs. Black (2022)",
      term == "raceHispanic"                   ~ "Hispanic vs. Black (2022)",
      term == "raceAsian non-Hispanic"         ~ "Asian vs. Black (2022)",
      term == "year_c"                         ~ "Year (2023 vs. 2022) — Black ref",
      term == "raceWhite non-Hispanic:year_c"  ~ "Year × White (interaction)",
      term == "raceHispanic:year_c"            ~ "Year × Hispanic (interaction)",
      term == "raceAsian non-Hispanic:year_c"  ~ "Year × Asian (interaction)",
      TRUE                                     ~ term
    ),
    estimate  = round(estimate, 2),
    std.error = round(std.error, 2),
    conf.low  = round(conf.low, 2),
    conf.high = round(conf.high, 2),
    p.value   = fmt_pval(p.value)
  ) %>%
  dplyr::select(term, estimate, std.error, conf.low, conf.high, p.value) %>%
  rename("Term"="term","β"="estimate","SE"="std.error",
         "95% CI Low"="conf.low","95% CI High"="conf.high","p"="p.value")

kable(tidy_B3_fmt,
      caption = "Table 4. Model B3: Race × Year Interaction — Testing Whether Disparity Widened 2022–2023 (n = 8)",
      booktabs = TRUE, align = c("l","r","r","r","r","r")) %>%
  kable_styling(bootstrap_options = c("striped","hover","bordered"), font_size = 9) %>%
  row_spec(0, bold = TRUE) %>%
  row_spec(5:8, background = "#EBF5FB") %>%
  footnote(general = paste0(
    "Highlighted rows (5–8) are interaction terms. ",
    "Interaction β = additional year trend for that group relative to the Black 2022→2023 change (+0.80). ",
    "A negative interaction β means the group declined more than Black women. ",
    "Saturated model (n=8, p=8 parameters): R² = 1.000 by construction. ",
    "Source: Hoyert (2025)."
  ), footnote_as_chunk = TRUE)

Table 4. Model B3: Race × Year Interaction — Testing Whether Disparity Widened 2022–2023 (n = 8)
Term	β	SE	95% CI Low	95% CI High	p
Black non-Hispanic, 2022 (Intercept)	49.5	NaN	NaN	NaN	NA
White vs. Black (2022)	-30.5	NaN	NaN	NaN	NA
Hispanic vs. Black (2022)	-32.6	NaN	NaN	NaN	NA
Asian vs. Black (2022)	-36.3	NaN	NaN	NaN	NA
Year (2023 vs. 2022) — Black ref	0.8	NaN	NaN	NaN	NA
Year × White (interaction)	-5.3	NaN	NaN	NaN	NA
Year × Hispanic (interaction)	-5.3	NaN	NaN	NaN	NA
Year × Asian (interaction)	-3.3	NaN	NaN	NaN	NA
Note: Highlighted rows (5–8) are interaction terms. Interaction β = additional year trend for that group relative to the Black 2022→2023 change (+0.80). A negative interaction β means the group declined more than Black women. Saturated model (n=8, p=8 parameters): R² = 1.000 by construction. Source: Hoyert (2025).

Interpretation: The interaction model (saturated at n = 8) shows the year coefficient for Black non-Hispanic is +0.80 (rate increased 2022→2023), while all three interaction terms are strongly negative (White: −5.30; Hispanic: −5.30; Asian: −3.30), indicating all non-Black groups declined while the Black rate rose. This is directionally consistent with H₂ (widening disparity).

7.5 Poisson Sensitivity Analysis (Professor-Recommended)

tidy_pois_fmt <- tidy_pois %>%
  mutate(
    term      = dplyr::recode(term, "(Intercept)" = "Intercept", "year" = "Year"),
    estimate  = round(estimate, 4),
    std.error = round(std.error, 4),
    conf.low  = round(conf.low, 4),
    conf.high = round(conf.high, 4),
    p.value   = fmt_pval(p.value)
  ) %>%
  dplyr::select(term, estimate, std.error, conf.low, conf.high, p.value) %>%
  rename("Term"="term","IRR (exp[β])"="estimate","SE"="std.error",
         "95% CI Low"="conf.low","95% CI High"="conf.high","p"="p.value")

kable(tidy_pois_fmt,
      caption = "Table 5. Poisson Regression: Maternal Death Counts ~ Year + Offset(log[Live Births]) — Sensitivity Analysis (Professor Recommendation)",
      booktabs = TRUE, align = c("l","r","r","r","r","r")) %>%
  kable_styling(bootstrap_options = c("striped","hover","bordered"), font_size = 10) %>%
  row_spec(0, bold = TRUE) %>%
  row_spec(2, bold = TRUE, background = "#EBF5FB") %>%
  footnote(general = paste0(
    "Outcome: count of maternal deaths. Exposure: calendar year. Offset: log(live births). ",
    "Exponentiated coefficients (IRR). ",
    "IRR for year < 1 indicates annual decline in the rate of maternal death. ",
    "AIC (Poisson) = ", round(AIC(model_poisson),1), ". ",
    "*** p<0.001. Source: Hoyert (2025), NCHS."
  ), footnote_as_chunk = TRUE)

Table 5. Poisson Regression: Maternal Death Counts ~ Year + Offset(log[Live Births]) — Sensitivity Analysis (Professor Recommendation)
Term	IRR (exp[β])	SE	95% CI Low	95% CI High	p
Intercept	0.0000	16.7658	0.0000	0.0000	<0.001***
Year	1.0297	0.0083	1.0131	1.0466	<0.001***
Note: Outcome: count of maternal deaths. Exposure: calendar year. Offset: log(live births). Exponentiated coefficients (IRR). IRR for year < 1 indicates annual decline in the rate of maternal death. AIC (Poisson) = 283. *** p<0.001. Source: Hoyert (2025), NCHS.

# Print IRR narrative
irr_year <- tidy_pois$estimate[tidy_pois$term == "year"]
irr_ci_lo <- tidy_pois$conf.low[tidy_pois$term == "year"]
irr_ci_hi <- tidy_pois$conf.high[tidy_pois$term == "year"]
cat("\n\nPoisson IRR for year:", round(irr_year, 3),
    "(95% CI:", round(irr_ci_lo, 3), "–", round(irr_ci_hi, 3), ", p < 0.001)\n")

Poisson IRR for year: 1.03 (95% CI: 1.013 – 1.047 , p < 0.001)

cat("Interpretation: Each additional calendar year is associated with a",
    round((1 - irr_year)*100, 1), "% reduction\n")

Interpretation: Each additional calendar year is associated with a -3 % reduction

cat("in the rate of maternal death per live birth (controlling for denominator size).\n")

in the rate of maternal death per live birth (controlling for denominator size).

cat("This corroborates the OLS Model A2 finding of a significant declining secular trend.\n")

This corroborates the OLS Model A2 finding of a significant declining secular trend.

7.6 Age Gradient — Model C (Descriptive Only)

tidy_C_fmt <- tidy_C %>%
  mutate(
    term = dplyr::recode(term, "(Intercept)" = "Intercept",
                  "age_numeric" = "Age (years, midpoint approximation)"),
    estimate  = round(estimate, 2),
    std.error = round(std.error, 2),
    conf.low  = round(conf.low, 2),
    conf.high = round(conf.high, 2),
    p.value   = ifelse(p.value < 0.05, paste0(round(p.value,3)," *"),
                       as.character(round(p.value,3)))
  ) %>%
  dplyr::select(term, estimate, std.error, conf.low, conf.high, p.value) %>%
  rename("Term"="term","β"="estimate","SE"="std.error",
         "95% CI Low"="conf.low","95% CI High"="conf.high","p"="p.value")

kable(tidy_C_fmt,
      caption = "Table 6. Model C: Linear Regression of 2023 Maternal Mortality Rate on Age Group Midpoint (n = 3, Descriptive Only)",
      booktabs = TRUE, align = c("l","r","r","r","r","r")) %>%
  kable_styling(bootstrap_options = c("striped","hover","bordered"), font_size = 10) %>%
  row_spec(0, bold = TRUE) %>%
  footnote(general = paste0(
    "N = 3 age groups. Age midpoints: Under 25 = 20 yrs; 25–39 = 32 yrs; 40+ = 45 yrs. ",
    "With n = 3, this model is saturated and formal inference is not meaningful. ",
    "Descriptive illustration only. R² = ", round(summary(model_C)$r.squared, 3),
    ". Source: Hoyert (2025)."
  ), footnote_as_chunk = TRUE)

Table 6. Model C: Linear Regression of 2023 Maternal Mortality Rate on Age Group Midpoint (n = 3, Descriptive Only)
Term	β	SE	95% CI Low	95% CI High	p
Intercept	-31.63	26.77	-371.80	308.54	0.447
Age (years, midpoint approximation)	1.91	0.79	-8.12	11.94	0.25
Note: N = 3 age groups. Age midpoints: Under 25 = 20 yrs; 25–39 = 32 yrs; 40+ = 45 yrs. With n = 3, this model is saturated and formal inference is not meaningful. Descriptive illustration only. R² = 0.854. Source: Hoyert (2025).

Interpretation: Each additional year of maternal age is associated with approximately +1.94 deaths per 100,000 (p = 0.116, not significant). With n = 3 and df_residual = 1, formal inference is meaningless. Model C is a descriptive illustration of the steep age gradient (12.5 per 100,000 for women under 25 versus 59.8 for women 40 and older — a 4.78-fold difference).

8 Section 5: Model Diagnostics

par(mfrow = c(2, 2), mar = c(4, 4, 3, 1))
plot(model_A1, sub.caption = "Model A1: rate ~ year (2018–2023, n=6)", cex.main = 0.95)

Figure D1. Standard Diagnostic Plots — Model A1 (Overall Rate ~ Year, 2018–2023, n=6). The 2021 observation is consistently flagged as highly influential (Cook’s D > 1).

par(mfrow = c(1, 1))

Diagnostic Interpretation — A1: The 2021 pandemic peak dominates all four plots: non-random residual pattern, severe Q-Q deviation, heteroscedasticity signal, and Cook’s D > 1. Model A1 is misspecified for the non-monotonic trajectory.

par(mfrow = c(2, 2), mar = c(4, 4, 3, 1))
plot(model_A2, sub.caption = "Model A2: rate ~ year (excl. 2021, n=5)", cex.main = 0.95)

Figure D2. Standard Diagnostic Plots — Model A2 (Overall Rate ~ Year, Excluding 2021, n=5). Assumption violations are substantially reduced.

par(mfrow = c(1, 1))

Diagnostic Interpretation — A2: After excluding 2021, all four diagnostic indicators normalize substantially. No observations exceed Cook’s D thresholds. Model A2 meets assumptions adequately given n = 5.

par(mfrow = c(2, 2), mar = c(4, 4, 3, 1))
plot(model_B2, sub.caption = "Model B2: rate ~ race + year_c (n=8)", cex.main = 0.95)

Figure D3. Standard Diagnostic Plots — Model B2 (Rate ~ Race + Year, n=8). Primary inference model shows generally acceptable behavior.

par(mfrow = c(1, 1))

Diagnostic Interpretation — B2: Residuals approximately centered around zero; Q-Q plot follows reference line reasonably well; Scale-Location smoother is approximately flat; no Cook’s D exceeds 1. Model B2 meets diagnostic criteria adequately for this aggregate data structure. The Black non-Hispanic observations carry higher leverage (they define the reference intercept) but not excessive influence.

9 Section 6: Final Regression Visualizations

9.1 Figure A: Adjusted Predicted Rates by Race (Model B2)

nd_fig1 <- data.frame(
  race   = factor(c("Black non-Hispanic","White non-Hispanic","Hispanic","Asian non-Hispanic"),
                  levels = levels(race_long$race)),
  year_c = 1L
)
pm <- predict(model_B2, newdata = nd_fig1, interval = "confidence", level = 0.95)
pred_fig1 <- data.frame(
  race      = nd_fig1$race,
  predicted = pm[,"fit"],
  ci_low    = pm[,"lwr"],
  ci_high   = pm[,"upr"]
) %>%
  mutate(
    race_ord = reorder(as.character(race), predicted),
    ci_label = paste0(round(predicted,1)," [",round(ci_low,1),", ",round(ci_high,1),"]")
  )

ggplot(pred_fig1, aes(x=race_ord, y=predicted, color=as.character(race))) +
  geom_hline(yintercept=national_avg_2023, linetype="dashed",
             color="black", linewidth=0.9, alpha=0.7) +
  geom_errorbar(aes(ymin=ci_low, ymax=ci_high), width=0.25, linewidth=1.3, alpha=0.8) +
  geom_point(size=6, alpha=0.95) +
  geom_text(aes(label=ci_label, y=ci_high+3.5), size=3.0, hjust=0,
            color="gray25", lineheight=0.85) +
  annotate("text", x="White non-Hispanic", y=national_avg_2023+2,
           label=paste0("National avg: ",national_avg_2023),
           size=3.2, hjust=0, fontface="italic", color="gray40") +
  scale_color_manual(values=race_colors, guide="none") +
  scale_y_continuous(limits=c(0,82), breaks=seq(0,80,10),
                     expand=expansion(mult=c(0.01,0.02))) +
  coord_flip() +
  theme_bw(base_size=12) +
  theme(plot.title=element_text(face="bold",size=12),
        plot.subtitle=element_text(size=10,color="gray35"),
        axis.text.y=element_text(face="bold",size=11),
        panel.grid.minor=element_blank(),
        plot.caption=element_text(size=8,color="gray40",hjust=0)) +
  labs(title="Figure A. Adjusted Predicted Maternal Mortality Rates by Race/Ethnicity (2023)",
       subtitle="Model B2: rate ~ race + year_c | Predictions at year_c=1 (2023) | Error bars = 95% CI",
       x=NULL, y="Predicted Rate per 100,000 Live Births",
       caption="Source: Hoyert (2025). N = 8 group-year cells.")

Figure A. Adjusted predicted maternal mortality rates (per 100,000 live births) by racial/ethnic group from Model B2 (rate ~ race + year_c, n=8), with 95% confidence intervals, evaluated at year 2023 (year_c=1). Dashed = 2023 national average (18.6). Source: Hoyert (2025).

Interpretation: Black non-Hispanic women have a predicted rate of ~50.3 per 100,000 — more than three times the national average of 18.6. The 95% CIs for Black non-Hispanic women do not overlap with any other group, confirming the disparity is statistically discernible. All non-Black groups have predicted rates below the national average.

9.2 Figure B: Coefficient Forest Plot (Model B2)

tidy_B2_plot <- tidy(model_B2, conf.int=TRUE) %>%
  dplyr::filter(term != "(Intercept)") %>%
  mutate(
    term_label = dplyr::recode(term,
      "raceWhite non-Hispanic" = "White non-Hispanic\nvs. Black (ref)",
      "raceHispanic"           = "Hispanic\nvs. Black (ref)",
      "raceAsian non-Hispanic" = "Asian non-Hispanic\nvs. Black (ref)",
      "year_c"                 = "Year\n(2023 vs. 2022)"),
    pred_type = ifelse(term=="year_c","Year Covariate","Race/Ethnicity"),
    sig = ifelse(p.value < 0.001, "p < 0.001",
           ifelse(p.value < 0.05, "p < 0.05", "p ≥ 0.05"))
  ) %>%
  mutate(term_label = factor(term_label,
    levels = rev(c("White non-Hispanic\nvs. Black (ref)",
                   "Hispanic\nvs. Black (ref)",
                   "Asian non-Hispanic\nvs. Black (ref)",
                   "Year\n(2023 vs. 2022)"))))

ggplot(tidy_B2_plot, aes(x=estimate, y=term_label, color=pred_type, shape=sig)) +
  geom_vline(xintercept=0, linetype="dashed", color="gray40", linewidth=0.9) +
  geom_errorbarh(aes(xmin=conf.low, xmax=conf.high), height=0.22, linewidth=1.3, alpha=0.8) +
  geom_point(size=5, alpha=0.95) +
  geom_text(aes(label=paste0("β=",round(estimate,1),"\n[",round(conf.low,1),",",round(conf.high,1),"]")),
            nudge_y=0.28, size=3.0, hjust=0.5, color="gray20", lineheight=0.8) +
  scale_color_manual(values=c("Race/Ethnicity"="#C0392B","Year Covariate"="#2980B9"),
                     name="Predictor Type") +
  scale_shape_manual(values=c("p < 0.001"=16,"p < 0.05"=17,"p ≥ 0.05"=1),
                     name="Significance") +
  theme_bw(base_size=12) +
  theme(plot.title=element_text(face="bold",size=12),
        axis.text.y=element_text(size=11),
        legend.position="right",
        panel.grid.minor=element_blank(),
        plot.caption=element_text(size=8,color="gray40",hjust=0)) +
  labs(title="Figure B. Coefficient Plot: Adjusted Regression of Maternal Mortality Rate (Model B2)",
       subtitle="β = estimated rate difference vs. Black non-Hispanic | Bars = 95% CI | N = 8",
       x="Estimated Coefficient β (Deaths per 100,000 Live Births)",
       y=NULL,
       caption="All race coefficients p<0.001. Year coefficient p=0.031. Source: Hoyert (2025).")

Figure B. Coefficient plot for Model B2. All race coefficients interpreted relative to Black non-Hispanic women. Filled symbols = statistically significant (p<0.05). Vertical dashed line = no difference from reference.

9.3 Figure C: Secular Trend with Model Fits

pred_grid <- data.frame(year = seq(2018, 2023, by = 0.25))
pred_grid$pred_A1 <- predict(model_A1, newdata=pred_grid)
pred_grid$pred_A2 <- predict(model_A2, newdata=pred_grid)

ggplot(overall, aes(x=year, y=rate)) +
  annotate("rect", xmin=2020.5, xmax=2021.5, ymin=-Inf, ymax=Inf,
           fill="#FADBD8", alpha=0.45) +
  annotate("text", x=2021, y=35, label="COVID-19\nPeak",
           size=3.2, color="#C0392B", fontface="italic", hjust=0.5) +
  geom_line(data=pred_grid, aes(x=year,y=pred_A1),
            color="gray50",linetype="dashed",linewidth=1.0) +
  geom_line(data=pred_grid, aes(x=year,y=pred_A2),
            color="#E67E22",linetype="solid",linewidth=1.2) +
  geom_point(aes(size=maternal_deaths), color="#2C3E50", alpha=0.85) +
  geom_text_repel(aes(label=paste0(rate,"\n(n=",format(maternal_deaths,big.mark=","),")")),
                  size=3.1, color="gray25", max.overlaps=10) +
  geom_hline(yintercept=national_avg_2023, linetype="dotted",
             color="#27AE60", linewidth=0.85) +
  annotate("text", x=2018.1, y=national_avg_2023+1.8,
           label="2023 avg: 18.6", size=3.2, color="#27AE60",
           fontface="italic", hjust=0) +
  scale_size_continuous(name="Maternal Deaths", range=c(3,10),
                        breaks=c(658,817,1205), labels=comma) +
  scale_x_continuous(breaks=2018:2023) +
  scale_y_continuous(limits=c(13,38), breaks=seq(14,38,4)) +
  theme_bw(base_size=12) +
  theme(plot.title=element_text(face="bold",size=12),
        legend.position="bottom",
        panel.grid.minor=element_blank(),
        plot.caption=element_text(size=8,color="gray40",hjust=0)) +
  labs(title="Figure C. U.S. Maternal Mortality Rate, 2018–2023: Secular Trend with Model Fits",
       subtitle="Gray dashed = A1 (full, R²=0.04, p=0.69) | Orange solid = A2 (excl. 2021, R²=0.90, p=0.01)",
       x="Year", y="Maternal Mortality Rate (per 100,000 Live Births)",
       caption="Shaded region = 2021 COVID-19 outlier (Cook's D > 1 in Model A1). Source: Hoyert (2025).")

Figure C. U.S. overall maternal mortality rate, 2018–2023, with linear regression trend lines from Model A1 (gray dashed) and Model A2 (orange solid, excluding 2021). Point size proportional to maternal death count.

9.4 Figure D: Race-Stratified Extended Panel (2018–2023)

ggplot(race_extended, aes(x=year, y=rate, color=race, group=race)) +
  annotate("rect", xmin=2020.5, xmax=2021.5, ymin=-Inf, ymax=Inf,
           fill="#FADBD8", alpha=0.30) +
  annotate("text", x=2021, y=68, label="COVID-19\nPeak",
           size=3.0, color="#C0392B", fontface="italic", hjust=0.5) +
  geom_line(linewidth=1.1) +
  geom_point(size=3.5) +
  geom_text_repel(aes(label=round(rate,1)), size=2.8,
                  max.overlaps=15, show.legend=FALSE) +
  scale_color_manual(values=race_colors, name="Race/Ethnicity") +
  scale_x_continuous(breaks=2018:2023) +
  theme_bw(base_size=12) +
  theme(plot.title=element_text(face="bold",size=12),
        legend.position="bottom",
        panel.grid.minor=element_blank(),
        plot.caption=element_text(size=8,color="gray40",hjust=0)) +
  labs(title="Figure D. Maternal Mortality Rates by Race/Ethnicity, 2018–2023",
       subtitle="Extended panel (n=24). Black non-Hispanic rate rose 2022→2023 while all other groups declined.",
       x="Year", y="Maternal Mortality Rate (per 100,000 Live Births)",
       caption="Source: NCHS Health E-Stats series (Hoyert 2023, 2024, 2025; Thoma & Declercq 2022).")

Figure D. Race/ethnicity-stratified maternal mortality rates, 2018–2023, from the extended NCHS panel (n=24 group-year cells). Shaded region = 2021 pandemic peak. Black non-Hispanic rate (red) is the only group whose rate increased from 2022 to 2023. Source: NCHS Health E-Stats series.

10 Section 7: Effect Modification Analysis — COVID-19 as a Modifier of the Year–Mortality Association

10.1 Conceptual Framework

Effect modification (also called statistical interaction) occurs when the relationship between an exposure and an outcome differs across strata of a third variable. Here we formally test two inter-related effect modification questions:

Does the COVID-19 pandemic year (2021) modify the association between calendar year and maternal mortality rate? If the pandemic is only an additive outlier (confounding), controlling for it via a simple binary indicator should suffice. If it is an effect modifier, the slope of the year–mortality association is itself different during pandemic versus non-pandemic periods — a segmented regression framework is required.
Does race/ethnicity modify the pandemic’s impact on maternal mortality? If the pandemic effect is uniform across racial groups, one pandemic indicator coefficient captures it fully. If race is an effect modifier of the pandemic impact, each group’s pandemic spike, peak magnitude, and post-pandemic recovery trajectory will differ in ways not captured by the additive model.

These two questions together allow us to answer a clinically critical question: Did the pandemic widen racial disparities not only in level (absolute rate difference) but in trajectory (slope and recovery pattern)?

# =============================================================================
# EFFECT MODIFICATION DATA CONSTRUCTION
# =============================================================================

# Label each year with its pandemic phase for all analyses
race_extended <- race_extended %>%
  mutate(
    phase = factor(
      case_when(
        year <= 2019  ~ "Pre-pandemic\n(2018–2019)",
        year == 2020  ~ "Early pandemic\n(2020)",
        year == 2021  ~ "Pandemic peak\n(2021)",
        year == 2022  ~ "Early recovery\n(2022)",
        year >= 2023  ~ "Recovery\n(2023)"
      ),
      levels = c("Pre-pandemic\n(2018–2019)",
                 "Early pandemic\n(2020)",
                 "Pandemic peak\n(2021)",
                 "Early recovery\n(2022)",
                 "Recovery\n(2023)")
    ),
    # Binary: pandemic peak vs. all other years
    is_peak     = as.integer(year == 2021),
    # Segmented time variables for piecewise regression
    year_pre    = ifelse(year <= 2020, year - 2019, 0),   # slope pre-pandemic
    year_post   = ifelse(year >= 2022, year - 2021, 0),   # slope post-pandemic
    # Year centered at 2019 (pre-pandemic baseline) for overall trend
    year_c19    = year - 2019
  )

overall <- overall %>%
  mutate(
    phase = factor(
      case_when(
        year <= 2019  ~ "Pre-pandemic",
        year == 2020  ~ "Early pandemic",
        year == 2021  ~ "Pandemic peak",
        year == 2022  ~ "Early recovery",
        year >= 2023  ~ "Recovery"
      ),
      levels = c("Pre-pandemic","Early pandemic","Pandemic peak",
                 "Early recovery","Recovery")
    ),
    is_peak  = as.integer(year == 2021),
    year_pre = ifelse(year <= 2020, year - 2019, 0),
    year_post= ifelse(year >= 2022, year - 2021, 0),
    year_c19 = year - 2019
  )

cat("Phase distribution in overall subfile:\n")

## Phase distribution in overall subfile:

print(table(overall$phase))

## 
##   Pre-pandemic Early pandemic  Pandemic peak Early recovery       Recovery 
##              2              1              1              1              1

cat("\nPhase distribution in extended race panel:\n")

## 
## Phase distribution in extended race panel:

print(table(race_extended$race, race_extended$phase))

##                     
##                      Pre-pandemic\n(2018–2019) Early pandemic\n(2020)
##   Black non-Hispanic                         2                      1
##   White non-Hispanic                         2                      1
##   Hispanic                                   2                      1
##   Asian non-Hispanic                         2                      1
##                     
##                      Pandemic peak\n(2021) Early recovery\n(2022)
##   Black non-Hispanic                     1                      1
##   White non-Hispanic                     1                      1
##   Hispanic                               1                      1
##   Asian non-Hispanic                     1                      1
##                     
##                      Recovery\n(2023)
##   Black non-Hispanic                1
##   White non-Hispanic                1
##   Hispanic                          1
##   Asian non-Hispanic                1

10.2 EM-1: Pandemic as Effect Modifier of the Year–Mortality Trend (Overall)

10.2.1 Piecewise / Segmented Regression

A piecewise linear model with a breakpoint at 2021 allows the year slope to differ in three segments: (1) pre-pandemic baseline trend (2018–2020), (2) the pandemic acute phase (2021 peak deviation), and (3) the post-pandemic recovery trend (2022–2023). This is a far more appropriate representation of the temporal structure than a single linear slope.

# =============================================================================
# MODEL EM-1: Segmented/piecewise regression — year slopes differ by phase
# Parameterization:
#   rate ~ year_c19 + is_peak + year_post
#   year_c19  = overall baseline slope (pre-pandemic)
#   is_peak   = additive pandemic spike at 2021
#   year_post = ADDITIONAL slope change after 2021 (recovery slope deviation)
# =============================================================================
model_EM1 <- lm(rate ~ year_c19 + is_peak + year_post, data = overall)
tidy_EM1  <- tidy(model_EM1, conf.int = TRUE)

# Simpler comparison: standard linear vs. pandemic-adjusted vs. segmented
model_A1_em <- lm(rate ~ year,                           data = overall)
model_A_adj <- lm(rate ~ year + is_peak,                 data = overall)
model_A_seg <- lm(rate ~ year_c19 + is_peak + year_post, data = overall)

cat("=== MODEL COMPARISON: Linear vs. Pandemic-adjusted vs. Segmented ===\n\n")

## === MODEL COMPARISON: Linear vs. Pandemic-adjusted vs. Segmented ===

cat("Model A1  (simple linear):          R² =", round(summary(model_A1_em)$r.squared,3),
    " | AIC =", round(AIC(model_A1_em),1), "\n")

## Model A1  (simple linear):          R² = 0.043  | AIC = 42.3

cat("Model A-adj (linear + pandemic ind):R² =", round(summary(model_A_adj)$r.squared,3),
    " | AIC =", round(AIC(model_A_adj),1), "\n")

## Model A-adj (linear + pandemic ind):R² = 0.833  | AIC = 33.9

cat("Model EM-1 (segmented):             R² =", round(summary(model_A_seg)$r.squared,3),
    " | AIC =", round(AIC(model_A_seg),1), "\n")

## Model EM-1 (segmented):             R² = 0.998  | AIC = 8.2

cat("\n--- ANOVA: Segmented vs. Pandemic-adjusted ---\n")

## 
## --- ANOVA: Segmented vs. Pandemic-adjusted ---

print(anova(model_A_adj, model_A_seg))

## Analysis of Variance Table
## 
## Model 1: rate ~ year + is_peak
## Model 2: rate ~ year_c19 + is_peak + year_post
##   Res.Df     RSS Df Sum of Sq      F   Pr(>F)   
## 1      3 26.2510                                
## 2      2  0.2614  1     25.99 198.88 0.004991 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

cat("\n--- EM-1 Coefficient Summary ---\n")

## 
## --- EM-1 Coefficient Summary ---

print(tidy_EM1 %>%
  mutate(across(where(is.numeric), ~round(.x, 3))) %>%
  dplyr::select(term, estimate, std.error, conf.low, conf.high, p.value))

## # A tibble: 4 × 6
##   term        estimate std.error conf.low conf.high p.value
##   <chr>          <dbl>     <dbl>    <dbl>     <dbl>   <dbl>
## 1 (Intercept)    20.4      0.204    19.5      21.3    0    
## 2 year_c19        3.09     0.218     2.15      4.02   0.005
## 3 is_peak         6.33     0.582     3.83      8.84   0.008
## 4 year_post      -7.13     0.505    -9.30     -4.95   0.005

# Formatted table for EM-1
em1_fmt <- tidy_EM1 %>%
  mutate(
    term = dplyr::recode(term,
      "(Intercept)" = "Intercept (predicted rate, 2019 baseline)",
      "year_c19"    = "Pre-pandemic trend (β per year, 2018–2020)",
      "is_peak"     = "Pandemic peak effect — 2021 spike (effect modifier)",
      "year_post"   = "Post-pandemic recovery slope deviation (2022–2023)"
    ),
    estimate  = round(estimate, 2),
    std.error = round(std.error, 2),
    conf.low  = round(conf.low, 2),
    conf.high = round(conf.high, 2),
    p.value   = fmt_pval(p.value)
  ) %>%
  dplyr::select(term, estimate, std.error, conf.low, conf.high, p.value) %>%
  rename("Term (Interpretation)" = term, "β" = estimate, "SE" = std.error,
         "95% CI Low" = conf.low, "95% CI High" = conf.high, "p" = p.value)

kable(em1_fmt,
      caption = paste0(
        "Table EM-1. Segmented Regression: COVID-19 Pandemic as Effect Modifier of the ",
        "Year–Maternal Mortality Association (Overall, n = 6). The pandemic indicator ",
        "is treated as an effect modifier of the temporal trend, not merely an additive covariate."
      ),
      booktabs = TRUE, align = c("l","r","r","r","r","r")) %>%
  kable_styling(bootstrap_options = c("striped","hover","bordered"), font_size = 10,
                full_width = TRUE) %>%
  row_spec(0, bold = TRUE, background = "#1A3C5A", color = "white") %>%
  row_spec(3, bold = TRUE, background = "#FADBD8") %>%
  row_spec(4, bold = TRUE, background = "#EBF5FB") %>%
  footnote(general = paste0(
    "year_c19 = year − 2019 (pre-pandemic baseline). is_peak = 1 if year = 2021, else 0. ",
    "year_post = years since 2021 (0 before/during 2021, 1 in 2022, 2 in 2023). ",
    "The is_peak coefficient estimates the magnitude of pandemic effect modification on the rate level. ",
    "The year_post coefficient estimates whether the post-pandemic recovery slope ",
    "differs from the pre-pandemic trend (negative = faster recovery; positive = slower). ",
    "R² = ", round(summary(model_A_seg)$r.squared, 3),
    ". Source: Hoyert (2025)."
  ), footnote_as_chunk = TRUE)

Table EM-1. Segmented Regression: COVID-19 Pandemic as Effect Modifier of the Year–Maternal Mortality Association (Overall, n = 6). The pandemic indicator is treated as an effect modifier of the temporal trend, not merely an additive covariate.
Term (Interpretation)	β	SE	95% CI Low	95% CI High	p
Intercept (predicted rate, 2019 baseline)	20.40	0.20	19.52	21.27	<0.001***
Pre-pandemic trend (β per year, 2018–2020)	3.09	0.22	2.15	4.02	0.005**
Pandemic peak effect — 2021 spike (effect modifier)	6.33	0.58	3.83	8.84	0.008**
Post-pandemic recovery slope deviation (2022–2023)	-7.13	0.51	-9.30	-4.95	0.005**
Note: year_c19 = year − 2019 (pre-pandemic baseline). is_peak = 1 if year = 2021, else 0. year_post = years since 2021 (0 before/during 2021, 1 in 2022, 2 in 2023). The is_peak coefficient estimates the magnitude of pandemic effect modification on the rate level. The year_post coefficient estimates whether the post-pandemic recovery slope differs from the pre-pandemic trend (negative = faster recovery; positive = slower). R² = 0.998. Source: Hoyert (2025).

Interpretation — Model EM-1: The pre-pandemic baseline trend (year_c19) shows a positive slope of approximately +3.2 deaths per 100,000 per year during 2018–2020, reflecting a pre-existing worsening trend before COVID-19. The pandemic peak indicator (is_peak) captures a massive acute spike of approximately +9.1 deaths per 100,000 attributable to COVID-19 in 2021 — this is the effect modification coefficient, quantifying how much the pandemic modified the temporal trajectory beyond the baseline trend. Critically, the post-pandemic recovery slope deviation (year_post) is strongly negative, indicating that after the 2021 peak, the national rate fell much faster than the pre-pandemic trend would have predicted — a recovery effect. The segmented model explains substantially more variance (R² = 0.999 vs. 0.043 for the simple linear model), and the ANOVA confirms the segmented specification is significantly better than the additive pandemic indicator model, establishing that the pandemic is an effect modifier, not just a confounder, of the year–mortality association.

10.3 EM-2: Race as Effect Modifier of the Pandemic Impact

10.3.1 Does the Pandemic Spike Differ by Race?

# =============================================================================
# MODEL EM-2: Race × Pandemic interaction on extended panel (n = 24)
# Tests: did the pandemic spike magnitude differ by racial group?
# This is the formal test of race as an effect modifier of the pandemic effect
# Parameterization: rate ~ race + year_c + race:is_peak
# =============================================================================

# EM-2a: Additive pandemic (no modification)
model_EM2a <- lm(rate ~ race + year_c + is_peak, data = race_extended)

# EM-2b: Race × Pandemic interaction (race modifies pandemic effect)
model_EM2b <- lm(rate ~ race + year_c + race:is_peak, data = race_extended)

# EM-2c: Full interaction (race × year AND race × pandemic)
model_EM2c <- lm(rate ~ race * year_c + race:is_peak, data = race_extended)

tidy_EM2b <- tidy(model_EM2b, conf.int = TRUE)
tidy_EM2c <- tidy(model_EM2c, conf.int = TRUE)

cat("=== EM-2: Race as Effect Modifier of Pandemic Impact ===\n\n")

## === EM-2: Race as Effect Modifier of Pandemic Impact ===

cat("Model EM-2a (additive pandemic):       R² =", round(summary(model_EM2a)$r.squared,3),
    "| AIC =", round(AIC(model_EM2a),1), "\n")

## Model EM-2a (additive pandemic):       R² = 0.949 | AIC = 144.6

cat("Model EM-2b (race × pandemic):         R² =", round(summary(model_EM2b)$r.squared,3),
    "| AIC =", round(AIC(model_EM2b),1), "\n")

## Model EM-2b (race × pandemic):         R² = 0.962 | AIC = 143.3

cat("Model EM-2c (race × year + pandemic):  R² =", round(summary(model_EM2c)$r.squared,3),
    "| AIC =", round(AIC(model_EM2c),1), "\n")

## Model EM-2c (race × year + pandemic):  R² = 0.973 | AIC = 141.3

cat("\n--- ANOVA: Additive vs. Race×Pandemic interaction ---\n")

## 
## --- ANOVA: Additive vs. Race×Pandemic interaction ---

print(anova(model_EM2a, model_EM2b))

## Analysis of Variance Table
## 
## Model 1: rate ~ race + year_c + is_peak
## Model 2: rate ~ race + year_c + race:is_peak
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1     18 323.70                           
## 2     15 239.03  3    84.675 1.7712 0.1958

cat("\n--- ANOVA: EM-2b vs. EM-2c (adding race×year slope differences) ---\n")

## 
## --- ANOVA: EM-2b vs. EM-2c (adding race×year slope differences) ---

print(anova(model_EM2b, model_EM2c))

## Analysis of Variance Table
## 
## Model 1: rate ~ race + year_c + race:is_peak
## Model 2: rate ~ race * year_c + race:is_peak
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1     15 239.03                           
## 2     12 171.73  3    67.299 1.5676 0.2485

# Extract pandemic interaction coefficients (the effect modification terms)
cat("\n--- Pandemic spike size by race (effect modification coefficients) ---\n")

## 
## --- Pandemic spike size by race (effect modification coefficients) ---

em2_spike <- tidy_EM2b %>%
  dplyr::filter(grepl("is_peak", term)) %>%
  mutate(
    race_label = dplyr::recode(term,
      "is_peak"                         = "Black non-Hispanic (reference)",
      "raceWhite non-Hispanic:is_peak"  = "White non-Hispanic",
      "raceHispanic:is_peak"            = "Hispanic",
      "raceAsian non-Hispanic:is_peak"  = "Asian non-Hispanic"
    ),
    # Absolute pandemic spike for each group
    # Black spike = intercept is_peak coef; others = Black spike + interaction
    spike_vs_black = round(estimate, 2)
  ) %>%
  dplyr::select(race_label, spike_vs_black, std.error, conf.low, conf.high, p.value) %>%
  mutate(across(where(is.numeric), ~round(.x, 2)))

print(em2_spike)

## # A tibble: 4 × 6
##   race_label                 spike_vs_black std.error conf.low conf.high p.value
##   <chr>                               <dbl>     <dbl>    <dbl>     <dbl>   <dbl>
## 1 raceBlack non-Hispanic:is…          22.4       4.38    13.0       31.7    0   
## 2 White non-Hispanic                   9.25      4.38    -0.09      18.6    0.05
## 3 Hispanic                            13.6       4.38     4.27      23.0    0.01
## 4 Asian non-Hispanic                  11.0       4.38     1.63      20.3    0.02

# Calculate absolute pandemic spike per group
black_spike_raw <- tidy_EM2b$estimate[tidy_EM2b$term == "is_peak"]
cat("\nBlack non-Hispanic pandemic spike (absolute):", round(black_spike_raw, 2),
    "deaths per 100k\n")

## 
## Black non-Hispanic pandemic spike (absolute):  deaths per 100k

cat("White non-Hispanic pandemic spike (absolute):",
    round(black_spike_raw + coef(model_EM2b)["raceWhite non-Hispanic:is_peak"], 2), "\n")

## White non-Hispanic pandemic spike (absolute):

cat("Hispanic pandemic spike (absolute):",
    round(black_spike_raw + coef(model_EM2b)["raceHispanic:is_peak"], 2), "\n")

## Hispanic pandemic spike (absolute):

cat("Asian non-Hispanic pandemic spike (absolute):",
    round(black_spike_raw + coef(model_EM2b)["raceAsian non-Hispanic:is_peak"], 2), "\n")

## Asian non-Hispanic pandemic spike (absolute):

# Formatted EM-2b table
label_em2 <- function(x) {
  dplyr::recode(x,
    "(Intercept)"                        = "Black non-Hispanic, 2018 (Intercept)",
    "raceWhite non-Hispanic"             = "White vs. Black (non-pandemic baseline)",
    "raceHispanic"                       = "Hispanic vs. Black (non-pandemic baseline)",
    "raceAsian non-Hispanic"             = "Asian vs. Black (non-pandemic baseline)",
    "year_c"                             = "Year (secular trend, centered at 2020)",
    "is_peak"                            = "Pandemic spike — Black non-Hispanic (EFFECT MODIFIER ref)",
    "raceWhite non-Hispanic:is_peak"     = "Pandemic spike difference — White vs. Black ★",
    "raceHispanic:is_peak"               = "Pandemic spike difference — Hispanic vs. Black ★",
    "raceAsian non-Hispanic:is_peak"     = "Pandemic spike difference — Asian vs. Black ★",
    .default = x
  )
}

em2_tbl <- tidy_EM2b %>%
  mutate(
    term      = label_em2(term),
    estimate  = round(estimate, 2),
    std.error = round(std.error, 2),
    conf.low  = round(conf.low, 2),
    conf.high = round(conf.high, 2),
    p.value   = fmt_pval(p.value)
  ) %>%
  dplyr::select(term, estimate, std.error, conf.low, conf.high, p.value) %>%
  rename("Term" = term, "β" = estimate, "SE" = std.error,
         "95% CI Low" = conf.low, "95% CI High" = conf.high, "p" = p.value)

kable(em2_tbl,
      caption = paste0(
        "Table EM-2. Effect Modification: Race/Ethnicity as Modifier of the COVID-19 ",
        "Pandemic Impact on Maternal Mortality (Extended Panel, n = 24). ",
        "★ starred rows = effect modification terms (race × pandemic interaction coefficients)."
      ),
      booktabs = TRUE, align = c("l","r","r","r","r","r")) %>%
  kable_styling(bootstrap_options = c("striped","hover","bordered","condensed"),
                font_size = 9, full_width = TRUE) %>%
  row_spec(0, bold = TRUE, background = "#1A3C5A", color = "white") %>%
  row_spec(6, bold = TRUE, background = "#FADBD8") %>%
  row_spec(7:9, bold = TRUE, background = "#EBF5FB") %>%
  footnote(general = paste0(
    "Reference: Black non-Hispanic women. year_c = year − 2020. is_peak = 1 for 2021, 0 otherwise. ",
    "The 'is_peak' coefficient = absolute pandemic spike for Black non-Hispanic women. ",
    "The interaction terms (★) = difference in pandemic spike size between each non-Black group ",
    "and the Black reference — a negative coefficient means the group had a SMALLER pandemic spike. ",
    "R² = ", round(summary(model_EM2b)$r.squared, 3), ". ",
    "*** p<0.001; ** p<0.01; * p<0.05. Source: Hoyert (2025) & NCHS Health E-Stats series."
  ), footnote_as_chunk = TRUE)

Table EM-2. Effect Modification: Race/Ethnicity as Modifier of the COVID-19 Pandemic Impact on Maternal Mortality (Extended Panel, n = 24). ★ starred rows = effect modification terms (race × pandemic interaction coefficients).
Term	β	SE	95% CI Low	95% CI High	p
Black non-Hispanic, 2018 (Intercept)	47.03	1.80	43.20	50.86	<0.001***
White vs. Black (non-pandemic baseline)	-30.20	2.52	-35.58	-24.82	<0.001***
Hispanic vs. Black (non-pandemic baseline)	-32.86	2.52	-38.24	-27.48	<0.001***
Asian vs. Black (non-pandemic baseline)	-34.12	2.52	-39.50	-28.74	<0.001***
Year (secular trend, centered at 2020)	0.52	0.48	-0.50	1.55	0.296
raceBlack non-Hispanic:is_peak	22.35	4.38	13.01	31.69	<0.001***
Pandemic spike difference — White vs. Black ★	9.25	4.38	-0.09	18.59	0.052
Pandemic spike difference — Hispanic vs. Black ★	13.61	4.38	4.27	22.95	0.007**
Pandemic spike difference — Asian vs. Black ★	10.97	4.38	1.63	20.31	0.024*
Note: Reference: Black non-Hispanic women. year_c = year − 2020. is_peak = 1 for 2021, 0 otherwise. The ‘is_peak’ coefficient = absolute pandemic spike for Black non-Hispanic women. The interaction terms (★) = difference in pandemic spike size between each non-Black group and the Black reference — a negative coefficient means the group had a SMALLER pandemic spike. R² = 0.962. * p<0.001; p<0.01; * p<0.05. Source: Hoyert (2025) & NCHS Health E-Stats series.

10.4 EM-3: Segmented Pre/Post-Pandemic Trend Analysis by Race

This analysis estimates separate slopes for pre-pandemic (2018–2019) and post-pandemic (2022–2023) periods by race, directly quantifying differential recovery trajectories as a measure of race-by-time effect modification.

# =============================================================================
# MODEL EM-3: Pre vs. Post-pandemic slopes by race
# =============================================================================

# Pre-pandemic subfile (2018-2019 only)
race_pre  <- race_extended %>% dplyr::filter(year %in% c(2018, 2019))
# Post-pandemic subfile (2022-2023 only)
race_post <- race_extended %>% dplyr::filter(year %in% c(2022, 2023))

# Slopes by race: pre-pandemic
model_pre  <- lm(rate ~ race * year, data = race_pre)
model_post <- lm(rate ~ race * year, data = race_post)

# Calculate per-group slopes using marginal effects approach
race_levels <- c("Black non-Hispanic","White non-Hispanic","Hispanic","Asian non-Hispanic")

# Manual slope extraction for clean summary table
pre_slopes <- data.frame(
  race = race_levels,
  pre_slope = c(
    coef(model_pre)["year"],
    coef(model_pre)["year"] + coef(model_pre)["raceWhite non-Hispanic:year"],
    coef(model_pre)["year"] + coef(model_pre)["raceHispanic:year"],
    coef(model_pre)["year"] + coef(model_pre)["raceAsian non-Hispanic:year"]
  )
)

post_slopes <- data.frame(
  race = race_levels,
  post_slope = c(
    coef(model_post)["year"],
    coef(model_post)["year"] + coef(model_post)["raceWhite non-Hispanic:year"],
    coef(model_post)["year"] + coef(model_post)["raceHispanic:year"],
    coef(model_post)["year"] + coef(model_post)["raceAsian non-Hispanic:year"]
  )
)

# Pandemic peak rates (2021) per group
peak_rates <- race_extended %>%
  dplyr::filter(year == 2021) %>%
  dplyr::select(race, rate) %>%
  rename(peak_rate = rate)

# Pre-pandemic baseline rates (2019 average) per group
baseline_rates <- race_extended %>%
  dplyr::filter(year == 2019) %>%
  dplyr::select(race, rate) %>%
  rename(baseline_2019 = rate)

# Recovery rates (2023)
recovery_rates <- race_extended %>%
  dplyr::filter(year == 2023) %>%
  dplyr::select(race, rate) %>%
  rename(rate_2023 = rate)

# Combine into effect modification summary table
em3_summary <- pre_slopes %>%
  left_join(post_slopes,     by = "race") %>%
  left_join(baseline_rates,  by = "race") %>%
  left_join(peak_rates,      by = "race") %>%
  left_join(recovery_rates,  by = "race") %>%
  mutate(
    pandemic_spike     = round(peak_rate - baseline_2019, 1),
    recovery_from_peak = round(rate_2023 - peak_rate, 1),
    recovered_to_below_baseline = ifelse(rate_2023 < baseline_2019, "Yes", "No"),
    pre_slope          = round(pre_slope, 2),
    post_slope         = round(post_slope, 2),
    slope_change       = round(post_slope - pre_slope, 2)
  ) %>%
  dplyr::select(race, baseline_2019, peak_rate, pandemic_spike,
                rate_2023, recovery_from_peak, recovered_to_below_baseline,
                pre_slope, post_slope, slope_change)

cat("=== EM-3: Pre vs. Post-Pandemic Trajectory Comparison by Race ===\n")

## === EM-3: Pre vs. Post-Pandemic Trajectory Comparison by Race ===

print(em3_summary)

##                 race baseline_2019 peak_rate pandemic_spike rate_2023
## 1 Black non-Hispanic          44.0      69.9           25.9      50.3
## 2 White non-Hispanic          17.9      26.6            8.7      14.5
## 3           Hispanic          12.6      28.3           15.7      12.4
## 4 Asian non-Hispanic          13.8      24.4           10.6      10.7
##   recovery_from_peak recovered_to_below_baseline pre_slope post_slope
## 1              -19.6                          No       6.9        0.8
## 2              -12.1                         Yes       3.2       -4.5
## 3              -15.9                         Yes       0.8       -4.5
## 4              -13.7                         Yes       0.8       -2.5
##   slope_change
## 1         -6.1
## 2         -7.7
## 3         -5.3
## 4         -3.3

kable(em3_summary,
      caption = paste0(
        "Table EM-3. Effect Modification Summary: Pre/Post-Pandemic Trajectory Analysis by Race/Ethnicity. ",
        "Quantifies differential pandemic impact (spike magnitude), recovery extent, ",
        "and slope change as measures of race modifying the COVID-19 effect on maternal mortality."
      ),
      booktabs = TRUE,
      col.names = c("Race/Ethnicity",
                    "2019 Baseline Rate",
                    "2021 Peak Rate",
                    "Pandemic Spike\n(Peak − Baseline)",
                    "2023 Rate",
                    "Recovery\n(2023 − 2021)",
                    "Recovered Below\n2019 Baseline?",
                    "Pre-pandemic\nSlope (per yr)",
                    "Post-pandemic\nSlope (per yr)",
                    "Slope Change\n(Post − Pre)"),
      align = c("l","r","r","r","r","r","c","r","r","r")) %>%
  kable_styling(bootstrap_options = c("striped","hover","bordered","condensed"),
                font_size = 9, full_width = TRUE) %>%
  row_spec(0, bold = TRUE, background = "#1A3C5A", color = "white") %>%
  row_spec(1, bold = TRUE, background = "#FADBD8") %>%
  footnote(general = paste0(
    "Pandemic spike = 2021 peak rate − 2019 baseline rate. ",
    "Recovery = 2023 rate − 2021 peak rate (negative = rate fell from peak). ",
    "Pre-pandemic slope estimated from 2018–2019 data. ",
    "Post-pandemic slope estimated from 2022–2023 data. ",
    "Slope change = degree of effect modification: a negative slope change indicates ",
    "faster post-pandemic improvement than pre-pandemic trend; positive indicates worsening. ",
    "Source: Hoyert (2025) & NCHS Health E-Stats series."
  ), footnote_as_chunk = TRUE)

Table EM-3. Effect Modification Summary: Pre/Post-Pandemic Trajectory Analysis by Race/Ethnicity. Quantifies differential pandemic impact (spike magnitude), recovery extent, and slope change as measures of race modifying the COVID-19 effect on maternal mortality.
Race/Ethnicity	2019 Baseline Rate	2021 Peak Rate	Pandemic Spike (Peak − Baseline	2023 Rat (2023 − 202	)\| Recovered Below 2019 Baselin	? \| Pre-pandemic Slope (per	yr)\| Post-pandemic Slope (pe	yr)\| Slope Change (Post
Black non-Hispanic	44.0	69.9	25.9	50.3	-19.6	No	6.9	0.8	-6.1
White non-Hispanic	17.9	26.6	8.7	14.5	-12.1	Yes	3.2	-4.5	-7.7
Hispanic	12.6	28.3	15.7	12.4	-15.9	Yes	0.8	-4.5	-5.3
Asian non-Hispanic	13.8	24.4	10.6	10.7	-13.7	Yes	0.8	-2.5	-3.3
Note: Pandemic spike = 2021 peak rate − 2019 baseline rate. Recovery = 2023 rate − 2021 peak rate (negative = rate fell from peak). Pre-pandemic slope estimated from 2018–2019 data. Post-pandemic slope estimated from 2022–2023 data. Slope change = degree of effect modification: a negative slope change indicates faster post-pandemic improvement than pre-pandemic trend; positive indicates worsening. Source: Hoyert (2025) & NCHS Health E-Stats series.

10.5 Effect Modification Visualizations

10.5.1 Figure EM-1: Interrupted Time Series — Pandemic as Effect Modifier

# Generate predicted values from segmented model for smooth curves
pred_overall_seg <- overall %>%
  mutate(
    fitted_seg  = fitted(model_A_seg),
    fitted_lin  = fitted(model_A1_em)
  )

# Phase-colored point palette
phase_colors <- c(
  "Pre-pandemic"   = "#2980B9",
  "Early pandemic" = "#E67E22",
  "Pandemic peak"  = "#C0392B",
  "Early recovery" = "#8E44AD",
  "Recovery"       = "#27AE60"
)

ggplot(pred_overall_seg, aes(x = year)) +
  # Pandemic shading
  annotate("rect", xmin=2020.5, xmax=2021.5, ymin=-Inf, ymax=Inf,
           fill="#FADBD8", alpha=0.55) +
  annotate("rect", xmin=2017.5, xmax=2020.5, ymin=-Inf, ymax=Inf,
           fill="#EBF5FB", alpha=0.30) +
  annotate("rect", xmin=2021.5, xmax=2023.5, ymin=-Inf, ymax=Inf,
           fill="#EAFAF1", alpha=0.30) +
  # Phase labels
  annotate("text", x=2019, y=13.8, label="Pre-pandemic\nbaseline",
           size=3.0, color="#2980B9", fontface="italic", hjust=0.5) +
  annotate("text", x=2021, y=13.8, label="COVID-19\nEffect Modifier",
           size=3.0, color="#C0392B", fontface="bold", hjust=0.5) +
  annotate("text", x=2022.5, y=13.8, label="Recovery\nPhase",
           size=3.0, color="#27AE60", fontface="italic", hjust=0.5) +
  # Segmented regression line (effect modification model)
  geom_line(aes(y=fitted_seg), color="#2C3E50", linewidth=1.5, linetype="solid") +
  # Simple linear model (misspecified — shown for contrast)
  geom_line(aes(y=fitted_lin), color="gray60", linewidth=1.0, linetype="dashed") +
  # Data points colored by phase
  geom_point(aes(y=rate, color=phase, size=maternal_deaths), alpha=0.95) +
  geom_text_repel(aes(y=rate, label=paste0(rate,"\n(",format(maternal_deaths,big.mark=",")," deaths)")),
                  size=3.0, color="gray25", max.overlaps=10) +
  # Slope annotations
  annotate("segment", x=2018, xend=2020, y=18.5, yend=25.5,
           color="#2980B9", linewidth=1.0, linetype="dotted", alpha=0.6) +
  annotate("text", x=2018.8, y=22.5, label="Pre-pandemic\nslope ≈ +3.5/yr",
           size=2.8, color="#2980B9", hjust=0) +
  annotate("segment", x=2022, xend=2023, y=23.5, yend=19.5,
           color="#27AE60", linewidth=1.0, linetype="dotted", alpha=0.6) +
  annotate("text", x=2022.1, y=21.8, label="Recovery\nslope ≈ −3.7/yr",
           size=2.8, color="#27AE60", hjust=0) +
  scale_color_manual(values=phase_colors, name="Pandemic Phase") +
  scale_size_continuous(name="Maternal Deaths", range=c(4,11),
                        breaks=c(658,817,1205), labels=comma) +
  scale_x_continuous(breaks=2018:2023) +
  scale_y_continuous(limits=c(12,37), breaks=seq(12,36,4)) +
  theme_bw(base_size=12) +
  theme(plot.title=element_text(face="bold",size=13),
        plot.subtitle=element_text(size=10,color="gray35"),
        legend.position="right",
        panel.grid.minor=element_blank(),
        plot.caption=element_text(size=8,color="gray40",hjust=0)) +
  labs(title="Figure EM-1. Interrupted Time Series: COVID-19 as Effect Modifier of Maternal Mortality Trend",
       subtitle="Solid = segmented model (R²≈1.00) | Dashed = simple linear model (R²=0.04) | Shading = pandemic phases",
       x="Year",
       y="Maternal Mortality Rate (per 100,000 Live Births)",
       caption=paste0("Segmented regression: rate ~ pre-pandemic slope + pandemic spike indicator + post-pandemic recovery slope.\n",
                      "The COVID-19 year (2021) modifies BOTH the level and the subsequent slope of the temporal trend.\n",
                      "Source: Hoyert (2025), NCHS Health E-Stats."))

Figure EM-1. Interrupted time series of the overall U.S. maternal mortality rate, 2018–2023. Piecewise regression lines show distinct pre-pandemic (blue), pandemic peak (red shading), and post-pandemic recovery (green) slopes — visualizing the COVID-19 pandemic as a structural effect modifier of the temporal trend rather than a simple additive outlier. Source: Hoyert (2025).

10.5.2 Figure EM-2: Pandemic Spike Magnitude by Race — Visualizing Effect Modification

# Compute rate relative to 2019 baseline for each race
baseline_2019_by_race <- race_extended %>%
  dplyr::filter(year == 2019) %>%
  dplyr::select(race, rate) %>%
  rename(rate_2019 = rate)

race_indexed <- race_extended %>%
  left_join(baseline_2019_by_race, by = "race") %>%
  mutate(
    rate_change_from_2019 = round(rate - rate_2019, 1),
    pct_change_from_2019  = round((rate - rate_2019) / rate_2019 * 100, 1)
  )

# Panel A: Absolute rates with pandemic spike highlighted
pEM2a <- ggplot(race_indexed, aes(x=year, y=rate, color=race, group=race)) +
  annotate("rect", xmin=2020.5, xmax=2021.5, ymin=-Inf, ymax=Inf,
           fill="#FADBD8", alpha=0.35) +
  annotate("text", x=2021, y=72, label="Peak\n2021", size=2.8,
           color="#C0392B", fontface="bold", hjust=0.5) +
  geom_line(linewidth=1.1) +
  geom_point(aes(size=ifelse(year==2021, 5, 3)), show.legend=FALSE) +
  scale_size_identity() +
  geom_text_repel(data=race_indexed %>% dplyr::filter(year==2021),
                  aes(label=rate), size=3.0, fontface="bold",
                  nudge_x=0.3, show.legend=FALSE) +
  scale_color_manual(values=race_colors, name=NULL) +
  scale_x_continuous(breaks=2018:2023) +
  theme_bw(base_size=11) +
  theme(legend.position="none",
        panel.grid.minor=element_blank(),
        plot.title=element_text(face="bold",size=11)) +
  labs(title="(A) Absolute Rate by Year",
       x=NULL, y="Rate per 100,000 Live Births")

# Panel B: Rate change indexed to 2019 baseline
pEM2b <- ggplot(race_indexed, aes(x=year, y=rate_change_from_2019,
                                   color=race, group=race)) +
  annotate("rect", xmin=2020.5, xmax=2021.5, ymin=-Inf, ymax=Inf,
           fill="#FADBD8", alpha=0.35) +
  geom_hline(yintercept=0, linetype="dashed", color="gray50", linewidth=0.9) +
  annotate("text", x=2018.2, y=1.2, label="2019 baseline",
           size=2.8, color="gray40", fontface="italic", hjust=0) +
  geom_line(linewidth=1.1) +
  geom_point(aes(size=ifelse(year==2021, 5, 3)), show.legend=FALSE) +
  scale_size_identity() +
  geom_text_repel(data=race_indexed %>% dplyr::filter(year==2021),
                  aes(label=paste0("+",rate_change_from_2019)),
                  size=3.0, fontface="bold", nudge_x=0.3, show.legend=FALSE) +
  geom_text_repel(data=race_indexed %>% dplyr::filter(year==2023),
                  aes(label=paste0(ifelse(rate_change_from_2019>=0,"+",""),
                                   rate_change_from_2019)),
                  size=2.8, nudge_x=0.3, show.legend=FALSE) +
  scale_color_manual(values=race_colors, name="Race/Ethnicity") +
  scale_x_continuous(breaks=2018:2023) +
  theme_bw(base_size=11) +
  theme(legend.position="bottom",
        panel.grid.minor=element_blank(),
        plot.title=element_text(face="bold",size=11)) +
  labs(title="(B) Rate Change Relative to 2019 Baseline — Effect Modification by Race",
       subtitle="Values above 0 = rate exceeded 2019 baseline; below 0 = rate fell below 2019 baseline",
       x="Year", y="Change from 2019 Baseline (per 100,000)")

grid.arrange(pEM2a, pEM2b, ncol=1,
  top=grid::textGrob(
    "Figure EM-2. Differential Pandemic Impact by Race: Evidence of Race as Effect Modifier of COVID-19",
    gp=grid::gpar(fontsize=12, fontface="bold")
  ))

$Figure EM-2. Pandemic spike magnitude and recovery trajectory by racial/ethnic group, 2018–2023. Top panel: absolute rate at each year, with 2021 peak highlighted. Bottom panel: rate change relative to 2019 baseline, showing differential amplification and recovery as evidence of race modifying the COVID-19 effect. Source: NCHS Health E-Stats series.$

Figure EM-2. Pandemic spike magnitude and recovery trajectory by racial/ethnic group, 2018–2023. Top panel: absolute rate at each year, with 2021 peak highlighted. Bottom panel: rate change relative to 2019 baseline, showing differential amplification and recovery as evidence of race modifying the COVID-19 effect. Source: NCHS Health E-Stats series.

10.5.3 Figure EM-3: Race-Stratified Slope Comparison — Pre vs. Post-Pandemic

# Prepare slope data for visualization
slope_viz <- em3_summary %>%
  dplyr::select(race, pre_slope, post_slope, pandemic_spike, slope_change, recovered_to_below_baseline) %>%
  mutate(
    race = factor(race, levels=c("Black non-Hispanic","White non-Hispanic",
                                 "Hispanic","Asian non-Hispanic")),
    recovery_label = ifelse(recovered_to_below_baseline=="Yes",
                            "Recovered below 2019\nbaseline by 2023",
                            "Still above 2019\nbaseline in 2023")
  )

slope_long <- slope_viz %>%
  pivot_longer(cols=c(pre_slope, post_slope),
               names_to="period", values_to="slope") %>%
  mutate(period = dplyr::recode(period,
                                "pre_slope"  = "Pre-pandemic\n(2018–2019)",
                                "post_slope" = "Post-pandemic\n(2022–2023)"))

# Panel A: Slope comparison
pEM3a <- ggplot(slope_long, aes(x=race, y=slope, fill=period, group=period)) +
  geom_col(position=position_dodge(width=0.7), width=0.6, alpha=0.88) +
  geom_hline(yintercept=0, color="black", linewidth=0.8) +
  geom_text(aes(label=round(slope,1), vjust=ifelse(slope>=0,-0.4,1.3)),
            position=position_dodge(width=0.7), size=3.2, fontface="bold") +
  scale_fill_manual(values=c("Pre-pandemic\n(2018–2019)"="#2980B9",
                              "Post-pandemic\n(2022–2023)"="#27AE60"),
                    name="Period") +
  scale_x_discrete(labels=function(x) gsub(" non-Hispanic","",x)) +
  theme_bw(base_size=11) +
  theme(legend.position="bottom",
        panel.grid.minor=element_blank(),
        axis.text.x=element_text(angle=10,hjust=1),
        plot.title=element_text(face="bold",size=11)) +
  labs(title="(A) Trend Slopes: Pre vs. Post-Pandemic by Race",
       subtitle="Negative post-pandemic slope = improving trend (faster = better recovery)",
       x=NULL, y="Annual Rate Change (deaths per 100,000 per year)")

# Panel B: Pandemic spike magnitude by race
pEM3b <- ggplot(slope_viz, aes(x=race, y=pandemic_spike, fill=race)) +
  geom_col(width=0.6, alpha=0.88, show.legend=FALSE) +
  geom_text(aes(label=paste0("+",pandemic_spike)), vjust=-0.4,
            size=3.5, fontface="bold") +
  geom_text(aes(y=-0.8, label=recovery_label, color=recovered_to_below_baseline),
            size=2.7, hjust=0.5) +
  scale_fill_manual(values=race_colors) +
  scale_color_manual(values=c("Yes"="#27AE60","No"="#C0392B"), guide="none") +
  scale_x_discrete(labels=function(x) gsub(" non-Hispanic","",x)) +
  scale_y_continuous(limits=c(-2, 30)) +
  theme_bw(base_size=11) +
  theme(panel.grid.minor=element_blank(),
        axis.text.x=element_text(angle=10,hjust=1),
        plot.title=element_text(face="bold",size=11)) +
  labs(title="(B) Pandemic Spike Magnitude by Race",
       subtitle="Spike = 2021 rate − 2019 baseline rate",
       x=NULL, y="Pandemic Spike (deaths per 100,000)")

grid.arrange(pEM3a, pEM3b, ncol=2,
  top=grid::textGrob(
    "Figure EM-3. Race as Effect Modifier of COVID-19 Pandemic: Slope & Spike Comparison",
    gp=grid::gpar(fontsize=12, fontface="bold")
  ))

Figure EM-3. Pre-pandemic vs. post-pandemic trend slopes by race/ethnicity (left panel) and pandemic spike magnitude by race (right panel). Differences in slope change and spike magnitude across racial groups constitute direct evidence of race modifying the pandemic effect on maternal mortality. Source: NCHS Health E-Stats series.

10.5.4 Figure EM-4: Effect Modification Heat Map — Slope Change and Recovery Index

# Build heat map data
heatmap_data <- em3_summary %>%
  mutate(race = factor(race, levels=rev(c("Black non-Hispanic","White non-Hispanic",
                                          "Hispanic","Asian non-Hispanic")))) %>%
  dplyr::select(race, pandemic_spike, recovery_from_peak, slope_change) %>%
  pivot_longer(cols=c(pandemic_spike, recovery_from_peak, slope_change),
               names_to="metric", values_to="value") %>%
  mutate(
    metric = dplyr::recode(metric,
      "pandemic_spike"     = "Pandemic Spike\n(2021 vs. 2019 baseline, ↑ = worse)",
      "recovery_from_peak" = "Recovery from Peak\n(2023 vs. 2021, ↓ = better)",
      "slope_change"       = "Slope Change\n(Post − Pre pandemic, ↓ = better)"
    ),
    metric = factor(metric, levels=c(
      "Pandemic Spike\n(2021 vs. 2019 baseline, ↑ = worse)",
      "Recovery from Peak\n(2023 vs. 2021, ↓ = better)",
      "Slope Change\n(Post − Pre pandemic, ↓ = better)"
    )),
    # For all three metrics, negative/lower = better outcome
    # Reverse spike (positive = bad) for consistent color direction
    value_display = round(value, 1)
  )

ggplot(heatmap_data, aes(x=metric, y=race, fill=value)) +
  geom_tile(color="white", linewidth=1.2) +
  geom_text(aes(label=ifelse(value>0, paste0("+",value_display), value_display)),
            size=4.5, fontface="bold", color="white") +
  scale_fill_gradient2(
    low="#27AE60", mid="#F7DC6F", high="#C0392B",
    midpoint=0, name="Value\n(+ = worse\n− = better)"
  ) +
  scale_x_discrete(position="top") +
  theme_minimal(base_size=12) +
  theme(
    plot.title       = element_text(face="bold", size=13),
    plot.subtitle    = element_text(size=10, color="gray40"),
    axis.text.x      = element_text(face="bold", size=9.5),
    axis.text.y      = element_text(face="bold", size=11),
    panel.grid       = element_blank(),
    legend.position  = "right",
    plot.caption     = element_text(size=8, color="gray50")
  ) +
  labs(
    title    = "Figure EM-4. Effect Modification Heat Map: COVID-19 Pandemic Impact by Race/Ethnicity",
    subtitle = paste0("Red = worse outcome | Green = better outcome | ",
                      "Black non-Hispanic women show the most adverse profile across all metrics"),
    x=NULL, y=NULL,
    caption  = paste0("Pandemic spike = 2021 rate − 2019 baseline. ",
                      "Recovery = 2023 − 2021 rate (negative = fell from peak). ",
                      "Slope change = post-pandemic − pre-pandemic annual trend. ",
                      "Source: NCHS Health E-Stats series (Hoyert 2023, 2024, 2025).")
  )

Figure EM-4. Effect modification heat map showing four metrics of pandemic impact by race/ethnicity: pandemic spike magnitude, 2023 recovery extent, slope change (post minus pre), and whether each group fully recovered below its 2019 baseline by 2023. Darker red = worse outcome; darker green = better outcome. Black non-Hispanic women show the most adverse profile across all metrics — evidence of pronounced effect modification. Source: NCHS Health E-Stats series.

10.5.5 Figure EM-5: Segmented Regression Lines by Race — Full Visual of Effect Modification

# Build predicted values for segmented lines per race
# Pre-pandemic segment: 2018-2020
# Post-pandemic segment: 2022-2023
pre_grid  <- expand.grid(race=race_levels, year=seq(2018,2020,0.1)) %>%
  mutate(race=factor(race, levels=race_levels))
post_grid <- expand.grid(race=race_levels, year=seq(2022,2023,0.1)) %>%
  mutate(race=factor(race, levels=race_levels))

pre_grid$fitted  <- predict(model_pre,  newdata=pre_grid)
post_grid$fitted <- predict(model_post, newdata=post_grid)

ggplot() +
  # Pandemic shading
  annotate("rect", xmin=2020.5, xmax=2021.5, ymin=-Inf, ymax=Inf,
           fill="#FADBD8", alpha=0.40) +
  annotate("text", x=2021, y=5.5, label="COVID-19\nPandemic\n2021",
           size=2.8, color="#C0392B", fontface="bold", hjust=0.5) +
  # Observed data points
  geom_point(data=race_extended, aes(x=year, y=rate, color=race),
             size=3.5, alpha=0.9) +
  geom_text_repel(data=race_extended %>% dplyr::filter(year==2021),
                  aes(x=year, y=rate, label=rate, color=race),
                  size=2.8, fontface="bold", nudge_x=0.25,
                  show.legend=FALSE, max.overlaps=10) +
  # Pre-pandemic trend lines (dashed)
  geom_line(data=pre_grid, aes(x=year, y=fitted, color=race),
            linewidth=1.1, linetype="dashed", alpha=0.85) +
  # Post-pandemic trend lines (solid)
  geom_line(data=post_grid, aes(x=year, y=fitted, color=race),
            linewidth=1.3, linetype="solid", alpha=0.95) +
  # Connect 2020 to 2022 with dotted bridge through 2021 peak
  geom_line(data=race_extended %>% dplyr::filter(year %in% c(2020,2021,2022)),
            aes(x=year, y=rate, color=race, group=race),
            linewidth=0.8, linetype="dotted", alpha=0.6) +
  # Slope annotation for Black group
  annotate("text", x=2018.4, y=40, label="Pre-pandemic\nslope (dashed)",
           size=2.8, color="gray40", fontface="italic", hjust=0) +
  annotate("text", x=2022.1, y=47, label="Post-pandemic\nslope (solid)",
           size=2.8, color="gray40", fontface="italic", hjust=0) +
  scale_color_manual(values=race_colors, name="Race/Ethnicity") +
  scale_x_continuous(breaks=2018:2023) +
  scale_y_continuous(limits=c(5,75), breaks=seq(10,70,10)) +
  theme_bw(base_size=12) +
  theme(
    plot.title      = element_text(face="bold", size=13),
    plot.subtitle   = element_text(size=10, color="gray35"),
    legend.position = "bottom",
    panel.grid.minor = element_blank(),
    plot.caption    = element_text(size=8, color="gray40", hjust=0)
  ) +
  labs(
    title    = "Figure EM-5. Segmented Regression by Race: Pre vs. Post-Pandemic Trajectories",
    subtitle = paste0("Dashed = pre-pandemic trend (2018–2020) | Dotted = pandemic transition (2020–2022) | ",
                      "Solid = post-pandemic trend (2022–2023)\n",
                      "Black non-Hispanic: only group whose 2023 rate exceeds its 2019 baseline"),
    x        = "Year",
    y        = "Maternal Mortality Rate (per 100,000 Live Births)",
    caption  = paste0("Segmented slopes estimated from race × year interaction models fitted separately ",
                      "on pre- and post-pandemic subfiles.\n",
                      "Source: NCHS Health E-Stats series (Hoyert 2023, 2024, 2025).")
  )

Figure EM-5. Segmented regression trajectories by racial/ethnic group, 2018–2023. Separate pre-pandemic (dashed) and post-pandemic (solid) trend lines fitted per group. The divergence in trajectory — particularly the Black non-Hispanic group’s failure to recover toward its pre-pandemic baseline — is the clearest visual evidence of race as an effect modifier of the COVID-19 pandemic’s impact on maternal mortality. Source: NCHS Health E-Stats series.

10.6 Effect Modification Summary and Interpretation

# Summary of all effect modification findings
em_summary_df <- data.frame(
  Analysis = c(
    "EM-1: Pandemic as modifier of year→rate",
    "EM-1: Pandemic as modifier of year→rate",
    "EM-1: Pandemic as modifier of year→rate",
    "EM-2: Race as modifier of pandemic spike",
    "EM-2: Race as modifier of pandemic spike",
    "EM-3: Pre vs. post-pandemic slopes",
    "EM-3: Pre vs. post-pandemic slopes",
    "EM-3: Pre vs. post-pandemic slopes"
  ),
  Finding = c(
    "Pre-pandemic trend (2018–2020): rate increasing +3.2/yr",
    "Pandemic spike (2021): acute +9.1 excess deaths per 100k above pre-trend",
    "Post-pandemic recovery (2022–2023): rate declining sharply from peak",
    "Black non-Hispanic pandemic spike largest in absolute terms (~+25.9)",
    "All non-Black groups had smaller spikes; interaction terms significant",
    "Black non-Hispanic: ONLY group with positive post-pandemic slope (rate still rising)",
    "All non-Black groups: negative post-pandemic slopes (improving after peak)",
    "Black non-Hispanic: only group NOT below 2019 baseline by 2023"
  ),
  Implication = c(
    "Pre-COVID trajectory was already worsening — pandemic accelerated existing trend",
    "COVID-19 is a structural effect modifier, not just a confounder — it changed the trajectory",
    "National recovery is real but unequally distributed",
    "Black women bore a disproportionately larger pandemic mortality burden",
    "Race significantly modifies the pandemic effect (ANOVA p < 0.05 for interaction)",
    "Black maternal mortality is not recovering — trajectory is diverging from other groups",
    "Three groups are on a downward recovery trajectory toward or below pre-pandemic baseline",
    "National improvement in 2022–2023 does NOT reflect improvement for Black women"
  ),
  stringsAsFactors = FALSE
)

kable(em_summary_df,
      caption = "Table EM-4. Effect Modification Analysis: Summary of All Findings",
      col.names = c("Analysis", "Key Finding", "Epidemiological Implication"),
      align = c("l","l","l"),
      booktabs = TRUE) %>%
  kable_styling(bootstrap_options = c("striped","hover","bordered"),
                font_size = 9, full_width = TRUE) %>%
  row_spec(0, bold=TRUE, background="#1A3C5A", color="white") %>%
  row_spec(c(1,2,3), background="#EBF5FB") %>%
  row_spec(c(4,5), background="#FADBD8") %>%
  row_spec(c(6,7,8), background="#FEF9E7") %>%
  column_spec(3, italic=TRUE) %>%
  footnote(general=paste0(
    "Effect modification = the pandemic and race do not simply add to the mortality rate — ",
    "they change the fundamental relationship between year and mortality rate. ",
    "This is a stronger claim than confounding and has direct implications for intervention: ",
    "population-level time-trend improvements will not reach Black women without race-targeted policies."
  ), footnote_as_chunk=TRUE)

Table EM-4. Effect Modification Analysis: Summary of All Findings
Analysis	Key Finding	Epidemiological Implication
EM-1: Pandemic as modifier of year→rate	Pre-pandemic trend (2018–2020): rate increasing +3.2/yr	Pre-COVID trajectory was already worsening — pandemic accelerated existing trend
EM-1: Pandemic as modifier of year→rate	Pandemic spike (2021): acute +9.1 excess deaths per 100k above pre-trend	COVID-19 is a structural effect modifier, not just a confounder — it changed the trajectory
EM-1: Pandemic as modifier of year→rate	Post-pandemic recovery (2022–2023): rate declining sharply from peak	National recovery is real but unequally distributed
EM-2: Race as modifier of pandemic spike	Black non-Hispanic pandemic spike largest in absolute terms (~+25.9)	Black women bore a disproportionately larger pandemic mortality burden
EM-2: Race as modifier of pandemic spike	All non-Black groups had smaller spikes; interaction terms significant	Race significantly modifies the pandemic effect (ANOVA p < 0.05 for interaction)
EM-3: Pre vs. post-pandemic slopes	Black non-Hispanic: ONLY group with positive post-pandemic slope (rate still rising)	Black maternal mortality is not recovering — trajectory is diverging from other groups
EM-3: Pre vs. post-pandemic slopes	All non-Black groups: negative post-pandemic slopes (improving after peak)	Three groups are on a downward recovery trajectory toward or below pre-pandemic baseline
EM-3: Pre vs. post-pandemic slopes	Black non-Hispanic: only group NOT below 2019 baseline by 2023	National improvement in 2022–2023 does NOT reflect improvement for Black women
Note: Effect modification = the pandemic and race do not simply add to the mortality rate — they change the fundamental relationship between year and mortality rate. This is a stronger claim than confounding and has direct implications for intervention: population-level time-trend improvements will not reach Black women without race-targeted policies.

Overall effect modification conclusion: Across all three analyses, the evidence converges on a consistent finding: the COVID-19 pandemic was a structural effect modifier of the year–maternal mortality trajectory, and race/ethnicity significantly modified the magnitude of the pandemic effect. The pandemic did not merely shift the national rate up and down uniformly — it disproportionately amplified mortality for Black non-Hispanic women and then failed to recede for that group while receding for all others. This differential effect modification explains why national aggregate improvement in 2022–2023 is accompanied by a worsening or stagnating trajectory for Black women: the pandemic accelerated a pre-existing structural divergence in racial mortality trajectories rather than creating it de novo.

11 Discussion

In 2023, Black non-Hispanic women in the United States experienced a maternal mortality rate of 50.3 deaths per 100,000 live births — 3.47 times the national average of 18.6 and 3.47 times the rate for White non-Hispanic women (14.5). This disparity constitutes the central finding of this analysis. Given approximately 500,000 live births to Black non-Hispanic mothers annually, the 35.8 additional deaths per 100,000 relative to White women corresponds to roughly 179 excess deaths per year that would not occur if Black women experienced White women’s mortality rate.

These findings are consistent with and extend the prior literature. Petersen et al. (2019) documented Black-to-White maternal mortality ratios of 2.5–3.5 using 2011–2015 NVSS data; the present analysis confirms this ratio has persisted at 3.47 in 2023. Crear-Perry et al. (2021) identified structural racism — operating through differential access to quality prenatal care, chronic physiological stress from discrimination, and implicit bias in clinical settings — as the primary driver of this disparity. The robustness of the racial disparity to year adjustment (0% change in race coefficients from B1 to B2) confirms that the racial gap is not an artifact of the secular trend and cannot be explained by overall improvements in maternal care quality.

The finding most requiring direct attention is the 2022–2023 trajectory divergence for Black non-Hispanic women. While all non-Black groups experienced rate declines (statistically significant for White and Hispanic), the Black non-Hispanic rate increased from 49.5 to 50.3. The race × year interaction analysis (Model B3) confirmed this divergence: all non-Black interaction terms were strongly negative while the Black reference trend was positive — directionally consistent with H₂.

The Poisson sensitivity analysis (IRR = 0.951 per year, p < 0.001) corroborates the OLS finding of a significant declining secular trend. The convergence of two model families on the same substantive conclusion strengthens confidence in the trend result. The extended 24-cell race panel (B2-Ext) provides inferential degrees of freedom not available in the two-year panel and substantially strengthens the evidence for the secular trend.

Confounding and bias: The 10% change criterion confirms that year does not confound the race–mortality association. However, several important confounders remain unavailable in aggregate NCHS data: individual-level socioeconomic status, comorbidity burden, insurance status, geographic region, facility type, and prenatal care adequacy.

Effect modification findings: The effect modification analyses add substantial depth to the central findings. The segmented regression (Model EM-1) establishes that the COVID-19 pandemic was not merely a confounder or additive outlier — it was a structural effect modifier that altered the slope of the temporal trend. The pre-pandemic trajectory showed a worsening trend (+3.2 per 100,000 per year, 2018–2020), the 2021 pandemic peak represented an acute spike of approximately +9.1 deaths per 100,000 above the pre-trend, and the post-pandemic period showed a steep recovery slope. Critically, the segmented model (R² ≈ 1.00) dramatically outperforms the simple linear model (R² = 0.043), confirming that the pandemic-as-effect-modifier specification is the correct temporal framework.

Race was established as a significant effect modifier of the pandemic impact (Model EM-2): the Black non-Hispanic pandemic spike in 2021 was substantially larger than all other groups’ spikes, and the race × pandemic interaction terms were statistically significant (ANOVA p < 0.05 for the improvement from additive to interaction specification). The segmented analysis by race (Model EM-3) revealed that Black non-Hispanic women are the only group whose 2023 rate (50.3) remains above their 2019 baseline (44.0), while all three non-Black groups have fully recovered to below their pre-pandemic baselines. Furthermore, the post-pandemic slope for Black non-Hispanic women is positive — the rate is still rising — while all other groups show negative post-pandemic slopes (improving). This divergence in recovery trajectories, visualized in Figures EM-2 through EM-5, is the most compelling evidence that aggregate national improvement does not constitute progress on racial equity in maternal mortality.

Limitations: (1) Small analytic samples (n = 6, 8, or 24) limit statistical power; (2) race-stratified data across 2018–2023 reconstructed from multiple publications may have inconsistencies; (3) American Indian/Alaska Native and Native Hawaiian/Pacific Islander groups are suppressed due to small cell counts; (4) ICD-10-based definition may undercount late indirect causes; (5) ecological design precludes individual-level causal inference; (6) the segmented regression models (EM-1, EM-3) are near-saturated given the small sample sizes, so coefficients should be interpreted as descriptive approximations of trajectory rather than as precise inferential estimates.

Implications: Aggregate national declines in maternal mortality do not constitute progress on equity. The effect modification analyses demonstrate that pandemic-era policy responses — which drove national rate declines in 2022–2023 — did not reach Black women. This means targeted structural interventions are not merely desirable but empirically necessary: without race-specific policy, secular improvements will continue to bypass the group with the greatest need. Recommended actions include expansion of implicit bias training in clinical settings, investment in community-based maternal health programs including doula access, elimination of structural barriers to quality prenatal care in majority-Black communities, and strengthening of Maternal Mortality Review Committees with explicit racial equity mandates.

12 References

Centers for Disease Control and Prevention. (2023). Pregnancy-related deaths: Data from maternal mortality review committees in 36 U.S. states, 2017–2019. https://www.cdc.gov/maternal-mortality/php/data-research/index.html

Crear-Perry, J., Maybank, A., Keeys, M., Mitchell, N., & Godbolt, D. (2021). Moving towards anti-racist praxis in medicine. The Lancet, 397(10283), 1457–1459. https://doi.org/10.1016/S0140-6736(21)00596-8

Hardeman, R. R., Burgess, D., Murphy, K., Satin, D., Nielsen, J., Potter, T. M., & Karbeah, J. (2020). Developing a medical school curriculum on racism: Multiyear design process and a critical race theory framework. Academic Medicine, 95(1), 101–105. https://doi.org/10.1097/ACM.0000000000002998

Hoyert, D. L. (2024). Maternal mortality rates in the United States, 2022. NCHS Health E-Stats. https://dx.doi.org/10.15620/cdc/149541

Hoyert, D. L. (2025). Maternal mortality rates in the United States, 2023. NCHS Health E-Stats. https://dx.doi.org/10.15620/cdc/174577

Petersen, E. E., Davis, N. L., Goodman, D., Cox, S., Syverson, C., Seed, K., Shapiro-Mendoza, C., Callaghan, W. M., & Barfield, W. (2019). Vital signs: Pregnancy-related deaths, United States, 2011–2015, and strategies for prevention, 13 states, 2013–2017. MMWR Morbidity and Mortality Weekly Report, 68(18), 423–429. https://doi.org/10.15585/mmwr.mm6818e1

Thoma, M. E., & Declercq, E. (2022). Trends in U.S. maternal mortality rates by sociodemographic group: National Vital Statistics System data, 2009–2019. Obstetrics & Gynecology, 139(5), 853–862. https://doi.org/10.1097/AOG.0000000000004735

World Health Organization. (2019). Maternal mortality. https://www.who.int/news-room/fact-sheets/detail/maternal-mortality

13 Appendix: AI Interaction Log

Per EPI 553 AI policy: Web search was used to verify citation details. All analytical decisions, model specifications, interpretations, and written content were developed independently by me, Emmanuel Nana Arko. AI use was limited to verifying APA citation formatting for NCHS Health E-Stats publications. References were managed in Zotero and verified before submission.

Racial and Ethnic Disparities in Maternal Mortality in the United States, 2018–2023

A Secondary Data Analysis of NCHS National Vital Statistics System Data EPI 553: Principles of Statistical Inference II — Final Report

Emmanuel Nana Arko

May 5, 2026