Gender as Moderator: Income × Health Regression

Interaction Term Analysis in R

Author

Timothy

Published

May 27, 2026

Code
library(tidyverse)
library(broom)
library(effectsize)
library(emmeans)
library(ggeffects)
library(modelsummary)
library(patchwork)

df <- read_csv("health_data.csv") |>
  mutate(gender = factor(gender, levels = c("Male", "Female")),
         income_k = income / 1000)   # income in $000s for readability

1. Data Overview

Code
df |>
  group_by(gender) |>
  summarise(
    n          = n(),
    income_mean = round(mean(income_k), 1),
    health_mean = round(mean(health_score), 1),
    health_sd   = round(sd(health_score), 1)
  ) |>
  knitr::kable(col.names = c("Gender","N","Mean Income ($k)","Mean Health","SD Health"))
Gender N Mean Income ($k) Mean Health SD Health
Male 30 36.7 53.3 6.5
Female 30 71.1 83.0 6.3

2. Models

We fit three nested models to isolate the interaction effect:

Code
m1 <- lm(health_score ~ income_k,                    data = df)   # income only
m2 <- lm(health_score ~ income_k + gender,            data = df)   # additive
m3 <- lm(health_score ~ income_k * gender,            data = df)   # interaction

modelsummary(
  list("Income only" = m1, "Additive" = m2, "Interaction" = m3),
  statistic = "({p.value})",
  stars     = TRUE,
  gof_map   = c("nobs","r.squared","adj.r.squared","AIC"),
  note      = "p-values in parentheses. * p<0.05, ** p<0.01, *** p<0.001"
)
Income only Additive Interaction
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
p-values in parentheses. * p<0.05, ** p<0.01, *** p<0.001
(Intercept) 26.231*** 33.028*** 20.388***
(<0.001) (<0.001) (<0.001)
income_k 0.777*** 0.551*** 0.896***
(<0.001) (<0.001) (<0.001)
genderFemale 10.778*** 30.106***
(<0.001) (<0.001)
income_k × genderFemale -0.438***
(<0.001)
Num.Obs. 60 60 60
R2 0.949 0.980 0.994
R2 Adj. 0.948 0.979 0.993

3. Is the Interaction Significant?

Code
anova(m2, m3) |> tidy() |> knitr::kable(digits = 3)
term df.residual rss df sumsq statistic p.value
health_score ~ income_k + gender 57 320.870 NA NA NA NA
health_score ~ income_k * gender 56 99.171 1 221.699 125.19 0

Interpretation: The ANOVA F-test compares the additive model (M2) vs. the interaction model (M3). A significant result (p < .05) confirms that gender moderates the income→health relationship — the slopes differ between males and females.

4. Effect Sizes

Code
# Partial eta-squared for each term
eta_squared(m3, partial = TRUE) |> as_tibble() |>
  knitr::kable(digits = 3,
               caption = "Partial η² (proportion of variance explained by each term)")
Partial η² (proportion of variance explained by each term)
Parameter Eta2_partial CI CI_low CI_high
income_k 0.993 0.95 0.991 1
gender 0.830 0.95 0.764 1
income_k:gender 0.691 0.95 0.578 1
η² value Interpretation
0.01–0.06 Small
0.06–0.14 Medium
> 0.14 Large

5. Visualisations

5a. Regression lines by gender (interaction plot)

Code
pred <- ggpredict(m3, terms = c("income_k [all]", "gender"))

# Instead of ggplot(df, aes(...))
# Use an empty ggplot object and specify data for each layer
p1 <- ggplot() +
  # Map variables specifically for the points layer
  geom_point(data = df, aes(x = income_k, y = health_score, colour = gender), 
             alpha = 0.5, size = 2) +
  # Map variables for the prediction line
  geom_line(data = as.data.frame(pred),
            aes(x = x, y = predicted, colour = group), linewidth = 1.2) +
  # Map variables for the ribbon
  geom_ribbon(data = as.data.frame(pred),
              aes(x = x, ymin = conf.low, ymax = conf.high,
                  fill = group), alpha = 0.15) +
  scale_colour_manual(values = c("Male" = "#2166ac", "Female" = "#d6604d")) +
  scale_fill_manual(values   = c("Male" = "#2166ac", "Female" = "#d6604d")) +
  labs(title    = "Income → Health: Gender Moderation",
       subtitle = "Diverging slopes indicate an interaction effect",
       x = "Income ($000s)", y = "Health Score",
       colour = "Gender", fill = "Gender") +
  theme_minimal(base_size = 13)
p1

5b. Simple slopes & 95% CIs

Code
emm <- emtrends(m3, ~ gender, var = "income_k")
slopes_df <- as.data.frame(emm)

ggplot(slopes_df, aes(x = gender, y = income_k.trend, colour = gender)) +
  geom_point(size = 4) +
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.15, linewidth = 1) +
  geom_hline(yintercept = 0, linetype = "dashed", colour = "grey50") +
  scale_colour_manual(values = c("Male" = "#2166ac", "Female" = "#d6604d")) +
  labs(title    = "Simple Slopes: Effect of Income on Health by Gender",
       subtitle = "Points = slope estimate; bars = 95% CI",
       x = NULL, y = "Slope (health units per $1k income)") +
  theme_minimal(base_size = 13) +
  theme(legend.position = "none")

5c. Partial η² bar chart

Code
eta_df <- eta_squared(m3, partial = TRUE) |> as_tibble()

ggplot(eta_df, aes(x = reorder(Parameter, Eta2_partial),
                   y = Eta2_partial, fill = Parameter)) +
  geom_col(width = 0.5) +
  geom_errorbar(aes(ymin = CI_low, ymax = CI_high), width = 0.15) +
  coord_flip() +
  scale_fill_brewer(palette = "Set2") +
  labs(title = "Partial η² by Predictor",
       x = NULL, y = "Partial η²") +
  theme_minimal(base_size = 13) +
  theme(legend.position = "none")

6. Simple Slopes Table

Code
as.data.frame(emm) |>
  mutate(across(where(is.numeric), \(x) round(x, 3))) |>
  knitr::kable(col.names = c("Gender","Slope","SE","df","Lower CI","Upper CI"))
Gender Slope SE df Lower CI Upper CI
Male 0.896 0.035 56 0.826 0.965
Female 0.457 0.018 56 0.421 0.494

7. Results Summary

Code
coefs  <- tidy(m3)
int_p  <- filter(coefs, str_detect(term, ":")) |> pull(p.value)
eta_i  <- eta_squared(m3, partial = TRUE) |> as_tibble() |>
            filter(str_detect(Parameter, ":")) |> pull(Eta2_partial)

slopes <- as.data.frame(emm)
s_m    <- round(slopes$income_k.trend[slopes$gender == "Male"],   3)
s_f    <- round(slopes$income_k.trend[slopes$gender == "Female"], 3)

cat(glue::glue(
"**Key findings:**

- The interaction term (income × gender) was **{ifelse(int_p < .05, 'statistically significant', 'not significant')}**
  (*p* = {round(int_p, 3)}).
- **Female slope:** {s_f} health units per $1k income increase.
- **Male slope:** {s_m} health units per $1k income increase.
- The interaction explained **{round(eta_i * 100, 1)}%** of variance in health scores (partial η²).
- **Interpretation:** Among females, each additional $1,000 in income is associated with a
  {s_f}-unit gain in health score. Among males, the same $1k increase yields only a
  {s_m}-unit gain — indicating that **income benefits health more for females than males**
  in this sample. Gender therefore **moderates** the income–health relationship.
"))

Key findings:

  • The interaction term (income × gender) was statistically significant (p = 0).
  • Female slope: 0.457 health units per $1k income increase.
  • Male slope: 0.896 health units per $1k income increase.
  • The interaction explained 69.1% of variance in health scores (partial η²).
  • Interpretation: Among females, each additional $1,000 in income is associated with a 0.457-unit gain in health score. Among males, the same $1k increase yields only a 0.896-unit gain — indicating that income benefits health more for females than males in this sample. Gender therefore moderates the income–health relationship.