Gender as Moderator: Income × Health Regression

Interaction Term Analysis in R

Author

Timothy

Published

May 27, 2026

Code

library(tidyverse)
library(broom)
library(effectsize)
library(emmeans)
library(ggeffects)
library(modelsummary)
library(patchwork)

df <- read_csv("health_data.csv") |>
  mutate(gender = factor(gender, levels = c("Male", "Female")),
         income_k = income / 1000)   # income in $000s for readability

1. Data Overview

Code

df |>
  group_by(gender) |>
  summarise(
    n          = n(),
    income_mean = round(mean(income_k), 1),
    health_mean = round(mean(health_score), 1),
    health_sd   = round(sd(health_score), 1)
  ) |>
  knitr::kable(col.names = c("Gender","N","Mean Income ($k)","Mean Health","SD Health"))

Gender	N	Mean Income ($k)	Mean Health	SD Health
Male	30	36.7	53.3	6.5
Female	30	71.1	83.0	6.3

2. Models

We fit three nested models to isolate the interaction effect:

Code

m1 <- lm(health_score ~ income_k,                    data = df)   # income only
m2 <- lm(health_score ~ income_k + gender,            data = df)   # additive
m3 <- lm(health_score ~ income_k * gender,            data = df)   # interaction

modelsummary(
  list("Income only" = m1, "Additive" = m2, "Interaction" = m3),
  statistic = "({p.value})",
  stars     = TRUE,
  gof_map   = c("nobs","r.squared","adj.r.squared","AIC"),
  note      = "p-values in parentheses. * p<0.05, ** p<0.01, *** p<0.001"
)

	Income only	Additive	Interaction
+ p < 0.1, * p < 0.05, p < 0.01, * p < 0.001
p-values in parentheses. * p<0.05, p<0.01, * p<0.001
(Intercept)	26.231***	33.028***	20.388***
	(<0.001)	(<0.001)	(<0.001)
income_k	0.777***	0.551***	0.896***
	(<0.001)	(<0.001)	(<0.001)
genderFemale		10.778***	30.106***
		(<0.001)	(<0.001)
income_k × genderFemale			-0.438***
			(<0.001)
Num.Obs.	60	60	60
R2	0.949	0.980	0.994
R2 Adj.	0.948	0.979	0.993

3. Is the Interaction Significant?

Code

anova(m2, m3) |> tidy() |> knitr::kable(digits = 3)

term	df.residual	rss	df	sumsq	statistic	p.value
health_score ~ income_k + gender	57	320.870	NA	NA	NA	NA
health_score ~ income_k * gender	56	99.171	1	221.699	125.19	0

Interpretation: The ANOVA F-test compares the additive model (M2) vs. the interaction model (M3). A significant result (p < .05) confirms that gender moderates the income→health relationship — the slopes differ between males and females.

4. Effect Sizes

Code

# Partial eta-squared for each term
eta_squared(m3, partial = TRUE) |> as_tibble() |>
  knitr::kable(digits = 3,
               caption = "Partial η² (proportion of variance explained by each term)")

Partial η² (proportion of variance explained by each term)
Parameter	Eta2_partial	CI	CI_low	CI_high
income_k	0.993	0.95	0.991	1
gender	0.830	0.95	0.764	1
income_k:gender	0.691	0.95	0.578	1

η² value	Interpretation
0.01–0.06	Small
0.06–0.14	Medium
> 0.14	Large

5. Visualisations

5a. Regression lines by gender (interaction plot)

Code

pred <- ggpredict(m3, terms = c("income_k [all]", "gender"))

# Instead of ggplot(df, aes(...))
# Use an empty ggplot object and specify data for each layer
p1 <- ggplot() +
  # Map variables specifically for the points layer
  geom_point(data = df, aes(x = income_k, y = health_score, colour = gender), 
             alpha = 0.5, size = 2) +
  # Map variables for the prediction line
  geom_line(data = as.data.frame(pred),
            aes(x = x, y = predicted, colour = group), linewidth = 1.2) +
  # Map variables for the ribbon
  geom_ribbon(data = as.data.frame(pred),
              aes(x = x, ymin = conf.low, ymax = conf.high,
                  fill = group), alpha = 0.15) +
  scale_colour_manual(values = c("Male" = "#2166ac", "Female" = "#d6604d")) +
  scale_fill_manual(values   = c("Male" = "#2166ac", "Female" = "#d6604d")) +
  labs(title    = "Income → Health: Gender Moderation",
       subtitle = "Diverging slopes indicate an interaction effect",
       x = "Income ($000s)", y = "Health Score",
       colour = "Gender", fill = "Gender") +
  theme_minimal(base_size = 13)
p1

5b. Simple slopes & 95% CIs

Code

emm <- emtrends(m3, ~ gender, var = "income_k")
slopes_df <- as.data.frame(emm)

ggplot(slopes_df, aes(x = gender, y = income_k.trend, colour = gender)) +
  geom_point(size = 4) +
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.15, linewidth = 1) +
  geom_hline(yintercept = 0, linetype = "dashed", colour = "grey50") +
  scale_colour_manual(values = c("Male" = "#2166ac", "Female" = "#d6604d")) +
  labs(title    = "Simple Slopes: Effect of Income on Health by Gender",
       subtitle = "Points = slope estimate; bars = 95% CI",
       x = NULL, y = "Slope (health units per $1k income)") +
  theme_minimal(base_size = 13) +
  theme(legend.position = "none")

5c. Partial η² bar chart

Code

eta_df <- eta_squared(m3, partial = TRUE) |> as_tibble()

ggplot(eta_df, aes(x = reorder(Parameter, Eta2_partial),
                   y = Eta2_partial, fill = Parameter)) +
  geom_col(width = 0.5) +
  geom_errorbar(aes(ymin = CI_low, ymax = CI_high), width = 0.15) +
  coord_flip() +
  scale_fill_brewer(palette = "Set2") +
  labs(title = "Partial η² by Predictor",
       x = NULL, y = "Partial η²") +
  theme_minimal(base_size = 13) +
  theme(legend.position = "none")

6. Simple Slopes Table

Code

as.data.frame(emm) |>
  mutate(across(where(is.numeric), \(x) round(x, 3))) |>
  knitr::kable(col.names = c("Gender","Slope","SE","df","Lower CI","Upper CI"))

Gender	Slope	SE	df	Lower CI	Upper CI
Male	0.896	0.035	56	0.826	0.965
Female	0.457	0.018	56	0.421	0.494

7. Results Summary

Code

coefs  <- tidy(m3)
int_p  <- filter(coefs, str_detect(term, ":")) |> pull(p.value)
eta_i  <- eta_squared(m3, partial = TRUE) |> as_tibble() |>
            filter(str_detect(Parameter, ":")) |> pull(Eta2_partial)

slopes <- as.data.frame(emm)
s_m    <- round(slopes$income_k.trend[slopes$gender == "Male"],   3)
s_f    <- round(slopes$income_k.trend[slopes$gender == "Female"], 3)

cat(glue::glue(
"**Key findings:**

- The interaction term (income × gender) was **{ifelse(int_p < .05, 'statistically significant', 'not significant')}**
  (*p* = {round(int_p, 3)}).
- **Female slope:** {s_f} health units per $1k income increase.
- **Male slope:** {s_m} health units per $1k income increase.
- The interaction explained **{round(eta_i * 100, 1)}%** of variance in health scores (partial η²).
- **Interpretation:** Among females, each additional $1,000 in income is associated with a
  {s_f}-unit gain in health score. Among males, the same $1k increase yields only a
  {s_m}-unit gain — indicating that **income benefits health more for females than males**
  in this sample. Gender therefore **moderates** the income–health relationship.
"))

Key findings:

The interaction term (income × gender) was statistically significant (p = 0).
Female slope: 0.457 health units per $1k income increase.
Male slope: 0.896 health units per $1k income increase.
The interaction explained 69.1% of variance in health scores (partial η²).
Interpretation: Among females, each additional $1,000 in income is associated with a 0.457-unit gain in health score. Among males, the same $1k increase yields only a 0.896-unit gain — indicating that income benefits health more for females than males in this sample. Gender therefore moderates the income–health relationship.