library(tidyverse)library(broom)library(effectsize)library(emmeans)library(ggeffects)library(modelsummary)library(patchwork)df <-read_csv("health_data.csv") |>mutate(gender =factor(gender, levels =c("Male", "Female")),income_k = income /1000) # income in $000s for readability
We fit three nested models to isolate the interaction effect:
Code
m1 <-lm(health_score ~ income_k, data = df) # income onlym2 <-lm(health_score ~ income_k + gender, data = df) # additivem3 <-lm(health_score ~ income_k * gender, data = df) # interactionmodelsummary(list("Income only"= m1, "Additive"= m2, "Interaction"= m3),statistic ="({p.value})",stars =TRUE,gof_map =c("nobs","r.squared","adj.r.squared","AIC"),note ="p-values in parentheses. * p<0.05, ** p<0.01, *** p<0.001")
Income only
Additive
Interaction
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
p-values in parentheses. * p<0.05, ** p<0.01, *** p<0.001
(Intercept)
26.231***
33.028***
20.388***
(<0.001)
(<0.001)
(<0.001)
income_k
0.777***
0.551***
0.896***
(<0.001)
(<0.001)
(<0.001)
genderFemale
10.778***
30.106***
(<0.001)
(<0.001)
income_k × genderFemale
-0.438***
(<0.001)
Num.Obs.
60
60
60
R2
0.949
0.980
0.994
R2 Adj.
0.948
0.979
0.993
3. Is the Interaction Significant?
Code
anova(m2, m3) |>tidy() |> knitr::kable(digits =3)
term
df.residual
rss
df
sumsq
statistic
p.value
health_score ~ income_k + gender
57
320.870
NA
NA
NA
NA
health_score ~ income_k * gender
56
99.171
1
221.699
125.19
0
Interpretation: The ANOVA F-test compares the additive model (M2) vs. the interaction model (M3). A significant result (p < .05) confirms that gender moderates the income→health relationship — the slopes differ between males and females.
4. Effect Sizes
Code
# Partial eta-squared for each termeta_squared(m3, partial =TRUE) |>as_tibble() |> knitr::kable(digits =3,caption ="Partial η² (proportion of variance explained by each term)")
Partial η² (proportion of variance explained by each term)
Parameter
Eta2_partial
CI
CI_low
CI_high
income_k
0.993
0.95
0.991
1
gender
0.830
0.95
0.764
1
income_k:gender
0.691
0.95
0.578
1
η² value
Interpretation
0.01–0.06
Small
0.06–0.14
Medium
> 0.14
Large
5. Visualisations
5a. Regression lines by gender (interaction plot)
Code
pred <-ggpredict(m3, terms =c("income_k [all]", "gender"))# Instead of ggplot(df, aes(...))# Use an empty ggplot object and specify data for each layerp1 <-ggplot() +# Map variables specifically for the points layergeom_point(data = df, aes(x = income_k, y = health_score, colour = gender), alpha =0.5, size =2) +# Map variables for the prediction linegeom_line(data =as.data.frame(pred),aes(x = x, y = predicted, colour = group), linewidth =1.2) +# Map variables for the ribbongeom_ribbon(data =as.data.frame(pred),aes(x = x, ymin = conf.low, ymax = conf.high,fill = group), alpha =0.15) +scale_colour_manual(values =c("Male"="#2166ac", "Female"="#d6604d")) +scale_fill_manual(values =c("Male"="#2166ac", "Female"="#d6604d")) +labs(title ="Income → Health: Gender Moderation",subtitle ="Diverging slopes indicate an interaction effect",x ="Income ($000s)", y ="Health Score",colour ="Gender", fill ="Gender") +theme_minimal(base_size =13)p1
5b. Simple slopes & 95% CIs
Code
emm <-emtrends(m3, ~ gender, var ="income_k")slopes_df <-as.data.frame(emm)ggplot(slopes_df, aes(x = gender, y = income_k.trend, colour = gender)) +geom_point(size =4) +geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width =0.15, linewidth =1) +geom_hline(yintercept =0, linetype ="dashed", colour ="grey50") +scale_colour_manual(values =c("Male"="#2166ac", "Female"="#d6604d")) +labs(title ="Simple Slopes: Effect of Income on Health by Gender",subtitle ="Points = slope estimate; bars = 95% CI",x =NULL, y ="Slope (health units per $1k income)") +theme_minimal(base_size =13) +theme(legend.position ="none")
coefs <-tidy(m3)int_p <-filter(coefs, str_detect(term, ":")) |>pull(p.value)eta_i <-eta_squared(m3, partial =TRUE) |>as_tibble() |>filter(str_detect(Parameter, ":")) |>pull(Eta2_partial)slopes <-as.data.frame(emm)s_m <-round(slopes$income_k.trend[slopes$gender =="Male"], 3)s_f <-round(slopes$income_k.trend[slopes$gender =="Female"], 3)cat(glue::glue("**Key findings:**- The interaction term (income × gender) was **{ifelse(int_p < .05, 'statistically significant', 'not significant')}** (*p* = {round(int_p, 3)}).- **Female slope:** {s_f} health units per $1k income increase.- **Male slope:** {s_m} health units per $1k income increase.- The interaction explained **{round(eta_i * 100, 1)}%** of variance in health scores (partial η²).- **Interpretation:** Among females, each additional $1,000 in income is associated with a {s_f}-unit gain in health score. Among males, the same $1k increase yields only a {s_m}-unit gain — indicating that **income benefits health more for females than males** in this sample. Gender therefore **moderates** the income–health relationship."))
Key findings:
The interaction term (income × gender) was statistically significant (p = 0).
Female slope: 0.457 health units per $1k income increase.
Male slope: 0.896 health units per $1k income increase.
The interaction explained 69.1% of variance in health scores (partial η²).
Interpretation: Among females, each additional $1,000 in income is associated with a 0.457-unit gain in health score. Among males, the same $1k increase yields only a 0.896-unit gain — indicating that income benefits health more for females than males in this sample. Gender therefore moderates the income–health relationship.