Set Up
# Define required packages
packages <- c("readr", "dplyr", "tidyr", "grid", "gridExtra",
"ggplot2", "tidyverse", "lmerTest", "sjPlot",
"ggeffects", "multcomp", "effectsize")
# Install missing packages
new_packages <- packages[!(packages %in% installed.packages()[, "Package"])]
if(length(new_packages)) install.packages(new_packages)
# Load libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lmerTest)
## Loading required package: lme4
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
##
## Attaching package: 'lmerTest'
##
## The following object is masked from 'package:lme4':
##
## lmer
##
## The following object is masked from 'package:stats':
##
## step
library(sjPlot)
##
## Attaching package: 'sjPlot'
##
## The following object is masked from 'package:ggplot2':
##
## set_theme
library(ggeffects)
library(multcomp)
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
## Loading required package: MASS
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
##
##
## Attaching package: 'TH.data'
##
## The following object is masked from 'package:MASS':
##
## geyser
library(effectsize)
##
## Attaching package: 'effectsize'
##
## The following object is masked from 'package:mvtnorm':
##
## standardize
Data Preparation
all_visits <- readRDS("all_visits.rds")
analysis_data <- all_visits %>%
dplyr::select(study_id, v1_total, v2_total, v3_total) %>%
rename(outcome1 = v1_total,
outcome2 = v2_total,
outcome3 = v3_total) %>%
pivot_longer(cols = starts_with("outcome"),
names_to = "timepoint",
values_to = "outcome") %>%
mutate(timepoint = stringr::str_remove(timepoint, "outcome"),
timepoint = factor(timepoint, levels = c("1", "2", "3")),
id = study_id)
head(analysis_data)
## # A tibble: 6 × 4
## study_id timepoint outcome id
## <dbl> <fct> <dbl> <dbl>
## 1 1 1 13 1
## 2 1 2 18 1
## 3 1 3 15 1
## 4 3 1 16 3
## 5 3 2 21 3
## 6 3 3 21 3
dplyr::count(analysis_data, timepoint)
## # A tibble: 3 × 2
## timepoint n
## <fct> <int>
## 1 1 30
## 2 2 30
## 3 3 30
summary(analysis_data$outcome)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 9.00 16.00 18.00 17.81 20.00 21.00 5
Fit Linear Mixed Model
fit <- lmer(outcome ~ timepoint + (1 | id), data = analysis_data)
summary(fit)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: outcome ~ timepoint + (1 | id)
## Data: analysis_data
##
## REML criterion at convergence: 392.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.5928 -0.1919 0.1608 0.5564 2.1747
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 1.599 1.265
## Residual 4.922 2.219
## Number of obs: 85, groups: id, 30
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 15.4333 0.4662 73.9103 33.103 < 2e-16 ***
## timepoint2 3.7953 0.5789 54.7837 6.556 2.04e-08 ***
## timepoint3 3.6289 0.5988 56.1857 6.060 1.20e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) tmpnt2
## timepoint2 -0.608
## timepoint3 -0.588 0.477
# Pretty summary table
tab_model(fit)
|
Â
|
outcome
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
15.43
|
14.51 – 16.36
|
<0.001
|
|
timepoint: timepoint2
|
3.80
|
2.64 – 4.95
|
<0.001
|
|
timepoint: timepoint3
|
3.63
|
2.44 – 4.82
|
<0.001
|
|
Random Effects
|
|
σ2
|
4.92
|
|
τ00 id
|
1.60
|
|
ICC
|
0.25
|
|
N id
|
30
|
|
Observations
|
85
|
|
Marginal R2 / Conditional R2
|
0.329 / 0.494
|
Visualize Model Predictions
plot_data <- ggpredict(fit, terms = "timepoint")
# Basic
plot_data %>%
mutate(x = ordered(x, levels = c("1", "2", "3"))) %>%
plot() +
ggtitle("Outcome by Timepoint") +
ylab("Predicted Outcome")
## Ignoring unknown labels:
## • linetype : "NA"
## • shape : "NA"

# Error bars only
ggplot(plot_data, aes(x = x, y = predicted)) +
geom_point(size = 3, color = "steelblue") +
geom_errorbar(aes(ymin = conf.low, ymax = conf.high),
width = 0.15, linewidth = 1, color = "steelblue") +
theme_minimal(base_size = 14) +
labs(title = "Predicted Outcome by Timepoint",
x = "Timepoint", y = "Predicted Outcome")

Pairwise Comparisons (Tukey)
fit_tukey <- glht(fit, linfct = mcp(timepoint = "Tukey"))
tukey_res <- summary(fit_tukey)
tukey_res
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: lmer(formula = outcome ~ timepoint + (1 | id), data = analysis_data)
##
## Linear Hypotheses:
## Estimate Std. Error z value Pr(>|z|)
## 2 - 1 == 0 3.7953 0.5789 6.556 <1e-05 ***
## 3 - 1 == 0 3.6289 0.5988 6.060 <1e-05 ***
## 3 - 2 == 0 -0.1664 0.6024 -0.276 0.959
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
Option 1 — Classical Cohen’s d
t1 <- analysis_data %>% filter(timepoint == "1") %>% pull(outcome)
t2 <- analysis_data %>% filter(timepoint == "2") %>% pull(outcome)
t3 <- analysis_data %>% filter(timepoint == "3") %>% pull(outcome)
cohens_d <- function(x, y) {
lx <- length(x) - 1
ly <- length(y) - 1
md <- mean(x, na.rm = TRUE) - mean(y, na.rm = TRUE)
csd <- lx * stats::var(x, na.rm = TRUE) + ly * stats::var(y, na.rm = TRUE)
csd <- csd / (lx + ly)
csd <- sqrt(csd)
md / csd
}
d_2_1 <- cohens_d(t2, t1)
d_3_1 <- cohens_d(t3, t1)
d_3_2 <- cohens_d(t3, t2)
effect_data_cohen <- tibble(
Comparison = c("2 - 1", "3 - 1", "3 - 2"),
`Effect Size (Cohen)` = round(c(d_2_1, d_3_1, d_3_2), 2)
)
effect_data_cohen
## # A tibble: 3 × 2
## Comparison `Effect Size (Cohen)`
## <chr> <dbl>
## 1 2 - 1 1.4
## 2 3 - 1 1.4
## 3 3 - 2 -0.09
Option 2 — Standardized Effect Size (Residual SD)
md_vec <- unname(tukey_res$test[-(1:2)]$coefficients)
names_vec <- names(tukey_res$test[-(1:2)]$coefficients)
resid_sd <- summary(fit)$sigma
effect_data_residual <- tibble(
Comparison = names_vec,
`Effect Size (Residual)` = round(md_vec / resid_sd, 2)
)
effect_data_residual
## # A tibble: 3 × 2
## Comparison `Effect Size (Residual)`
## <chr> <dbl>
## 1 2 - 1 1.71
## 2 3 - 1 1.64
## 3 3 - 2 -0.08
Option 3 — Standardized Effect Size (All Variance Components)
vc <- VarCorr(fit)
pooled_sd_all <- sqrt(as.data.frame(vc)$vcov[1] + as.data.frame(vc)$vcov[2])
effect_data_allvar <- tibble(
Comparison = names_vec,
`Effect Size (All Variance)` = round(md_vec / pooled_sd_all, 2)
)
effect_data_allvar
## # A tibble: 3 × 2
## Comparison `Effect Size (All Variance)`
## <chr> <dbl>
## 1 2 - 1 1.49
## 2 3 - 1 1.42
## 3 3 - 2 -0.07
Combine Effect Sizes
effect_table <- effect_data_cohen %>%
left_join(effect_data_residual, by = "Comparison") %>%
left_join(effect_data_allvar, by = "Comparison") %>%
mutate(across(where(is.numeric), ~ round(.x, 3)))
effect_table
## # A tibble: 3 × 4
## Comparison `Effect Size (Cohen)` Effect Size (Residua…¹ Effect Size (All Var…²
## <chr> <dbl> <dbl> <dbl>
## 1 2 - 1 1.4 1.71 1.49
## 2 3 - 1 1.4 1.64 1.42
## 3 3 - 2 -0.09 -0.08 -0.07
## # ℹ abbreviated names: ¹​`Effect Size (Residual)`, ²​`Effect Size (All Variance)`
Interpretation
effect_interpretation <- effect_data_cohen %>%
mutate(
`Hopkins Interpretation` = case_when(
abs(`Effect Size (Cohen)`) <= 0.2 ~ "Trivial",
abs(`Effect Size (Cohen)`) <= 0.6 ~ "Small",
abs(`Effect Size (Cohen)`) <= 1.2 ~ "Moderate",
abs(`Effect Size (Cohen)`) <= 2.0 ~ "Large",
abs(`Effect Size (Cohen)`) <= 4.0 ~ "Very Large",
TRUE ~ "Nearly Perfect"
),
`Cohen Interpretation` = case_when(
abs(`Effect Size (Cohen)`) < 0.5 ~ "Small",
abs(`Effect Size (Cohen)`) < 0.8 ~ "Medium",
TRUE ~ "Large"
)
)
effect_interpretation
## # A tibble: 3 × 4
## Comparison `Effect Size (Cohen)` Hopkins Interpretati…¹ `Cohen Interpretation`
## <chr> <dbl> <chr> <chr>
## 1 2 - 1 1.4 Large Large
## 2 3 - 1 1.4 Large Large
## 3 3 - 2 -0.09 Trivial Small
## # ℹ abbreviated name: ¹​`Hopkins Interpretation`
Spaghetti Plot (All Participants)
analysis_data <- analysis_data %>%
mutate(timepoint = factor(timepoint, levels = c("1", "2", "3"), ordered = TRUE))
p_spaghetti <- ggplot(analysis_data, aes(x = timepoint, y = outcome, group = id)) +
geom_line(color = "grey75", linewidth = 0.7, alpha = 0.6) +
geom_point(color = "grey65", size = 1.8, alpha = 0.7) +
stat_summary(aes(group = 1), fun = mean, geom = "line",
color = "#045a8d", linewidth = 1.6) +
stat_summary(aes(group = 1), fun = mean, geom = "point",
color = "#045a8d", fill = "white", size = 3.5,
shape = 21, stroke = 1) +
stat_summary(aes(group = 1), fun.data = mean_cl_normal,
geom = "errorbar", width = 0.12,
color = "#045a8d", linewidth = 1) +
labs(title = "Individual Trajectories of Total Scores",
subtitle = "Gray = individual participants, Blue = group mean ± 95% CI",
x = "Study Visit", y = "Total Score") +
scale_x_discrete(labels = c("1" = "Visit 1\n(Baseline)",
"2" = "Visit 2\n(Week 2)",
"3" = "Visit 3\n(Week 6)")) +
theme_minimal(base_size = 15) +
theme(plot.title = element_text(face = "bold", size = 19, hjust = 0.5),
plot.subtitle = element_text(size = 13, hjust = 0.5, margin = margin(b = 10)),
axis.title = element_text(face = "bold", size = 14),
axis.text = element_text(size = 12, color = "#333333"),
panel.grid.major.y = element_line(color = "grey85", linetype = "dotted"),
panel.grid.minor = element_blank(),
panel.grid.major.x = element_blank(),
plot.margin = margin(20, 20, 20, 20))
p_spaghetti
## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Removed 5 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Removed 5 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

Spaghetti Plot Colored by Decline (Baseline → Visit 3)
decline_flags <- analysis_data %>%
dplyr::select(id, timepoint, outcome) %>%
pivot_wider(names_from = timepoint, values_from = outcome, names_prefix = "t") %>%
mutate(
decline_1_2 = ifelse(!is.na(t1) & !is.na(t2) & t2 < t1, "Decline V1→V2", "No Decline V1→V2"),
decline_1_3 = ifelse(!is.na(t1) & !is.na(t3) & t3 < t1, "Decline V1→V3", "No Decline V1→V3"),
decline_2_3 = ifelse(!is.na(t2) & !is.na(t3) & t3 < t2, "Decline V2→V3", "No Decline V2→V3")
) %>%
dplyr::select(id, decline_1_2, decline_1_3, decline_2_3)
dat_flagged <- analysis_data %>% left_join(decline_flags, by = "id")
p_line_by_1to3 <- ggplot(dat_flagged,
aes(x = timepoint, y = outcome, group = id,
color = decline_1_3)) +
geom_line(linewidth = 0.9, alpha = 0.9) +
geom_point(size = 2) +
stat_summary(aes(group = 1), fun = mean, geom = "line",
color = "black", linewidth = 1.2) +
stat_summary(aes(group = 1), fun = mean, geom = "point",
color = "black", size = 3) +
scale_color_manual(values = c("Decline V1→V3" = "#d73027",
"No Decline V1→V3" = "#4575b4")) +
theme_minimal(base_size = 15) +
labs(title = "Spaghetti Plot Colored by Decline (Baseline → Visit 3)",
x = "Timepoint", y = "Total Score", color = "Change")
p_line_by_1to3
## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Removed 5 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).
