Set Up

# Define required packages
packages <- c("readr", "dplyr", "tidyr", "grid", "gridExtra",
              "ggplot2", "tidyverse", "lmerTest", "sjPlot",
              "ggeffects", "multcomp", "effectsize")

# Install missing packages
new_packages <- packages[!(packages %in% installed.packages()[, "Package"])]
if(length(new_packages)) install.packages(new_packages)

# Load libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lmerTest)
## Loading required package: lme4
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## 
## Attaching package: 'lmerTest'
## 
## The following object is masked from 'package:lme4':
## 
##     lmer
## 
## The following object is masked from 'package:stats':
## 
##     step
library(sjPlot)
## 
## Attaching package: 'sjPlot'
## 
## The following object is masked from 'package:ggplot2':
## 
##     set_theme
library(ggeffects)
library(multcomp)
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## 
## Attaching package: 'TH.data'
## 
## The following object is masked from 'package:MASS':
## 
##     geyser
library(effectsize)
## 
## Attaching package: 'effectsize'
## 
## The following object is masked from 'package:mvtnorm':
## 
##     standardize

Data Preparation

all_visits <- readRDS("all_visits.rds")

analysis_data <- all_visits %>%
  dplyr::select(study_id, v1_total, v2_total, v3_total) %>%
  rename(outcome1 = v1_total,
         outcome2 = v2_total,
         outcome3 = v3_total) %>%
  pivot_longer(cols = starts_with("outcome"),
               names_to  = "timepoint",
               values_to = "outcome") %>%
  mutate(timepoint = stringr::str_remove(timepoint, "outcome"),
         timepoint = factor(timepoint, levels = c("1", "2", "3")),
         id = study_id)

head(analysis_data)
## # A tibble: 6 × 4
##   study_id timepoint outcome    id
##      <dbl> <fct>       <dbl> <dbl>
## 1        1 1              13     1
## 2        1 2              18     1
## 3        1 3              15     1
## 4        3 1              16     3
## 5        3 2              21     3
## 6        3 3              21     3
dplyr::count(analysis_data, timepoint)
## # A tibble: 3 × 2
##   timepoint     n
##   <fct>     <int>
## 1 1            30
## 2 2            30
## 3 3            30
summary(analysis_data$outcome)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    9.00   16.00   18.00   17.81   20.00   21.00       5

Fit Linear Mixed Model

fit <- lmer(outcome ~ timepoint + (1 | id), data = analysis_data)
summary(fit)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: outcome ~ timepoint + (1 | id)
##    Data: analysis_data
## 
## REML criterion at convergence: 392.2
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.5928 -0.1919  0.1608  0.5564  2.1747 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  id       (Intercept) 1.599    1.265   
##  Residual             4.922    2.219   
## Number of obs: 85, groups:  id, 30
## 
## Fixed effects:
##             Estimate Std. Error      df t value Pr(>|t|)    
## (Intercept)  15.4333     0.4662 73.9103  33.103  < 2e-16 ***
## timepoint2    3.7953     0.5789 54.7837   6.556 2.04e-08 ***
## timepoint3    3.6289     0.5988 56.1857   6.060 1.20e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##            (Intr) tmpnt2
## timepoint2 -0.608       
## timepoint3 -0.588  0.477
# Pretty summary table
tab_model(fit)
  outcome
Predictors Estimates CI p
(Intercept) 15.43 14.51 – 16.36 <0.001
timepoint: timepoint2 3.80 2.64 – 4.95 <0.001
timepoint: timepoint3 3.63 2.44 – 4.82 <0.001
Random Effects
σ2 4.92
τ00 id 1.60
ICC 0.25
N id 30
Observations 85
Marginal R2 / Conditional R2 0.329 / 0.494

Visualize Model Predictions

plot_data <- ggpredict(fit, terms = "timepoint")

# Basic
plot_data %>%
  mutate(x = ordered(x, levels = c("1", "2", "3"))) %>%
  plot() +
  ggtitle("Outcome by Timepoint") +
  ylab("Predicted Outcome")
## Ignoring unknown labels:
## • linetype : "NA"
## • shape : "NA"

# Error bars only
ggplot(plot_data, aes(x = x, y = predicted)) +
  geom_point(size = 3, color = "steelblue") +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high),
                width = 0.15, linewidth = 1, color = "steelblue") +
  theme_minimal(base_size = 14) +
  labs(title = "Predicted Outcome by Timepoint",
       x = "Timepoint", y = "Predicted Outcome")

Pairwise Comparisons (Tukey)

fit_tukey <- glht(fit, linfct = mcp(timepoint = "Tukey"))
tukey_res <- summary(fit_tukey)
tukey_res
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: lmer(formula = outcome ~ timepoint + (1 | id), data = analysis_data)
## 
## Linear Hypotheses:
##            Estimate Std. Error z value Pr(>|z|)    
## 2 - 1 == 0   3.7953     0.5789   6.556   <1e-05 ***
## 3 - 1 == 0   3.6289     0.5988   6.060   <1e-05 ***
## 3 - 2 == 0  -0.1664     0.6024  -0.276    0.959    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

Option 1 — Classical Cohen’s d

t1 <- analysis_data %>% filter(timepoint == "1") %>% pull(outcome)
t2 <- analysis_data %>% filter(timepoint == "2") %>% pull(outcome)
t3 <- analysis_data %>% filter(timepoint == "3") %>% pull(outcome)

cohens_d <- function(x, y) {
  lx <- length(x) - 1
  ly <- length(y) - 1
  md <- mean(x, na.rm = TRUE) - mean(y, na.rm = TRUE)
  csd <- lx * stats::var(x, na.rm = TRUE) + ly * stats::var(y, na.rm = TRUE)
  csd <- csd / (lx + ly)
  csd <- sqrt(csd)
  md / csd
}

d_2_1 <- cohens_d(t2, t1)
d_3_1 <- cohens_d(t3, t1)
d_3_2 <- cohens_d(t3, t2)

effect_data_cohen <- tibble(
  Comparison = c("2 - 1", "3 - 1", "3 - 2"),
  `Effect Size (Cohen)` = round(c(d_2_1, d_3_1, d_3_2), 2)
)
effect_data_cohen
## # A tibble: 3 × 2
##   Comparison `Effect Size (Cohen)`
##   <chr>                      <dbl>
## 1 2 - 1                       1.4 
## 2 3 - 1                       1.4 
## 3 3 - 2                      -0.09

Option 2 — Standardized Effect Size (Residual SD)

md_vec    <- unname(tukey_res$test[-(1:2)]$coefficients)
names_vec <- names(tukey_res$test[-(1:2)]$coefficients)

resid_sd <- summary(fit)$sigma

effect_data_residual <- tibble(
  Comparison = names_vec,
  `Effect Size (Residual)` = round(md_vec / resid_sd, 2)
)
effect_data_residual
## # A tibble: 3 × 2
##   Comparison `Effect Size (Residual)`
##   <chr>                         <dbl>
## 1 2 - 1                          1.71
## 2 3 - 1                          1.64
## 3 3 - 2                         -0.08

Option 3 — Standardized Effect Size (All Variance Components)

vc <- VarCorr(fit)
pooled_sd_all <- sqrt(as.data.frame(vc)$vcov[1] + as.data.frame(vc)$vcov[2])

effect_data_allvar <- tibble(
  Comparison = names_vec,
  `Effect Size (All Variance)` = round(md_vec / pooled_sd_all, 2)
)
effect_data_allvar
## # A tibble: 3 × 2
##   Comparison `Effect Size (All Variance)`
##   <chr>                             <dbl>
## 1 2 - 1                              1.49
## 2 3 - 1                              1.42
## 3 3 - 2                             -0.07

Combine Effect Sizes

effect_table <- effect_data_cohen %>%
  left_join(effect_data_residual, by = "Comparison") %>%
  left_join(effect_data_allvar,  by = "Comparison") %>%
  mutate(across(where(is.numeric), ~ round(.x, 3)))

effect_table
## # A tibble: 3 × 4
##   Comparison `Effect Size (Cohen)` Effect Size (Residua…¹ Effect Size (All Var…²
##   <chr>                      <dbl>                  <dbl>                  <dbl>
## 1 2 - 1                       1.4                    1.71                   1.49
## 2 3 - 1                       1.4                    1.64                   1.42
## 3 3 - 2                      -0.09                  -0.08                  -0.07
## # ℹ abbreviated names: ¹​`Effect Size (Residual)`, ²​`Effect Size (All Variance)`

Interpretation

effect_interpretation <- effect_data_cohen %>%
  mutate(
    `Hopkins Interpretation` = case_when(
      abs(`Effect Size (Cohen)`) <= 0.2 ~ "Trivial",
      abs(`Effect Size (Cohen)`) <= 0.6 ~ "Small",
      abs(`Effect Size (Cohen)`) <= 1.2 ~ "Moderate",
      abs(`Effect Size (Cohen)`) <= 2.0 ~ "Large",
      abs(`Effect Size (Cohen)`) <= 4.0 ~ "Very Large",
      TRUE ~ "Nearly Perfect"
    ),
    `Cohen Interpretation` = case_when(
      abs(`Effect Size (Cohen)`) < 0.5 ~ "Small",
      abs(`Effect Size (Cohen)`) < 0.8 ~ "Medium",
      TRUE ~ "Large"
    )
  )
effect_interpretation
## # A tibble: 3 × 4
##   Comparison `Effect Size (Cohen)` Hopkins Interpretati…¹ `Cohen Interpretation`
##   <chr>                      <dbl> <chr>                  <chr>                 
## 1 2 - 1                       1.4  Large                  Large                 
## 2 3 - 1                       1.4  Large                  Large                 
## 3 3 - 2                      -0.09 Trivial                Small                 
## # ℹ abbreviated name: ¹​`Hopkins Interpretation`

Spaghetti Plot (All Participants)

analysis_data <- analysis_data %>%
  mutate(timepoint = factor(timepoint, levels = c("1", "2", "3"), ordered = TRUE))

p_spaghetti <- ggplot(analysis_data, aes(x = timepoint, y = outcome, group = id)) +
  geom_line(color = "grey75", linewidth = 0.7, alpha = 0.6) +
  geom_point(color = "grey65", size = 1.8, alpha = 0.7) +
  stat_summary(aes(group = 1), fun = mean, geom = "line",
               color = "#045a8d", linewidth = 1.6) +
  stat_summary(aes(group = 1), fun = mean, geom = "point",
               color = "#045a8d", fill = "white", size = 3.5,
               shape = 21, stroke = 1) +
  stat_summary(aes(group = 1), fun.data = mean_cl_normal,
               geom = "errorbar", width = 0.12,
               color = "#045a8d", linewidth = 1) +
  labs(title = "Individual Trajectories of Total Scores",
       subtitle = "Gray = individual participants, Blue = group mean ± 95% CI",
       x = "Study Visit", y = "Total Score") +
  scale_x_discrete(labels = c("1" = "Visit 1\n(Baseline)",
                              "2" = "Visit 2\n(Week 2)",
                              "3" = "Visit 3\n(Week 6)")) +
  theme_minimal(base_size = 15) +
  theme(plot.title = element_text(face = "bold", size = 19, hjust = 0.5),
        plot.subtitle = element_text(size = 13, hjust = 0.5, margin = margin(b = 10)),
        axis.title = element_text(face = "bold", size = 14),
        axis.text = element_text(size = 12, color = "#333333"),
        panel.grid.major.y = element_line(color = "grey85", linetype = "dotted"),
        panel.grid.minor = element_blank(),
        panel.grid.major.x = element_blank(),
        plot.margin = margin(20, 20, 20, 20))
p_spaghetti
## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Removed 5 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Removed 5 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

Spaghetti Plot Colored by Decline (Baseline → Visit 3)

decline_flags <- analysis_data %>%
  dplyr::select(id, timepoint, outcome) %>%
  pivot_wider(names_from = timepoint, values_from = outcome, names_prefix = "t") %>%
  mutate(
    decline_1_2 = ifelse(!is.na(t1) & !is.na(t2) & t2 < t1, "Decline V1→V2", "No Decline V1→V2"),
    decline_1_3 = ifelse(!is.na(t1) & !is.na(t3) & t3 < t1, "Decline V1→V3", "No Decline V1→V3"),
    decline_2_3 = ifelse(!is.na(t2) & !is.na(t3) & t3 < t2, "Decline V2→V3", "No Decline V2→V3")
  ) %>%
  dplyr::select(id, decline_1_2, decline_1_3, decline_2_3)

dat_flagged <- analysis_data %>% left_join(decline_flags, by = "id")

p_line_by_1to3 <- ggplot(dat_flagged,
                         aes(x = timepoint, y = outcome, group = id,
                             color = decline_1_3)) +
  geom_line(linewidth = 0.9, alpha = 0.9) +
  geom_point(size = 2) +
  stat_summary(aes(group = 1), fun = mean, geom = "line",
               color = "black", linewidth = 1.2) +
  stat_summary(aes(group = 1), fun = mean, geom = "point",
               color = "black", size = 3) +
  scale_color_manual(values = c("Decline V1→V3" = "#d73027",
                                "No Decline V1→V3" = "#4575b4")) +
  theme_minimal(base_size = 15) +
  labs(title = "Spaghetti Plot Colored by Decline (Baseline → Visit 3)",
       x = "Timepoint", y = "Total Score", color = "Change")

p_line_by_1to3
## Warning: Removed 5 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Removed 5 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).