# ==============================================================================
# Project: Yoga for Chronic Stroke - Pilot RCT Re-analysis
# Analysis: Linear Mixed-Effects Models (LMER) & Trajectory Visualization
# Date: February 2026
# Author: [Your Name]
# ==============================================================================

# 1. Setup & Libraries ---------------------------------------------------------
if(!require(tidyverse)) install.packages("tidyverse")
## Loading required package: tidyverse
## Warning: package 'tidyverse' was built under R version 4.2.3
## Warning: package 'ggplot2' was built under R version 4.2.3
## Warning: package 'tibble' was built under R version 4.2.3
## Warning: package 'tidyr' was built under R version 4.2.3
## Warning: package 'readr' was built under R version 4.2.2
## Warning: package 'purrr' was built under R version 4.2.2
## Warning: package 'dplyr' was built under R version 4.2.3
## Warning: package 'stringr' was built under R version 4.2.2
## Warning: package 'forcats' was built under R version 4.2.2
## Warning: package 'lubridate' was built under R version 4.2.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.1
## ✔ purrr     1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
if(!require(lme4)) install.packages("lme4")
## Loading required package: lme4
## Warning: package 'lme4' was built under R version 4.2.2
## Loading required package: Matrix
## Warning: package 'Matrix' was built under R version 4.2.2
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
if(!require(lmerTest)) install.packages("lmerTest") # For p-values in lmer
## Loading required package: lmerTest
## Warning: package 'lmerTest' was built under R version 4.2.3
## 
## Attaching package: 'lmerTest'
## 
## The following object is masked from 'package:lme4':
## 
##     lmer
## 
## The following object is masked from 'package:stats':
## 
##     step
if(!require(emmeans)) install.packages("emmeans")   # For marginal means
## Loading required package: emmeans
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
if(!require(gridExtra)) install.packages("gridExtra")
## Loading required package: gridExtra
## Warning: package 'gridExtra' was built under R version 4.2.2
## 
## Attaching package: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine
library(tidyverse)
library(lme4)
library(lmerTest)
library(emmeans)
library(gridExtra)
# 2. Load and Clean Data -------------------------------------------------------
# Replace with your actual filename
data <- read.csv("PFXOnly(RAW)_ExcDropouts.csv", stringsAsFactors = FALSE)

# Standardize column names (remove whitespace)
names(data) <- trimws(names(data))

# Define Dropouts to Exclude (IDs 4, 21, 26)
# Note: ID 24 is kept as a completer per protocol
dropouts <- c(4, 21, 26)
df <- data %>% 
  filter(!ID %in% dropouts) %>%
  mutate(
    # Ensure Factors are set correctly for Modeling
    # Reference Levels: Timepoint = Pre, Group = Control
    ID = as.factor(ID),
    Group = factor(trimws(Group), levels = c("Control", "Intervention")),
    Timepoint = factor(trimws(Timepoint), levels = c("Pre", "Post")),
    
    # Ensure Outcomes are Numeric
    TUG = as.numeric(TUG),
    Walk = as.numeric(Walk),
    MAS = as.numeric(MAS),
    BBS = as.numeric(BBS),
    SISPhys = as.numeric(SISPhys),      # Stroke Impact Scale (Physical Composite)
    SISRecovery = as.numeric(SISRecovery), # Perceived Recovery (0-100)
    SISEmotional = as.numeric(SISEmotional),
    STAIY1 = as.numeric(STAIY1),        # State Anxiety
    GDS = as.numeric(GDS)               # Depression
  )
# 3. Handling Outliers & Transformations ---------------------------------------
# SENSITIVITY: TUG has an outlier (ID 2 = 120s fixed). 
# We create a Log-Transformed version for robust analysis.
df$logTUG <- log(df$TUG)

# 4. Statistical Function: LMER Loop -------------------------------------------
# This function runs the model, prints key results, and checks residuals
run_lmer_model <- function(outcome_name, dataset) {
  
  cat(paste0("\n==========================================================\n"))
  cat(paste0(" MODELING OUTCOME: ", outcome_name, "\n"))
  cat(paste0("==========================================================\n"))
  
  # Formula: Outcome ~ Group * Time + (Random Intercept per ID)
  f <- as.formula(paste(outcome_name, "~ Group * Timepoint + (1|ID)"))
  
  # Run Model
  model <- lmer(f, data = dataset)
  
  # Print Coefficients (Look for GroupIntervention:TimepointPost)
  print(coef(summary(model)))
  
  # CHECK ASSUMPTIONS: Normality of Residuals
  residuals <- resid(model)
  shapiro <- shapiro.test(residuals)
  
  cat("\n--- Normality Check (Shapiro-Wilk) ---\n")
  cat(paste("W =", round(shapiro$statistic, 4), ", p-value =", round(shapiro$p.value, 4), "\n"))
  if(shapiro$p.value < 0.05) {
    cat("WARNING: Residuals are NOT normal. Consider transformation or non-parametric check.\n")
  } else {
    cat("PASS: Residuals appear normal.\n")
  }
  
  # Diagnostic Plot (Q-Q Plot)
  qqnorm(residuals, main = paste("QQ Plot:", outcome_name))
  qqline(residuals, col = "red")
  
  return(model)
}
# 5. Run Analysis for Key Outcomes ---------------------------------------------

# --- PHYSICAL OUTCOMES ---
model_TUG     <- run_lmer_model("TUG", df)       # Original TUG (Sig w/ outlier included)
## 
## ==========================================================
##  MODELING OUTCOME: TUG
## ==========================================================
##                                  Estimate Std. Error       df    t value
## (Intercept)                     15.936667 12.1517586 12.03542  1.3114700
## GroupIntervention               11.269583 16.0752656 12.03542  0.7010511
## TimepointPost                    2.918333  0.9329737 12.00000  3.1279911
## GroupIntervention:TimepointPost -2.472083  1.2342082 12.00000 -2.0029712
##                                    Pr(>|t|)
## (Intercept)                     0.214167019
## GroupIntervention               0.496602463
## TimepointPost                   0.008724351
## GroupIntervention:TimepointPost 0.068298006
## 
## --- Normality Check (Shapiro-Wilk) ---
## W = 0.9718 , p-value = 0.6299 
## PASS: Residuals appear normal.

model_logTUG  <- run_lmer_model("logTUG", df)    # Robust TUG (Recommended)
## 
## ==========================================================
##  MODELING OUTCOME: logTUG
## ==========================================================
##                                   Estimate Std. Error       df    t value
## (Intercept)                      2.6860924 0.30354710 12.23348  8.8490135
## GroupIntervention                0.1192143 0.40155507 12.23348  0.2968815
## TimepointPost                    0.1821377 0.05959282 12.00000  3.0563694
## GroupIntervention:TimepointPost -0.1624319 0.07883390 12.00000 -2.0604322
##                                     Pr(>|t|)
## (Intercept)                     1.145464e-06
## GroupIntervention               7.715375e-01
## TimepointPost                   9.966055e-03
## GroupIntervention:TimepointPost 6.172369e-02
## 
## --- Normality Check (Shapiro-Wilk) ---
## W = 0.9826 , p-value = 0.908 
## PASS: Residuals appear normal.

model_Walk    <- run_lmer_model("Walk", df)
## 
## ==========================================================
##  MODELING OUTCOME: Walk
## ==========================================================
##                                  Estimate Std. Error       df   t value
## (Intercept)                     74.766667  18.234674 13.48773 4.1002470
## GroupIntervention                7.508333  24.122206 13.48773 0.3112623
## TimepointPost                    8.700000   8.827042 12.00000 0.9856077
## GroupIntervention:TimepointPost 13.000000  11.677078 12.00000 1.1132922
##                                    Pr(>|t|)
## (Intercept)                     0.001164091
## GroupIntervention               0.760359946
## TimepointPost                   0.343781861
## GroupIntervention:TimepointPost 0.287386708
## 
## --- Normality Check (Shapiro-Wilk) ---
## W = 0.9858 , p-value = 0.9589 
## PASS: Residuals appear normal.

model_MAS     <- run_lmer_model("MAS", df)
## 
## ==========================================================
##  MODELING OUTCOME: MAS
## ==========================================================
##                                    Estimate Std. Error       df    t value
## (Intercept)                     34.00000000  3.5815969 12.24415  9.4929723
## GroupIntervention                0.37500000  4.7380073 12.24415  0.0791472
## TimepointPost                    0.66666667  0.7188758 12.00000  0.9273739
## GroupIntervention:TimepointPost -0.04166667  0.9509833 12.00000 -0.0438143
##                                     Pr(>|t|)
## (Intercept)                     5.332322e-07
## GroupIntervention               9.381944e-01
## TimepointPost                   3.720094e-01
## GroupIntervention:TimepointPost 9.657731e-01
## 
## --- Normality Check (Shapiro-Wilk) ---
## W = 0.9711 , p-value = 0.6093 
## PASS: Residuals appear normal.

model_SISPhys <- run_lmer_model("SISPhys", df)   # Key Sig Finding (p=0.007)
## 
## ==========================================================
##  MODELING OUTCOME: SISPhys
## ==========================================================
##                                   Estimate Std. Error       df   t value
## (Intercept)                      66.122685   8.608848 13.51299  7.680782
## GroupIntervention               -15.818866  11.388436 13.51299 -1.389029
## TimepointPost                    -9.438657   4.200769 12.00000 -2.246888
## GroupIntervention:TimepointPost  15.028935   5.557095 12.00000  2.704459
##                                     Pr(>|t|)
## (Intercept)                     2.737516e-06
## GroupIntervention               1.872915e-01
## TimepointPost                   4.424464e-02
## GroupIntervention:TimepointPost 1.915073e-02
## 
## --- Normality Check (Shapiro-Wilk) ---
## W = 0.975 , p-value = 0.7178 
## PASS: Residuals appear normal.

# --- MENTAL / QoL OUTCOMES ---
model_GDS     <- run_lmer_model("GDS", df)
## 
## ==========================================================
##  MODELING OUTCOME: GDS
## ==========================================================
##                                   Estimate Std. Error      df    t value
## (Intercept)                      3.5000000  1.5016388 12.9279  2.3307869
## GroupIntervention                1.3750000  1.9864814 12.9279  0.6921786
## TimepointPost                   -0.3333333  0.5798507 12.0000 -0.5748606
## GroupIntervention:TimepointPost -0.7916667  0.7670704 12.0000 -1.0320652
##                                   Pr(>|t|)
## (Intercept)                     0.03660978
## GroupIntervention               0.50106907
## TimepointPost                   0.57600500
## GroupIntervention:TimepointPost 0.32239436
## 
## --- Normality Check (Shapiro-Wilk) ---
## W = 0.9676 , p-value = 0.5167 
## PASS: Residuals appear normal.

model_STAI    <- run_lmer_model("STAIY1", df)    # State Anxiety
## 
## ==========================================================
##  MODELING OUTCOME: STAIY1
## ==========================================================
##                                  Estimate Std. Error       df     t value
## (Intercept)                     37.000000   3.996273 13.92279  9.25862695
## GroupIntervention               -0.125000   5.286572 13.92279 -0.02364481
## TimepointPost                   -1.333333   2.183296 12.00000 -0.61069741
## GroupIntervention:TimepointPost -3.291667   2.888229 12.00000 -1.13968325
##                                     Pr(>|t|)
## (Intercept)                     2.514922e-07
## GroupIntervention               9.814715e-01
## TimepointPost                   5.527913e-01
## GroupIntervention:TimepointPost 2.766567e-01
## 
## --- Normality Check (Shapiro-Wilk) ---
## W = 0.9547 , p-value = 0.2597 
## PASS: Residuals appear normal.

model_Recov   <- run_lmer_model("SISRecovery", df)
## 
## ==========================================================
##  MODELING OUTCOME: SISRecovery
## ==========================================================
##                                 Estimate Std. Error       df    t value
## (Intercept)                       65.000   9.413117 13.83434  6.9052579
## GroupIntervention                -10.625  12.452383 13.83434 -0.8532503
## TimepointPost                     -5.000   5.030290 12.00000 -0.9939784
## GroupIntervention:TimepointPost    4.375   6.654448 12.00000  0.6574549
##                                     Pr(>|t|)
## (Intercept)                     7.739243e-06
## GroupIntervention               4.080514e-01
## TimepointPost                   3.398543e-01
## GroupIntervention:TimepointPost 5.232994e-01
## 
## --- Normality Check (Shapiro-Wilk) ---
## W = 0.9778 , p-value = 0.7954 
## PASS: Residuals appear normal.

model_Emot    <- run_lmer_model("SISEmotional", df)
## 
## ==========================================================
##  MODELING OUTCOME: SISEmotional
## ==========================================================
##                                   Estimate Std. Error       df     t value
## (Intercept)                     68.8078704   6.920633 13.60524  9.94242493
## GroupIntervention               -1.3599537   9.155136 13.60524 -0.14854543
## TimepointPost                    4.3402778   3.472945 12.00000  1.24973968
## GroupIntervention:TimepointPost -0.2604167   4.594275 12.00000 -0.05668286
##                                     Pr(>|t|)
## (Intercept)                     1.292542e-07
## GroupIntervention               8.840915e-01
## TimepointPost                   2.352197e-01
## GroupIntervention:TimepointPost 9.557307e-01
## 
## --- Normality Check (Shapiro-Wilk) ---
## W = 0.974 , p-value = 0.6904 
## PASS: Residuals appear normal.

# 6. Visualization: "Spaghetti Plot" with Group Means --------------------------
# This creates the modern style: Individual Lines (Grey) + Group Mean (Bold Color)

plot_trajectory <- function(data, y_var, y_label, title) {
  
  # Calculate Summary Statistics (Mean + SE for Error Bars)
  summary_data <- data %>%
    group_by(Group, Timepoint) %>%
    summarise(
      Mean = mean(.data[[y_var]], na.rm = TRUE),
      SE = sd(.data[[y_var]], na.rm = TRUE) / sqrt(n()),
      .groups = 'drop'
    )
  
  ggplot(data, aes(x = Timepoint, y = .data[[y_var]], group = ID)) +
    # 1. Individual Trajectories (Faint Grey Lines)
    geom_line(color = "grey80", alpha = 0.6) +
    geom_point(color = "grey80", size = 1, alpha = 0.6) +
    
    # 2. Group Means (Bold Colored Lines) - Note: group = Group to connect means
    geom_line(data = summary_data, 
              aes(x = Timepoint, y = Mean, group = Group, color = Group), 
              linewidth = 1.5) +
    
    # 3. Error Bars (95% CI or SE) - Here using Mean +/- SE for cleaner look
    geom_errorbar(data = summary_data,
                  aes(x = Timepoint, y = Mean, ymin = Mean - SE, ymax = Mean + SE, group = Group, color = Group),
                  width = 0.1, linewidth = 1) +
    
    # 4. Group Mean Points
    geom_point(data = summary_data, 
               aes(x = Timepoint, y = Mean, group = Group, color = Group), 
               size = 3) +
    
    # Styling
    scale_color_manual(values = c("Control" = "#E69F00", "Intervention" = "#56B4E9")) +
    labs(title = title, y = y_label, x = NULL) +
    theme_classic() +
    theme(
      legend.position = "bottom",
      plot.title = element_text(face = "bold", size = 12),
      axis.text = element_text(size = 10)
    )
}
# Generate Plots
p1 <- plot_trajectory(df, "TUG", "Time (sec)", "Timed Up & Go (TUG)")
p2 <- plot_trajectory(df, "Walk", "Distance (m)", "2-Minute Walk")
p3 <- plot_trajectory(df, "SISPhys", "Score (0-100)", "SIS Physical Domain")
p4 <- plot_trajectory(df, "STAIY1", "Score (20-80)", "Anxiety (STAI-State)")

# Combine into one grid
grid.arrange(p1, p2, p3, p4, ncol = 2)

# Save Plot
ggsave("Results_Interaction_Panel.png", arrangeGrob(p1, p2, p3, p4, ncol = 2), width = 10, height = 8, dpi = 300)

print("Analysis Complete. Plots saved to working directory.")
## [1] "Analysis Complete. Plots saved to working directory."