1 Setup and Data Preparation

1.1 Load Libraries

library(tidyverse)
library(lme4)
library(lmerTest)
library(broom.mixed)
library(MuMIn)
library(knitr)
library(kableExtra)
library(car)
library(moments)

1.2 Load and Clean Data

input_path <- '/Users/ab4233/Library/CloudStorage/OneDrive-YaleUniversity/NAPLS-2 Psychological Constructs/NAPLS2_CDSS_Merged.csv'
output_dir <- '/Users/ab4233/Library/CloudStorage/OneDrive-YaleUniversity/NAPLS-2 Psychological Constructs/'

df <- read_csv(input_path, show_col_types = FALSE)
df_clean <- df %>%
  filter(subject_type_3 != "Control") %>%
  mutate(
    VisitNum = as.numeric(gsub("BL", "1", visit_number)),
    ID = paste(site_number, subject_number, sep = "_"),
    
    # Symptom Domain Totals
    Pos_Total = as.numeric(c_sops_positive),
    Neg_Total = as.numeric(c_sops_negative),
    Gen_Total = as.numeric(c_sops_general),
    Dis_Total = as.numeric(c_sops_disorganization),
    
    # Individual P-Items
    P1 = as.numeric(p1_sops),
    P2 = as.numeric(p2_sops),
    P3 = as.numeric(p3_sops),
    P4 = as.numeric(p4_sops),
    P5 = as.numeric(p5_sops),
    
    # Functioning Measures
    GAF = as.numeric(gaf_current_score),
    GF_Role = as.numeric(gfrole_gfr_current),
    GF_Soc = as.numeric(gfsoc_gfs_current),
    
    # Schemas
    Self_Neg = as.numeric(bcss_total_self_negative),
    Other_Neg = as.numeric(bcss_total_other_negative),
    Self_Pos = as.numeric(bcss_total_self_positive),
    Other_Pos = as.numeric(bcss_total_other_positive),
    
    # Depression Control
    Depression = as.numeric(c_cdstotal),
    Negative = as.numeric(c_sops_negative)
  ) %>%
  filter(VisitNum != 6)

# Safety check
if (nrow(df_clean) == 0) {
  stop("CRITICAL ERROR: 'df_clean' is empty. Check filtering logic.")
}

1.3 Define Common Variables

# Schema predictors
schemas <- c("Self_Neg", "Other_Neg", "Self_Pos", "Other_Pos")
schemas_full <- c("Self_Negative", "Other_Negative", "Self_Positive", "Other_Positive")

# Symptom domains
domains <- c("Positive", "Negative", "General", "Disorganized")

# Individual P-items
p_items <- c("P1", "P2", "P3", "P4", "P5")
p_labels <- c(
  "P1" = "P1: Unusual Thoughts",
  "P2" = "P2: Suspiciousness",
  "P3" = "P3: Grandiosity",
  "P4" = "P4: Perceptual Abn.",
  "P5" = "P5: Disorg. Comm."
)

# Functioning outcomes
func_items <- c("GAF", "GF_Role", "GF_Soc")

# Color palette for plots
schema_cols <- c(
  "Self Negative" = "#D55E00", "Other Negative" = "#E69F00",
  "Self Positive" = "#009E73", "Other Positive" = "#56B4E9"
)

2 Analysis 1: Time-Lagged Models (Symptoms & Functioning with Depression Control)

2.1 Create Lagged Dataset

df_lagged <- df_clean %>%
  arrange(ID, VisitNum) %>%
  group_by(ID) %>%
  mutate(
    # Lagged P-Items
    Prev_P1 = lag(P1), Prev_P2 = lag(P2), Prev_P3 = lag(P3),
    Prev_P4 = lag(P4), Prev_P5 = lag(P5), Prev_Negative = lag(Negative),
    
    # Lagged Functioning
    Prev_GAF = lag(GAF), Prev_GF_Role = lag(GF_Role), Prev_GF_Soc = lag(GF_Soc),
    
    # Lagged Schemas
    Prev_Self_Neg = lag(Self_Neg), Prev_Other_Neg = lag(Other_Neg),
    Prev_Self_Pos = lag(Self_Pos), Prev_Other_Pos = lag(Other_Pos),
    
    # Lagged Control
    Prev_Dep = lag(Depression)
  ) %>%
  ungroup() %>%
  filter(VisitNum > 1)

# Safety check
if (nrow(df_lagged) == 0) {
  stop("CRITICAL ERROR: 'df_lagged' is empty. No subjects have VisitNum > 1.")
}

# Standardize
df_lagged_z <- df_lagged %>%
  mutate(across(where(is.numeric) & !VisitNum, ~ scale(.)[, 1]))

2.2 Run Time-Lagged Models

all_outcomes <- c(p_items, "Negative", func_items)
results_lagged <- list()

cat("\n--- RUNNING LAGGED MODELS ---\n")
## 
## --- RUNNING LAGGED MODELS ---
cat(sprintf("Outcomes: %d | Schemas: %d | Total pairs: %d\n", 
            length(all_outcomes), length(schemas), length(all_outcomes) * length(schemas)))
## Outcomes: 9 | Schemas: 4 | Total pairs: 36
for (outcome in all_outcomes) {
  for (schema in schemas) {
    prev_outcome <- paste0("Prev_", outcome)
    prev_schema <- paste0("Prev_", schema)
    outcome_type <- ifelse(outcome %in% c(p_items, "Negative"), "Symptom", "Functioning")
    
    # MODEL A: Schema(t-1) -> Outcome(t)
    f_a <- paste(outcome, "~", prev_outcome, "+", prev_schema, "+ Prev_Dep + VisitNum + (1|ID)")
    mod_a <- tryCatch(lmer(as.formula(f_a), data = df_lagged_z), error = function(e) NULL)
    
    # MODEL B: Outcome(t-1) -> Schema(t)
    f_b <- paste(schema, "~", prev_schema, "+", prev_outcome, "+ Prev_Dep + VisitNum + (1|ID)")
    mod_b <- tryCatch(lmer(as.formula(f_b), data = df_lagged_z), error = function(e) NULL)
    
    if (!is.null(mod_a) & !is.null(mod_b)) {
      res_a <- tidy(mod_a, conf.int = TRUE) %>% filter(term == prev_schema)
      res_b <- tidy(mod_b, conf.int = TRUE) %>% filter(term == prev_outcome)
      
      results_lagged[[paste(outcome, schema)]] <- tibble(
        Outcome = outcome, Outcome_Type = outcome_type, Schema = schema,
        Beta_Fwd = res_a$estimate, CI_Low_Fwd = res_a$conf.low, CI_High_Fwd = res_a$conf.high, P_Fwd = res_a$p.value,
        Beta_Rev = res_b$estimate, CI_Low_Rev = res_b$conf.low, CI_High_Rev = res_b$conf.high, P_Rev = res_b$p.value
      )
    }
  }
}

# Safety check
if (length(results_lagged) == 0) {
  stop("CRITICAL ERROR: No models converged successfully.")
}

2.3 FDR Correction and Summary

final_lagged <- bind_rows(results_lagged) %>%
  mutate(
    P_Fwd_FDR = p.adjust(P_Fwd, method = "fdr"),
    P_Rev_FDR = p.adjust(P_Rev, method = "fdr"),
    Conclusion = case_when(
      P_Fwd_FDR < 0.05 & P_Rev_FDR >= 0.05 ~ "Schema Drives Outcome",
      P_Fwd_FDR >= 0.05 & P_Rev_FDR < 0.05 ~ "Outcome Drives Schema",
      P_Fwd_FDR < 0.05 & P_Rev_FDR < 0.05  ~ "Bidirectional",
      TRUE ~ "No Link"
    )
  )

# Summary by outcome type
cat("\n--- RESULTS SUMMARY BY OUTCOME TYPE ---\n")
## 
## --- RESULTS SUMMARY BY OUTCOME TYPE ---
final_lagged %>%
  group_by(Outcome_Type, Conclusion) %>%
  summarise(n = n(), .groups = "drop") %>%
  pivot_wider(names_from = Conclusion, values_from = n, values_fill = 0) %>%
  print()
## # A tibble: 2 × 4
##   Outcome_Type `No Link` `Schema Drives Outcome` Bidirectional
##   <chr>            <int>                   <int>         <int>
## 1 Functioning          4                       8             0
## 2 Symptom             12                       8             4

2.4 Symptom Results Table

final_lagged %>%
  filter(Outcome_Type == "Symptom") %>%
  select(Outcome, Schema, Conclusion, Beta_Fwd, P_Fwd, P_Fwd_FDR, Beta_Rev, P_Rev, P_Rev_FDR) %>%
  arrange(P_Fwd_FDR) %>%
  kable(digits = 3, caption = "Directionality Analysis: Symptoms (Time-Lagged Mixed Models, FDR Corrected)") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Directionality Analysis: Symptoms (Time-Lagged Mixed Models, FDR Corrected)
Outcome Schema Conclusion Beta_Fwd P_Fwd P_Fwd_FDR Beta_Rev P_Rev P_Rev_FDR
P2 Other_Neg Bidirectional 0.101 0.000 0.000 0.097 0.000 0.003
P3 Self_Pos Schema Drives Outcome 0.113 0.000 0.000 0.053 0.016 0.097
P3 Other_Neg Schema Drives Outcome 0.095 0.000 0.002 0.017 0.454 0.659
Negative Self_Pos Schema Drives Outcome -0.085 0.000 0.002 -0.055 0.025 0.114
P2 Self_Neg Bidirectional 0.094 0.001 0.005 0.073 0.004 0.039
P4 Other_Neg Schema Drives Outcome 0.082 0.002 0.007 0.008 0.728 0.899
P2 Other_Pos Bidirectional -0.069 0.002 0.007 -0.114 0.000 0.001
P1 Other_Neg Schema Drives Outcome 0.068 0.006 0.016 0.016 0.535 0.741
Negative Other_Pos Schema Drives Outcome -0.057 0.006 0.016 -0.006 0.821 0.899
P2 Self_Pos Bidirectional -0.065 0.007 0.017 -0.068 0.004 0.039
P1 Self_Neg Schema Drives Outcome 0.075 0.012 0.026 0.035 0.183 0.463
Negative Other_Neg Schema Drives Outcome 0.050 0.020 0.035 -0.007 0.779 0.899
Negative Self_Neg No Link 0.056 0.035 0.060 0.024 0.352 0.659
P5 Other_Pos No Link 0.042 0.045 0.070 -0.009 0.725 0.899
P4 Other_Pos No Link -0.044 0.076 0.110 -0.059 0.024 0.114
P5 Other_Neg No Link 0.037 0.093 0.128 -0.005 0.822 0.899
P1 Other_Pos No Link -0.031 0.178 0.238 -0.040 0.141 0.390
P3 Self_Neg No Link -0.034 0.270 0.335 -0.005 0.824 0.899
P3 Other_Pos No Link 0.018 0.454 0.511 0.001 0.955 0.973
P5 Self_Pos No Link 0.018 0.443 0.511 -0.023 0.306 0.648
P4 Self_Neg No Link 0.020 0.528 0.576 0.002 0.930 0.973
P5 Self_Neg No Link 0.010 0.728 0.771 -0.018 0.457 0.659
P1 Self_Pos No Link 0.007 0.775 0.797 -0.001 0.973 0.973
P4 Self_Pos No Link 0.004 0.889 0.889 0.020 0.384 0.659

2.5 Functioning Results Table

final_lagged %>%
  filter(Outcome_Type == "Functioning") %>%
  select(Outcome, Schema, Conclusion, Beta_Fwd, P_Fwd, P_Fwd_FDR, Beta_Rev, P_Rev, P_Rev_FDR) %>%
  arrange(P_Fwd_FDR) %>%
  kable(digits = 3, caption = "Directionality Analysis: Functioning (Time-Lagged Mixed Models, FDR Corrected)") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Directionality Analysis: Functioning (Time-Lagged Mixed Models, FDR Corrected)
Outcome Schema Conclusion Beta_Fwd P_Fwd P_Fwd_FDR Beta_Rev P_Rev P_Rev_FDR
GAF Self_Pos Schema Drives Outcome 0.096 0.000 0.002 0.033 0.193 0.463
GAF Other_Neg Schema Drives Outcome -0.085 0.001 0.005 -0.045 0.081 0.266
GAF Other_Pos Schema Drives Outcome 0.077 0.002 0.007 -0.010 0.718 0.899
GF_Soc Self_Pos Schema Drives Outcome 0.064 0.003 0.011 0.038 0.093 0.278
GF_Role Other_Neg Schema Drives Outcome -0.061 0.006 0.017 -0.022 0.324 0.648
GAF Self_Neg Schema Drives Outcome -0.079 0.010 0.022 -0.051 0.059 0.212
GF_Soc Other_Pos Schema Drives Outcome 0.049 0.013 0.026 0.023 0.373 0.659
GF_Soc Other_Neg Schema Drives Outcome -0.050 0.015 0.028 -0.051 0.031 0.123
GF_Soc Self_Neg No Link -0.053 0.037 0.060 -0.064 0.008 0.061
GF_Role Self_Pos No Link 0.045 0.055 0.082 0.022 0.322 0.648
GF_Role Self_Neg No Link -0.033 0.232 0.298 -0.018 0.439 0.659
GF_Role Other_Pos No Link 0.020 0.344 0.413 -0.020 0.416 0.659

2.6 Forest Plots

plot_lagged <- final_lagged %>%
  transmute(
    Outcome, Outcome_Type, Schema,
    Direction = "Schema (t-1) \u2192 Outcome (t)",
    Beta = Beta_Fwd, CI_Low = CI_Low_Fwd, CI_High = CI_High_Fwd, P_FDR = P_Fwd_FDR
  ) %>%
  bind_rows(
    final_lagged %>%
      transmute(
        Outcome, Outcome_Type, Schema,
        Direction = "Outcome (t-1) \u2192 Schema (t)",
        Beta = Beta_Rev, CI_Low = CI_Low_Rev, CI_High = CI_High_Rev, P_FDR = P_Rev_FDR
      )
  ) %>%
  filter(P_FDR < 0.05) %>%
  mutate(
    Pair = paste0(Outcome, " \u00d7 ", Schema),
    Pair = forcats::fct_reorder(Pair, Beta)
  )

if (nrow(plot_lagged) > 0) {
  # Combined forest plot
  ggplot(plot_lagged, aes(x = Beta, y = Pair, group = Direction)) +
    geom_vline(xintercept = 0, linetype = "dashed") +
    geom_pointrange(
      aes(xmin = CI_Low, xmax = CI_High, shape = Direction, color = Outcome_Type),
      position = position_dodge(width = 0.4)
    ) +
    facet_wrap(~ Direction, ncol = 1, scales = "free_y") +
    scale_color_manual(values = c("Symptom" = "#E64B35", "Functioning" = "#4DBBD5")) +
    labs(
      x = "Standardized lagged effect (β)", y = NULL, color = "Outcome Type",
      title = "Time-Lagged Associations Between Schemas and Clinical Outcomes",
      subtitle = "Paths significant after FDR correction (q < .05)"
    ) +
    theme_bw() +
    theme(
      panel.grid.minor = element_blank(), legend.position = "bottom",
      strip.background = element_rect(fill = "grey90"), strip.text = element_text(face = "bold")
    )
} else {
  message("No FDR-significant lagged effects to plot.")
}

2.7 Trajectory Plots for Significant Forward Effects

# Helper function
plot_schema_trajectory <- function(outcome, schema, df_clean, df_lagged, outcome_type = "Symptom") {
  prev_outcome <- paste0("Prev_", outcome)
  prev_schema <- paste0("Prev_", schema)
  
  dat_pair <- df_lagged %>%
    select(ID, VisitNum, all_of(c(outcome, prev_outcome, prev_schema, "Prev_Dep"))) %>%
    drop_na()
  
  if (nrow(dat_pair) == 0) {
    warning(paste("No usable rows for", outcome, "x", schema, "— skipping."))
    return(NULL)
  }
  
  form_plot <- as.formula(paste0(outcome, " ~ ", prev_outcome, " + ", prev_schema, " + Prev_Dep + VisitNum + (1|ID)"))
  mod_plot <- lmer(form_plot, data = dat_pair)
  
  schema_mean <- mean(dat_pair[[prev_schema]], na.rm = TRUE)
  schema_sd <- sd(dat_pair[[prev_schema]], na.rm = TRUE)
  low_schema <- schema_mean - schema_sd
  high_schema <- schema_mean + schema_sd
  dep_mean <- mean(dat_pair$Prev_Dep, na.rm = TRUE)
  
  out_start <- df_clean %>%
    filter(VisitNum == 1) %>%
    summarise(m = mean(.data[[outcome]], na.rm = TRUE)) %>%
    pull(m)
  
  visit_vals <- sort(unique(df_clean$VisitNum))
  
  simulate_traj <- function(schema_value, label) {
    prev_out <- out_start
    out <- tibble(VisitNum = visit_vals, Outcome_hat = NA_real_, Schema_level = label)
    
    for (i in seq_along(visit_vals)) {
      v <- visit_vals[i]
      if (v == min(visit_vals)) {
        out$Outcome_hat[i] <- prev_out
      } else {
        newdata <- tibble(ID = NA, VisitNum = v, Prev_Dep = dep_mean)
        newdata[[prev_outcome]] <- prev_out
        newdata[[prev_schema]] <- schema_value
        pred <- predict(mod_plot, newdata = newdata, re.form = NA)
        out$Outcome_hat[i] <- pred
        prev_out <- pred
      }
    }
    out
  }
  
  traj_all <- bind_rows(
    simulate_traj(low_schema, "Low schema (-1 SD)"),
    simulate_traj(high_schema, "High schema (+1 SD)")
  )
  
  y_label <- if (outcome_type == "Functioning") paste("Predicted", outcome, "score") else paste("Predicted", outcome, "severity")
  
  ggplot(traj_all, aes(x = VisitNum, y = Outcome_hat, color = Schema_level)) +
    geom_line(linewidth = 1) +
    geom_point(size = 2) +
    scale_x_continuous(breaks = visit_vals) +
    labs(
      x = "Visit", y = y_label, color = "Schema level",
      title = paste0("Predicted trajectories of ", outcome, " by ", schema, " (t-1)"),
      subtitle = "Model-based predictions controlling for prior outcome, prior depression, and time"
    ) +
    theme_bw() +
    theme(legend.position = "bottom", plot.title = element_text(face = "bold"))
}

# Identify significant forward effects and plot
sig_pairs <- final_lagged %>%
  filter(P_Fwd_FDR < 0.05) %>%
  select(Outcome, Outcome_Type, Schema) %>%
  distinct()

cat("\n--- SIGNIFICANT FORWARD EFFECTS (Schema -> Outcome) ---\n")
## 
## --- SIGNIFICANT FORWARD EFFECTS (Schema -> Outcome) ---
print(sig_pairs)
## # A tibble: 20 × 3
##    Outcome  Outcome_Type Schema   
##    <chr>    <chr>        <chr>    
##  1 P1       Symptom      Self_Neg 
##  2 P1       Symptom      Other_Neg
##  3 P2       Symptom      Self_Neg 
##  4 P2       Symptom      Other_Neg
##  5 P2       Symptom      Self_Pos 
##  6 P2       Symptom      Other_Pos
##  7 P3       Symptom      Other_Neg
##  8 P3       Symptom      Self_Pos 
##  9 P4       Symptom      Other_Neg
## 10 Negative Symptom      Other_Neg
## 11 Negative Symptom      Self_Pos 
## 12 Negative Symptom      Other_Pos
## 13 GAF      Functioning  Self_Neg 
## 14 GAF      Functioning  Other_Neg
## 15 GAF      Functioning  Self_Pos 
## 16 GAF      Functioning  Other_Pos
## 17 GF_Role  Functioning  Other_Neg
## 18 GF_Soc   Functioning  Other_Neg
## 19 GF_Soc   Functioning  Self_Pos 
## 20 GF_Soc   Functioning  Other_Pos
for (i in seq_len(nrow(sig_pairs))) {
  p_i <- plot_schema_trajectory(
    outcome = sig_pairs$Outcome[i],
    schema = sig_pairs$Schema[i],
    df_clean = df_clean,
    df_lagged = df_lagged,
    outcome_type = sig_pairs$Outcome_Type[i]
  )
  
  if (!is.null(p_i)) {
    if (sig_pairs$Outcome_Type[i] == "Symptom") {
      p_i <- p_i + scale_y_continuous(limits = c(0, 5))
    } else if (sig_pairs$Outcome[i] == "GAF") {
      p_i <- p_i + scale_y_continuous(limits = c(0, 100))
    } else {
      p_i <- p_i + scale_y_continuous(limits = c(1, 10))
    }
    print(p_i)
  }
}


3 Analysis 4: Time-Lagged Models (Without Depression Control)

3.1 Run Models

results_lagged_nodep <- list()

for (item in p_items) {
  for (schema in schemas) {
    prev_item <- paste0("Prev_", item)
    prev_schema <- paste0("Prev_", schema)
    
    # MODEL A: Schema(t-1) -> Symptom(t) - No depression control
    f_a <- paste(item, "~", prev_item, "+", prev_schema, "+ VisitNum + (1|ID)")
    mod_a <- tryCatch(lmer(as.formula(f_a), data = df_lagged_z), error = function(e) NULL)
    
    # MODEL B: Symptom(t-1) -> Schema(t) - No depression control
    f_b <- paste(schema, "~", prev_schema, "+", prev_item, "+ VisitNum + (1|ID)")
    mod_b <- tryCatch(lmer(as.formula(f_b), data = df_lagged_z), error = function(e) NULL)
    
    if (!is.null(mod_a) & !is.null(mod_b)) {
      res_a <- tidy(mod_a, conf.int = TRUE) %>% filter(term == prev_schema)
      res_b <- tidy(mod_b, conf.int = TRUE) %>% filter(term == prev_item)
      
      results_lagged_nodep[[paste(item, schema)]] <- tibble(
        Symptom = item, Schema = schema,
        Beta_Fwd = res_a$estimate, CI_Low_Fwd = res_a$conf.low, CI_High_Fwd = res_a$conf.high, P_Fwd = res_a$p.value,
        Beta_Rev = res_b$estimate, CI_Low_Rev = res_b$conf.low, CI_High_Rev = res_b$conf.high, P_Rev = res_b$p.value
      )
    }
  }
}

if (length(results_lagged_nodep) == 0) {
  stop("CRITICAL ERROR: No models converged successfully.")
}

3.2 FDR Correction and Results

final_lagged_nodep <- bind_rows(results_lagged_nodep) %>%
  mutate(
    P_Fwd_FDR = p.adjust(P_Fwd, method = "fdr"),
    P_Rev_FDR = p.adjust(P_Rev, method = "fdr"),
    Conclusion = case_when(
      P_Fwd_FDR < 0.05 & P_Rev_FDR >= 0.05 ~ "Schema Drives Symptom",
      P_Fwd_FDR >= 0.05 & P_Rev_FDR < 0.05 ~ "Symptom Drives Schema",
      P_Fwd_FDR < 0.05 & P_Rev_FDR < 0.05  ~ "Bidirectional",
      TRUE ~ "No Link"
    )
  )

final_lagged_nodep %>%
  select(Symptom, Schema, Conclusion, Beta_Fwd, P_Fwd, P_Fwd_FDR, Beta_Rev, P_Rev, P_Rev_FDR) %>%
  arrange(P_Fwd_FDR) %>%
  kable(digits = 3, caption = "Directionality Analysis: Time-Lagged Mixed Models (FDR Corrected, No Depression Control)") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Directionality Analysis: Time-Lagged Mixed Models (FDR Corrected, No Depression Control)
Symptom Schema Conclusion Beta_Fwd P_Fwd P_Fwd_FDR Beta_Rev P_Rev P_Rev_FDR
P2 Self_Neg Bidirectional 0.121 0.000 0.000 0.082 0.001 0.005
P2 Other_Neg Bidirectional 0.116 0.000 0.000 0.100 0.000 0.001
P2 Self_Pos Bidirectional -0.090 0.000 0.000 -0.077 0.001 0.005
P3 Self_Pos Schema Drives Symptom 0.098 0.000 0.000 0.049 0.028 0.093
P2 Other_Pos Bidirectional -0.079 0.000 0.002 -0.126 0.000 0.000
P3 Other_Neg Schema Drives Symptom 0.082 0.001 0.002 0.014 0.536 0.786
P4 Other_Neg Schema Drives Symptom 0.082 0.001 0.003 0.013 0.602 0.786
P1 Other_Neg Schema Drives Symptom 0.074 0.002 0.004 0.024 0.332 0.664
P1 Self_Neg Schema Drives Symptom 0.074 0.002 0.005 0.041 0.111 0.278
P5 Other_Pos No Link 0.044 0.034 0.069 -0.015 0.551 0.786
P4 Other_Pos Symptom Drives Schema -0.044 0.068 0.124 -0.067 0.009 0.037
P1 Other_Pos No Link -0.034 0.138 0.231 -0.052 0.052 0.149
P5 Other_Neg No Link 0.028 0.184 0.283 -0.004 0.880 0.910
P4 Self_Neg No Link 0.030 0.243 0.348 0.008 0.756 0.890
P3 Self_Neg No Link -0.024 0.319 0.399 -0.005 0.844 0.910
P5 Self_Pos No Link 0.021 0.316 0.399 -0.029 0.194 0.432
P3 Other_Pos No Link 0.019 0.426 0.501 0.003 0.910 0.910
P1 Self_Pos No Link -0.006 0.814 0.882 -0.013 0.580 0.786
P4 Self_Pos No Link -0.004 0.875 0.882 0.011 0.629 0.786
P5 Self_Neg No Link -0.003 0.882 0.882 -0.015 0.556 0.786

3.3 Forest Plot

plot_lagged_nodep <- final_lagged_nodep %>%
  transmute(
    Symptom, Schema,
    Direction = "Schema (t-1) \u2192 Symptom (t)",
    Beta = Beta_Fwd, CI_Low = CI_Low_Fwd, CI_High = CI_High_Fwd, P_FDR = P_Fwd_FDR
  ) %>%
  bind_rows(
    final_lagged_nodep %>%
      transmute(
        Symptom, Schema,
        Direction = "Symptom (t-1) \u2192 Schema (t)",
        Beta = Beta_Rev, CI_Low = CI_Low_Rev, CI_High = CI_High_Rev, P_FDR = P_Rev_FDR
      )
  ) %>%
  filter(P_FDR < 0.05) %>%
  mutate(
    Pair = paste0(Symptom, " \u00d7 ", Schema),
    Pair = forcats::fct_reorder(Pair, Beta)
  )

if (nrow(plot_lagged_nodep) > 0) {
  ggplot(plot_lagged_nodep, aes(x = Beta, y = Pair, group = Direction)) +
    geom_vline(xintercept = 0, linetype = "dashed") +
    geom_pointrange(
      aes(xmin = CI_Low, xmax = CI_High, shape = Direction),
      position = position_dodge(width = 0.4)
    ) +
    facet_wrap(~ Direction, ncol = 1, scales = "free_y") +
    labs(
      x = "Standardized lagged effect (β)", y = NULL,
      title = "Time-Lagged Associations Between Schemas and Positive Symptoms",
      subtitle = "Paths significant after FDR correction (q < .05)"
    ) +
    theme_bw() +
    theme(
      panel.grid.minor = element_blank(), legend.position = "bottom",
      strip.background = element_rect(fill = "grey90"), strip.text = element_text(face = "bold")
    )
} else {
  message("No FDR-significant lagged effects to plot.")
}


4 Supplemental Analyses

4.1 S1: Model Diagnostics

4.1.1 Residual Diagnostics for Key Models

# Select a representative model for diagnostics: P2 ~ Other_Neg (strongest bidirectional effect)
diag_model <- lmer(P2 ~ Prev_P2 + Prev_Other_Neg + Prev_Dep + VisitNum + (1|ID), data = df_lagged_z)

# Extract residuals and fitted values
diag_df <- data.frame(
  Fitted = fitted(diag_model),
  Residuals = residuals(diag_model),
  Std_Residuals = residuals(diag_model, scaled = TRUE)
)

# Create diagnostic plots
par(mfrow = c(2, 2))

# 1. Residuals vs Fitted (Homoscedasticity)
plot(diag_df$Fitted, diag_df$Residuals, 
     xlab = "Fitted Values", ylab = "Residuals",
     main = "Residuals vs Fitted", pch = 20, col = rgb(0,0,0,0.3))
abline(h = 0, col = "red", lty = 2)
lines(lowess(diag_df$Fitted, diag_df$Residuals), col = "blue")

# 2. Q-Q Plot (Normality)
qqnorm(diag_df$Std_Residuals, main = "Q-Q Plot of Residuals", pch = 20, col = rgb(0,0,0,0.3))
qqline(diag_df$Std_Residuals, col = "red")

# 3. Scale-Location Plot
plot(diag_df$Fitted, sqrt(abs(diag_df$Std_Residuals)),
     xlab = "Fitted Values", ylab = "√|Standardized Residuals|",
     main = "Scale-Location Plot", pch = 20, col = rgb(0,0,0,0.3))
lines(lowess(diag_df$Fitted, sqrt(abs(diag_df$Std_Residuals))), col = "blue")

# 4. Histogram of Residuals
hist(diag_df$Residuals, breaks = 30, main = "Distribution of Residuals", 
     xlab = "Residuals", col = "lightblue", border = "white")

par(mfrow = c(1, 1))

# Shapiro-Wilk test (on sample if N > 5000)
set.seed(123)
resid_sample <- if(length(diag_df$Residuals) > 5000) sample(diag_df$Residuals, 5000) else diag_df$Residuals
shapiro_test <- shapiro.test(resid_sample)

cat("\n=== RESIDUAL DIAGNOSTICS (Model: P2 ~ Prev_Other_Neg) ===\n")
## 
## === RESIDUAL DIAGNOSTICS (Model: P2 ~ Prev_Other_Neg) ===
cat("Shapiro-Wilk Test for Normality: W =", round(shapiro_test$statistic, 4), 
    ", p =", format.pval(shapiro_test$p.value, digits = 3), "\n")
## Shapiro-Wilk Test for Normality: W = 0.9885 , p = 1.67e-08
cat("Note: With large samples, minor deviations from normality are expected.\n")
## Note: With large samples, minor deviations from normality are expected.
cat("Visual inspection of Q-Q plot is more informative.\n")
## Visual inspection of Q-Q plot is more informative.

4.1.2 Diagnostics Across All Significant Models

# Run diagnostics for all significant forward-path models
sig_models <- final_lagged %>% filter(P_Fwd_FDR < 0.05)

diagnostics_list <- list()

for (i in 1:nrow(sig_models)) {
  outcome <- sig_models$Outcome[i]
  schema <- sig_models$Schema[i]
  prev_outcome <- paste0("Prev_", outcome)
  prev_schema <- paste0("Prev_", schema)
  
  f_text <- paste(outcome, "~", prev_outcome, "+", prev_schema, "+ Prev_Dep + VisitNum + (1|ID)")
  mod <- lmer(as.formula(f_text), data = df_lagged_z)
  
  resids <- residuals(mod)
  fitted_vals <- fitted(mod)
  
  # Compute diagnostic statistics
  resid_sample <- if(length(resids) > 5000) sample(resids, 5000) else resids
  sw_test <- shapiro.test(resid_sample)
  
  diagnostics_list[[i]] <- tibble(
    Outcome = outcome,
    Schema = schema,
    Resid_Mean = round(mean(resids), 4),
    Resid_SD = round(sd(resids), 3),
    Resid_Skew = round(moments::skewness(resids), 3),
    Resid_Kurtosis = round(moments::kurtosis(resids), 3),
    Shapiro_W = round(sw_test$statistic, 4),
    Shapiro_P = sw_test$p.value
  )
}

diagnostics_table <- bind_rows(diagnostics_list)

cat("\n=== RESIDUAL DIAGNOSTICS FOR ALL SIGNIFICANT MODELS ===\n")
## 
## === RESIDUAL DIAGNOSTICS FOR ALL SIGNIFICANT MODELS ===
diagnostics_table %>%
  kable(digits = 3, caption = "Residual Diagnostics for Significant Forward-Path Models") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Residual Diagnostics for Significant Forward-Path Models
Outcome Schema Resid_Mean Resid_SD Resid_Skew Resid_Kurtosis Shapiro_W Shapiro_P
P1 Self_Neg 0 0.653 -0.128 3.316 0.995 0
P1 Other_Neg 0 0.661 -0.102 3.300 0.995 0
P2 Self_Neg 0 0.758 0.162 4.049 0.988 0
P2 Other_Neg 0 0.756 0.168 4.090 0.989 0
P2 Self_Pos 0 0.758 0.154 4.057 0.988 0
P2 Other_Pos 0 0.758 0.173 4.089 0.988 0
P3 Other_Neg 0 0.687 0.831 5.066 0.908 0
P3 Self_Pos 0 0.675 0.850 5.106 0.911 0
P4 Other_Neg 0 0.696 -0.093 2.920 0.994 0
Negative Other_Neg 0 0.706 0.195 4.138 0.988 0
Negative Self_Pos 0 0.701 0.174 4.135 0.987 0
Negative Other_Pos 0 0.703 0.183 4.149 0.988 0
GAF Self_Neg 0 0.666 -0.009 4.890 0.978 0
GAF Other_Neg 0 0.684 -0.034 4.923 0.978 0
GAF Self_Pos 0 0.668 -0.001 4.811 0.978 0
GAF Other_Pos 0 0.665 -0.026 4.854 0.978 0
GF_Role Other_Neg 0 0.748 -0.550 4.697 0.964 0
GF_Soc Other_Neg 0 0.685 -0.176 4.414 0.987 0
GF_Soc Self_Pos 0 0.682 -0.140 4.423 0.986 0
GF_Soc Other_Pos 0 0.684 -0.145 4.390 0.987 0

4.2 S2: Multicollinearity Assessment

library(car)

# Check VIF for a representative model
vif_model <- lm(P2 ~ Prev_P2 + Prev_Self_Neg + Prev_Other_Neg + Prev_Self_Pos + Prev_Other_Pos + Prev_Dep + VisitNum, 
                data = df_lagged_z)

vif_values <- vif(vif_model)

cat("\n=== VARIANCE INFLATION FACTORS ===\n")
## 
## === VARIANCE INFLATION FACTORS ===
cat("Model: P2 ~ All Schemas + Controls\n")
## Model: P2 ~ All Schemas + Controls
cat("VIF > 5 indicates problematic multicollinearity\n\n")
## VIF > 5 indicates problematic multicollinearity
vif_df <- data.frame(
  Variable = names(vif_values),
  VIF = round(vif_values, 2)
)

vif_df %>%
  kable(caption = "Variance Inflation Factors") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Variance Inflation Factors
Variable VIF
Prev_P2 Prev_P2 1.40
Prev_Self_Neg Prev_Self_Neg 2.24
Prev_Other_Neg Prev_Other_Neg 1.49
Prev_Self_Pos Prev_Self_Pos 1.80
Prev_Other_Pos Prev_Other_Pos 1.47
Prev_Dep Prev_Dep 1.80
VisitNum VisitNum 1.10
# Schema intercorrelations
cat("\n=== SCHEMA INTERCORRELATIONS (Baseline) ===\n")
## 
## === SCHEMA INTERCORRELATIONS (Baseline) ===
schema_cors <- df_clean %>%
  filter(VisitNum == 1) %>%
  select(Self_Neg, Other_Neg, Self_Pos, Other_Pos) %>%
  cor(use = "pairwise.complete.obs")

schema_cors %>%
  round(3) %>%
  kable(caption = "Correlations Among Baseline Schemas") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Correlations Among Baseline Schemas
Self_Neg Other_Neg Self_Pos Other_Pos
Self_Neg 1.000 0.478 -0.482 -0.214
Other_Neg 0.478 1.000 -0.110 -0.122
Self_Pos -0.482 -0.110 1.000 0.491
Other_Pos -0.214 -0.122 0.491 1.000

4.3 S4: Sensitivity Analysis - Random Slopes

# Compare random intercept vs random slope models for key findings
cat("\n=== RANDOM SLOPES SENSITIVITY ANALYSIS ===\n")
## 
## === RANDOM SLOPES SENSITIVITY ANALYSIS ===
cat("Testing whether allowing random slopes for time improves model fit\n\n")
## Testing whether allowing random slopes for time improves model fit
# Test on the strongest effect: P2 ~ Other_Neg
mod_ri <- lmer(P2 ~ Prev_P2 + Prev_Other_Neg + Prev_Dep + VisitNum + (1|ID), 
               data = df_lagged_z, REML = FALSE)

mod_rs <- tryCatch({
  lmer(P2 ~ Prev_P2 + Prev_Other_Neg + Prev_Dep + VisitNum + (1 + VisitNum|ID), 
       data = df_lagged_z, REML = FALSE)
}, error = function(e) NULL)

if (!is.null(mod_rs)) {
  # Likelihood ratio test
  lr_test <- anova(mod_ri, mod_rs)
  
  cat("Model Comparison: P2 ~ Prev_Other_Neg\n")
  cat("Random Intercept AIC:", round(AIC(mod_ri), 2), "\n")
  cat("Random Slope AIC:", round(AIC(mod_rs), 2), "\n")
  cat("LRT Chi-sq:", round(lr_test$Chisq[2], 2), ", df =", lr_test$Df[2], 
      ", p =", format.pval(lr_test$`Pr(>Chisq)`[2], digits = 3), "\n\n")
  
  # Compare fixed effects
  cat("Fixed Effect Comparison (Prev_Other_Neg):\n")
  ri_coef <- fixef(mod_ri)["Prev_Other_Neg"]
  rs_coef <- fixef(mod_rs)["Prev_Other_Neg"]
  cat("  Random Intercept β:", round(ri_coef, 4), "\n")
  cat("  Random Slope β:", round(rs_coef, 4), "\n")
  cat("  Difference:", round(abs(ri_coef - rs_coef), 4), "\n")
} else {
  cat("Random slopes model did not converge - results from random intercept model are robust.\n")
}
## Model Comparison: P2 ~ Prev_Other_Neg
## Random Intercept AIC: 2942.86 
## Random Slope AIC: 2943.72 
## LRT Chi-sq: 3.14 , df = 2 , p = 0.208 
## 
## Fixed Effect Comparison (Prev_Other_Neg):
##   Random Intercept β: 0.1009 
##   Random Slope β: 0.0984 
##   Difference: 0.0025
# Run for all significant models
rs_comparison <- list()

for (i in 1:min(5, nrow(sig_models))) {  # Test top 5 significant models
  outcome <- sig_models$Outcome[i]
  schema <- sig_models$Schema[i]
  prev_outcome <- paste0("Prev_", outcome)
  prev_schema <- paste0("Prev_", schema)
  
  f_ri <- paste(outcome, "~", prev_outcome, "+", prev_schema, "+ Prev_Dep + VisitNum + (1|ID)")
  f_rs <- paste(outcome, "~", prev_outcome, "+", prev_schema, "+ Prev_Dep + VisitNum + (1 + VisitNum|ID)")
  
  mod_ri <- lmer(as.formula(f_ri), data = df_lagged_z, REML = FALSE)
  mod_rs <- tryCatch(lmer(as.formula(f_rs), data = df_lagged_z, REML = FALSE), error = function(e) NULL)
  
  if (!is.null(mod_rs)) {
    rs_comparison[[i]] <- tibble(
      Outcome = outcome,
      Schema = schema,
      AIC_RI = AIC(mod_ri),
      AIC_RS = AIC(mod_rs),
      Beta_RI = fixef(mod_ri)[prev_schema],
      Beta_RS = fixef(mod_rs)[prev_schema],
      Converged = TRUE
    )
  } else {
    rs_comparison[[i]] <- tibble(
      Outcome = outcome,
      Schema = schema,
      AIC_RI = AIC(mod_ri),
      AIC_RS = NA,
      Beta_RI = fixef(mod_ri)[prev_schema],
      Beta_RS = NA,
      Converged = FALSE
    )
  }
}

bind_rows(rs_comparison) %>%
  mutate(
    AIC_Diff = AIC_RS - AIC_RI,
    Beta_Diff = abs(Beta_RS - Beta_RI)
  ) %>%
  kable(digits = 3, caption = "Random Intercept vs Random Slope Model Comparison") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Random Intercept vs Random Slope Model Comparison
Outcome Schema AIC_RI AIC_RS Beta_RI Beta_RS Converged AIC_Diff Beta_Diff
P1 Self_Neg 2962.176 2952.892 0.075 0.072 TRUE -9.284 0.002
P1 Other_Neg 2963.367 2955.832 0.068 0.062 TRUE -7.535 0.006
P2 Self_Neg 2953.521 2954.265 0.094 0.091 TRUE 0.743 0.003
P2 Other_Neg 2942.861 2943.720 0.101 0.098 TRUE 0.859 0.003
P2 Self_Pos 2961.433 2960.313 -0.065 -0.069 TRUE -1.120 0.004

4.4 S5: Sensitivity Analysis - Non-Linear Time

cat("\n=== NON-LINEAR TIME SENSITIVITY ANALYSIS ===\n")
## 
## === NON-LINEAR TIME SENSITIVITY ANALYSIS ===
cat("Testing whether quadratic time improves model fit\n\n")
## Testing whether quadratic time improves model fit
# Add quadratic time term
df_lagged_z <- df_lagged_z %>%
  mutate(VisitNum_sq = VisitNum^2)

# Test on P2 ~ Other_Neg
mod_linear <- lmer(P2 ~ Prev_P2 + Prev_Other_Neg + Prev_Dep + VisitNum + (1|ID), 
                   data = df_lagged_z, REML = FALSE)

mod_quad <- lmer(P2 ~ Prev_P2 + Prev_Other_Neg + Prev_Dep + VisitNum + VisitNum_sq + (1|ID), 
                 data = df_lagged_z, REML = FALSE)

lr_test_time <- anova(mod_linear, mod_quad)

cat("Model: P2 ~ Prev_Other_Neg\n")
## Model: P2 ~ Prev_Other_Neg
cat("Linear Time AIC:", round(AIC(mod_linear), 2), "\n")
## Linear Time AIC: 2942.86
cat("Quadratic Time AIC:", round(AIC(mod_quad), 2), "\n")
## Quadratic Time AIC: 2942.98
cat("LRT Chi-sq:", round(lr_test_time$Chisq[2], 2), 
    ", p =", format.pval(lr_test_time$`Pr(>Chisq)`[2], digits = 3), "\n\n")
## LRT Chi-sq: 1.88 , p = 0.17
# Compare schema effects
cat("Schema Effect (Prev_Other_Neg):\n")
## Schema Effect (Prev_Other_Neg):
cat("  Linear model β:", round(fixef(mod_linear)["Prev_Other_Neg"], 4), "\n")
##   Linear model β: 0.1009
cat("  Quadratic model β:", round(fixef(mod_quad)["Prev_Other_Neg"], 4), "\n")
##   Quadratic model β: 0.0999

4.5 S6: Sensitivity Analysis - Without Depression Control

cat("\n=== SENSITIVITY: EFFECTS WITHOUT DEPRESSION CONTROL ===\n")
## 
## === SENSITIVITY: EFFECTS WITHOUT DEPRESSION CONTROL ===
cat("Comparing models with vs. without depression covariate\n\n")
## Comparing models with vs. without depression covariate
# Run models without depression for significant findings
nodep_comparison <- list()

for (i in 1:nrow(sig_models)) {
  outcome <- sig_models$Outcome[i]
  schema <- sig_models$Schema[i]
  prev_outcome <- paste0("Prev_", outcome)
  prev_schema <- paste0("Prev_", schema)
  
  # With depression
  f_dep <- paste(outcome, "~", prev_outcome, "+", prev_schema, "+ Prev_Dep + VisitNum + (1|ID)")
  mod_dep <- lmer(as.formula(f_dep), data = df_lagged_z)
  
  # Without depression
  f_nodep <- paste(outcome, "~", prev_outcome, "+", prev_schema, "+ VisitNum + (1|ID)")
  mod_nodep <- lmer(as.formula(f_nodep), data = df_lagged_z)
  
  coef_dep <- tidy(mod_dep) %>% filter(term == prev_schema)
  coef_nodep <- tidy(mod_nodep) %>% filter(term == prev_schema)
  
  nodep_comparison[[i]] <- tibble(
    Outcome = outcome,
    Schema = schema,
    Beta_WithDep = coef_dep$estimate,
    P_WithDep = coef_dep$p.value,
    Beta_NoDep = coef_nodep$estimate,
    P_NoDep = coef_nodep$p.value,
    Beta_Change_Pct = ((coef_nodep$estimate - coef_dep$estimate) / coef_dep$estimate) * 100
  )
}

nodep_table <- bind_rows(nodep_comparison)

nodep_table %>%
  mutate(
    Interpretation = case_when(
      Beta_Change_Pct > 20 ~ "Depression partially mediates",
      Beta_Change_Pct < -20 ~ "Depression suppresses effect",
      TRUE ~ "Effect robust to depression control"
    )
  ) %>%
  kable(digits = 3, caption = "Schema Effects With vs. Without Depression Control") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Schema Effects With vs. Without Depression Control
Outcome Schema Beta_WithDep P_WithDep Beta_NoDep P_NoDep Beta_Change_Pct Interpretation
P1 Self_Neg 0.075 0.012 0.074 0.002 -0.453 Effect robust to depression control
P1 Other_Neg 0.068 0.006 0.074 0.002 8.039 Effect robust to depression control
P2 Self_Neg 0.094 0.001 0.121 0.000 28.887 Depression partially mediates
P2 Other_Neg 0.101 0.000 0.116 0.000 15.213 Effect robust to depression control
P2 Self_Pos -0.065 0.007 -0.090 0.000 39.191 Depression partially mediates
P2 Other_Pos -0.069 0.002 -0.079 0.000 13.679 Effect robust to depression control
P3 Other_Neg 0.095 0.000 0.082 0.001 -13.594 Effect robust to depression control
P3 Self_Pos 0.113 0.000 0.098 0.000 -13.557 Effect robust to depression control
P4 Other_Neg 0.082 0.002 0.082 0.001 0.129 Effect robust to depression control
Negative Other_Neg 0.050 0.020 0.050 0.014 -0.111 Effect robust to depression control
Negative Self_Pos -0.085 0.000 -0.076 0.000 -9.915 Effect robust to depression control
Negative Other_Pos -0.057 0.006 -0.054 0.008 -5.388 Effect robust to depression control
GAF Self_Neg -0.079 0.010 -0.082 0.002 2.709 Effect robust to depression control
GAF Other_Neg -0.085 0.001 -0.090 0.000 5.458 Effect robust to depression control
GAF Self_Pos 0.096 0.000 0.099 0.000 3.085 Effect robust to depression control
GAF Other_Pos 0.077 0.002 0.079 0.001 3.128 Effect robust to depression control
GF_Role Other_Neg -0.061 0.006 -0.062 0.003 2.378 Effect robust to depression control
GF_Soc Other_Neg -0.050 0.015 -0.059 0.003 18.045 Effect robust to depression control
GF_Soc Self_Pos 0.064 0.003 0.069 0.001 8.424 Effect robust to depression control
GF_Soc Other_Pos 0.049 0.013 0.051 0.009 4.477 Effect robust to depression control
# Summary statistics
cat("\nSummary:\n")
## 
## Summary:
cat("Mean % change in beta when removing depression:", round(mean(nodep_table$Beta_Change_Pct), 1), "%\n")
## Mean % change in beta when removing depression: 5.5 %
cat("Effects that remain significant without depression:", 
    sum(nodep_table$P_NoDep < 0.05), "of", nrow(nodep_table), "\n")
## Effects that remain significant without depression: 20 of 20

4.6 S7: Cumulative Effect Illustration

cat("\n=== CUMULATIVE EFFECT ILLUSTRATION ===\n")
## 
## === CUMULATIVE EFFECT ILLUSTRATION ===
cat("Demonstrating how small per-wave effects accumulate over time\n\n")
## Demonstrating how small per-wave effects accumulate over time
# Use P2 ~ Other_Neg as example (β = 0.101)
beta_fwd <- 0.101
auto_reg <- 0.5  # Approximate autoregressive coefficient

# Simulate trajectories for +1 SD vs -1 SD on Other_Neg schema
n_waves <- 5
start_symptom <- 0  # Starting at mean (z-scored)

# High schema group (+1 SD)
traj_high <- numeric(n_waves)
traj_high[1] <- start_symptom
for (t in 2:n_waves) {
  traj_high[t] <- auto_reg * traj_high[t-1] + beta_fwd * 1  # +1 SD schema
}

# Low schema group (-1 SD)
traj_low <- numeric(n_waves)
traj_low[1] <- start_symptom
for (t in 2:n_waves) {
  traj_low[t] <- auto_reg * traj_low[t-1] + beta_fwd * (-1)  # -1 SD schema
}

# Plot
traj_df <- tibble(
  Wave = rep(1:n_waves, 2),
  Symptom = c(traj_high, traj_low),
  Group = rep(c("High Other-Negative (+1 SD)", "Low Other-Negative (-1 SD)"), each = n_waves)
)

ggplot(traj_df, aes(x = Wave, y = Symptom, color = Group)) +
  geom_line(linewidth = 1.5) +
  geom_point(size = 3) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "grey50") +
  scale_color_manual(values = c("High Other-Negative (+1 SD)" = "#D55E00", 
                                 "Low Other-Negative (-1 SD)" = "#56B4E9")) +
  labs(
    title = "Cumulative Effect of Other-Negative Schema on P2 (Suspiciousness)",
    subtitle = paste0("Based on β = ", beta_fwd, " per wave (standardized)"),
    x = "Assessment Wave",
    y = "Predicted P2 Severity (z-score)",
    color = "Schema Group"
  ) +
  theme_bw(base_size = 12) +
  theme(legend.position = "bottom")

# Calculate final difference
final_diff <- traj_high[n_waves] - traj_low[n_waves]
cat("\nAfter", n_waves, "waves:\n")
## 
## After 5 waves:
cat("High schema group predicted P2:", round(traj_high[n_waves], 3), "SD\n")
## High schema group predicted P2: 0.189 SD
cat("Low schema group predicted P2:", round(traj_low[n_waves], 3), "SD\n")
## Low schema group predicted P2: -0.189 SD
cat("Cumulative difference:", round(final_diff, 3), "SD\n")
## Cumulative difference: 0.379 SD
cat("\nThis represents a", round(final_diff, 2), "SD difference in suspiciousness symptoms\n")
## 
## This represents a 0.38 SD difference in suspiciousness symptoms
cat("between individuals differing by 2 SD on Other-Negative schemas.\n")
## between individuals differing by 2 SD on Other-Negative schemas.

4.7 S8: Effect Size Context

cat("\n=== EFFECT SIZE CONTEXT ===\n")
## 
## === EFFECT SIZE CONTEXT ===
cat("Comparing effect sizes to published longitudinal studies in CHR/psychosis\n\n")
## Comparing effect sizes to published longitudinal studies in CHR/psychosis
# Summary of our significant effects
our_effects <- final_lagged %>%
  filter(P_Fwd_FDR < 0.05) %>%
  summarise(
    N_Significant = n(),
    Mean_Beta = mean(abs(Beta_Fwd)),
    Median_Beta = median(abs(Beta_Fwd)),
    Range_Beta = paste0(round(min(abs(Beta_Fwd)), 3), " - ", round(max(abs(Beta_Fwd)), 3))
  )

cat("Current Study - Significant Forward Effects:\n")
## Current Study - Significant Forward Effects:
cat("  N significant paths:", our_effects$N_Significant, "\n")
##   N significant paths: 20
cat("  Mean |β|:", round(our_effects$Mean_Beta, 3), "\n")
##   Mean |β|: 0.076
cat("  Median |β|:", round(our_effects$Median_Beta, 3), "\n")
##   Median |β|: 0.076
cat("  Range:", our_effects$Range_Beta, "\n\n")
##   Range: 0.049 - 0.113
# Context from literature (approximate values from similar studies)
context_df <- tibble(
  Study_Type = c(
    "Current Study (Schema → Symptom)",
    "Typical Cross-Lagged Effects (Hamaker et al.)",
    "Cognitive Models in Depression (Alloy et al.)",
    "Within-Person RI-CLPM Effects (Mulder & Hamaker)",
    "Social Cognition → Symptoms in CHR"
  ),
  Typical_Beta = c(
    round(our_effects$Mean_Beta, 3),
    "0.05 - 0.15",
    "0.08 - 0.12",
    "0.03 - 0.10",
    "0.05 - 0.10"
  ),
  Notes = c(
    "Controlled for depression, autoregressive effects",
    "Review of cross-lagged panel models",
    "Cognitive vulnerability studies",
    "After separating between/within-person variance",
    "Meta-analytic estimates"
  )
)

context_df %>%
  kable(caption = "Effect Size Comparison with Published Literature") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Effect Size Comparison with Published Literature
Study_Type Typical_Beta Notes
Current Study (Schema → Symptom) 0.076 Controlled for depression, autoregressive effects
Typical Cross-Lagged Effects (Hamaker et al.) 0.05 - 0.15 Review of cross-lagged panel models
Cognitive Models in Depression (Alloy et al.) 0.08 - 0.12 Cognitive vulnerability studies
Within-Person RI-CLPM Effects (Mulder & Hamaker) 0.03 - 0.10 After separating between/within-person variance
Social Cognition → Symptoms in CHR 0.05 - 0.10 Meta-analytic estimates
cat("\nInterpretation: Our effect sizes are consistent with the literature on\n")
## 
## Interpretation: Our effect sizes are consistent with the literature on
cat("cross-lagged psychological processes. Effects of β = 0.05-0.10 are typical\n")
## cross-lagged psychological processes. Effects of β = 0.05-0.10 are typical
cat("and considered meaningful when (a) controlling for autoregressive effects,\n")
## and considered meaningful when (a) controlling for autoregressive effects,
cat("(b) using rigorous longitudinal designs, and (c) studying clinical populations.\n")
## (b) using rigorous longitudinal designs, and (c) studying clinical populations.

4.8 S9: Sample Characteristics and Descriptive Statistics

cat("\n=== SAMPLE CHARACTERISTICS ===\n")
## 
## === SAMPLE CHARACTERISTICS ===
# Baseline descriptives
baseline_desc <- df_clean %>%
  filter(VisitNum == 1) %>%
  select(Pos_Total, Neg_Total, Gen_Total, Dis_Total,
         Self_Neg, Other_Neg, Self_Pos, Other_Pos,
         Depression, GAF, GF_Role, GF_Soc) %>%
  pivot_longer(everything(), names_to = "Variable", values_to = "Value") %>%
  group_by(Variable) %>%
  summarise(
    N = sum(!is.na(Value)),
    Mean = mean(Value, na.rm = TRUE),
    SD = sd(Value, na.rm = TRUE),
    Min = min(Value, na.rm = TRUE),
    Max = max(Value, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(Variable = factor(Variable, levels = c(
    "Pos_Total", "Neg_Total", "Gen_Total", "Dis_Total",
    "Self_Neg", "Other_Neg", "Self_Pos", "Other_Pos",
    "Depression", "GAF", "GF_Role", "GF_Soc"
  )))

baseline_desc %>%
  arrange(Variable) %>%
  kable(digits = 2, caption = "Baseline Descriptive Statistics") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Baseline Descriptive Statistics
Variable N Mean SD Min Max
Pos_Total 763 11.91 3.81 0 24
Neg_Total 738 11.89 6.07 0 30
Gen_Total 737 9.16 4.27 0 22
Dis_Total 739 5.16 3.16 0 20
Self_Neg 695 6.46 5.60 0 24
Other_Neg 694 7.87 6.10 0 24
Self_Pos 694 11.12 5.79 0 24
Other_Pos 694 9.73 5.39 0 24
Depression 722 5.82 4.75 0 21
GAF 759 48.39 10.75 18 88
GF_Role 753 5.95 2.15 1 10
GF_Soc 755 6.18 1.57 2 10
# Visit-level N
visit_n <- df_clean %>%
  group_by(VisitNum) %>%
  summarise(N = n_distinct(ID), .groups = "drop")

cat("\nSample Size by Visit:\n")
## 
## Sample Size by Visit:
visit_n %>%
  kable(caption = "N Participants per Visit") %>%
  kable_styling(bootstrap_options = "striped")
N Participants per Visit
VisitNum N
1 764
2 764
3 764
4 764
5 764
# Total observations used in lagged models
cat("\nTime-Lagged Model Sample:\n")
## 
## Time-Lagged Model Sample:
cat("Total observations (visits 2-5):", nrow(df_lagged), "\n")
## Total observations (visits 2-5): 3056
cat("Unique participants:", n_distinct(df_lagged$ID), "\n")
## Unique participants: 764

5 Export Results

# Save full lagged results
write_csv(final_lagged, paste0(output_dir, "Lagged_Models_Results_Full.csv"))
cat("Full results saved to:", paste0(output_dir, "Lagged_Models_Results_Full.csv"), "\n")
## Full results saved to: /Users/ab4233/Library/CloudStorage/OneDrive-YaleUniversity/NAPLS-2 Psychological Constructs/Lagged_Models_Results_Full.csv
# Save significant results only
sig_results <- final_lagged %>%
  filter(P_Fwd_FDR < 0.05 | P_Rev_FDR < 0.05)

write_csv(sig_results, paste0(output_dir, "Lagged_Models_Results_Significant.csv"))
cat("Significant results saved to:", paste0(output_dir, "Lagged_Models_Results_Significant.csv"), "\n")
## Significant results saved to: /Users/ab4233/Library/CloudStorage/OneDrive-YaleUniversity/NAPLS-2 Psychological Constructs/Lagged_Models_Results_Significant.csv
# Save diagnostics
write_csv(diagnostics_table, paste0(output_dir, "Model_Diagnostics.csv"))
cat("Diagnostics saved to:", paste0(output_dir, "Model_Diagnostics.csv"), "\n")
## Diagnostics saved to: /Users/ab4233/Library/CloudStorage/OneDrive-YaleUniversity/NAPLS-2 Psychological Constructs/Model_Diagnostics.csv