1 Setup and Data Preparation

1.1 Load Libraries

library(tidyverse)
library(lme4)
library(lmerTest)
library(broom.mixed)
library(MuMIn)
library(knitr)
library(kableExtra)
library(car)
library(moments)

1.2 Load and Clean Data

input_path <- '/Users/ab4233/Library/CloudStorage/OneDrive-YaleUniversity/NAPLS-2 Psychological Constructs/NAPLS2_CDSS_Merged.csv'
output_dir <- '/Users/ab4233/Library/CloudStorage/OneDrive-YaleUniversity/NAPLS-2 Psychological Constructs/'

df <- read_csv(input_path, show_col_types = FALSE)

df_clean <- df %>%
  filter(subject_type_3 != "Control") %>%
  mutate(
    VisitNum = as.numeric(gsub("BL", "1", visit_number)),
    ID = paste(site_number, subject_number, sep = "_"),
    
    # Symptom Domain Totals
    Pos_Total = as.numeric(c_sops_positive),
    Neg_Total = as.numeric(c_sops_negative),
    Gen_Total = as.numeric(c_sops_general),
    Dis_Total = as.numeric(c_sops_disorganization),
    
    # Individual P-Items
    P1 = as.numeric(p1_sops),
    P2 = as.numeric(p2_sops),
    P3 = as.numeric(p3_sops),
    P4 = as.numeric(p4_sops),
    P5 = as.numeric(p5_sops),
    
    # Functioning Measures
    GAF = as.numeric(gaf_current_score),
    GF_Role = as.numeric(gfrole_gfr_current),
    GF_Soc = as.numeric(gfsoc_gfs_current),
    
    # Schemas
    Self_Neg = as.numeric(bcss_total_self_negative),
    Other_Neg = as.numeric(bcss_total_other_negative),
    Self_Pos = as.numeric(bcss_total_self_positive),
    Other_Pos = as.numeric(bcss_total_other_positive),
    
    # Depression Control
    Depression = as.numeric(c_cdstotal),
    Negative = as.numeric(c_sops_negative)
  ) %>%
  filter(VisitNum != 6)

# Safety check
if (nrow(df_clean) == 0) {
  stop("CRITICAL ERROR: 'df_clean' is empty. Check filtering logic.")
}

1.3 Define Common Variables

# Schema predictors
schemas <- c("Self_Neg", "Other_Neg", "Self_Pos", "Other_Pos")
schemas_full <- c("Self_Negative", "Other_Negative", "Self_Positive", "Other_Positive")

# Symptom domains
domains <- c("Positive", "Negative", "General", "Disorganized")

# Individual P-items
p_items <- c("P1", "P2", "P3", "P4", "P5")
p_labels <- c(
  "P1" = "P1: Unusual Thoughts",
  "P2" = "P2: Suspiciousness",
  "P3" = "P3: Grandiosity",
  "P4" = "P4: Perceptual Abn.",
  "P5" = "P5: Disorg. Comm."
)

# Functioning outcomes
func_items <- c("GAF", "GF_Role", "GF_Soc")

# Color palette for plots
schema_cols <- c(
  "Self Negative" = "#D55E00", "Other Negative" = "#E69F00",
  "Self Positive" = "#009E73", "Other Positive" = "#56B4E9"
)

2 Analysis 1: Time-Lagged Models (Symptoms & Functioning with Depression Control)

2.1 Create Lagged Dataset

df_lagged <- df_clean %>%
  arrange(ID, VisitNum) %>%
  group_by(ID) %>%
  mutate(
    # Lagged P-Items
    Prev_P1 = lag(P1), Prev_P2 = lag(P2), Prev_P3 = lag(P3),
    Prev_P4 = lag(P4), Prev_P5 = lag(P5), Prev_Negative = lag(Negative),
    
    # Lagged Functioning
    Prev_GAF = lag(GAF), Prev_GF_Role = lag(GF_Role), Prev_GF_Soc = lag(GF_Soc),
    
    # Lagged Schemas
    Prev_Self_Neg = lag(Self_Neg), Prev_Other_Neg = lag(Other_Neg),
    Prev_Self_Pos = lag(Self_Pos), Prev_Other_Pos = lag(Other_Pos),
    
    # Lagged Control
    Prev_Dep = lag(Depression)
  ) %>%
  ungroup() %>%
  filter(VisitNum > 1)

# Safety check
if (nrow(df_lagged) == 0) {
  stop("CRITICAL ERROR: 'df_lagged' is empty. No subjects have VisitNum > 1.")
}

# Standardize
df_lagged_z <- df_lagged %>%
  mutate(across(where(is.numeric) & !VisitNum, ~ scale(.)[, 1]))

2.2 Run Time-Lagged Models

all_outcomes <- c(p_items, "Negative", func_items)
results_lagged <- list()

cat("\n--- RUNNING LAGGED MODELS ---\n")

## 
## --- RUNNING LAGGED MODELS ---

cat(sprintf("Outcomes: %d | Schemas: %d | Total pairs: %d\n", 
            length(all_outcomes), length(schemas), length(all_outcomes) * length(schemas)))

## Outcomes: 9 | Schemas: 4 | Total pairs: 36

for (outcome in all_outcomes) {
  for (schema in schemas) {
    prev_outcome <- paste0("Prev_", outcome)
    prev_schema <- paste0("Prev_", schema)
    outcome_type <- ifelse(outcome %in% c(p_items, "Negative"), "Symptom", "Functioning")
    
    # MODEL A: Schema(t-1) -> Outcome(t)
    f_a <- paste(outcome, "~", prev_outcome, "+", prev_schema, "+ Prev_Dep + VisitNum + (1|ID)")
    mod_a <- tryCatch(lmer(as.formula(f_a), data = df_lagged_z), error = function(e) NULL)
    
    # MODEL B: Outcome(t-1) -> Schema(t)
    f_b <- paste(schema, "~", prev_schema, "+", prev_outcome, "+ Prev_Dep + VisitNum + (1|ID)")
    mod_b <- tryCatch(lmer(as.formula(f_b), data = df_lagged_z), error = function(e) NULL)
    
    if (!is.null(mod_a) & !is.null(mod_b)) {
      res_a <- tidy(mod_a, conf.int = TRUE) %>% filter(term == prev_schema)
      res_b <- tidy(mod_b, conf.int = TRUE) %>% filter(term == prev_outcome)
      
      results_lagged[[paste(outcome, schema)]] <- tibble(
        Outcome = outcome, Outcome_Type = outcome_type, Schema = schema,
        Beta_Fwd = res_a$estimate, CI_Low_Fwd = res_a$conf.low, CI_High_Fwd = res_a$conf.high, P_Fwd = res_a$p.value,
        Beta_Rev = res_b$estimate, CI_Low_Rev = res_b$conf.low, CI_High_Rev = res_b$conf.high, P_Rev = res_b$p.value
      )
    }
  }
}

# Safety check
if (length(results_lagged) == 0) {
  stop("CRITICAL ERROR: No models converged successfully.")
}

2.3 FDR Correction and Summary

final_lagged <- bind_rows(results_lagged) %>%
  mutate(
    P_Fwd_FDR = p.adjust(P_Fwd, method = "fdr"),
    P_Rev_FDR = p.adjust(P_Rev, method = "fdr"),
    Conclusion = case_when(
      P_Fwd_FDR < 0.05 & P_Rev_FDR >= 0.05 ~ "Schema Drives Outcome",
      P_Fwd_FDR >= 0.05 & P_Rev_FDR < 0.05 ~ "Outcome Drives Schema",
      P_Fwd_FDR < 0.05 & P_Rev_FDR < 0.05  ~ "Bidirectional",
      TRUE ~ "No Link"
    )
  )

# Summary by outcome type
cat("\n--- RESULTS SUMMARY BY OUTCOME TYPE ---\n")

## 
## --- RESULTS SUMMARY BY OUTCOME TYPE ---

final_lagged %>%
  group_by(Outcome_Type, Conclusion) %>%
  summarise(n = n(), .groups = "drop") %>%
  pivot_wider(names_from = Conclusion, values_from = n, values_fill = 0) %>%
  print()

## # A tibble: 2 × 4
##   Outcome_Type `No Link` `Schema Drives Outcome` Bidirectional
##   <chr>            <int>                   <int>         <int>
## 1 Functioning          4                       8             0
## 2 Symptom             12                       8             4

2.4 Symptom Results Table

final_lagged %>%
  filter(Outcome_Type == "Symptom") %>%
  select(Outcome, Schema, Conclusion, Beta_Fwd, P_Fwd, P_Fwd_FDR, Beta_Rev, P_Rev, P_Rev_FDR) %>%
  arrange(P_Fwd_FDR) %>%
  kable(digits = 3, caption = "Directionality Analysis: Symptoms (Time-Lagged Mixed Models, FDR Corrected)") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))

Directionality Analysis: Symptoms (Time-Lagged Mixed Models, FDR Corrected)
Outcome	Schema	Conclusion	Beta_Fwd	P_Fwd	P_Fwd_FDR	Beta_Rev	P_Rev	P_Rev_FDR
P2	Other_Neg	Bidirectional	0.101	0.000	0.000	0.097	0.000	0.003
P3	Self_Pos	Schema Drives Outcome	0.113	0.000	0.000	0.053	0.016	0.097
P3	Other_Neg	Schema Drives Outcome	0.095	0.000	0.002	0.017	0.454	0.659
Negative	Self_Pos	Schema Drives Outcome	-0.085	0.000	0.002	-0.055	0.025	0.114
P2	Self_Neg	Bidirectional	0.094	0.001	0.005	0.073	0.004	0.039
P4	Other_Neg	Schema Drives Outcome	0.082	0.002	0.007	0.008	0.728	0.899
P2	Other_Pos	Bidirectional	-0.069	0.002	0.007	-0.114	0.000	0.001
P1	Other_Neg	Schema Drives Outcome	0.068	0.006	0.016	0.016	0.535	0.741
Negative	Other_Pos	Schema Drives Outcome	-0.057	0.006	0.016	-0.006	0.821	0.899
P2	Self_Pos	Bidirectional	-0.065	0.007	0.017	-0.068	0.004	0.039
P1	Self_Neg	Schema Drives Outcome	0.075	0.012	0.026	0.035	0.183	0.463
Negative	Other_Neg	Schema Drives Outcome	0.050	0.020	0.035	-0.007	0.779	0.899
Negative	Self_Neg	No Link	0.056	0.035	0.060	0.024	0.352	0.659
P5	Other_Pos	No Link	0.042	0.045	0.070	-0.009	0.725	0.899
P4	Other_Pos	No Link	-0.044	0.076	0.110	-0.059	0.024	0.114
P5	Other_Neg	No Link	0.037	0.093	0.128	-0.005	0.822	0.899
P1	Other_Pos	No Link	-0.031	0.178	0.238	-0.040	0.141	0.390
P3	Self_Neg	No Link	-0.034	0.270	0.335	-0.005	0.824	0.899
P3	Other_Pos	No Link	0.018	0.454	0.511	0.001	0.955	0.973
P5	Self_Pos	No Link	0.018	0.443	0.511	-0.023	0.306	0.648
P4	Self_Neg	No Link	0.020	0.528	0.576	0.002	0.930	0.973
P5	Self_Neg	No Link	0.010	0.728	0.771	-0.018	0.457	0.659
P1	Self_Pos	No Link	0.007	0.775	0.797	-0.001	0.973	0.973
P4	Self_Pos	No Link	0.004	0.889	0.889	0.020	0.384	0.659

2.5 Functioning Results Table

final_lagged %>%
  filter(Outcome_Type == "Functioning") %>%
  select(Outcome, Schema, Conclusion, Beta_Fwd, P_Fwd, P_Fwd_FDR, Beta_Rev, P_Rev, P_Rev_FDR) %>%
  arrange(P_Fwd_FDR) %>%
  kable(digits = 3, caption = "Directionality Analysis: Functioning (Time-Lagged Mixed Models, FDR Corrected)") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))

Directionality Analysis: Functioning (Time-Lagged Mixed Models, FDR Corrected)
Outcome	Schema	Conclusion	Beta_Fwd	P_Fwd	P_Fwd_FDR	Beta_Rev	P_Rev	P_Rev_FDR
GAF	Self_Pos	Schema Drives Outcome	0.096	0.000	0.002	0.033	0.193	0.463
GAF	Other_Neg	Schema Drives Outcome	-0.085	0.001	0.005	-0.045	0.081	0.266
GAF	Other_Pos	Schema Drives Outcome	0.077	0.002	0.007	-0.010	0.718	0.899
GF_Soc	Self_Pos	Schema Drives Outcome	0.064	0.003	0.011	0.038	0.093	0.278
GF_Role	Other_Neg	Schema Drives Outcome	-0.061	0.006	0.017	-0.022	0.324	0.648
GAF	Self_Neg	Schema Drives Outcome	-0.079	0.010	0.022	-0.051	0.059	0.212
GF_Soc	Other_Pos	Schema Drives Outcome	0.049	0.013	0.026	0.023	0.373	0.659
GF_Soc	Other_Neg	Schema Drives Outcome	-0.050	0.015	0.028	-0.051	0.031	0.123
GF_Soc	Self_Neg	No Link	-0.053	0.037	0.060	-0.064	0.008	0.061
GF_Role	Self_Pos	No Link	0.045	0.055	0.082	0.022	0.322	0.648
GF_Role	Self_Neg	No Link	-0.033	0.232	0.298	-0.018	0.439	0.659
GF_Role	Other_Pos	No Link	0.020	0.344	0.413	-0.020	0.416	0.659

2.6 Forest Plots

plot_lagged <- final_lagged %>%
  transmute(
    Outcome, Outcome_Type, Schema,
    Direction = "Schema (t-1) \u2192 Outcome (t)",
    Beta = Beta_Fwd, CI_Low = CI_Low_Fwd, CI_High = CI_High_Fwd, P_FDR = P_Fwd_FDR
  ) %>%
  bind_rows(
    final_lagged %>%
      transmute(
        Outcome, Outcome_Type, Schema,
        Direction = "Outcome (t-1) \u2192 Schema (t)",
        Beta = Beta_Rev, CI_Low = CI_Low_Rev, CI_High = CI_High_Rev, P_FDR = P_Rev_FDR
      )
  ) %>%
  filter(P_FDR < 0.05) %>%
  mutate(
    Pair = paste0(Outcome, " \u00d7 ", Schema),
    Pair = forcats::fct_reorder(Pair, Beta)
  )

if (nrow(plot_lagged) > 0) {
  # Combined forest plot
  ggplot(plot_lagged, aes(x = Beta, y = Pair, group = Direction)) +
    geom_vline(xintercept = 0, linetype = "dashed") +
    geom_pointrange(
      aes(xmin = CI_Low, xmax = CI_High, shape = Direction, color = Outcome_Type),
      position = position_dodge(width = 0.4)
    ) +
    facet_wrap(~ Direction, ncol = 1, scales = "free_y") +
    scale_color_manual(values = c("Symptom" = "#E64B35", "Functioning" = "#4DBBD5")) +
    labs(
      x = "Standardized lagged effect (β)", y = NULL, color = "Outcome Type",
      title = "Time-Lagged Associations Between Schemas and Clinical Outcomes",
      subtitle = "Paths significant after FDR correction (q < .05)"
    ) +
    theme_bw() +
    theme(
      panel.grid.minor = element_blank(), legend.position = "bottom",
      strip.background = element_rect(fill = "grey90"), strip.text = element_text(face = "bold")
    )
} else {
  message("No FDR-significant lagged effects to plot.")
}

2.7 Trajectory Plots for Significant Forward Effects

# Helper function
plot_schema_trajectory <- function(outcome, schema, df_clean, df_lagged, outcome_type = "Symptom") {
  prev_outcome <- paste0("Prev_", outcome)
  prev_schema <- paste0("Prev_", schema)
  
  dat_pair <- df_lagged %>%
    select(ID, VisitNum, all_of(c(outcome, prev_outcome, prev_schema, "Prev_Dep"))) %>%
    drop_na()
  
  if (nrow(dat_pair) == 0) {
    warning(paste("No usable rows for", outcome, "x", schema, "— skipping."))
    return(NULL)
  }
  
  form_plot <- as.formula(paste0(outcome, " ~ ", prev_outcome, " + ", prev_schema, " + Prev_Dep + VisitNum + (1|ID)"))
  mod_plot <- lmer(form_plot, data = dat_pair)
  
  schema_mean <- mean(dat_pair[[prev_schema]], na.rm = TRUE)
  schema_sd <- sd(dat_pair[[prev_schema]], na.rm = TRUE)
  low_schema <- schema_mean - schema_sd
  high_schema <- schema_mean + schema_sd
  dep_mean <- mean(dat_pair$Prev_Dep, na.rm = TRUE)
  
  out_start <- df_clean %>%
    filter(VisitNum == 1) %>%
    summarise(m = mean(.data[[outcome]], na.rm = TRUE)) %>%
    pull(m)
  
  visit_vals <- sort(unique(df_clean$VisitNum))
  
  simulate_traj <- function(schema_value, label) {
    prev_out <- out_start
    out <- tibble(VisitNum = visit_vals, Outcome_hat = NA_real_, Schema_level = label)
    
    for (i in seq_along(visit_vals)) {
      v <- visit_vals[i]
      if (v == min(visit_vals)) {
        out$Outcome_hat[i] <- prev_out
      } else {
        newdata <- tibble(ID = NA, VisitNum = v, Prev_Dep = dep_mean)
        newdata[[prev_outcome]] <- prev_out
        newdata[[prev_schema]] <- schema_value
        pred <- predict(mod_plot, newdata = newdata, re.form = NA)
        out$Outcome_hat[i] <- pred
        prev_out <- pred
      }
    }
    out
  }
  
  traj_all <- bind_rows(
    simulate_traj(low_schema, "Low schema (-1 SD)"),
    simulate_traj(high_schema, "High schema (+1 SD)")
  )
  
  y_label <- if (outcome_type == "Functioning") paste("Predicted", outcome, "score") else paste("Predicted", outcome, "severity")
  
  ggplot(traj_all, aes(x = VisitNum, y = Outcome_hat, color = Schema_level)) +
    geom_line(linewidth = 1) +
    geom_point(size = 2) +
    scale_x_continuous(breaks = visit_vals) +
    labs(
      x = "Visit", y = y_label, color = "Schema level",
      title = paste0("Predicted trajectories of ", outcome, " by ", schema, " (t-1)"),
      subtitle = "Model-based predictions controlling for prior outcome, prior depression, and time"
    ) +
    theme_bw() +
    theme(legend.position = "bottom", plot.title = element_text(face = "bold"))
}

# Identify significant forward effects and plot
sig_pairs <- final_lagged %>%
  filter(P_Fwd_FDR < 0.05) %>%
  select(Outcome, Outcome_Type, Schema) %>%
  distinct()

cat("\n--- SIGNIFICANT FORWARD EFFECTS (Schema -> Outcome) ---\n")

## 
## --- SIGNIFICANT FORWARD EFFECTS (Schema -> Outcome) ---

print(sig_pairs)

## # A tibble: 20 × 3
##    Outcome  Outcome_Type Schema   
##    <chr>    <chr>        <chr>    
##  1 P1       Symptom      Self_Neg 
##  2 P1       Symptom      Other_Neg
##  3 P2       Symptom      Self_Neg 
##  4 P2       Symptom      Other_Neg
##  5 P2       Symptom      Self_Pos 
##  6 P2       Symptom      Other_Pos
##  7 P3       Symptom      Other_Neg
##  8 P3       Symptom      Self_Pos 
##  9 P4       Symptom      Other_Neg
## 10 Negative Symptom      Other_Neg
## 11 Negative Symptom      Self_Pos 
## 12 Negative Symptom      Other_Pos
## 13 GAF      Functioning  Self_Neg 
## 14 GAF      Functioning  Other_Neg
## 15 GAF      Functioning  Self_Pos 
## 16 GAF      Functioning  Other_Pos
## 17 GF_Role  Functioning  Other_Neg
## 18 GF_Soc   Functioning  Other_Neg
## 19 GF_Soc   Functioning  Self_Pos 
## 20 GF_Soc   Functioning  Other_Pos

for (i in seq_len(nrow(sig_pairs))) {
  p_i <- plot_schema_trajectory(
    outcome = sig_pairs$Outcome[i],
    schema = sig_pairs$Schema[i],
    df_clean = df_clean,
    df_lagged = df_lagged,
    outcome_type = sig_pairs$Outcome_Type[i]
  )
  
  if (!is.null(p_i)) {
    if (sig_pairs$Outcome_Type[i] == "Symptom") {
      p_i <- p_i + scale_y_continuous(limits = c(0, 5))
    } else if (sig_pairs$Outcome[i] == "GAF") {
      p_i <- p_i + scale_y_continuous(limits = c(0, 100))
    } else {
      p_i <- p_i + scale_y_continuous(limits = c(1, 10))
    }
    print(p_i)
  }
}

3 Analysis 4: Time-Lagged Models (Without Depression Control)

3.1 Run Models

results_lagged_nodep <- list()

for (item in p_items) {
  for (schema in schemas) {
    prev_item <- paste0("Prev_", item)
    prev_schema <- paste0("Prev_", schema)
    
    # MODEL A: Schema(t-1) -> Symptom(t) - No depression control
    f_a <- paste(item, "~", prev_item, "+", prev_schema, "+ VisitNum + (1|ID)")
    mod_a <- tryCatch(lmer(as.formula(f_a), data = df_lagged_z), error = function(e) NULL)
    
    # MODEL B: Symptom(t-1) -> Schema(t) - No depression control
    f_b <- paste(schema, "~", prev_schema, "+", prev_item, "+ VisitNum + (1|ID)")
    mod_b <- tryCatch(lmer(as.formula(f_b), data = df_lagged_z), error = function(e) NULL)
    
    if (!is.null(mod_a) & !is.null(mod_b)) {
      res_a <- tidy(mod_a, conf.int = TRUE) %>% filter(term == prev_schema)
      res_b <- tidy(mod_b, conf.int = TRUE) %>% filter(term == prev_item)
      
      results_lagged_nodep[[paste(item, schema)]] <- tibble(
        Symptom = item, Schema = schema,
        Beta_Fwd = res_a$estimate, CI_Low_Fwd = res_a$conf.low, CI_High_Fwd = res_a$conf.high, P_Fwd = res_a$p.value,
        Beta_Rev = res_b$estimate, CI_Low_Rev = res_b$conf.low, CI_High_Rev = res_b$conf.high, P_Rev = res_b$p.value
      )
    }
  }
}

if (length(results_lagged_nodep) == 0) {
  stop("CRITICAL ERROR: No models converged successfully.")
}

3.2 FDR Correction and Results

final_lagged_nodep <- bind_rows(results_lagged_nodep) %>%
  mutate(
    P_Fwd_FDR = p.adjust(P_Fwd, method = "fdr"),
    P_Rev_FDR = p.adjust(P_Rev, method = "fdr"),
    Conclusion = case_when(
      P_Fwd_FDR < 0.05 & P_Rev_FDR >= 0.05 ~ "Schema Drives Symptom",
      P_Fwd_FDR >= 0.05 & P_Rev_FDR < 0.05 ~ "Symptom Drives Schema",
      P_Fwd_FDR < 0.05 & P_Rev_FDR < 0.05  ~ "Bidirectional",
      TRUE ~ "No Link"
    )
  )

final_lagged_nodep %>%
  select(Symptom, Schema, Conclusion, Beta_Fwd, P_Fwd, P_Fwd_FDR, Beta_Rev, P_Rev, P_Rev_FDR) %>%
  arrange(P_Fwd_FDR) %>%
  kable(digits = 3, caption = "Directionality Analysis: Time-Lagged Mixed Models (FDR Corrected, No Depression Control)") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))

Directionality Analysis: Time-Lagged Mixed Models (FDR Corrected, No Depression Control)
Symptom	Schema	Conclusion	Beta_Fwd	P_Fwd	P_Fwd_FDR	Beta_Rev	P_Rev	P_Rev_FDR
P2	Self_Neg	Bidirectional	0.121	0.000	0.000	0.082	0.001	0.005
P2	Other_Neg	Bidirectional	0.116	0.000	0.000	0.100	0.000	0.001
P2	Self_Pos	Bidirectional	-0.090	0.000	0.000	-0.077	0.001	0.005
P3	Self_Pos	Schema Drives Symptom	0.098	0.000	0.000	0.049	0.028	0.093
P2	Other_Pos	Bidirectional	-0.079	0.000	0.002	-0.126	0.000	0.000
P3	Other_Neg	Schema Drives Symptom	0.082	0.001	0.002	0.014	0.536	0.786
P4	Other_Neg	Schema Drives Symptom	0.082	0.001	0.003	0.013	0.602	0.786
P1	Other_Neg	Schema Drives Symptom	0.074	0.002	0.004	0.024	0.332	0.664
P1	Self_Neg	Schema Drives Symptom	0.074	0.002	0.005	0.041	0.111	0.278
P5	Other_Pos	No Link	0.044	0.034	0.069	-0.015	0.551	0.786
P4	Other_Pos	Symptom Drives Schema	-0.044	0.068	0.124	-0.067	0.009	0.037
P1	Other_Pos	No Link	-0.034	0.138	0.231	-0.052	0.052	0.149
P5	Other_Neg	No Link	0.028	0.184	0.283	-0.004	0.880	0.910
P4	Self_Neg	No Link	0.030	0.243	0.348	0.008	0.756	0.890
P3	Self_Neg	No Link	-0.024	0.319	0.399	-0.005	0.844	0.910
P5	Self_Pos	No Link	0.021	0.316	0.399	-0.029	0.194	0.432
P3	Other_Pos	No Link	0.019	0.426	0.501	0.003	0.910	0.910
P1	Self_Pos	No Link	-0.006	0.814	0.882	-0.013	0.580	0.786
P4	Self_Pos	No Link	-0.004	0.875	0.882	0.011	0.629	0.786
P5	Self_Neg	No Link	-0.003	0.882	0.882	-0.015	0.556	0.786

3.3 Forest Plot

plot_lagged_nodep <- final_lagged_nodep %>%
  transmute(
    Symptom, Schema,
    Direction = "Schema (t-1) \u2192 Symptom (t)",
    Beta = Beta_Fwd, CI_Low = CI_Low_Fwd, CI_High = CI_High_Fwd, P_FDR = P_Fwd_FDR
  ) %>%
  bind_rows(
    final_lagged_nodep %>%
      transmute(
        Symptom, Schema,
        Direction = "Symptom (t-1) \u2192 Schema (t)",
        Beta = Beta_Rev, CI_Low = CI_Low_Rev, CI_High = CI_High_Rev, P_FDR = P_Rev_FDR
      )
  ) %>%
  filter(P_FDR < 0.05) %>%
  mutate(
    Pair = paste0(Symptom, " \u00d7 ", Schema),
    Pair = forcats::fct_reorder(Pair, Beta)
  )

if (nrow(plot_lagged_nodep) > 0) {
  ggplot(plot_lagged_nodep, aes(x = Beta, y = Pair, group = Direction)) +
    geom_vline(xintercept = 0, linetype = "dashed") +
    geom_pointrange(
      aes(xmin = CI_Low, xmax = CI_High, shape = Direction),
      position = position_dodge(width = 0.4)
    ) +
    facet_wrap(~ Direction, ncol = 1, scales = "free_y") +
    labs(
      x = "Standardized lagged effect (β)", y = NULL,
      title = "Time-Lagged Associations Between Schemas and Positive Symptoms",
      subtitle = "Paths significant after FDR correction (q < .05)"
    ) +
    theme_bw() +
    theme(
      panel.grid.minor = element_blank(), legend.position = "bottom",
      strip.background = element_rect(fill = "grey90"), strip.text = element_text(face = "bold")
    )
} else {
  message("No FDR-significant lagged effects to plot.")
}

4 Supplemental Analyses

4.1 S1: Model Diagnostics

4.1.1 Residual Diagnostics for Key Models

# Select a representative model for diagnostics: P2 ~ Other_Neg (strongest bidirectional effect)
diag_model <- lmer(P2 ~ Prev_P2 + Prev_Other_Neg + Prev_Dep + VisitNum + (1|ID), data = df_lagged_z)

# Extract residuals and fitted values
diag_df <- data.frame(
  Fitted = fitted(diag_model),
  Residuals = residuals(diag_model),
  Std_Residuals = residuals(diag_model, scaled = TRUE)
)

# Create diagnostic plots
par(mfrow = c(2, 2))

# 1. Residuals vs Fitted (Homoscedasticity)
plot(diag_df$Fitted, diag_df$Residuals, 
     xlab = "Fitted Values", ylab = "Residuals",
     main = "Residuals vs Fitted", pch = 20, col = rgb(0,0,0,0.3))
abline(h = 0, col = "red", lty = 2)
lines(lowess(diag_df$Fitted, diag_df$Residuals), col = "blue")

# 2. Q-Q Plot (Normality)
qqnorm(diag_df$Std_Residuals, main = "Q-Q Plot of Residuals", pch = 20, col = rgb(0,0,0,0.3))
qqline(diag_df$Std_Residuals, col = "red")

# 3. Scale-Location Plot
plot(diag_df$Fitted, sqrt(abs(diag_df$Std_Residuals)),
     xlab = "Fitted Values", ylab = "√|Standardized Residuals|",
     main = "Scale-Location Plot", pch = 20, col = rgb(0,0,0,0.3))
lines(lowess(diag_df$Fitted, sqrt(abs(diag_df$Std_Residuals))), col = "blue")

# 4. Histogram of Residuals
hist(diag_df$Residuals, breaks = 30, main = "Distribution of Residuals", 
     xlab = "Residuals", col = "lightblue", border = "white")

par(mfrow = c(1, 1))

# Shapiro-Wilk test (on sample if N > 5000)
set.seed(123)
resid_sample <- if(length(diag_df$Residuals) > 5000) sample(diag_df$Residuals, 5000) else diag_df$Residuals
shapiro_test <- shapiro.test(resid_sample)

cat("\n=== RESIDUAL DIAGNOSTICS (Model: P2 ~ Prev_Other_Neg) ===\n")

## 
## === RESIDUAL DIAGNOSTICS (Model: P2 ~ Prev_Other_Neg) ===

cat("Shapiro-Wilk Test for Normality: W =", round(shapiro_test$statistic, 4), 
    ", p =", format.pval(shapiro_test$p.value, digits = 3), "\n")

## Shapiro-Wilk Test for Normality: W = 0.9885 , p = 1.67e-08

cat("Note: With large samples, minor deviations from normality are expected.\n")

## Note: With large samples, minor deviations from normality are expected.

cat("Visual inspection of Q-Q plot is more informative.\n")

## Visual inspection of Q-Q plot is more informative.

4.1.2 Diagnostics Across All Significant Models

# Run diagnostics for all significant forward-path models
sig_models <- final_lagged %>% filter(P_Fwd_FDR < 0.05)

diagnostics_list <- list()

for (i in 1:nrow(sig_models)) {
  outcome <- sig_models$Outcome[i]
  schema <- sig_models$Schema[i]
  prev_outcome <- paste0("Prev_", outcome)
  prev_schema <- paste0("Prev_", schema)
  
  f_text <- paste(outcome, "~", prev_outcome, "+", prev_schema, "+ Prev_Dep + VisitNum + (1|ID)")
  mod <- lmer(as.formula(f_text), data = df_lagged_z)
  
  resids <- residuals(mod)
  fitted_vals <- fitted(mod)
  
  # Compute diagnostic statistics
  resid_sample <- if(length(resids) > 5000) sample(resids, 5000) else resids
  sw_test <- shapiro.test(resid_sample)
  
  diagnostics_list[[i]] <- tibble(
    Outcome = outcome,
    Schema = schema,
    Resid_Mean = round(mean(resids), 4),
    Resid_SD = round(sd(resids), 3),
    Resid_Skew = round(moments::skewness(resids), 3),
    Resid_Kurtosis = round(moments::kurtosis(resids), 3),
    Shapiro_W = round(sw_test$statistic, 4),
    Shapiro_P = sw_test$p.value
  )
}

diagnostics_table <- bind_rows(diagnostics_list)

cat("\n=== RESIDUAL DIAGNOSTICS FOR ALL SIGNIFICANT MODELS ===\n")

## 
## === RESIDUAL DIAGNOSTICS FOR ALL SIGNIFICANT MODELS ===

diagnostics_table %>%
  kable(digits = 3, caption = "Residual Diagnostics for Significant Forward-Path Models") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))

Residual Diagnostics for Significant Forward-Path Models
Outcome	Schema	Resid_SD	Resid_Skew	Resid_Kurtosis	Shapiro_W
P1	Self_Neg	0.653	-0.128	3.316	0.995
P1	Other_Neg	0.661	-0.102	3.300	0.995
P2	Self_Neg	0.758	0.162	4.049	0.988
P2	Other_Neg	0.756	0.168	4.090	0.989
P2	Self_Pos	0.758	0.154	4.057	0.988
P2	Other_Pos	0.758	0.173	4.089	0.988
P3	Other_Neg	0.687	0.831	5.066	0.908
P3	Self_Pos	0.675	0.850	5.106	0.911
P4	Other_Neg	0.696	-0.093	2.920	0.994
Negative	Other_Neg	0.706	0.195	4.138	0.988
Negative	Self_Pos	0.701	0.174	4.135	0.987
Negative	Other_Pos	0.703	0.183	4.149	0.988
GAF	Self_Neg	0.666	-0.009	4.890	0.978
GAF	Other_Neg	0.684	-0.034	4.923	0.978
GAF	Self_Pos	0.668	-0.001	4.811	0.978
GAF	Other_Pos	0.665	-0.026	4.854	0.978
GF_Role	Other_Neg	0.748	-0.550	4.697	0.964
GF_Soc	Other_Neg	0.685	-0.176	4.414	0.987
GF_Soc	Self_Pos	0.682	-0.140	4.423	0.986
GF_Soc	Other_Pos	0.684	-0.145	4.390	0.987

4.2 S2: Multicollinearity Assessment

library(car)

# Check VIF for a representative model
vif_model <- lm(P2 ~ Prev_P2 + Prev_Self_Neg + Prev_Other_Neg + Prev_Self_Pos + Prev_Other_Pos + Prev_Dep + VisitNum, 
                data = df_lagged_z)

vif_values <- vif(vif_model)

cat("\n=== VARIANCE INFLATION FACTORS ===\n")

## 
## === VARIANCE INFLATION FACTORS ===

cat("Model: P2 ~ All Schemas + Controls\n")

## Model: P2 ~ All Schemas + Controls

cat("VIF > 5 indicates problematic multicollinearity\n\n")

## VIF > 5 indicates problematic multicollinearity

vif_df <- data.frame(
  Variable = names(vif_values),
  VIF = round(vif_values, 2)
)

vif_df %>%
  kable(caption = "Variance Inflation Factors") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))

Variance Inflation Factors
	Variable	VIF
Prev_P2	Prev_P2	1.40
Prev_Self_Neg	Prev_Self_Neg	2.24
Prev_Other_Neg	Prev_Other_Neg	1.49
Prev_Self_Pos	Prev_Self_Pos	1.80
Prev_Other_Pos	Prev_Other_Pos	1.47
Prev_Dep	Prev_Dep	1.80
VisitNum	VisitNum	1.10

# Schema intercorrelations
cat("\n=== SCHEMA INTERCORRELATIONS (Baseline) ===\n")

## 
## === SCHEMA INTERCORRELATIONS (Baseline) ===

schema_cors <- df_clean %>%
  filter(VisitNum == 1) %>%
  select(Self_Neg, Other_Neg, Self_Pos, Other_Pos) %>%
  cor(use = "pairwise.complete.obs")

schema_cors %>%
  round(3) %>%
  kable(caption = "Correlations Among Baseline Schemas") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))

Correlations Among Baseline Schemas
	Self_Neg	Other_Neg	Self_Pos	Other_Pos
Self_Neg	1.000	0.478	-0.482	-0.214
Other_Neg	0.478	1.000	-0.110	-0.122
Self_Pos	-0.482	-0.110	1.000	0.491
Other_Pos	-0.214	-0.122	0.491	1.000

4.3 S4: Sensitivity Analysis - Random Slopes

# Compare random intercept vs random slope models for key findings
cat("\n=== RANDOM SLOPES SENSITIVITY ANALYSIS ===\n")

## 
## === RANDOM SLOPES SENSITIVITY ANALYSIS ===

cat("Testing whether allowing random slopes for time improves model fit\n\n")

## Testing whether allowing random slopes for time improves model fit

# Test on the strongest effect: P2 ~ Other_Neg
mod_ri <- lmer(P2 ~ Prev_P2 + Prev_Other_Neg + Prev_Dep + VisitNum + (1|ID), 
               data = df_lagged_z, REML = FALSE)

mod_rs <- tryCatch({
  lmer(P2 ~ Prev_P2 + Prev_Other_Neg + Prev_Dep + VisitNum + (1 + VisitNum|ID), 
       data = df_lagged_z, REML = FALSE)
}, error = function(e) NULL)

if (!is.null(mod_rs)) {
  # Likelihood ratio test
  lr_test <- anova(mod_ri, mod_rs)
  
  cat("Model Comparison: P2 ~ Prev_Other_Neg\n")
  cat("Random Intercept AIC:", round(AIC(mod_ri), 2), "\n")
  cat("Random Slope AIC:", round(AIC(mod_rs), 2), "\n")
  cat("LRT Chi-sq:", round(lr_test$Chisq[2], 2), ", df =", lr_test$Df[2], 
      ", p =", format.pval(lr_test$`Pr(>Chisq)`[2], digits = 3), "\n\n")
  
  # Compare fixed effects
  cat("Fixed Effect Comparison (Prev_Other_Neg):\n")
  ri_coef <- fixef(mod_ri)["Prev_Other_Neg"]
  rs_coef <- fixef(mod_rs)["Prev_Other_Neg"]
  cat("  Random Intercept β:", round(ri_coef, 4), "\n")
  cat("  Random Slope β:", round(rs_coef, 4), "\n")
  cat("  Difference:", round(abs(ri_coef - rs_coef), 4), "\n")
} else {
  cat("Random slopes model did not converge - results from random intercept model are robust.\n")
}

## Model Comparison: P2 ~ Prev_Other_Neg
## Random Intercept AIC: 2942.86 
## Random Slope AIC: 2943.72 
## LRT Chi-sq: 3.14 , df = 2 , p = 0.208 
## 
## Fixed Effect Comparison (Prev_Other_Neg):
##   Random Intercept β: 0.1009 
##   Random Slope β: 0.0984 
##   Difference: 0.0025

# Run for all significant models
rs_comparison <- list()

for (i in 1:min(5, nrow(sig_models))) {  # Test top 5 significant models
  outcome <- sig_models$Outcome[i]
  schema <- sig_models$Schema[i]
  prev_outcome <- paste0("Prev_", outcome)
  prev_schema <- paste0("Prev_", schema)
  
  f_ri <- paste(outcome, "~", prev_outcome, "+", prev_schema, "+ Prev_Dep + VisitNum + (1|ID)")
  f_rs <- paste(outcome, "~", prev_outcome, "+", prev_schema, "+ Prev_Dep + VisitNum + (1 + VisitNum|ID)")
  
  mod_ri <- lmer(as.formula(f_ri), data = df_lagged_z, REML = FALSE)
  mod_rs <- tryCatch(lmer(as.formula(f_rs), data = df_lagged_z, REML = FALSE), error = function(e) NULL)
  
  if (!is.null(mod_rs)) {
    rs_comparison[[i]] <- tibble(
      Outcome = outcome,
      Schema = schema,
      AIC_RI = AIC(mod_ri),
      AIC_RS = AIC(mod_rs),
      Beta_RI = fixef(mod_ri)[prev_schema],
      Beta_RS = fixef(mod_rs)[prev_schema],
      Converged = TRUE
    )
  } else {
    rs_comparison[[i]] <- tibble(
      Outcome = outcome,
      Schema = schema,
      AIC_RI = AIC(mod_ri),
      AIC_RS = NA,
      Beta_RI = fixef(mod_ri)[prev_schema],
      Beta_RS = NA,
      Converged = FALSE
    )
  }
}

bind_rows(rs_comparison) %>%
  mutate(
    AIC_Diff = AIC_RS - AIC_RI,
    Beta_Diff = abs(Beta_RS - Beta_RI)
  ) %>%
  kable(digits = 3, caption = "Random Intercept vs Random Slope Model Comparison") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))

Random Intercept vs Random Slope Model Comparison
Outcome	Schema	AIC_RI	AIC_RS	Beta_RI	Beta_RS	Converged	AIC_Diff	Beta_Diff
P1	Self_Neg	2962.176	2952.892	0.075	0.072	TRUE	-9.284	0.002
P1	Other_Neg	2963.367	2955.832	0.068	0.062	TRUE	-7.535	0.006
P2	Self_Neg	2953.521	2954.265	0.094	0.091	TRUE	0.743	0.003
P2	Other_Neg	2942.861	2943.720	0.101	0.098	TRUE	0.859	0.003
P2	Self_Pos	2961.433	2960.313	-0.065	-0.069	TRUE	-1.120	0.004

4.4 S5: Sensitivity Analysis - Non-Linear Time

cat("\n=== NON-LINEAR TIME SENSITIVITY ANALYSIS ===\n")

## 
## === NON-LINEAR TIME SENSITIVITY ANALYSIS ===

cat("Testing whether quadratic time improves model fit\n\n")

## Testing whether quadratic time improves model fit

# Add quadratic time term
df_lagged_z <- df_lagged_z %>%
  mutate(VisitNum_sq = VisitNum^2)

# Test on P2 ~ Other_Neg
mod_linear <- lmer(P2 ~ Prev_P2 + Prev_Other_Neg + Prev_Dep + VisitNum + (1|ID), 
                   data = df_lagged_z, REML = FALSE)

mod_quad <- lmer(P2 ~ Prev_P2 + Prev_Other_Neg + Prev_Dep + VisitNum + VisitNum_sq + (1|ID), 
                 data = df_lagged_z, REML = FALSE)

lr_test_time <- anova(mod_linear, mod_quad)

cat("Model: P2 ~ Prev_Other_Neg\n")

## Model: P2 ~ Prev_Other_Neg

cat("Linear Time AIC:", round(AIC(mod_linear), 2), "\n")

## Linear Time AIC: 2942.86

cat("Quadratic Time AIC:", round(AIC(mod_quad), 2), "\n")

## Quadratic Time AIC: 2942.98

cat("LRT Chi-sq:", round(lr_test_time$Chisq[2], 2), 
    ", p =", format.pval(lr_test_time$`Pr(>Chisq)`[2], digits = 3), "\n\n")

## LRT Chi-sq: 1.88 , p = 0.17

# Compare schema effects
cat("Schema Effect (Prev_Other_Neg):\n")

## Schema Effect (Prev_Other_Neg):

cat("  Linear model β:", round(fixef(mod_linear)["Prev_Other_Neg"], 4), "\n")

##   Linear model β: 0.1009

cat("  Quadratic model β:", round(fixef(mod_quad)["Prev_Other_Neg"], 4), "\n")

##   Quadratic model β: 0.0999

4.5 S6: Sensitivity Analysis - Without Depression Control

cat("\n=== SENSITIVITY: EFFECTS WITHOUT DEPRESSION CONTROL ===\n")

## 
## === SENSITIVITY: EFFECTS WITHOUT DEPRESSION CONTROL ===

cat("Comparing models with vs. without depression covariate\n\n")

## Comparing models with vs. without depression covariate

# Run models without depression for significant findings
nodep_comparison <- list()

for (i in 1:nrow(sig_models)) {
  outcome <- sig_models$Outcome[i]
  schema <- sig_models$Schema[i]
  prev_outcome <- paste0("Prev_", outcome)
  prev_schema <- paste0("Prev_", schema)
  
  # With depression
  f_dep <- paste(outcome, "~", prev_outcome, "+", prev_schema, "+ Prev_Dep + VisitNum + (1|ID)")
  mod_dep <- lmer(as.formula(f_dep), data = df_lagged_z)
  
  # Without depression
  f_nodep <- paste(outcome, "~", prev_outcome, "+", prev_schema, "+ VisitNum + (1|ID)")
  mod_nodep <- lmer(as.formula(f_nodep), data = df_lagged_z)
  
  coef_dep <- tidy(mod_dep) %>% filter(term == prev_schema)
  coef_nodep <- tidy(mod_nodep) %>% filter(term == prev_schema)
  
  nodep_comparison[[i]] <- tibble(
    Outcome = outcome,
    Schema = schema,
    Beta_WithDep = coef_dep$estimate,
    P_WithDep = coef_dep$p.value,
    Beta_NoDep = coef_nodep$estimate,
    P_NoDep = coef_nodep$p.value,
    Beta_Change_Pct = ((coef_nodep$estimate - coef_dep$estimate) / coef_dep$estimate) * 100
  )
}

nodep_table <- bind_rows(nodep_comparison)

nodep_table %>%
  mutate(
    Interpretation = case_when(
      Beta_Change_Pct > 20 ~ "Depression partially mediates",
      Beta_Change_Pct < -20 ~ "Depression suppresses effect",
      TRUE ~ "Effect robust to depression control"
    )
  ) %>%
  kable(digits = 3, caption = "Schema Effects With vs. Without Depression Control") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))

Schema Effects With vs. Without Depression Control
Outcome	Schema	Beta_WithDep	P_WithDep	Beta_NoDep	P_NoDep	Beta_Change_Pct	Interpretation
P1	Self_Neg	0.075	0.012	0.074	0.002	-0.453	Effect robust to depression control
P1	Other_Neg	0.068	0.006	0.074	0.002	8.039	Effect robust to depression control
P2	Self_Neg	0.094	0.001	0.121	0.000	28.887	Depression partially mediates
P2	Other_Neg	0.101	0.000	0.116	0.000	15.213	Effect robust to depression control
P2	Self_Pos	-0.065	0.007	-0.090	0.000	39.191	Depression partially mediates
P2	Other_Pos	-0.069	0.002	-0.079	0.000	13.679	Effect robust to depression control
P3	Other_Neg	0.095	0.000	0.082	0.001	-13.594	Effect robust to depression control
P3	Self_Pos	0.113	0.000	0.098	0.000	-13.557	Effect robust to depression control
P4	Other_Neg	0.082	0.002	0.082	0.001	0.129	Effect robust to depression control
Negative	Other_Neg	0.050	0.020	0.050	0.014	-0.111	Effect robust to depression control
Negative	Self_Pos	-0.085	0.000	-0.076	0.000	-9.915	Effect robust to depression control
Negative	Other_Pos	-0.057	0.006	-0.054	0.008	-5.388	Effect robust to depression control
GAF	Self_Neg	-0.079	0.010	-0.082	0.002	2.709	Effect robust to depression control
GAF	Other_Neg	-0.085	0.001	-0.090	0.000	5.458	Effect robust to depression control
GAF	Self_Pos	0.096	0.000	0.099	0.000	3.085	Effect robust to depression control
GAF	Other_Pos	0.077	0.002	0.079	0.001	3.128	Effect robust to depression control
GF_Role	Other_Neg	-0.061	0.006	-0.062	0.003	2.378	Effect robust to depression control
GF_Soc	Other_Neg	-0.050	0.015	-0.059	0.003	18.045	Effect robust to depression control
GF_Soc	Self_Pos	0.064	0.003	0.069	0.001	8.424	Effect robust to depression control
GF_Soc	Other_Pos	0.049	0.013	0.051	0.009	4.477	Effect robust to depression control

# Summary statistics
cat("\nSummary:\n")

## 
## Summary:

cat("Mean % change in beta when removing depression:", round(mean(nodep_table$Beta_Change_Pct), 1), "%\n")

## Mean % change in beta when removing depression: 5.5 %

cat("Effects that remain significant without depression:", 
    sum(nodep_table$P_NoDep < 0.05), "of", nrow(nodep_table), "\n")

## Effects that remain significant without depression: 20 of 20

4.6 S7: Cumulative Effect Illustration

cat("\n=== CUMULATIVE EFFECT ILLUSTRATION ===\n")

## 
## === CUMULATIVE EFFECT ILLUSTRATION ===

cat("Demonstrating how small per-wave effects accumulate over time\n\n")

## Demonstrating how small per-wave effects accumulate over time

# Use P2 ~ Other_Neg as example (β = 0.101)
beta_fwd <- 0.101
auto_reg <- 0.5  # Approximate autoregressive coefficient

# Simulate trajectories for +1 SD vs -1 SD on Other_Neg schema
n_waves <- 5
start_symptom <- 0  # Starting at mean (z-scored)

# High schema group (+1 SD)
traj_high <- numeric(n_waves)
traj_high[1] <- start_symptom
for (t in 2:n_waves) {
  traj_high[t] <- auto_reg * traj_high[t-1] + beta_fwd * 1  # +1 SD schema
}

# Low schema group (-1 SD)
traj_low <- numeric(n_waves)
traj_low[1] <- start_symptom
for (t in 2:n_waves) {
  traj_low[t] <- auto_reg * traj_low[t-1] + beta_fwd * (-1)  # -1 SD schema
}

# Plot
traj_df <- tibble(
  Wave = rep(1:n_waves, 2),
  Symptom = c(traj_high, traj_low),
  Group = rep(c("High Other-Negative (+1 SD)", "Low Other-Negative (-1 SD)"), each = n_waves)
)

ggplot(traj_df, aes(x = Wave, y = Symptom, color = Group)) +
  geom_line(linewidth = 1.5) +
  geom_point(size = 3) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "grey50") +
  scale_color_manual(values = c("High Other-Negative (+1 SD)" = "#D55E00", 
                                 "Low Other-Negative (-1 SD)" = "#56B4E9")) +
  labs(
    title = "Cumulative Effect of Other-Negative Schema on P2 (Suspiciousness)",
    subtitle = paste0("Based on β = ", beta_fwd, " per wave (standardized)"),
    x = "Assessment Wave",
    y = "Predicted P2 Severity (z-score)",
    color = "Schema Group"
  ) +
  theme_bw(base_size = 12) +
  theme(legend.position = "bottom")

# Calculate final difference
final_diff <- traj_high[n_waves] - traj_low[n_waves]
cat("\nAfter", n_waves, "waves:\n")

## 
## After 5 waves:

cat("High schema group predicted P2:", round(traj_high[n_waves], 3), "SD\n")

## High schema group predicted P2: 0.189 SD

cat("Low schema group predicted P2:", round(traj_low[n_waves], 3), "SD\n")

## Low schema group predicted P2: -0.189 SD

cat("Cumulative difference:", round(final_diff, 3), "SD\n")

## Cumulative difference: 0.379 SD

cat("\nThis represents a", round(final_diff, 2), "SD difference in suspiciousness symptoms\n")

## 
## This represents a 0.38 SD difference in suspiciousness symptoms

cat("between individuals differing by 2 SD on Other-Negative schemas.\n")

## between individuals differing by 2 SD on Other-Negative schemas.

4.7 S8: Effect Size Context

cat("\n=== EFFECT SIZE CONTEXT ===\n")

## 
## === EFFECT SIZE CONTEXT ===

cat("Comparing effect sizes to published longitudinal studies in CHR/psychosis\n\n")

## Comparing effect sizes to published longitudinal studies in CHR/psychosis

# Summary of our significant effects
our_effects <- final_lagged %>%
  filter(P_Fwd_FDR < 0.05) %>%
  summarise(
    N_Significant = n(),
    Mean_Beta = mean(abs(Beta_Fwd)),
    Median_Beta = median(abs(Beta_Fwd)),
    Range_Beta = paste0(round(min(abs(Beta_Fwd)), 3), " - ", round(max(abs(Beta_Fwd)), 3))
  )

cat("Current Study - Significant Forward Effects:\n")

## Current Study - Significant Forward Effects:

cat("  N significant paths:", our_effects$N_Significant, "\n")

##   N significant paths: 20

cat("  Mean |β|:", round(our_effects$Mean_Beta, 3), "\n")

##   Mean |β|: 0.076

cat("  Median |β|:", round(our_effects$Median_Beta, 3), "\n")

##   Median |β|: 0.076

cat("  Range:", our_effects$Range_Beta, "\n\n")

##   Range: 0.049 - 0.113

# Context from literature (approximate values from similar studies)
context_df <- tibble(
  Study_Type = c(
    "Current Study (Schema → Symptom)",
    "Typical Cross-Lagged Effects (Hamaker et al.)",
    "Cognitive Models in Depression (Alloy et al.)",
    "Within-Person RI-CLPM Effects (Mulder & Hamaker)",
    "Social Cognition → Symptoms in CHR"
  ),
  Typical_Beta = c(
    round(our_effects$Mean_Beta, 3),
    "0.05 - 0.15",
    "0.08 - 0.12",
    "0.03 - 0.10",
    "0.05 - 0.10"
  ),
  Notes = c(
    "Controlled for depression, autoregressive effects",
    "Review of cross-lagged panel models",
    "Cognitive vulnerability studies",
    "After separating between/within-person variance",
    "Meta-analytic estimates"
  )
)

context_df %>%
  kable(caption = "Effect Size Comparison with Published Literature") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))

Effect Size Comparison with Published Literature
Study_Type	Typical_Beta	Notes
Current Study (Schema → Symptom)	0.076	Controlled for depression, autoregressive effects
Typical Cross-Lagged Effects (Hamaker et al.)	0.05 - 0.15	Review of cross-lagged panel models
Cognitive Models in Depression (Alloy et al.)	0.08 - 0.12	Cognitive vulnerability studies
Within-Person RI-CLPM Effects (Mulder & Hamaker)	0.03 - 0.10	After separating between/within-person variance
Social Cognition → Symptoms in CHR	0.05 - 0.10	Meta-analytic estimates

cat("\nInterpretation: Our effect sizes are consistent with the literature on\n")

## 
## Interpretation: Our effect sizes are consistent with the literature on

cat("cross-lagged psychological processes. Effects of β = 0.05-0.10 are typical\n")

## cross-lagged psychological processes. Effects of β = 0.05-0.10 are typical

cat("and considered meaningful when (a) controlling for autoregressive effects,\n")

## and considered meaningful when (a) controlling for autoregressive effects,

cat("(b) using rigorous longitudinal designs, and (c) studying clinical populations.\n")

## (b) using rigorous longitudinal designs, and (c) studying clinical populations.

4.8 S9: Sample Characteristics and Descriptive Statistics

cat("\n=== SAMPLE CHARACTERISTICS ===\n")

## 
## === SAMPLE CHARACTERISTICS ===

# Baseline descriptives
baseline_desc <- df_clean %>%
  filter(VisitNum == 1) %>%
  select(Pos_Total, Neg_Total, Gen_Total, Dis_Total,
         Self_Neg, Other_Neg, Self_Pos, Other_Pos,
         Depression, GAF, GF_Role, GF_Soc) %>%
  pivot_longer(everything(), names_to = "Variable", values_to = "Value") %>%
  group_by(Variable) %>%
  summarise(
    N = sum(!is.na(Value)),
    Mean = mean(Value, na.rm = TRUE),
    SD = sd(Value, na.rm = TRUE),
    Min = min(Value, na.rm = TRUE),
    Max = max(Value, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(Variable = factor(Variable, levels = c(
    "Pos_Total", "Neg_Total", "Gen_Total", "Dis_Total",
    "Self_Neg", "Other_Neg", "Self_Pos", "Other_Pos",
    "Depression", "GAF", "GF_Role", "GF_Soc"
  )))

baseline_desc %>%
  arrange(Variable) %>%
  kable(digits = 2, caption = "Baseline Descriptive Statistics") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))

Baseline Descriptive Statistics
Variable	N	Mean	SD	Min	Max
Pos_Total	763	11.91	3.81	0	24
Neg_Total	738	11.89	6.07	0	30
Gen_Total	737	9.16	4.27	0	22
Dis_Total	739	5.16	3.16	0	20
Self_Neg	695	6.46	5.60	0	24
Other_Neg	694	7.87	6.10	0	24
Self_Pos	694	11.12	5.79	0	24
Other_Pos	694	9.73	5.39	0	24
Depression	722	5.82	4.75	0	21
GAF	759	48.39	10.75	18	88
GF_Role	753	5.95	2.15	1	10
GF_Soc	755	6.18	1.57	2	10

# Visit-level N
visit_n <- df_clean %>%
  group_by(VisitNum) %>%
  summarise(N = n_distinct(ID), .groups = "drop")

cat("\nSample Size by Visit:\n")

## 
## Sample Size by Visit:

visit_n %>%
  kable(caption = "N Participants per Visit") %>%
  kable_styling(bootstrap_options = "striped")

N Participants per Visit
VisitNum	N
1	764
2	764
3	764
4	764
5	764

# Total observations used in lagged models
cat("\nTime-Lagged Model Sample:\n")

## 
## Time-Lagged Model Sample:

cat("Total observations (visits 2-5):", nrow(df_lagged), "\n")

## Total observations (visits 2-5): 3056

cat("Unique participants:", n_distinct(df_lagged$ID), "\n")

## Unique participants: 764

5 Export Results

# Save full lagged results
write_csv(final_lagged, paste0(output_dir, "Lagged_Models_Results_Full.csv"))
cat("Full results saved to:", paste0(output_dir, "Lagged_Models_Results_Full.csv"), "\n")

## Full results saved to: /Users/ab4233/Library/CloudStorage/OneDrive-YaleUniversity/NAPLS-2 Psychological Constructs/Lagged_Models_Results_Full.csv

# Save significant results only
sig_results <- final_lagged %>%
  filter(P_Fwd_FDR < 0.05 | P_Rev_FDR < 0.05)

write_csv(sig_results, paste0(output_dir, "Lagged_Models_Results_Significant.csv"))
cat("Significant results saved to:", paste0(output_dir, "Lagged_Models_Results_Significant.csv"), "\n")

## Significant results saved to: /Users/ab4233/Library/CloudStorage/OneDrive-YaleUniversity/NAPLS-2 Psychological Constructs/Lagged_Models_Results_Significant.csv

# Save diagnostics
write_csv(diagnostics_table, paste0(output_dir, "Model_Diagnostics.csv"))
cat("Diagnostics saved to:", paste0(output_dir, "Model_Diagnostics.csv"), "\n")

## Diagnostics saved to: /Users/ab4233/Library/CloudStorage/OneDrive-YaleUniversity/NAPLS-2 Psychological Constructs/Model_Diagnostics.csv

Longitudinal Impact of Core Schemas on Psychosis Symptom Domains

Alex Broekhuijse

2025-12-15

1 Setup and Data Preparation

1.1 Load Libraries

1.2 Load and Clean Data

1.3 Define Common Variables

2 Analysis 1: Time-Lagged Models (Symptoms & Functioning with Depression Control)

2.1 Create Lagged Dataset

2.2 Run Time-Lagged Models

2.3 FDR Correction and Summary

2.4 Symptom Results Table

2.5 Functioning Results Table

2.6 Forest Plots

2.7 Trajectory Plots for Significant Forward Effects

3 Analysis 4: Time-Lagged Models (Without Depression Control)

3.1 Run Models

3.2 FDR Correction and Results

3.3 Forest Plot

4 Supplemental Analyses

4.1 S1: Model Diagnostics

4.1.1 Residual Diagnostics for Key Models

4.1.2 Diagnostics Across All Significant Models

4.2 S2: Multicollinearity Assessment

4.3 S4: Sensitivity Analysis - Random Slopes

4.4 S5: Sensitivity Analysis - Non-Linear Time

4.5 S6: Sensitivity Analysis - Without Depression Control

4.6 S7: Cumulative Effect Illustration

4.7 S8: Effect Size Context

4.8 S9: Sample Characteristics and Descriptive Statistics

5 Export Results