This file contains the analyses needed for the depression/anxiety RCT manuscript and supplement. It is organized to follow the manuscript first, then supplement sections.

Load libraries

library(tidyverse)
library(lme4)
library(lmerTest)
library(emmeans)
library(parameters)
library(effectsize)
library(psych)
library(haven)
library(lubridate)
library(knitr)
library(broom)
library(grid)

Load data

Wave0 <- read_csv("./data/raw/1a.+Psych+-+SONA_November+26,+2025_13.32_deidentified.csv") |> slice(-c(1:2))
Wave1 <- read_csv("./data/raw/2.+Psych+-+SONA+-+Biweekly_November+26,+2025_13.54_deidentified.csv") |> slice(-c(1:2))
Wave2 <- read_csv("./data/raw/3.+Psych+-+SONA+-+Biweekly_November+26,+2025_13.55_deidentified.csv") |> slice(-c(1:2))
Wave3 <- read_csv("./data/raw/4.+Pysch+SONA+-+Post_December+5,+2025_14.51_deidentified.csv") |> slice(-c(1:2))

Helper functions

fmt_mean_sd <- function(x) {
  sprintf("%.2f (%.2f)", mean(x, na.rm = TRUE), sd(x, na.rm = TRUE))
}

fmt_n_pct <- function(n, denom) {
  sprintf("%d (%.1f)", n, 100 * n / denom)
}

format_p <- function(p) {
  case_when(
    is.na(p) ~ NA_character_,
    p < .001 ~ "< .001",
    TRUE ~ sprintf("%.3f", p)
  )
}

extract_lmer_term <- function(model, term_name) {
  
  coef_table <- as.data.frame(summary(model)$coefficients)
  coef_table$term <- rownames(coef_table)
  
  conf <- as.data.frame(confint(model, parm = "beta_", method = "Wald"))
  conf$term <- rownames(conf)
  names(conf)[1:2] <- c("LL", "UL")
  
  coef_table |>
    dplyr::filter(term == term_name) |>
    dplyr::left_join(conf, by = "term") |>
    dplyr::transmute(
      term,
      b = Estimate,
      SE = `Std. Error`,
      df = df,
      t = `t value`,
      p = `Pr(>|t|)`,
      LL,
      UL
    )
}

extract_lmer_terms_matching <- function(model, pattern) {
  
  coef_table <- as.data.frame(summary(model)$coefficients)
  coef_table$term <- rownames(coef_table)
  
  conf <- as.data.frame(confint(model, parm = "beta_", method = "Wald"))
  conf$term <- rownames(conf)
  names(conf)[1:2] <- c("LL", "UL")
  
  coef_table |>
    dplyr::filter(stringr::str_detect(term, pattern)) |>
    dplyr::left_join(conf, by = "term") |>
    dplyr::transmute(
      term,
      b = Estimate,
      SE = `Std. Error`,
      df = df,
      t = `t value`,
      p = `Pr(>|t|)`,
      LL,
      UL
    )
}

extract_standardized_term <- function(model, term_name) {
  parameters::standardize_parameters(model) |>
    as_tibble() |>
    filter(Parameter == term_name) |>
    transmute(
      term = Parameter,
      beta = Std_Coefficient,
      beta_LL = CI_low,
      beta_UL = CI_high
    )
}

monotone_cum <- function(x) {
  x2 <- x
  x2[is.na(x2)] <- -Inf
  out <- cummax(x2)
  out[is.infinite(out)] <- NA_real_
  out
}

Clean and merge data

clean_baseline <- function(df) {
  df |>
    dplyr::select(
      EndDate, Finished, ResponseId,
      unique_ID = new_id,
      PHQ_4_1:cond
    ) |>
    rename_with(~ gsub(" _", "_", .x), starts_with("importance_factors")) |>
    rename_with(~ gsub("-", "_", .x)) |>
    rename_with(~ gsub(" ", "_", .x)) |>
    mutate(
      term = case_when(
        month(EndDate) %in% 5:8  ~ "Summer",
        month(EndDate) %in% 9:12 ~ "Fall",
        TRUE ~ NA_character_
      )
    ) |>
    filter(!is.na(unique_ID)) |>
    distinct(unique_ID, .keep_all = TRUE) |>
    rename_with(~ paste0(., "_T1"), -unique_ID)
}

clean_followup <- function(df, wave_suffix) {
  df |>
    dplyr::select(
      EndDate, ResponseId, Finished,
      unique_ID = new_id,
      PHQ_4_1:cond
    ) |>
    rename_with(~ gsub("-", "_", .x)) |>
    rename_with(~ gsub(" ", "_", .x)) |>
    mutate(
      term = case_when(
        month(EndDate) %in% 5:8  ~ "Summer",
        month(EndDate) %in% 9:12 ~ "Fall",
        TRUE ~ NA_character_
      )
    ) |>
    filter(!is.na(unique_ID)) |>
    distinct(unique_ID, .keep_all = TRUE) |>
    rename_with(~ paste0(., "_", wave_suffix), -unique_ID)
}

Wave0_clean <- clean_baseline(Wave0)
Wave1_clean <- clean_followup(Wave1, "T2")
Wave2_clean <- clean_followup(Wave2, "T3")
Wave3_clean <- Wave3 |>
  dplyr::select(-c(starts_with("SWEMWBS_"))) |>
  clean_followup("T4")

merged_data <- Wave0_clean |>
  full_join(Wave1_clean, by = "unique_ID") |>
  full_join(Wave2_clean, by = "unique_ID") |>
  full_join(Wave3_clean, by = "unique_ID") |>
  mutate(across(
    -c(unique_ID, starts_with("ResponseId"), starts_with("EndDate"), starts_with("term"), starts_with("RaceEthnicity"), starts_with("cond")),
    ~ suppressWarnings(as.numeric(.x))
  )) |>
  filter(is.na(Age_T1) | Age_T1 != 17)

Compute manuscript variables

merged_data <- merged_data |>
  mutate(across(starts_with("PHQ_4_"), ~ .x - 1)) |>
  mutate(
    depression_T1 = PHQ_4_1_T1 + PHQ_4_2_T1,
    depression_T2 = PHQ_4_1_T2 + PHQ_4_2_T2,
    depression_T3 = PHQ_4_1_T3 + PHQ_4_2_T3,
    depression_T4 = PHQ_4_1_T4 + PHQ_4_2_T4,
    anxiety_T1    = PHQ_4_3_T1 + PHQ_4_4_T1,
    anxiety_T2    = PHQ_4_3_T2 + PHQ_4_4_T2,
    anxiety_T3    = PHQ_4_3_T3 + PHQ_4_4_T3,
    anxiety_T4    = PHQ_4_3_T4 + PHQ_4_4_T4
  ) |>
  mutate(
    Sex_T1 = factor(Sex_T1, levels = c(1, 2, 3), labels = c("Male", "Female", "Intersex")),
    int_student_T1 = factor(int_student_T1, levels = c(1, 2), labels = c("Yes", "No")),
    SES_num = as.numeric(SES_T1),
    SES_T1 = factor(
      SES_T1,
      levels = c(1, 2, 3, 4, 5),
      labels = c("Always stressful (1)", "Often stressful (2)", "Sometimes stressful (3)", "Rarely stressful (4)", "Never stressful (5)")
    )
  ) |>
  mutate(
    RaceEthnicity_T1 = as.character(RaceEthnicity_T1),
    Ethnicity_Mixed = ifelse(grepl(",", RaceEthnicity_T1), 1, 0),
    Ethnicity_White = ifelse(Ethnicity_Mixed == 0 & grepl("10", RaceEthnicity_T1), 1, 0),
    Ethnicity_Hispanic = ifelse(Ethnicity_Mixed == 0 & grepl("9", RaceEthnicity_T1), 1, 0),
    Ethnicity_Black = ifelse(Ethnicity_Mixed == 0 & grepl("3", RaceEthnicity_T1), 1, 0),
    Ethnicity_East_Asian = ifelse(Ethnicity_Mixed == 0 & grepl("4", RaceEthnicity_T1), 1, 0),
    Ethnicity_South_Asian = ifelse(Ethnicity_Mixed == 0 & grepl("5", RaceEthnicity_T1), 1, 0),
    Ethnicity_Pacific_Islander = ifelse(Ethnicity_Mixed == 0 & grepl("7", RaceEthnicity_T1), 1, 0),
    Ethnicity_Middle_Eastern = ifelse(Ethnicity_Mixed == 0 & grepl("6", RaceEthnicity_T1), 1, 0),
    Ethnicity_American_Indian = ifelse(Ethnicity_Mixed == 0 & grepl("(^|,)1(,|$)", RaceEthnicity_T1), 1, 0),
    Ethnicity_Self_Identify = ifelse(Ethnicity_Mixed == 0 & grepl("8", RaceEthnicity_T1), 1, 0),
    Ethnicity_categ_T1 = case_when(
      is.na(RaceEthnicity_T1) ~ NA_character_,
      Ethnicity_Mixed == 1 ~ "Multiracial",
      Ethnicity_White == 1 ~ "White",
      Ethnicity_Hispanic == 1 ~ "Hispanic",
      Ethnicity_Black == 1 ~ "Black or African American",
      Ethnicity_East_Asian == 1 | Ethnicity_South_Asian == 1 ~ "Asian",
      Ethnicity_Pacific_Islander == 1 ~ "Pacific Islander",
      Ethnicity_Middle_Eastern == 1 ~ "Middle Eastern",
      Ethnicity_American_Indian == 1 ~ "American Indian/Indigenous",
      Ethnicity_Self_Identify == 1 ~ "Other/Self-identified",
      TRUE ~ NA_character_
    ),
    Ethnicity_categ_T1 = factor(
      Ethnicity_categ_T1,
      levels = c(
        "American Indian/Indigenous", "Asian", "Black or African American",
        "Hispanic", "Middle Eastern", "Pacific Islander", "White",
        "Multiracial", "Other/Self-identified"
      )
    )
  )

Internal consistency / Cronbach’s alpha

alpha_for_wave <- function(df, items) {
  psych::alpha(df |> dplyr::select(all_of(items)), warnings = FALSE, check.keys = FALSE)$total$raw_alpha
}

internal_consistency <- tibble(
  wave = c("Baseline", "Week 2", "Week 4", "Week 6"),
  PHQ_2_alpha = c(
    alpha_for_wave(merged_data, c("PHQ_4_1_T1", "PHQ_4_2_T1")),
    alpha_for_wave(merged_data, c("PHQ_4_1_T2", "PHQ_4_2_T2")),
    alpha_for_wave(merged_data, c("PHQ_4_1_T3", "PHQ_4_2_T3")),
    alpha_for_wave(merged_data, c("PHQ_4_1_T4", "PHQ_4_2_T4"))
  ),
  GAD_2_alpha = c(
    alpha_for_wave(merged_data, c("PHQ_4_3_T1", "PHQ_4_4_T1")),
    alpha_for_wave(merged_data, c("PHQ_4_3_T2", "PHQ_4_4_T2")),
    alpha_for_wave(merged_data, c("PHQ_4_3_T3", "PHQ_4_4_T3")),
    alpha_for_wave(merged_data, c("PHQ_4_3_T4", "PHQ_4_4_T4"))
  )
)

internal_consistency |>
  mutate(across(where(is.numeric), ~ round(.x, 2))) |>
  kable(caption = "Internal consistency across assessment waves")
Internal consistency across assessment waves
wave PHQ_2_alpha GAD_2_alpha
Baseline 0.66 0.82
Week 2 0.72 0.81
Week 4 0.75 0.83
Week 6 0.73 0.86

Prepare long dataset

merged_data_long <- merged_data |>
  pivot_longer(
    cols = matches("_T[1234]$"),
    names_to = c(".value", "time"),
    names_pattern = "(.*)_T(1|2|3|4)"
  ) |>
  mutate(
    time = as.numeric(time),
    week = case_when(time == 1 ~ 0, time == 2 ~ 2, time == 3 ~ 4, time == 4 ~ 6),
    time_c = time - 2.5,
    time_f = factor(time, levels = c(1, 2, 3, 4), labels = c("Baseline", "Week 2", "Week 4", "Week 6"))
  ) |>
  group_by(unique_ID) |>
  fill(cond, Sex, Age, starts_with("Ethnicity"), int_student, int_student_country, SES, SES_num, term, .direction = "downup") |>
  mutate(
    depression_baseline = depression[time == 1][1],
    anxiety_baseline = anxiety[time == 1][1],
    depression_baseline_cat = case_when(
      depression_baseline >= 3 ~ "Elevated baseline (PHQ-2 ≥ 3)",
      !is.na(depression_baseline) ~ "Low baseline (PHQ-2 < 3)",
      TRUE ~ NA_character_
    ),
    anxiety_baseline_cat = case_when(
      anxiety_baseline >= 3 ~ "Elevated baseline (GAD-2 ≥ 3)",
      !is.na(anxiety_baseline) ~ "Low baseline (GAD-2 < 3)",
      TRUE ~ NA_character_
    )
  ) |>
  ungroup() |>
  mutate(
    cond = case_when(
      str_to_lower(as.character(cond)) == "control" ~ "Control",
      str_to_lower(as.character(cond)) == "flourish" ~ "Flourish",
      TRUE ~ as.character(cond)
    ),
    cond = factor(cond, levels = c("Control", "Flourish")),
    term = factor(term),
    depression_baseline_cat = factor(depression_baseline_cat, levels = c("Low baseline (PHQ-2 < 3)", "Elevated baseline (PHQ-2 ≥ 3)")),
    anxiety_baseline_cat = factor(anxiety_baseline_cat, levels = c("Low baseline (GAD-2 < 3)", "Elevated baseline (GAD-2 ≥ 3)")),
    Ethnicity_WhitePOC = factor(if_else(Ethnicity_categ == "White", "White", "Non-White", missing = NA_character_))
  )

contrasts(merged_data_long$cond) <- cbind(flourish_vs_control = c(-1, 1))

write.csv(merged_data_long, "./data/merged/merged_data_manuscript.csv", row.names = FALSE)

data_baseline <- merged_data_long |>
  filter(time == 1) |>
  distinct(unique_ID, .keep_all = TRUE) |>
  mutate(
    depression_baseline_severity = case_when(
      depression >= 3 ~ "Depression Elevated (PHQ-2 ≥ 3)",
      depression < 3 ~ "Depression Low (PHQ-2 < 3)",
      TRUE ~ NA_character_
    ),
    anxiety_baseline_severity = case_when(
      anxiety >= 3 ~ "Anxiety Elevated (GAD-2 ≥ 3)",
      anxiety < 3 ~ "Anxiety Low (GAD-2 < 3)",
      TRUE ~ NA_character_
    )
  )

Table 1. Participant Characteristics

make_cat_rows <- function(data, var, label, levels_to_show = NULL, include_missing = TRUE) {
  if (is.null(levels_to_show)) levels_to_show <- sort(unique(na.omit(data[[var]])))

  out <- map_dfr(levels_to_show, function(lvl) {
    control_n <- sum(data$cond == "Control" & data[[var]] == lvl, na.rm = TRUE)
    flourish_n <- sum(data$cond == "Flourish" & data[[var]] == lvl, na.rm = TRUE)
    total_n <- sum(data[[var]] == lvl, na.rm = TRUE)

    tibble(
      Characteristic = paste0("     ", lvl),
      Control = fmt_n_pct(control_n, sum(data$cond == "Control")),
      Flourish = fmt_n_pct(flourish_n, sum(data$cond == "Flourish")),
      Total = fmt_n_pct(total_n, nrow(data))
    )
  })

  if (include_missing) {
    out <- bind_rows(
      out,
      tibble(
        Characteristic = "     Missing",
        Control = fmt_n_pct(sum(data$cond == "Control" & is.na(data[[var]])), sum(data$cond == "Control")),
        Flourish = fmt_n_pct(sum(data$cond == "Flourish" & is.na(data[[var]])), sum(data$cond == "Flourish")),
        Total = fmt_n_pct(sum(is.na(data[[var]])), nrow(data))
      )
    )
  }

  if (!is.null(label)) {
    bind_rows(tibble(Characteristic = label, Control = "", Flourish = "", Total = ""), out)
  } else {
    out
  }
}

n_control <- sum(data_baseline$cond == "Control")
n_flourish <- sum(data_baseline$cond == "Flourish")
n_total <- nrow(data_baseline)

table1 <- bind_rows(
  tibble(
    Characteristic = "Age, mean (SD), y",
    Control = fmt_mean_sd(data_baseline$Age[data_baseline$cond == "Control"]),
    Flourish = fmt_mean_sd(data_baseline$Age[data_baseline$cond == "Flourish"]),
    Total = fmt_mean_sd(data_baseline$Age)
  ),
  make_cat_rows(data_baseline, "Sex", "Sex", c("Female", "Male"), include_missing = TRUE),
  make_cat_rows(data_baseline, "Ethnicity_categ", "Race/Ethnicity, No. (%)", levels(data_baseline$Ethnicity_categ), include_missing = TRUE),
  tibble(
    Characteristic = "International student, %",
    Control = fmt_n_pct(sum(data_baseline$cond == "Control" & data_baseline$int_student == "Yes", na.rm = TRUE), n_control),
    Flourish = fmt_n_pct(sum(data_baseline$cond == "Flourish" & data_baseline$int_student == "Yes", na.rm = TRUE), n_flourish),
    Total = fmt_n_pct(sum(data_baseline$int_student == "Yes", na.rm = TRUE), n_total)
  ),
  make_cat_rows(data_baseline, "SES", "Subjective socioeconomic status", c("Always stressful (1)", "Often stressful (2)", "Sometimes stressful (3)", "Rarely stressful (4)", "Never stressful (5)"), include_missing = TRUE),
  tibble(Characteristic = "Baseline symptom scores", Control = "", Flourish = "", Total = ""),
  tibble(
    Characteristic = "     PHQ-2 depression, mean (SD)",
    Control = fmt_mean_sd(data_baseline$depression[data_baseline$cond == "Control"]),
    Flourish = fmt_mean_sd(data_baseline$depression[data_baseline$cond == "Flourish"]),
    Total = fmt_mean_sd(data_baseline$depression)
  ),
  tibble(
    Characteristic = "     GAD-2 anxiety, mean (SD)",
    Control = fmt_mean_sd(data_baseline$anxiety[data_baseline$cond == "Control"]),
    Flourish = fmt_mean_sd(data_baseline$anxiety[data_baseline$cond == "Flourish"]),
    Total = fmt_mean_sd(data_baseline$anxiety)
  ),
  tibble(Characteristic = "Baseline symptom severity group", Control = "", Flourish = "", Total = ""),
  make_cat_rows(data_baseline, "depression_baseline_severity", NULL, c("Depression Low (PHQ-2 < 3)", "Depression Elevated (PHQ-2 ≥ 3)"), include_missing = FALSE),
  make_cat_rows(data_baseline, "anxiety_baseline_severity", NULL, c("Anxiety Low (GAD-2 < 3)", "Anxiety Elevated (GAD-2 ≥ 3)"), include_missing = FALSE)
)

table1 |>
  kable(
    col.names = c("Characteristic", paste0("Control<br>(n = ", n_control, ")"), paste0("Flourish<br>(n = ", n_flourish, ")"), paste0("Total<br>(N = ", n_total, ")")),
    escape = FALSE,
    align = c("l", "c", "c", "c"),
    caption = "Table 1. Participant Characteristics"
  )
Table 1. Participant Characteristics
Characteristic Control
(n = 581)
Flourish
(n = 556)
Total
(N = 1137)
Age, mean (SD), y 20.63 (5.11) 20.91 (5.58) 20.77 (5.34)
Sex
Female 434 (74.7) 419 (75.4) 853 (75.0)
Male 147 (25.3) 136 (24.5) 283 (24.9)
Missing 0 (0.0) 1 (0.2) 1 (0.1)
Race/Ethnicity, No. (%)
American Indian/Indigenous 5 (0.9) 11 (2.0) 16 (1.4)
Asian 133 (22.9) 145 (26.1) 278 (24.5)
Black or African American 74 (12.7) 65 (11.7) 139 (12.2)
Hispanic 8 (1.4) 4 (0.7) 12 (1.1)
Middle Eastern 20 (3.4) 19 (3.4) 39 (3.4)
Pacific Islander 5 (0.9) 1 (0.2) 6 (0.5)
White 222 (38.2) 213 (38.3) 435 (38.3)
Multiracial 50 (8.6) 52 (9.4) 102 (9.0)
Other/Self-identified 64 (11.0) 45 (8.1) 109 (9.6)
Missing 0 (0.0) 1 (0.2) 1 (0.1)
International student, % 51 (8.8) 62 (11.2) 113 (9.9)
Subjective socioeconomic status
Always stressful (1) 63 (10.8) 29 (5.2) 92 (8.1)
Often stressful (2) 86 (14.8) 117 (21.0) 203 (17.9)
Sometimes stressful (3) 234 (40.3) 186 (33.5) 420 (36.9)
Rarely stressful (4) 131 (22.5) 150 (27.0) 281 (24.7)
Never stressful (5) 67 (11.5) 73 (13.1) 140 (12.3)
Missing 0 (0.0) 1 (0.2) 1 (0.1)
Baseline symptom scores
PHQ-2 depression, mean (SD) 1.71 (1.48) 1.63 (1.47) 1.67 (1.47)
GAD-2 anxiety, mean (SD) 2.55 (1.83) 2.54 (1.83) 2.55 (1.83)
Baseline symptom severity group
Depression Low (PHQ-2 < 3) 428 (73.7) 433 (77.9) 861 (75.7)
Depression Elevated (PHQ-2 ≥ 3) 153 (26.3) 123 (22.1) 276 (24.3)
Anxiety Low (GAD-2 < 3) 342 (58.9) 312 (56.1) 654 (57.5)
Anxiety Elevated (GAD-2 ≥ 3) 239 (41.1) 244 (43.9) 483 (42.5)

Randomization checks

# Randomization-check approach:
# linear models for continuous/ordinal variables and chi-square tests for categorical variables.

data_baseline_check <- merged_data_long %>%
  filter(time == 1) %>%
  dplyr::select(unique_ID, cond, Age, Sex, SES_num, depression, anxiety, int_student) %>%
  distinct()

rand_age <- lm(Age ~ cond, data = data_baseline_check)
rand_ses <- lm(SES_num ~ cond, data = data_baseline_check)
rand_dep <- lm(depression ~ cond, data = data_baseline_check)
rand_anx <- lm(anxiety ~ cond, data = data_baseline_check)

sex_table <- table(data_baseline_check$Sex, data_baseline_check$cond)
sex_table <- sex_table[rowSums(sex_table) != 0, ]
rand_sex <- chisq.test(sex_table)

int_table <- table(data_baseline_check$int_student, data_baseline_check$cond)
int_table <- int_table[rowSums(int_table) != 0, ]
rand_int <- chisq.test(int_table)

randomization_checks <- tibble(
  characteristic = c(
    "Age",
    "Sex",
    "Subjective socioeconomic status",
    "Depressive symptoms (PHQ-2)",
    "Anxiety symptoms (GAD-2)",
    "International student status"
  ),
  original_test = c(
    "lm(Age ~ cond)",
    "chisq.test(table(Sex, cond))",
    "lm(SES_num ~ cond)",
    "lm(depression ~ cond)",
    "lm(anxiety ~ cond)",
    "chisq.test(table(int_student, cond))"
  ),
  p_value = c(
    coef(summary(rand_age))[2, "Pr(>|t|)"],
    rand_sex$p.value,
    coef(summary(rand_ses))[2, "Pr(>|t|)"],
    coef(summary(rand_dep))[2, "Pr(>|t|)"],
    coef(summary(rand_anx))[2, "Pr(>|t|)"],
    rand_int$p.value
  )
) |>
  mutate(p = format_p(p_value), significant = p_value < .05)

randomization_checks |>
  kable(caption = "Randomization checks using original analysis approach")
Randomization checks using original analysis approach
characteristic original_test p_value p significant
Age lm(Age ~ cond) 0.3888117 0.389 FALSE
Sex chisq.test(table(Sex, cond)) 0.8089879 0.809 FALSE
Subjective socioeconomic status lm(SES_num ~ cond) 0.0531220 0.053 FALSE
Depressive symptoms (PHQ-2) lm(depression ~ cond) 0.3225544 0.323 FALSE
Anxiety symptoms (GAD-2) lm(anxiety ~ cond) 0.9053494 0.905 FALSE
International student status chisq.test(table(int_student, cond)) 0.2120223 0.212 FALSE
all(randomization_checks$p_value > .05, na.rm = TRUE)
## [1] TRUE

Survey completion / attrition and Figure 1

# Completion is based on Finished == 1,
# rather than non-missing PHQ/GAD outcome scores.

randomized_n <- data_baseline |> count(cond, name = "n_randomized")

completion_long <- merged_data_long |>
  filter(time %in% c(2, 3, 4)) |>
  group_by(unique_ID, cond, time) |>
  summarise(
    completed = as.integer(any(Finished == 1, na.rm = TRUE)),
    .groups = "drop"
  ) |>
  mutate(
    wave = factor(time, levels = c(2, 3, 4), labels = c("Week 2 (T2)", "Week 4 (T3)", "Week 6 (T4)"))
  )

completion_summary_raw <- completion_long |>
  group_by(cond, wave) |>
  summarise(n_completed = sum(completed == 1, na.rm = TRUE), .groups = "drop") |>
  left_join(randomized_n, by = "cond") |>
  mutate(
    pct_completed = 100 * n_completed / n_randomized,
    completion = sprintf("%d (%.1f%%)", n_completed, pct_completed)
  )

completion_summary <- completion_summary_raw |>
  select(wave, cond, completion) |>
  pivot_wider(names_from = cond, values_from = completion)

completion_summary |>
  kable(caption = "Survey completion by condition across follow-up waves")
Survey completion by condition across follow-up waves
wave Control Flourish
Week 2 (T2) 516 (88.8%) 489 (87.9%)
Week 4 (T3) 475 (81.8%) 454 (81.7%)
Week 6 (T4) 433 (74.5%) 417 (75.0%)
# chi-square tests by wave.
completion_chisq_tests <- completion_long |>
  group_by(wave) |>
  group_modify(~{
    tab <- table(.x$completed, .x$cond)
    test <- chisq.test(tab)
    tibble(
      chi_square = unname(test$statistic),
      df = unname(test$parameter),
      p_value = test$p.value
    )
  }) |>
  ungroup() |>
  mutate(
    chi_square = round(chi_square, 2),
    p_formatted = format_p(p_value)
  )

completion_chisq_tests |>
  kable(caption = "Chi-square tests of survey completion by condition")
Chi-square tests of survey completion by condition
wave chi_square df p_value p_formatted
Week 2 (T2) 0.13 1 0.7178278 0.718
Week 4 (T3) 0.00 1 1.0000000 1.000
Week 6 (T4) 0.01 1 0.9081500 0.908

Number of surveys completed

# Count completed timepoints using Finished == 1.

surveys_completed <- merged_data_long %>%
  group_by(unique_ID) %>%
  summarise(
    n_surveys_completed = sum(Finished == 1, na.rm = TRUE),
    .groups = "drop"
  )

surveys_completed_summary <- surveys_completed %>%
  count(n_surveys_completed, name = "n") %>%
  mutate(pct = 100 * n / sum(n))

surveys_completed_summary |>
  kable(digits = 1, caption = "Number of surveys completed across both conditions")
Number of surveys completed across both conditions
n_surveys_completed n pct
1 129 11.3
2 80 7.0
3 80 7.0
4 848 74.6

Week 6 cumulative engagement

Clean engagement variables

monotone_observed <- function(x) {
  out <- x
  running_max <- -Inf

  for (i in seq_along(x)) {
    if (!is.na(x[i])) {
      running_max <- max(running_max, x[i])
      out[i] <- running_max
    }
  }

  out
}

max_or_na <- function(x) {
  if (all(is.na(x))) {
    NA_real_
  } else {
    max(x, na.rm = TRUE)
  }
}

df_fl <- merged_data_long |>
  filter(
    cond == "Flourish",
    time %in% c(2, 3, 4)
  ) |>
  select(
    unique_ID,
    time,
    Engagement_1,
    Engagement_3
  ) |>
  mutate(
    max_possible_days = case_when(
      time == 2 ~ 14,
      time == 3 ~ 28,
      time == 4 ~ 42
    ),

    impossible_active_days =
      !is.na(Engagement_1) &
      (Engagement_1 < 0 | Engagement_1 > max_possible_days),

    impossible_activities =
      !is.na(Engagement_3) &
      Engagement_3 < 0,

    Engagement_1_clean = case_when(
      is.na(Engagement_1) ~ NA_real_,
      Engagement_1 < 0 ~ 0,
      Engagement_1 > max_possible_days ~ max_possible_days,
      TRUE ~ Engagement_1
    ),

    Engagement_3_clean = case_when(
      is.na(Engagement_3) ~ NA_real_,
      Engagement_3 < 0 ~ 0,
      TRUE ~ Engagement_3
    )
  ) |>
  arrange(unique_ID, time)

df_fl_fix <- df_fl |>
  group_by(unique_ID) |>
  mutate(
    Engagement_1_fix = monotone_observed(Engagement_1_clean),
    Engagement_3_fix = monotone_observed(Engagement_3_clean)
  ) |>
  ungroup()

engagement_week6 <- df_fl_fix |>
  filter(time == 4) |>
  transmute(
    unique_ID,
    active_days_week6 = Engagement_1_fix,
    activities_completed_week6 = Engagement_3_fix
  )

engagement_week6_summary <- engagement_week6 |>
  summarise(
    n_active_days = sum(!is.na(active_days_week6)),
    mean_active_days = mean(active_days_week6, na.rm = TRUE),
    sd_active_days = sd(active_days_week6, na.rm = TRUE),
    n_activities = sum(!is.na(activities_completed_week6)),
    mean_activities = mean(activities_completed_week6, na.rm = TRUE),
    sd_activities = sd(activities_completed_week6, na.rm = TRUE)
  )

engagement_end_clean <- df_fl_fix |>
  group_by(unique_ID) |>
  summarise(
    active_days_end = max_or_na(Engagement_1_fix),
    activities_completed_end = max_or_na(Engagement_3_fix),
    .groups = "drop"
  )

Output summary

engagement_week6_summary |>
  mutate(across(where(is.numeric), ~ round(.x, 1))) |>
  kable(
    caption = "Week 6 cumulative app engagement among participants assigned to Flourish"
  )
Week 6 cumulative app engagement among participants assigned to Flourish
n_active_days mean_active_days sd_active_days n_activities mean_activities sd_activities
410 18.9 10.2 410 11.7 14

Primary Outcomes

Descriptive statistics for Supplement Section 6

depression_descriptives <- merged_data_long |>
  group_by(cond, time_f, depression_baseline_cat) |>
  summarise(n = sum(!is.na(depression)), mean = mean(depression, na.rm = TRUE), sd = sd(depression, na.rm = TRUE), se = sd / sqrt(n), .groups = "drop")

anxiety_descriptives <- merged_data_long |>
  group_by(cond, time_f, anxiety_baseline_cat) |>
  summarise(n = sum(!is.na(anxiety)), mean = mean(anxiety, na.rm = TRUE), sd = sd(anxiety, na.rm = TRUE), se = sd / sqrt(n), .groups = "drop")

depression_descriptives |>
  mutate(across(c(mean, sd, se), ~ round(.x, 2))) |>
  kable(caption = "Depression descriptive statistics by condition, time point, and baseline severity")
Depression descriptive statistics by condition, time point, and baseline severity
cond time_f depression_baseline_cat n mean sd se
Control Baseline Low baseline (PHQ-2 < 3) 428 0.99 0.81 0.04
Control Baseline Elevated baseline (PHQ-2 ≥ 3) 153 3.75 0.90 0.07
Control Week 2 Low baseline (PHQ-2 < 3) 390 1.50 1.36 0.07
Control Week 2 Elevated baseline (PHQ-2 ≥ 3) 131 2.89 1.53 0.13
Control Week 4 Low baseline (PHQ-2 < 3) 363 1.62 1.47 0.08
Control Week 4 Elevated baseline (PHQ-2 ≥ 3) 116 2.86 1.57 0.15
Control Week 6 Low baseline (PHQ-2 < 3) 337 1.78 1.43 0.08
Control Week 6 Elevated baseline (PHQ-2 ≥ 3) 101 2.94 1.70 0.17
Flourish Baseline Low baseline (PHQ-2 < 3) 433 0.99 0.83 0.04
Flourish Baseline Elevated baseline (PHQ-2 ≥ 3) 123 3.88 0.93 0.08
Flourish Week 2 Low baseline (PHQ-2 < 3) 384 1.47 1.31 0.07
Flourish Week 2 Elevated baseline (PHQ-2 ≥ 3) 108 2.92 1.54 0.15
Flourish Week 4 Low baseline (PHQ-2 < 3) 360 1.46 1.31 0.07
Flourish Week 4 Elevated baseline (PHQ-2 ≥ 3) 97 2.97 1.67 0.17
Flourish Week 6 Low baseline (PHQ-2 < 3) 329 1.52 1.38 0.08
Flourish Week 6 Elevated baseline (PHQ-2 ≥ 3) 88 2.49 1.62 0.17
anxiety_descriptives |>
  mutate(across(c(mean, sd, se), ~ round(.x, 2))) |>
  kable(caption = "Anxiety descriptive statistics by condition, time point, and baseline severity")
Anxiety descriptive statistics by condition, time point, and baseline severity
cond time_f anxiety_baseline_cat n mean sd se
Control Baseline Low baseline (GAD-2 < 3) 342 1.24 0.79 0.04
Control Baseline Elevated baseline (GAD-2 ≥ 3) 239 4.44 1.10 0.07
Control Week 2 Low baseline (GAD-2 < 3) 308 1.86 1.41 0.08
Control Week 2 Elevated baseline (GAD-2 ≥ 3) 213 3.45 1.63 0.11
Control Week 4 Low baseline (GAD-2 < 3) 284 2.03 1.59 0.09
Control Week 4 Elevated baseline (GAD-2 ≥ 3) 195 3.50 1.77 0.13
Control Week 6 Low baseline (GAD-2 < 3) 261 2.11 1.63 0.10
Control Week 6 Elevated baseline (GAD-2 ≥ 3) 176 3.49 1.78 0.13
Flourish Baseline Low baseline (GAD-2 < 3) 312 1.16 0.83 0.05
Flourish Baseline Elevated baseline (GAD-2 ≥ 3) 244 4.30 1.12 0.07
Flourish Week 2 Low baseline (GAD-2 < 3) 277 1.79 1.45 0.09
Flourish Week 2 Elevated baseline (GAD-2 ≥ 3) 215 3.42 1.72 0.12
Flourish Week 4 Low baseline (GAD-2 < 3) 259 1.77 1.56 0.10
Flourish Week 4 Elevated baseline (GAD-2 ≥ 3) 198 3.32 1.78 0.13
Flourish Week 6 Low baseline (GAD-2 < 3) 237 1.82 1.58 0.10
Flourish Week 6 Elevated baseline (GAD-2 ≥ 3) 180 3.06 1.66 0.12

Primary mixed-effects models

# outcome ~ cond * I(time - 2.5) + (1 | unique_ID) + (1 | term)

model_depression_primary <- lmer(depression ~ cond * I(time - 2.5) + (1 | unique_ID) + (1 | term), data = merged_data_long)
model_anxiety_primary <- lmer(anxiety ~ cond * I(time - 2.5) + (1 | unique_ID) + (1 | term), data = merged_data_long)

summary(model_depression_primary)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: depression ~ cond * I(time - 2.5) + (1 | unique_ID) + (1 | term)
##    Data: merged_data_long
## 
## REML criterion at convergence: 13437.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.1857 -0.5576 -0.1220  0.5044  3.6705 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  unique_ID (Intercept) 1.137170 1.06638 
##  term      (Intercept) 0.001054 0.03246 
##  Residual              1.162928 1.07839 
## Number of obs: 3941, groups:  unique_ID, 1137; term, 2
## 
## Fixed effects:
##                                         Estimate Std. Error         df t value
## (Intercept)                              1.82847    0.04587    0.20956  39.859
## condflourish_vs_control                 -0.08380    0.03665 1113.47083  -2.287
## I(time - 2.5)                            0.07964    0.01582 2971.40465   5.033
## condflourish_vs_control:I(time - 2.5)   -0.04326    0.01582 2971.65187  -2.734
##                                       Pr(>|t|)    
## (Intercept)                            0.34451    
## condflourish_vs_control                0.02241 *  
## I(time - 2.5)                         5.11e-07 ***
## condflourish_vs_control:I(time - 2.5)  0.00629 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) cndf__ I(-2.5
## cndflrsh_v_ 0.019               
## I(time-2.5) 0.071  0.003        
## c__:I(-2.5) 0.003  0.087  0.024
summary(model_anxiety_primary)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: anxiety ~ cond * I(time - 2.5) + (1 | unique_ID) + (1 | term)
##    Data: merged_data_long
## 
## REML criterion at convergence: 14577.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.2730 -0.6017 -0.1059  0.5631  3.4196 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  unique_ID (Intercept) 1.6582   1.2877  
##  term      (Intercept) 0.1314   0.3625  
##  Residual              1.5161   1.2313  
## Number of obs: 3940, groups:  unique_ID, 1137; term, 2
## 
## Fixed effects:
##                                         Estimate Std. Error         df t value
## (Intercept)                            2.343e+00  2.638e-01  9.938e-01   8.882
## condflourish_vs_control               -7.224e-02  4.369e-02  1.129e+03  -1.654
## I(time - 2.5)                         -9.772e-03  1.809e-02  2.972e+03  -0.540
## condflourish_vs_control:I(time - 2.5) -5.825e-02  1.809e-02  2.973e+03  -3.220
##                                       Pr(>|t|)   
## (Intercept)                             0.0723 . 
## condflourish_vs_control                 0.0985 . 
## I(time - 2.5)                           0.5892   
## condflourish_vs_control:I(time - 2.5)   0.0013 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) cndf__ I(-2.5
## cndflrsh_v_ 0.004               
## I(time-2.5) 0.016  0.003        
## c__:I(-2.5) 0.001  0.086  0.023
standardize_parameters(model_depression_primary)
## # Standardization method: refit
## 
## Parameter                            | Std. Coef. |         95% CI
## ------------------------------------------------------------------
## (Intercept)                          |       0.16 | [ 0.08,  0.24]
## condflourish vs control              |      -0.13 | [-0.21, -0.06]
## time - 2 5                           |       0.06 | [ 0.04,  0.08]
## condflourish vs control × time - 2 5 |      -0.03 | [-0.05, -0.01]
standardize_parameters(model_anxiety_primary)
## # Standardization method: refit
## 
## Parameter                            | Std. Coef. |         95% CI
## ------------------------------------------------------------------
## (Intercept)                          |      -0.12 | [-0.41,  0.18]
## condflourish vs control              |      -0.13 | [-0.20, -0.05]
## time - 2 5                           |  -6.09e-03 | [-0.03,  0.02]
## condflourish vs control × time - 2 5 |      -0.04 | [-0.06, -0.01]
depression_interaction <- extract_lmer_term(model_depression_primary, "condflourish_vs_control:I(time - 2.5)") |>
  left_join(extract_standardized_term(model_depression_primary, "condflourish_vs_control:I(time - 2.5)"), by = "term") |>
  mutate(across(c(b, SE, t, beta, beta_LL, beta_UL), ~ round(.x, 2)), p_formatted = format_p(p))

anxiety_interaction <- extract_lmer_term(model_anxiety_primary, "condflourish_vs_control:I(time - 2.5)") |>
  left_join(extract_standardized_term(model_anxiety_primary, "condflourish_vs_control:I(time - 2.5)"), by = "term") |>
  mutate(across(c(b, SE, t, beta, beta_LL, beta_UL), ~ round(.x, 2)), p_formatted = format_p(p))

depression_interaction |> kable(caption = "Depression primary condition × time interaction")
Depression primary condition × time interaction
term b SE df t p LL UL beta beta_LL beta_UL p_formatted
condflourish_vs_control:I(time - 2.5) -0.04 0.02 2971.652 -2.73 0.0062907 -0.0742716 -0.0122497 -0.03 -0.05 -0.01 0.006
anxiety_interaction |> kable(caption = "Anxiety primary condition × time interaction")
Anxiety primary condition × time interaction
term b SE df t p LL UL beta beta_LL beta_UL p_formatted
condflourish_vs_control:I(time - 2.5) -0.06 0.02 2972.615 -3.22 0.0012972 -0.0937162 -0.022792 -0.04 -0.06 -0.01 0.001

Within-condition slopes

# Probe slopes by fitting separate models within each condition.

get_slopes_with_beta <- function(data, outcome) {
  
  map_dfr(c("Control", "Flourish"), function(condition_label) {
    
    d <- data |>
      filter(cond == condition_label) |>
      filter(
        !is.na(.data[[outcome]]),
        !is.na(time),
        !is.na(unique_ID),
        !is.na(term)
      ) |>
      mutate(
        outcome_z = as.numeric(scale(.data[[outcome]])),
        time_z = as.numeric(scale(time - 2.5))
      )
    
    # Unstandardized model
    raw_fit <- lmer(
      reformulate(
        termlabels = c("I(time - 2.5)", "(1 | unique_ID)", "(1 | term)"),
        response = outcome
      ),
      data = d
    )
    
    raw <- extract_lmer_term(raw_fit, "I(time - 2.5)") |>
      mutate(cond = condition_label)
    
    # Manually standardized model
    std_fit <- lmer(
      outcome_z ~ time_z + (1 | unique_ID) + (1 | term),
      data = d
    )
    
    std <- extract_lmer_term(std_fit, "time_z") |>
      transmute(
        cond = condition_label,
        beta = b,
        beta_LL = LL,
        beta_UL = UL
      )
    
    raw |>
      left_join(std, by = "cond")
  }) |>
    mutate(
      across(
        c(b, SE, t, LL, UL, beta, beta_LL, beta_UL),
        ~ round(.x, 2)
      ),
      p_formatted = format_p(p)
    )
}

depression_slopes <- get_slopes_with_beta(merged_data_long, "depression")
anxiety_slopes <- get_slopes_with_beta(merged_data_long, "anxiety")

depression_slopes |> kable(caption = "Depression slopes over time by condition")
Depression slopes over time by condition
term b SE df t p LL UL cond beta beta_LL beta_UL p_formatted
I(time - 2.5) 0.12 0.02 1519.161 5.42 0.0000001 0.08 0.17 Control 0.09 0.06 0.12 < .001
I(time - 2.5) 0.04 0.02 1452.427 1.65 0.0984877 -0.01 0.08 Flourish 0.03 -0.01 0.06 0.098
anxiety_slopes |> kable(caption = "Anxiety slopes over time by condition")
Anxiety slopes over time by condition
term b SE df t p LL UL cond beta beta_LL beta_UL p_formatted
I(time - 2.5) 0.05 0.03 1514.723 1.95 0.0516803 0.00 0.10 Control 0.03 0.00 0.06 0.052
I(time - 2.5) -0.07 0.03 1457.327 -2.60 0.0094189 -0.12 -0.02 Flourish -0.04 -0.07 -0.01 0.009

Between-condition differences by time point

# Probe timepoint differences using separate lmer models at each timepoint:
# outcome ~ cond + (1 | term)

get_condition_diffs_original <- function(data, outcome) {
  map_dfr(1:4, function(tt) {
    fit <- lmer(
      as.formula(paste0(outcome, " ~ cond + (1 | term)")),
      data = data |> filter(time == tt)
    )
    extract_lmer_term(fit, "condflourish_vs_control") |>
      transmute(
        time = tt,
        time_f = factor(tt, levels = c(1, 2, 3, 4), labels = c("Baseline", "Week 2", "Week 4", "Week 6")),
        b, SE, df, t, p, LL, UL
      )
  }) |>
    mutate(p_formatted = format_p(p))
}

depression_condition_diffs <- get_condition_diffs_original(merged_data_long, "depression")
anxiety_condition_diffs <- get_condition_diffs_original(merged_data_long, "anxiety")

depression_condition_diffs |>
  mutate(across(c(b, SE, t, LL, UL), ~ round(.x, 2))) |>
  kable(caption = "Depression between-condition differences by time point using original per-timepoint models")
Depression between-condition differences by time point using original per-timepoint models
time time_f b SE df t p LL UL p_formatted
1 Baseline -0.04 0.04 1134.0038 -1.00 0.3163058 -0.13 0.04 0.316
2 Week 2 -0.03 0.05 1011.0000 -0.69 0.4880585 -0.13 0.06 0.488
3 Week 4 -0.07 0.05 933.0146 -1.37 0.1708381 -0.17 0.03 0.171
4 Week 6 -0.16 0.05 853.0000 -3.09 0.0020416 -0.26 -0.06 0.002
anxiety_condition_diffs |>
  mutate(across(c(b, SE, t, LL, UL), ~ round(.x, 2))) |>
  kable(caption = "Anxiety between-condition differences by time point using original per-timepoint models")
Anxiety between-condition differences by time point using original per-timepoint models
time time_f b SE df t p LL UL p_formatted
1 Baseline -0.01 0.05 1134.0267 -0.12 0.9058530 -0.11 0.10 0.906
2 Week 2 0.00 0.05 1010.0170 -0.07 0.9477384 -0.11 0.10 0.948
3 Week 4 -0.10 0.06 933.0016 -1.63 0.1035781 -0.21 0.02 0.104
4 Week 6 -0.16 0.06 851.0042 -2.61 0.0091744 -0.28 -0.04 0.009

Week 6 effect sizes

# Use effectsize::cohens_d() directly on the observed Week 6 data.

depression_week6_d <- cohens_d(depression ~ cond, data = subset(merged_data_long, time == 4)) |>
  as_tibble() |>
  transmute(d = abs(Cohens_d), d_LL = min(abs(CI_low), abs(CI_high)), d_UL = max(abs(CI_low), abs(CI_high)))

anxiety_week6_d <- cohens_d(anxiety ~ cond, data = subset(merged_data_long, time == 4)) |>
  as_tibble() |>
  transmute(d = abs(Cohens_d), d_LL = min(abs(CI_low), abs(CI_high)), d_UL = max(abs(CI_low), abs(CI_high)))

depression_week6_d |> kable(digits = 2, caption = "Week 6 depression effect size")
Week 6 depression effect size
d d_LL d_UL
0.21 0.08 0.35
anxiety_week6_d |> kable(digits = 2, caption = "Week 6 anxiety effect size")
Week 6 anxiety effect size
d d_LL d_UL
0.18 0.04 0.31

Baseline Symptom Severity as a Moderator

model_depression_moderation <- lmer(depression ~ cond * I(time - 2.5) * depression_baseline_cat + (1 | unique_ID) + (1 | term), data = merged_data_long, REML = TRUE)
model_anxiety_moderation <- lmer(anxiety ~ cond * I(time - 2.5) * anxiety_baseline_cat + (1 | unique_ID) + (1 | term), data = merged_data_long, REML = TRUE)

depression_moderation_term <- extract_lmer_term(model_depression_moderation, "condflourish_vs_control:I(time - 2.5):depression_baseline_catElevated baseline (PHQ-2 ≥ 3)") |>
  mutate(across(c(b, SE, t, LL, UL), ~ round(.x, 2)), p_formatted = format_p(p))

anxiety_moderation_term <- extract_lmer_term(model_anxiety_moderation, "condflourish_vs_control:I(time - 2.5):anxiety_baseline_catElevated baseline (GAD-2 ≥ 3)") |>
  mutate(across(c(b, SE, t, LL, UL), ~ round(.x, 2)), p_formatted = format_p(p))

depression_moderation_term |> kable(caption = "Depression condition × time × baseline symptom severity interaction")
Depression condition × time × baseline symptom severity interaction
term b SE df t p LL UL p_formatted
condflourish_vs_control:I(time - 2.5):depression_baseline_catElevated baseline (PHQ-2 ≥ 3) -0.03 0.04 3101.121 -0.91 0.3630022 -0.1 0.04 0.363
anxiety_moderation_term |> kable(caption = "Anxiety condition × time × baseline symptom severity interaction")
Anxiety condition × time × baseline symptom severity interaction
term b SE df t p LL UL p_formatted
condflourish_vs_control:I(time - 2.5):anxiety_baseline_catElevated baseline (GAD-2 ≥ 3) -0.01 0.03 3070.956 -0.25 0.7989186 -0.08 0.06 0.799

Subgroup-specific mixed-effects models

format_ci_bound <- function(x) {
  case_when(
    is.na(x) ~ NA_character_,
    x != 0 & abs(x) < .01 ~ sprintf("%.3f", x),
    TRUE ~ sprintf("%.2f", x)
  )
}

run_subgroup_lmm <- function(data, outcome, baseline_cat_var) {
  
  groups <- levels(data[[baseline_cat_var]])
  
  map_dfr(groups, function(group_label) {
    
    d <- data |>
      filter(.data[[baseline_cat_var]] == group_label) |>
      filter(
        !is.na(.data[[outcome]]),
        !is.na(cond),
        !is.na(time),
        !is.na(unique_ID),
        !is.na(term)
      ) |>
      mutate(
        cond = factor(cond, levels = c("Control", "Flourish")),
        term = factor(term),
        outcome_z = as.numeric(scale(.data[[outcome]])),
        time_z = as.numeric(scale(time - 2.5))
      )
    
    contrasts(d$cond) <- cbind(
      flourish_vs_control = c(-1, 1)
    )
    
    # Unstandardized model
    raw_formula <- as.formula(
      paste0(
        outcome,
        " ~ cond * I(time - 2.5) + ",
        "(1 | unique_ID) + (1 | term)"
      )
    )
    
    raw_fit <- lmer(
      raw_formula,
      data = d,
      REML = TRUE
    )
    
    raw <- extract_lmer_term(
      raw_fit,
      "condflourish_vs_control:I(time - 2.5)"
    )
    
    # Manually standardized model
    std_fit <- lmer(
      outcome_z ~ cond * time_z +
        (1 | unique_ID) +
        (1 | term),
      data = d,
      REML = TRUE
    )
    
    std <- extract_lmer_term(
      std_fit,
      "condflourish_vs_control:time_z"
    ) |>
      transmute(
        beta = b,
        beta_LL = LL,
        beta_UL = UL
      )
    
    bind_cols(raw, std) |>
      mutate(baseline_group = group_label)
  })
}

depression_subgroup_models <- run_subgroup_lmm(
  merged_data_long,
  "depression",
  "depression_baseline_cat"
) |>
  mutate(
    outcome = "Depression",
    across(c(b, SE, t, LL, UL, beta), ~ round(.x, 2)),
    p_formatted = format_p(p)
  )

anxiety_subgroup_models <- run_subgroup_lmm(
  merged_data_long,
  "anxiety",
  "anxiety_baseline_cat"
) |>
  mutate(
    outcome = "Anxiety",
    across(c(b, SE, t, LL, UL, beta), ~ round(.x, 2)),
    p_formatted = format_p(p)
  )

bind_rows(depression_subgroup_models, anxiety_subgroup_models) |>
  mutate(
    beta_LL = format_ci_bound(beta_LL),
    beta_UL = format_ci_bound(beta_UL)
  ) |>
  dplyr::select(outcome, baseline_group, b, SE, t, p_formatted, beta, beta_LL, beta_UL) |>
  kable(
    col.names = c("Outcome", "Baseline group", "b", "SE", "t", "p", "β", "95% CI LL", "95% CI UL"),
    caption = "Subgroup-specific condition × time mixed-effects model results"
  )
Subgroup-specific condition × time mixed-effects model results
Outcome Baseline group b SE t p β 95% CI LL 95% CI UL
Depression Low baseline (PHQ-2 < 3) -0.05 0.02 -2.83 0.005 -0.04 -0.07 -0.01
Depression Elevated baseline (PHQ-2 ≥ 3) -0.08 0.04 -2.08 0.037 -0.06 -0.11 -0.003
Anxiety Low baseline (GAD-2 < 3) -0.05 0.02 -2.18 0.030 -0.04 -0.07 -0.004
Anxiety Elevated baseline (GAD-2 ≥ 3) -0.05 0.03 -1.88 0.060 -0.04 -0.08 0.002

Table 2. Change from baseline to Week 6 by baseline symptom severity

# - completers with both T1 and T4
# - change_raw = post - pre
# - change_improve = pre - post
# - between-arm tests use t.test(change_raw ~ cond, var.equal = TRUE)
# - effect size is delta_improve divided by the pooled SD of raw change.

make_change_table <- function(df, outcome_var, baseline_cat_var,
                              pre_time = 1, post_time = 4) {

  prepost_long <- df |>
    filter(time %in% c(pre_time, post_time), !is.na({{ baseline_cat_var }})) |>
    dplyr::select(unique_ID, cond, time, baseline_cat = {{ baseline_cat_var }}, value = {{ outcome_var }}) |>
    mutate(timepoint = if_else(time == pre_time, "pre", "post"))

  prepost_wide <- prepost_long |>
    dplyr::select(unique_ID, cond, baseline_cat, timepoint, value) |>
    pivot_wider(names_from = timepoint, values_from = value) |>
    mutate(
      complete = !is.na(pre) & !is.na(post),
      change_raw = post - pre,
      change_improve = pre - post
    ) |>
    filter(complete)

  within_summary <- prepost_wide |>
    group_by(baseline_cat, cond) |>
    summarise(
      n = n(),
      mean_pre = mean(pre, na.rm = TRUE),
      sd_pre = sd(pre, na.rm = TRUE),
      mean_post = mean(post, na.rm = TRUE),
      sd_post = sd(post, na.rm = TRUE),
      avg_change_improvement = mean(change_improve, na.rm = TRUE),
      sd_change_raw = sd(change_raw, na.rm = TRUE),
      .groups = "drop"
    )

  between_tests <- prepost_wide |>
    group_by(baseline_cat) |>
    group_modify(~{
      fit <- t.test(change_raw ~ cond, data = .x, var.equal = TRUE)

      x_f <- .x |> filter(cond == "Flourish") |> pull(change_raw)
      x_c <- .x |> filter(cond == "Control") |> pull(change_raw)

      n_f <- sum(!is.na(x_f))
      n_c <- sum(!is.na(x_c))
      sd_f <- sd(x_f, na.rm = TRUE)
      sd_c <- sd(x_c, na.rm = TRUE)

      pooled_sd <- sqrt(((n_f - 1) * sd_f^2 + (n_c - 1) * sd_c^2) / (n_f + n_c - 2))

      mean_imp_f <- mean(.x$change_improve[.x$cond == "Flourish"], na.rm = TRUE)
      mean_imp_c <- mean(.x$change_improve[.x$cond == "Control"], na.rm = TRUE)
      delta_improve <- mean_imp_f - mean_imp_c

      raw_ci <- fit$conf.int

      tibble(
        delta = round(delta_improve, 2),
        p = fit$p.value,
        LL = -raw_ci[2],
        UL = -raw_ci[1],
        ES = delta_improve / pooled_sd,
        ES_LL = (-raw_ci[2]) / pooled_sd,
        ES_UL = (-raw_ci[1]) / pooled_sd
      )
    }) |>
    ungroup() |>
    mutate(
      p = ifelse(p < .001, "< .001", sprintf("%.3f", p)),
      LL = round(LL, 2),
      UL = round(UL, 2),
      ES = round(ES, 2),
      ES_LL = round(ES_LL, 2),
      ES_UL = round(ES_UL, 2)
    )

  table_out <- within_summary |>
    dplyr::select(baseline_cat, cond, avg_change_improvement) |>
    pivot_wider(names_from = cond, values_from = avg_change_improvement) |>
    left_join(between_tests, by = "baseline_cat") |>
    mutate(
      Flourish = round(Flourish, 2),
      Control = round(Control, 2)
    ) |>
    dplyr::select(
      baseline_cat,
      Flourish, Control,
      delta, p, LL, UL,
      ES, ES_LL, ES_UL
    )

  return(table_out)
}

table2_dep <- make_change_table(
  df = merged_data_long,
  outcome_var = depression,
  baseline_cat_var = depression_baseline_cat
)

table2_anx <- make_change_table(
  df = merged_data_long,
  outcome_var = anxiety,
  baseline_cat_var = anxiety_baseline_cat
)

table2_combined <- bind_rows(
  table2_dep |> mutate(Outcome = "Depression (PHQ-2)"),
  table2_anx |> mutate(Outcome = "Anxiety (GAD-2)")
) |>
  dplyr::select(Outcome, everything())

table2_combined |>
  kable(
    col.names = c("Outcome", "Baseline symptom severity", "Flourish", "Control", "Δ", "p", "LL", "UL", "ES", "LL", "UL"),
    caption = "Table 2. Change in depressive and anxiety symptoms from baseline to Week 6 by baseline symptom severity group"
  )
Table 2. Change in depressive and anxiety symptoms from baseline to Week 6 by baseline symptom severity group
Outcome Baseline symptom severity Flourish Control Δ p LL UL ES LL UL
Depression (PHQ-2) Low baseline (PHQ-2 < 3) -0.53 -0.80 0.27 0.012 -0.47 -0.06 0.20 -0.35 -0.04
Depression (PHQ-2) Elevated baseline (PHQ-2 ≥ 3) 1.38 0.75 0.62 0.018 -1.14 -0.11 0.35 -0.64 -0.06
Anxiety (GAD-2) Low baseline (GAD-2 < 3) -0.65 -0.86 0.21 0.135 -0.49 0.07 0.13 -0.31 0.04
Anxiety (GAD-2) Elevated baseline (GAD-2 ≥ 3) 1.30 0.89 0.41 0.031 -0.78 -0.04 0.23 -0.44 -0.02
# Keep a wide change dataset for the meaningful-change analyses below.
change_data <- merged_data_long |>
  filter(time %in% c(1, 4)) |>
  select(unique_ID, cond, time, depression, anxiety, depression_baseline_cat, anxiety_baseline_cat) |>
  pivot_wider(names_from = time, values_from = c(depression, anxiety), names_prefix = "T") |>
  mutate(
    depression_change = depression_T1 - depression_T4,
    anxiety_change = anxiety_T1 - anxiety_T4
  )

Meaningful Symptom Improvement Among Participants With Elevated Baseline Symptoms

run_meaningful_change <- function(
    data,
    outcome_change_var,
    baseline_cat_var,
    elevated_label,
    missing_nonresponse = FALSE
) {
  
  d <- data |>
    filter(.data[[baseline_cat_var]] == elevated_label) |>
    mutate(cond = factor(cond, levels = c("Control", "Flourish")))

  if (!missing_nonresponse) {
    d <- d |>
      filter(!is.na(.data[[outcome_change_var]])) |>
      mutate(responder = .data[[outcome_change_var]] >= 2)
  } else {
    d <- d |>
      mutate(
        responder = !is.na(.data[[outcome_change_var]]) &
          .data[[outcome_change_var]] >= 2
      )
  }

  responders <- d |>
    group_by(cond) |>
    summarise(
      n = n(),
      n_responder = sum(responder),
      pct_responder = 100 * mean(responder),
      .groups = "drop"
    )

  fit <- glm(
    responder ~ cond,
    data = d,
    family = binomial
  )

  fit_null <- glm(
    responder ~ 1,
    data = d,
    family = binomial
  )

  or <- exp(coef(fit)["condFlourish"])

  ci <- suppressMessages(
    exp(confint(fit, parm = "condFlourish"))
  )

  p <- anova(
    fit_null,
    fit,
    test = "LRT"
  )$`Pr(>Chi)`[2]

  flourish_rate <- responders |>
    filter(cond == "Flourish") |>
    pull(pct_responder) / 100

  control_rate <- responders |>
    filter(cond == "Control") |>
    pull(pct_responder) / 100

  arr <- flourish_rate - control_rate

  list(
    responders = responders,
    OR = unname(or),
    CI_low = unname(ci[1]),
    CI_high = unname(ci[2]),
    p = p,
    ARR = arr,
    NNT = 1 / arr
  )
}

depression_meaningful <- run_meaningful_change(
  change_data,
  "depression_change",
  "depression_baseline_cat",
  "Elevated baseline (PHQ-2 ≥ 3)"
)

anxiety_meaningful <- run_meaningful_change(
  change_data,
  "anxiety_change",
  "anxiety_baseline_cat",
  "Elevated baseline (GAD-2 ≥ 3)"
)

depression_meaningful_missing_nonresponse <- run_meaningful_change(
  change_data,
  "depression_change",
  "depression_baseline_cat",
  "Elevated baseline (PHQ-2 ≥ 3)",
  missing_nonresponse = TRUE
)

anxiety_meaningful_missing_nonresponse <- run_meaningful_change(
  change_data,
  "anxiety_change",
  "anxiety_baseline_cat",
  "Elevated baseline (GAD-2 ≥ 3)",
  missing_nonresponse = TRUE
)

format_meaningful <- function(x, outcome, analysis) {
  
  r <- x$responders

  tibble(
    Outcome = outcome,
    Analysis = analysis,
    Flourish = sprintf(
      "%d/%d (%.1f%%)",
      r$n_responder[r$cond == "Flourish"],
      r$n[r$cond == "Flourish"],
      r$pct_responder[r$cond == "Flourish"]
    ),
    Control = sprintf(
      "%d/%d (%.1f%%)",
      r$n_responder[r$cond == "Control"],
      r$n[r$cond == "Control"],
      r$pct_responder[r$cond == "Control"]
    ),
    OR = sprintf("%.2f", x$OR),
    `95% CI` = sprintf(
      "[%.2f, %.2f]",
      x$CI_low,
      x$CI_high
    ),
    p = format_p(x$p),
    NNT = sprintf("%.1f", abs(x$NNT))
  )
}

meaningful_change_results <- bind_rows(
  format_meaningful(
    depression_meaningful,
    "Depression",
    "Complete cases"
  ),
  format_meaningful(
    anxiety_meaningful,
    "Anxiety",
    "Complete cases"
  ),
  format_meaningful(
    depression_meaningful_missing_nonresponse,
    "Depression",
    "Missing = nonresponse"
  ),
  format_meaningful(
    anxiety_meaningful_missing_nonresponse,
    "Anxiety",
    "Missing = nonresponse"
  )
)

meaningful_change_results |>
  kable(
    caption = "Meaningful symptom improvement analyses"
  )
Meaningful symptom improvement analyses
Outcome Analysis Flourish Control OR 95% CI p NNT
Depression Complete cases 46/88 (52.3%) 32/101 (31.7%) 2.36 [1.31, 4.30] 0.004 4.9
Anxiety Complete cases 91/180 (50.6%) 67/176 (38.1%) 1.66 [1.09, 2.54] 0.018 8.0
Depression Missing = nonresponse 46/123 (37.4%) 32/153 (20.9%) 2.26 [1.33, 3.88] 0.003 6.1
Anxiety Missing = nonresponse 91/244 (37.3%) 67/239 (28.0%) 1.53 [1.04, 2.25] 0.030 10.8

Figures 2 and 3: symptom trajectories by baseline symptom severity

plot_trajectory <- function(
    df,
    outcome,
    baseline_cat,
    y_label,
    output_file
) {

  plot_df <- df |>
    filter(
      !is.na(.data[[outcome]]),
      !is.na(.data[[baseline_cat]]),
      !is.na(cond),
      !is.na(week)
    ) |>
    mutate(
      baseline_cat_plot = case_when(
        as.character(.data[[baseline_cat]]) %in%
          c(
            "Low baseline (PHQ-2 < 3)",
            "Low baseline (GAD-2 < 3)"
          ) ~ "Low Baseline",

        as.character(.data[[baseline_cat]]) %in%
          c(
            "Elevated baseline (PHQ-2 ≥ 3)",
            "Elevated baseline (GAD-2 ≥ 3)"
          ) ~ "High Baseline",

        TRUE ~ as.character(.data[[baseline_cat]])
      ),
      baseline_cat_plot = factor(
        baseline_cat_plot,
        levels = c("Low Baseline", "High Baseline")
      ),
      cond = factor(
        cond,
        levels = c("Control", "Flourish")
      )
    )

  p <- ggplot(
    plot_df,
    aes(
      x = week,
      y = .data[[outcome]],
      color = cond,
      group = cond
    )
  ) +
    geom_errorbar(
      stat = "summary",
      fun.data = mean_se,
      width = 0.12,
      linewidth = 0.45,
      na.rm = TRUE
    ) +
    geom_line(
      stat = "summary",
      fun = mean,
      linewidth = 0.85,
      na.rm = TRUE
    ) +
    geom_point(
      stat = "summary",
      fun = mean,
      size = 2,
      na.rm = TRUE
    ) +
    facet_grid(. ~ baseline_cat_plot) +
    labs(
      x = "Week",
      y = y_label,
      color = "Condition"
    ) +
    scale_x_continuous(
      breaks = c(0, 2, 4, 6),
      labels = c("0", "2", "4", "6"),
      expand = expansion(add = c(0.15, 0.15))
    ) +
    scale_y_continuous(
      breaks = 1:4,
      expand = expansion(mult = c(0.02, 0.04))
    ) +
    coord_cartesian(
      ylim = c(0.8, 4.7)
    ) +
    scale_color_manual(
      values = c(
        "Control" = "#E9B12E",
        "Flourish" = "#3A828E"
      ),
      breaks = c("Control", "Flourish"),
      labels = c(
        "Control" = "Control",
        "Flourish" = "Flourish"
      ),
      drop = FALSE
    ) +
    theme(
      panel.background = element_rect(
        fill = "white",
        color = NA
      ),
      plot.background = element_rect(
        fill = "white",
        color = NA
      ),
      panel.grid.major = element_blank(),
      panel.grid.minor = element_blank(),
      panel.border = element_rect(
        color = "black",
        fill = NA,
        linewidth = 0.4
      ),
      axis.line = element_blank(),
      axis.text = element_text(
        size = 10.5,
        color = "black"
      ),
      axis.text.x = element_text(
        margin = margin(t = 4)
      ),
      axis.text.y = element_text(
        margin = margin(r = 4)
      ),
      axis.title = element_text(
        size = 12,
        face = "plain",
        color = "black"
      ),
      axis.title.x = element_text(
        margin = margin(t = 6)
      ),
      axis.title.y = element_text(
        margin = margin(r = 6)
      ),
      legend.position = "right",
      legend.title = element_text(
        size = 11.5,
        face = "plain",
        color = "black"
      ),
      legend.text = element_text(
        size = 11,
        color = "black"
      ),
      strip.background = element_rect(
        fill = "white",
        color = NA
      ),
      strip.text = element_text(
        size = 11.5,
        face = "plain",
        color = "black"
      ),
      panel.spacing = grid::unit(0.7, "lines"),
      plot.margin = margin(
        t = 6,
        r = 8,
        b = 6,
        l = 8
      )
    )

  print(p)

  ggsave(
    filename = output_file,
    plot = p,
    width = 6.5,
    height = 4.5
  )

  p
}

figure_depression <- plot_trajectory(
  df = merged_data_long,
  outcome = "depression",
  baseline_cat = "depression_baseline_cat",
  y_label = "Depression (0–6)",
  output_file = "./figure_depression.svg"
)

figure_anxiety <- plot_trajectory(
  df = merged_data_long,
  outcome = "anxiety",
  baseline_cat = "anxiety_baseline_cat",
  y_label = "Anxiety (0–6)",
  output_file = "./figure_anxiety.svg"
)

SUPPLEMENT

Section 1: Sensitivity Analyses Applying Additional Exclusion Criteria

Apply exclusions

participant_completion <- merged_data_long |>
  group_by(unique_ID) |>
  summarise(
    n_completed_timepoints = sum(Finished == 1, na.rm = TRUE),
    .groups = "drop"
  )

engagement_end <- engagement_end_clean |> 
  dplyr::select(unique_ID, active_days_end)

control_exposure_data <- merged_data_long |>
  group_by(unique_ID) |>
  summarise(
    control_exposed_to_flourish = any(
      contamination == 1,
      na.rm = TRUE
    ),
    .groups = "drop"
  )

preregistered_exclusions <- data_baseline |>
  left_join(participant_completion, by = "unique_ID") |>
  left_join(engagement_end, by = "unique_ID") |>
  left_join(control_exposure_data, by = "unique_ID") |>
  mutate(
    fewer_than_two_timepoints =
      n_completed_timepoints < 2,

    treatment_zero_active_days =
      cond == "Flourish" &
      !is.na(active_days_end) &
      active_days_end == 0,

    control_exposed_to_flourish =
      cond == "Control" &
      coalesce(control_exposed_to_flourish, FALSE),

    exclude_preregistered =
      fewer_than_two_timepoints |
      treatment_zero_active_days |
      control_exposed_to_flourish
  )

preregistered_exclusion_counts <- preregistered_exclusions |>
  summarise(
    n_itt = n(),
    n_fewer_than_two_timepoints =
      sum(fewer_than_two_timepoints, na.rm = TRUE),
    n_treatment_zero_active_days =
      sum(treatment_zero_active_days, na.rm = TRUE),
    n_control_exposed_to_flourish =
      sum(control_exposed_to_flourish, na.rm = TRUE),
    n_excluded_total =
      sum(exclude_preregistered, na.rm = TRUE),
    n_preregistered_exclusion_sample =
      n() - sum(exclude_preregistered, na.rm = TRUE)
  )

preregistered_exclusion_counts |>
  kable(
    caption = "Section 1. Preregistered exclusion criteria"
  )
Section 1. Preregistered exclusion criteria
n_itt n_fewer_than_two_timepoints n_treatment_zero_active_days n_control_exposed_to_flourish n_excluded_total n_preregistered_exclusion_sample
1137 129 4 27 160 977
merged_data_long_prereg <- merged_data_long |>
  left_join(
    preregistered_exclusions |>
      select(unique_ID, exclude_preregistered),
    by = "unique_ID"
  ) |>
  filter(!exclude_preregistered) |>
  mutate(
    cond = factor(cond, levels = c("Control", "Flourish")),
    term = factor(term)
  )

contrasts(merged_data_long_prereg$cond) <- cbind(
  flourish_vs_control = c(-1, 1)
)

n_distinct(merged_data_long_prereg$unique_ID)
## [1] 977

Refit models

model_depression_prereg <- lmer(
  depression ~ cond * I(time - 2.5) +
    (1 | unique_ID) +
    (1 | term),
  data = merged_data_long_prereg
)

model_anxiety_prereg <- lmer(
  anxiety ~ cond * I(time - 2.5) +
    (1 | unique_ID) +
    (1 | term),
  data = merged_data_long_prereg
)

depression_prereg_interaction <- extract_lmer_term(
  model_depression_prereg,
  "condflourish_vs_control:I(time - 2.5)"
) |>
  left_join(
    extract_standardized_term(
      model_depression_prereg,
      "condflourish_vs_control:I(time - 2.5)"
    ),
    by = "term"
  ) |>
  mutate(
    across(c(b, SE, t, beta), ~ round(.x, 2)),
    beta_LL = format_ci_bound(beta_LL),
    beta_UL = format_ci_bound(beta_UL),
    p_formatted = format_p(p)
  )

anxiety_prereg_interaction <- extract_lmer_term(
  model_anxiety_prereg,
  "condflourish_vs_control:I(time - 2.5)"
) |>
  left_join(
    extract_standardized_term(
      model_anxiety_prereg,
      "condflourish_vs_control:I(time - 2.5)"
    ),
    by = "term"
  ) |>
  mutate(
    across(c(b, SE, t, beta), ~ round(.x, 2)),
    beta_LL = format_ci_bound(beta_LL),
    beta_UL = format_ci_bound(beta_UL),
    p_formatted = format_p(p)
  )

depression_prereg_interaction |>
  kable(
    caption = paste(
      "Preregistered-exclusion depression",
      "condition × time interaction"
    )
  )
Preregistered-exclusion depression condition × time interaction
term b SE df t p LL UL beta beta_LL beta_UL p_formatted
condflourish_vs_control:I(time - 2.5) -0.05 0.02 2769.741 -2.79 0.0053157 -0.0775047 -0.0135363 -0.03 -0.06 -0.010 0.005
anxiety_prereg_interaction |>
  kable(
    caption = paste(
      "Preregistered-exclusion anxiety",
      "condition × time interaction"
    )
  )
Preregistered-exclusion anxiety condition × time interaction
term b SE df t p LL UL beta beta_LL beta_UL p_formatted
condflourish_vs_control:I(time - 2.5) -0.06 0.02 2763.964 -3.47 0.0005233 -0.1014139 -0.0282378 -0.04 -0.06 -0.02 < .001
model_depression_moderation_prereg <- lmer(
  depression ~
    cond * I(time - 2.5) * depression_baseline_cat +
    (1 | unique_ID) +
    (1 | term),
  data = merged_data_long_prereg,
  REML = TRUE
)

model_anxiety_moderation_prereg <- lmer(
  anxiety ~
    cond * I(time - 2.5) * anxiety_baseline_cat +
    (1 | unique_ID) +
    (1 | term),
  data = merged_data_long_prereg,
  REML = TRUE
)

extract_lmer_term(
  model_depression_moderation_prereg,
  paste0(
    "condflourish_vs_control:I(time - 2.5):",
    "depression_baseline_catElevated baseline (PHQ-2 ≥ 3)"
  )
) |>
  kable(
    caption = "Preregistered-exclusion depression moderation"
  )
Preregistered-exclusion depression moderation
term b SE df t p LL UL
condflourish_vs_control:I(time - 2.5):depression_baseline_catElevated baseline (PHQ-2 ≥ 3) -0.02434 0.0373074 2822.325 -0.6524163 0.5141858 -0.0974612 0.0487812
extract_lmer_term(
  model_anxiety_moderation_prereg,
  paste0(
    "condflourish_vs_control:I(time - 2.5):",
    "anxiety_baseline_catElevated baseline (GAD-2 ≥ 3)"
  )
) |>
  kable(
    caption = "Preregistered-exclusion anxiety moderation"
  )
Preregistered-exclusion anxiety moderation
term b SE df t p LL UL
condflourish_vs_control:I(time - 2.5):anxiety_baseline_catElevated baseline (GAD-2 ≥ 3) 0.002438 0.0359188 2798.36 0.0678748 0.9458902 -0.0679616 0.0728376
prereg_subgroup_results <- bind_rows(
  run_subgroup_lmm(
    merged_data_long_prereg,
    "depression",
    "depression_baseline_cat"
  ) |>
    mutate(outcome = "Depression"),

  run_subgroup_lmm(
    merged_data_long_prereg,
    "anxiety",
    "anxiety_baseline_cat"
  ) |>
    mutate(outcome = "Anxiety")
) |>
  mutate(
    across(c(b, SE, t, LL, UL, beta), ~ round(.x, 2)),
    p_formatted = format_p(p)
  )

prereg_subgroup_results |>
  mutate(
    beta_LL = format_ci_bound(beta_LL),
    beta_UL = format_ci_bound(beta_UL)
  ) |>
  select(
    outcome,
    baseline_group,
    b,
    SE,
    t,
    p_formatted,
    beta,
    beta_LL,
    beta_UL
  ) |>
  kable(
    col.names = c(
      "Outcome",
      "Baseline group",
      "b",
      "SE",
      "t",
      "p",
      "β",
      "95% CI LL",
      "95% CI UL"
    ),
    caption = paste(
      "Preregistered-exclusion subgroup-specific",
      "condition × time effects"
    )
  )
Preregistered-exclusion subgroup-specific condition × time effects
Outcome Baseline group b SE t p β 95% CI LL 95% CI UL
Depression Low baseline (PHQ-2 < 3) -0.05 0.02 -2.95 0.003 -0.04 -0.07 -0.01
Depression Elevated baseline (PHQ-2 ≥ 3) -0.07 0.04 -1.89 0.059 -0.05 -0.11 0.002
Anxiety Low baseline (GAD-2 < 3) -0.05 0.02 -2.54 0.011 -0.04 -0.08 -0.010
Anxiety Elevated baseline (GAD-2 ≥ 3) -0.05 0.03 -1.76 0.078 -0.04 -0.07 0.004

Test Week 6 effects

run_week6_prereg <- function(data, outcome) {
  d <- data |>
    filter(time == 4, !is.na(.data[[outcome]])) |>
    mutate(
      cond = factor(cond, levels = c("Control", "Flourish")),
      term = factor(term)
    )

  contrasts(d$cond) <- cbind(flourish_vs_control = c(-1, 1))

  fit <- lmer(
    reformulate(c("cond", "(1 | term)"), response = outcome),
    data = d
  )

  model_result <- extract_lmer_term(
    fit,
    "condflourish_vs_control"
  )

  effect_result <- effectsize::cohens_d(
    reformulate("cond", response = outcome),
    data = d
  ) |>
    as_tibble() |>
    transmute(
      d = abs(Cohens_d),
      d_LL = pmin(abs(CI_low), abs(CI_high)),
      d_UL = pmax(abs(CI_low), abs(CI_high))
    )

  bind_cols(model_result, effect_result) |>
    mutate(outcome = str_to_title(outcome))
}

week6_prereg_effects <- bind_rows(
  run_week6_prereg(merged_data_long_prereg, "depression"),
  run_week6_prereg(merged_data_long_prereg, "anxiety")
) |>
  mutate(
    across(c(b, SE, t, d, d_LL, d_UL), ~ round(.x, 2)),
    p_formatted = format_p(p),
    d_CI = sprintf("[%.2f, %.2f]", d_LL, d_UL)
  )

week6_prereg_effects |>
  select(outcome, b, SE, t, p_formatted, d, d_CI) |>
  kable(
    col.names = c("Outcome", "b", "SE", "t", "p", "d", "95% CI for d"),
    caption = "Week 6 between-condition effects after preregistered exclusions"
  )
Week 6 between-condition effects after preregistered exclusions
Outcome b SE t p d 95% CI for d
Depression -0.16 0.05 -3.09 0.002 0.21 [0.08, 0.35]
Anxiety -0.16 0.06 -2.59 0.010 0.18 [0.04, 0.31]

Section 2: Intervention and Safety Details

This section does not require additional analysis code.

Section 3: Attrition and Missingness Checks

# Completion is defined using Finished == 1.

attrition_check <- merged_data_long %>%
  group_by(unique_ID) %>%
  dplyr::summarise(
    cond = first(na.omit(cond)),
    term = first(na.omit(term)),
    Age = first(na.omit(Age)),
    Sex = first(na.omit(Sex)),
    SES_num = first(na.omit(SES_num)),
    int_student = first(na.omit(int_student)),
    baseline_depression = depression[time == 1][1],
    baseline_anxiety = anxiety[time == 1][1],
    completed_t4 = as.integer(any(time == 4 & Finished == 1, na.rm = TRUE)),
    num_timepoints_completed = sum(Finished == 1, na.rm = TRUE),
    .groups = "drop"
  )

completion_by_time <- merged_data_long %>%
  group_by(unique_ID, cond, time) %>%
  summarise(
    completed = as.integer(any(Finished == 1, na.rm = TRUE)),
    .groups = "drop"
  ) %>%
  count(time, cond, completed, name = "n") %>%
  group_by(time, cond) %>%
  mutate(percent = round(100 * n / sum(n), 1)) %>%
  ungroup()

completion_by_time %>%
  knitr::kable(caption = "Section 3. Completion by condition at each timepoint")
Section 3. Completion by condition at each timepoint
time cond completed n percent
1 Control 1 581 100.0
1 Flourish 1 556 100.0
2 Control 0 65 11.2
2 Control 1 516 88.8
2 Flourish 0 67 12.1
2 Flourish 1 489 87.9
3 Control 0 106 18.2
3 Control 1 475 81.8
3 Flourish 0 102 18.3
3 Flourish 1 454 81.7
4 Control 0 148 25.5
4 Control 1 433 74.5
4 Flourish 0 139 25.0
4 Flourish 1 417 75.0
completion_tests <- merged_data_long %>%
  group_by(unique_ID, cond, time) %>%
  summarise(
    completed = as.integer(any(Finished == 1, na.rm = TRUE)),
    .groups = "drop"
  ) %>%
  filter(time > 1) %>%
  group_by(time) %>%
  group_modify(~ {
    tab <- table(.x$completed, .x$cond)
    test <- chisq.test(tab)
    tibble(
      chisq = unname(test$statistic),
      df = unname(test$parameter),
      p = test$p.value
    )
  }) %>%
  ungroup() %>%
  mutate(
    wave = factor(time, levels = c(2, 3, 4), labels = c("Week 2", "Week 4", "Week 6")),
    p_formatted = format_p(p)
  )

completion_tests %>%
  knitr::kable(digits = 3, caption = "Section 3. Chi-square tests of completion by condition")
Section 3. Chi-square tests of completion by condition
time chisq df p wave p_formatted
2 0.131 1 0.718 Week 2 0.718
3 0.000 1 1.000 Week 4 1.000
4 0.013 1 0.908 Week 6 0.908
attrition_model <- glm(
  completed_t4 ~ cond + baseline_depression + baseline_anxiety + Age + Sex + SES_num + int_student + term,
  data = attrition_check,
  family = binomial
)

summary(attrition_model)
## 
## Call:
## glm(formula = completed_t4 ~ cond + baseline_depression + baseline_anxiety + 
##     Age + Sex + SES_num + int_student + term, family = binomial, 
##     data = attrition_check)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0467  -1.2727   0.7080   0.7761   1.1275  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)  
## (Intercept)          0.809638   0.464098   1.745   0.0811 .
## condFlourish         0.003509   0.138270   0.025   0.9798  
## baseline_depression -0.142577   0.055439  -2.572   0.0101 *
## baseline_anxiety     0.034293   0.045695   0.750   0.4530  
## Age                  0.015654   0.014388   1.088   0.2766  
## SexFemale            0.215516   0.158083   1.363   0.1728  
## SES_num              0.019714   0.065586   0.301   0.7637  
## int_studentNo       -0.046349   0.234976  -0.197   0.8436  
## termSummer          -0.470714   0.200650  -2.346   0.0190 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1283.6  on 1134  degrees of freedom
## Residual deviance: 1266.3  on 1126  degrees of freedom
##   (2 observations deleted due to missingness)
## AIC: 1284.3
## 
## Number of Fisher Scoring iterations: 4
attrition_or <- exp(cbind(OR = coef(attrition_model), confint(attrition_model)))
attrition_or
##                            OR     2.5 %    97.5 %
## (Intercept)         2.2470940 0.8977712 5.5600827
## condFlourish        1.0035149 0.7652429 1.3162800
## baseline_depression 0.8671208 0.7777436 0.9667502
## baseline_anxiety    1.0348875 0.9466539 1.1325429
## Age                 1.0157768 0.9886632 1.0463286
## SexFemale           1.2405013 0.9071668 1.6869090
## SES_num             1.0199091 0.8967332 1.1598666
## int_studentNo       0.9547089 0.5947692 1.4979675
## termSummer          0.6245564 0.4226779 0.9294044
attrition_model_interactions <- glm(
  completed_t4 ~ cond * baseline_depression + cond * baseline_anxiety + Age + Sex + SES_num + int_student + term,
  data = attrition_check,
  family = binomial
)

summary(attrition_model_interactions)
## 
## Call:
## glm(formula = completed_t4 ~ cond * baseline_depression + cond * 
##     baseline_anxiety + Age + Sex + SES_num + int_student + term, 
##     family = binomial, data = attrition_check)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9909  -1.2206   0.7068   0.7761   1.1973  
## 
## Coefficients:
##                                   Estimate Std. Error z value Pr(>|z|)   
## (Intercept)                       0.927904   0.477826   1.942  0.05215 . 
## condFlourish                     -0.205232   0.247861  -0.828  0.40766   
## baseline_depression              -0.199126   0.075619  -2.633  0.00846 **
## baseline_anxiety                  0.032758   0.061928   0.529  0.59683   
## Age                               0.015083   0.014367   1.050  0.29379   
## SexFemale                         0.221457   0.158257   1.399  0.16171   
## SES_num                           0.018181   0.065652   0.277  0.78184   
## int_studentNo                    -0.045847   0.235126  -0.195  0.84540   
## termSummer                       -0.448524   0.201531  -2.226  0.02604 * 
## condFlourish:baseline_depression  0.117777   0.109789   1.073  0.28338   
## condFlourish:baseline_anxiety    -0.001106   0.089768  -0.012  0.99017   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1283.6  on 1134  degrees of freedom
## Residual deviance: 1264.7  on 1124  degrees of freedom
##   (2 observations deleted due to missingness)
## AIC: 1286.7
## 
## Number of Fisher Scoring iterations: 4
attrition_interaction_or <- exp(cbind(OR = coef(attrition_model_interactions), confint(attrition_model_interactions)))
attrition_interaction_or
##                                         OR     2.5 %    97.5 %
## (Intercept)                      2.5292020 0.9846154 6.4344787
## condFlourish                     0.8144583 0.5005492 1.3238368
## baseline_depression              0.8194465 0.7059996 0.9501225
## baseline_anxiety                 1.0333003 0.9159955 1.1681494
## Age                              1.0151971 0.9881333 1.0456799
## SexFemale                        1.2478930 0.9122828 1.6975827
## SES_num                          1.0183471 0.8952371 1.1582317
## int_studentNo                    0.9551879 0.5948979 1.4991716
## termSummer                       0.6385699 0.4314358 0.9519390
## condFlourish:baseline_depression 1.1249928 0.9073569 1.3959580
## condFlourish:baseline_anxiety    0.9988949 0.8376879 1.1913940
# Tidy versions for the supplement table.
week6_completion_results <- broom::tidy(attrition_model, conf.int = TRUE, exponentiate = TRUE) |>
  mutate(OR = estimate, LL = conf.low, UL = conf.high, p_formatted = format_p(p.value)) |>
  select(term, OR, LL, UL, p.value, p_formatted)

week6_completion_results |>
  mutate(across(c(OR, LL, UL), ~ round(.x, 2))) |>
  kable(caption = "Section 3. Adjusted logistic regression predicting Week 6 completion")
Section 3. Adjusted logistic regression predicting Week 6 completion
term OR LL UL p.value p_formatted
(Intercept) 2.25 0.90 5.56 0.0810652 0.081
condFlourish 1.00 0.77 1.32 0.9797554 0.980
baseline_depression 0.87 0.78 0.97 0.0101178 0.010
baseline_anxiety 1.03 0.95 1.13 0.4529756 0.453
Age 1.02 0.99 1.05 0.2766119 0.277
SexFemale 1.24 0.91 1.69 0.1727848 0.173
SES_num 1.02 0.90 1.16 0.7637378 0.764
int_studentNo 0.95 0.59 1.50 0.8436330 0.844
termSummer 0.62 0.42 0.93 0.0189789 0.019
attrition_interaction_tests <- broom::tidy(attrition_model_interactions, conf.int = TRUE, exponentiate = TRUE) |>
  filter(str_detect(term, "cond.*baseline_depression|cond.*baseline_anxiety")) |>
  mutate(OR = estimate, LL = conf.low, UL = conf.high, p_formatted = format_p(p.value)) |>
  select(term, OR, LL, UL, p.value, p_formatted)

attrition_interaction_tests |>
  mutate(across(c(OR, LL, UL), ~ round(.x, 2))) |>
  kable(caption = "Section 3. Condition × baseline symptom severity interactions predicting Week 6 completion")
Section 3. Condition × baseline symptom severity interactions predicting Week 6 completion
term OR LL UL p.value p_formatted
condFlourish:baseline_depression 1.12 0.91 1.40 0.2833826 0.283
condFlourish:baseline_anxiety 1.00 0.84 1.19 0.9901727 0.990

Section 4: App Engagement

# Overall engagement

engagement_overall <- df_fl_fix |>
  filter(time == 4) |>
  summarise(
    n_week6_total = n(),
    n_week6_observed = sum(!is.na(Engagement_1_fix)),
    days_mean = mean(Engagement_1_fix, na.rm = TRUE),
    days_sd = sd(Engagement_1_fix, na.rm = TRUE),
    activities_mean = mean(Engagement_3_fix, na.rm = TRUE),
    activities_sd = sd(Engagement_3_fix, na.rm = TRUE),
    days_per_week = days_mean / 6,
    activities_per_week = activities_mean / 6,
    pct_at_least_12_days_observed =
      100 * mean(Engagement_1_fix >= 12, na.rm = TRUE),
    zero_engagement = sum(Engagement_1_fix == 0, na.rm = TRUE),
    pct_zero_all_assigned =
      100 * zero_engagement / n_week6_total,
    pct_zero_observed =
      100 * zero_engagement / n_week6_observed
  )

engagement_overall |>
  mutate(across(where(is.numeric), ~ round(.x, 2))) |>
  knitr::kable(
    caption = "Section 4. Overall cumulative engagement at Week 6"
  )
Section 4. Overall cumulative engagement at Week 6
n_week6_total n_week6_observed days_mean days_sd activities_mean activities_sd days_per_week activities_per_week pct_at_least_12_days_observed zero_engagement pct_zero_all_assigned pct_zero_observed
556 410 18.9 10.18 11.72 14.03 3.15 1.95 76.1 3 0.54 0.73
# Engagement by interval

by_window_clean <- df_fl_fix |>
  group_by(unique_ID) |>
  arrange(time, .by_group = TRUE) |>
  mutate(
    previous_days = if_else(
      time == 2,
      0,
      lag(Engagement_1_fix)
    ),
    previous_activities = if_else(
      time == 2,
      0,
      lag(Engagement_3_fix)
    ),
    days_window_raw = if_else(
      is.na(Engagement_1_fix) | is.na(previous_days),
      NA_real_,
      Engagement_1_fix - previous_days
    ),
    activities_window_raw = if_else(
      is.na(Engagement_3_fix) | is.na(previous_activities),
      NA_real_,
      Engagement_3_fix - previous_activities
    ),
    days_window = case_when(
      is.na(days_window_raw) ~ NA_real_,
      days_window_raw < 0 ~ 0,
      days_window_raw > 14 ~ 14,
      TRUE ~ days_window_raw
    ),
    activities_window = case_when(
      is.na(activities_window_raw) ~ NA_real_,
      activities_window_raw < 0 ~ 0,
      TRUE ~ activities_window_raw
    ),
    days_per_week = days_window / 2,
    activities_per_week = activities_window / 2,
    window = factor(
      case_when(
        time == 2 ~ "Week 0–2",
        time == 3 ~ "Week 2–4",
        time == 4 ~ "Week 4–6"
      ),
      levels = c(
        "Week 0–2",
        "Week 2–4",
        "Week 4–6"
      )
    )
  ) |>
  ungroup() |>
  select(
    unique_ID,
    time,
    window,
    Engagement_1_fix,
    Engagement_3_fix,
    days_window,
    days_per_week,
    activities_window,
    activities_per_week
  )

summary_by_window <- by_window_clean |>
  group_by(window) |>
  summarise(
    n_total = n(),
    n_days_observed = sum(!is.na(days_window)),
    n_activities_observed = sum(!is.na(activities_window)),
    days_mean = mean(days_window, na.rm = TRUE),
    days_sd = sd(days_window, na.rm = TRUE),
    activities_mean = mean(activities_window, na.rm = TRUE),
    activities_sd = sd(activities_window, na.rm = TRUE),
    pct_adherent_2x_per_week =
      100 * mean(days_window >= 4, na.rm = TRUE),
    pct_any_use =
      100 * mean(days_window > 0, na.rm = TRUE),
    .groups = "drop"
  )

summary_by_window |>
  mutate(across(where(is.numeric), ~ round(.x, 2))) |>
  knitr::kable(
    caption = "Section 4. Engagement by two-week interval"
  )
Section 4. Engagement by two-week interval
window n_total n_days_observed n_activities_observed days_mean days_sd activities_mean activities_sd pct_adherent_2x_per_week pct_any_use
Week 0–2 556 480 482 6.43 3.61 3.78 5.77 81.04 95.42
Week 2–4 556 446 446 5.37 4.11 3.71 5.61 65.92 82.96
Week 4–6 556 410 409 6.15 4.50 4.46 8.35 67.56 89.51
# Sustained engagement

adherence_flags <- by_window_clean |>
  group_by(unique_ID) |>
  summarise(
    windows_reported = sum(!is.na(days_window)),
    complete_all_windows = all(!is.na(days_window)),
    used_all_windows = if (
      all(!is.na(days_window))
    ) {
      all(days_window > 0)
    } else {
      NA
    },
    adhered_all_windows = if (
      all(!is.na(days_window))
    ) {
      all(days_window >= 4)
    } else {
      NA
    },
    .groups = "drop"
  )

sustained_engagement <- adherence_flags |>
  summarise(
    n_flourish = n(),
    n_complete_all_windows =
      sum(complete_all_windows),
    n_missing_at_least_one_window =
      n_flourish - n_complete_all_windows,
    n_used_all_windows =
      sum(used_all_windows, na.rm = TRUE),
    pct_used_all_windows =
      100 * n_used_all_windows / n_complete_all_windows,
    n_adhered_all_windows =
      sum(adhered_all_windows, na.rm = TRUE),
    pct_adhered_all_windows =
      100 * n_adhered_all_windows / n_complete_all_windows
  )

sustained_engagement |>
  mutate(across(where(is.numeric), ~ round(.x, 2))) |>
  knitr::kable(
    caption = "Section 4. Sustained engagement across intervals"
  )
Section 4. Sustained engagement across intervals
n_flourish n_complete_all_windows n_missing_at_least_one_window n_used_all_windows pct_used_all_windows n_adhered_all_windows pct_adhered_all_windows
556 405 151 300 74.07 181 44.69
# Weekly engagement rate

overall_weekly_rate <- df_fl_fix |>
  filter(time == 4) |>
  summarise(
    n_days_observed = sum(!is.na(Engagement_1_fix)),
    n_activities_observed = sum(!is.na(Engagement_3_fix)),
    mean_days_per_week =
      mean(Engagement_1_fix / 6, na.rm = TRUE),
    sd_days_per_week =
      sd(Engagement_1_fix / 6, na.rm = TRUE),
    mean_activities_per_week =
      mean(Engagement_3_fix / 6, na.rm = TRUE),
    sd_activities_per_week =
      sd(Engagement_3_fix / 6, na.rm = TRUE)
  )

overall_weekly_rate |>
  mutate(across(where(is.numeric), ~ round(.x, 2))) |>
  knitr::kable(
    caption = "Section 4. Overall weekly engagement rate"
  )
Section 4. Overall weekly engagement rate
n_days_observed n_activities_observed mean_days_per_week sd_days_per_week mean_activities_per_week sd_activities_per_week
410 410 3.15 1.7 1.95 2.34

Section 5: Exploratory Moderation Analyses

merged_data_long <- merged_data_long |>
  mutate(
    Age_c = Age - mean(Age, na.rm = TRUE),
    SES_c = SES_num - mean(SES_num, na.rm = TRUE),
    time_c = time - 2.5,
    Sex = factor(Sex),
    int_student = factor(int_student),
    Ethnicity_categ = factor(Ethnicity_categ),
    cond = factor(cond, levels = c("Control", "Flourish")),
    term = factor(term)
  )

demographic_moderators <- tibble::tribble(
  ~moderator,                ~moderator_var,
  "Age",                     "Age_c",
  "Sex",                     "Sex",
  "International student",   "int_student",
  "SES",                     "SES_c",
  "Ethnicity",               "Ethnicity_categ"
)

run_demographic_moderation <- function(
    data,
    outcome,
    moderator,
    moderator_var
) {

  model_data <- data |>
    select(
      unique_ID,
      term,
      cond,
      time_c,
      all_of(moderator_var),
      all_of(outcome)
    ) |>
    filter(
      complete.cases(
        across(
          c(
            unique_ID,
            term,
            cond,
            time_c,
            all_of(moderator_var),
            all_of(outcome)
          )
        )
      )
    ) |>
    droplevels()

  contrasts(model_data$cond) <-
    contr.sum(nlevels(model_data$cond))

  if (is.factor(model_data[[moderator_var]])) {
    contrasts(model_data[[moderator_var]]) <-
      contr.sum(nlevels(model_data[[moderator_var]]))
  }

  fit <- lmer(
    as.formula(
      paste0(
        outcome,
        " ~ cond * time_c * ",
        moderator_var,
        " + (1 | unique_ID) + (1 | term)"
      )
    ),
    data = model_data,
    REML = FALSE
  )

  omnibus <- car::Anova(fit, type = 3) |>
    as.data.frame() |>
    tibble::rownames_to_column("effect") |>
    filter(
      str_detect(effect, "cond"),
      str_detect(effect, "time_c"),
      str_detect(effect, fixed(moderator_var))
    )

  if (nrow(omnibus) != 1) {
    stop(
      paste(
        "Could not uniquely identify interaction for",
        outcome,
        "and",
        moderator_var
      )
    )
  }

  omnibus |>
    transmute(
      outcome = str_to_title(outcome),
      moderator = moderator,
      effect = "Condition × Time × Moderator",
      chi_square = Chisq,
      df = Df,
      p = `Pr(>Chisq)`,
      n = nobs(fit)
    )
}

demographic_moderation_results <- tidyr::crossing(
  outcome = c("depression", "anxiety"),
  demographic_moderators
) |>
  pmap_dfr(
    function(outcome, moderator, moderator_var) {
      run_demographic_moderation(
        merged_data_long,
        outcome,
        moderator,
        moderator_var
      )
    }
  ) |>
  mutate(
    p_bonferroni = p.adjust(p, method = "bonferroni"),
    significant_uncorrected = p < .05,
    significant_bonferroni = p_bonferroni < .05,
    p_formatted = format_p(p),
    p_bonferroni_formatted = format_p(p_bonferroni)
  )

demographic_moderation_results |>
  mutate(
    chi_square = round(chi_square, 2)
  ) |>
  dplyr::select(
    outcome,
    moderator,
    n,
    chi_square,
    df,
    p_formatted,
    p_bonferroni_formatted,
    significant_uncorrected,
    significant_bonferroni
  ) |>
  kable(
    col.names = c(
      "Outcome",
      "Moderator",
      "n",
      "χ²",
      "df",
      "p",
      "Bonferroni-adjusted p",
      "Significant",
      "Significant after correction"
    ),
    caption = "Section 5. Exploratory demographic moderation analyses"
  )
Section 5. Exploratory demographic moderation analyses
Outcome Moderator n χ² df p Bonferroni-adjusted p Significant Significant after correction
Anxiety Age 3932 0.48 1 0.489 1.000 FALSE FALSE
Anxiety Ethnicity 3936 7.98 8 0.435 1.000 FALSE FALSE
Anxiety International student 3936 0.05 1 0.826 1.000 FALSE FALSE
Anxiety SES 3936 0.75 1 0.387 1.000 FALSE FALSE
Anxiety Sex 3936 0.16 1 0.689 1.000 FALSE FALSE
Depression Age 3933 0.13 1 0.714 1.000 FALSE FALSE
Depression Ethnicity 3937 6.56 8 0.585 1.000 FALSE FALSE
Depression International student 3937 0.55 1 0.458 1.000 FALSE FALSE
Depression SES 3937 1.20 1 0.273 1.000 FALSE FALSE
Depression Sex 3937 0.20 1 0.657 1.000 FALSE FALSE
# Engagement predictors

engagement_t4 <- df_fl_fix |>
  filter(time == 4) |>
  transmute(
    unique_ID,
    active_days_T4 = Engagement_1_fix,
    activities_T4 = Engagement_3_fix
  ) |>
  distinct(unique_ID, .keep_all = TRUE)

engagement_outcome_data <- change_data |>
  select(
    unique_ID,
    cond,
    depression_T1,
    depression_T4,
    anxiety_T1,
    anxiety_T4
  ) |>
  left_join(
    engagement_t4,
    by = "unique_ID"
  ) |>
  left_join(
    data_baseline |>
      select(unique_ID, term),
    by = "unique_ID"
  ) |>
  filter(cond == "Flourish") |>
  mutate(term = factor(term))

run_engagement_model <- function(
    data,
    outcome_T4,
    outcome_T1,
    engagement_var
) {

  fit <- lm(
    reformulate(
      c(
        outcome_T1,
        engagement_var,
        "term"
      ),
      response = outcome_T4
    ),
    data = data
  )

  broom::tidy(
    fit,
    conf.int = TRUE
  ) |>
    filter(
      .data$term == .env$engagement_var
    ) |>
    transmute(
      outcome = outcome_T4,
      baseline_covariate = outcome_T1,
      engagement_predictor = engagement_var,
      n = nobs(fit),
      b = estimate,
      SE = std.error,
      t = statistic,
      p = p.value,
      LL = conf.low,
      UL = conf.high
    )
}

engagement_model_results <- bind_rows(
  run_engagement_model(
    engagement_outcome_data,
    "depression_T4",
    "depression_T1",
    "active_days_T4"
  ),
  run_engagement_model(
    engagement_outcome_data,
    "depression_T4",
    "depression_T1",
    "activities_T4"
  ),
  run_engagement_model(
    engagement_outcome_data,
    "anxiety_T4",
    "anxiety_T1",
    "active_days_T4"
  ),
  run_engagement_model(
    engagement_outcome_data,
    "anxiety_T4",
    "anxiety_T1",
    "activities_T4"
  )
) |>
  mutate(
    p_bonferroni = p.adjust(
      p,
      method = "bonferroni"
    ),
    significant_uncorrected = p < .05,
    significant_bonferroni = p_bonferroni < .05,
    p_formatted = format_p(p),
    p_bonferroni_formatted =
      format_p(p_bonferroni)
  )

engagement_model_results |>
  mutate(
    across(
      c(b, SE, t, LL, UL),
      ~ round(.x, 2)
    )
  ) |>
  select(
    outcome,
    engagement_predictor,
    n,
    b,
    SE,
    t,
    p_formatted,
    p_bonferroni_formatted,
    significant_uncorrected,
    significant_bonferroni
  ) |>
  kable(
    col.names = c(
      "Outcome",
      "Engagement predictor",
      "n",
      "b",
      "SE",
      "t",
      "p",
      "Bonferroni-adjusted p",
      "Significant",
      "Significant after correction"
    ),
    caption = paste(
      "Section 5. Exploratory engagement predictors",
      "of Week 6 outcomes among Flourish participants"
    )
  )
Section 5. Exploratory engagement predictors of Week 6 outcomes among Flourish participants
Outcome Engagement predictor n b SE t p Bonferroni-adjusted p Significant Significant after correction
depression_T4 active_days_T4 410 0.01 0.01 1.40 0.162 0.650 FALSE FALSE
depression_T4 activities_T4 410 0.00 0.00 0.08 0.934 1.000 FALSE FALSE
anxiety_T4 active_days_T4 410 0.00 0.01 0.55 0.580 1.000 FALSE FALSE
anxiety_T4 activities_T4 410 0.00 0.01 -0.54 0.593 1.000 FALSE FALSE

Section 6: Subgroup-Specific Symptom Trajectories by Baseline Symptom Severity

eTable 1

make_etable1_block <- function(data, outcome_var, outcome_label, baseline_cat_var) {
  full_sample <- data |>
    group_by(cond, time_f) |>
    summarise(mean_sd = fmt_mean_sd(.data[[outcome_var]]), .groups = "drop") |>
    mutate(Outcome = outcome_label, `Baseline symptom severity` = "Full sample")

  subgroup_sample <- data |>
    filter(!is.na(.data[[baseline_cat_var]])) |>
    group_by(.data[[baseline_cat_var]], cond, time_f) |>
    summarise(mean_sd = fmt_mean_sd(.data[[outcome_var]]), .groups = "drop") |>
    rename(`Baseline symptom severity` = .data[[baseline_cat_var]]) |>
    mutate(
      Outcome = outcome_label,
      `Baseline symptom severity` = case_when(
        str_detect(`Baseline symptom severity`, "Low") ~ "Low severity (<3)",
        str_detect(`Baseline symptom severity`, "Elevated") ~ "Elevated severity (≥3)",
        TRUE ~ as.character(`Baseline symptom severity`)
      )
    )

  bind_rows(full_sample, subgroup_sample) |>
    mutate(
      Outcome = factor(Outcome, levels = c("Depression (PHQ-2)", "Anxiety (GAD-2)")),
      `Baseline symptom severity` = factor(`Baseline symptom severity`, levels = c("Full sample", "Low severity (<3)", "Elevated severity (≥3)")),
      cond = factor(cond, levels = c("Control", "Flourish"))
    ) |>
    select(Outcome, `Baseline symptom severity`, Condition = cond, time_f, mean_sd) |>
    pivot_wider(names_from = time_f, values_from = mean_sd) |>
    arrange(Outcome, `Baseline symptom severity`, Condition)
}

etable1_display <- bind_rows(
  make_etable1_block(merged_data_long, "depression", "Depression (PHQ-2)", "depression_baseline_cat"),
  make_etable1_block(merged_data_long, "anxiety", "Anxiety (GAD-2)", "anxiety_baseline_cat")
)

etable1_display |>
  kable(
    col.names = c("Outcome", "Baseline symptom severity", "Condition", "T1 (Baseline),<br>M (SD)", "T2 (Week 2),<br>M (SD)", "T3 (Week 4),<br>M (SD)", "T4 (Week 6),<br>M (SD)"),
    escape = FALSE,
    align = c("l", "l", "l", "c", "c", "c", "c"),
    caption = "eTable 1. Descriptive statistics for depression and anxiety by condition, timepoint, and baseline symptom severity subgroup"
  )
eTable 1. Descriptive statistics for depression and anxiety by condition, timepoint, and baseline symptom severity subgroup
Outcome Baseline symptom severity Condition T1 (Baseline),
M (SD)
T2 (Week 2),
M (SD)
T3 (Week 4),
M (SD)
T4 (Week 6),
M (SD)
Depression (PHQ-2) Full sample Control 1.71 (1.48) 1.85 (1.53) 1.92 (1.58) 2.05 (1.57)
Depression (PHQ-2) Full sample Flourish 1.63 (1.47) 1.78 (1.49) 1.78 (1.52) 1.72 (1.48)
Depression (PHQ-2) Low severity (<3) Control 0.99 (0.81) 1.50 (1.36) 1.62 (1.47) 1.78 (1.43)
Depression (PHQ-2) Low severity (<3) Flourish 0.99 (0.83) 1.47 (1.31) 1.46 (1.31) 1.52 (1.38)
Depression (PHQ-2) Elevated severity (≥3) Control 3.75 (0.90) 2.89 (1.53) 2.86 (1.57) 2.94 (1.70)
Depression (PHQ-2) Elevated severity (≥3) Flourish 3.88 (0.93) 2.92 (1.54) 2.97 (1.67) 2.49 (1.62)
Anxiety (GAD-2) Full sample Control 2.55 (1.83) 2.51 (1.69) 2.63 (1.81) 2.67 (1.82)
Anxiety (GAD-2) Full sample Flourish 2.54 (1.83) 2.51 (1.76) 2.44 (1.83) 2.35 (1.73)
Anxiety (GAD-2) Low severity (<3) Control 1.24 (0.79) 1.86 (1.41) 2.03 (1.59) 2.11 (1.63)
Anxiety (GAD-2) Low severity (<3) Flourish 1.16 (0.83) 1.79 (1.45) 1.77 (1.56) 1.82 (1.58)
Anxiety (GAD-2) Elevated severity (≥3) Control 4.44 (1.10) 3.45 (1.63) 3.50 (1.77) 3.49 (1.78)
Anxiety (GAD-2) Elevated severity (≥3) Flourish 4.30 (1.12) 3.42 (1.72) 3.32 (1.78) 3.06 (1.66)

Subgroup trajectory model details

format_ci_bound <- function(x) {
  case_when(
    is.na(x) ~ NA_character_,
    x != 0 & abs(x) < .01 ~ sprintf("%.3f", x),
    TRUE ~ sprintf("%.2f", x)
  )
}

run_subgroup_trajectory <- function(
    data,
    outcome,
    baseline_cat_var,
    subgroup_label
) {

  d <- data |>
    filter(.data[[baseline_cat_var]] == subgroup_label) |>
    filter(
      !is.na(.data[[outcome]]),
      !is.na(cond),
      !is.na(time),
      !is.na(unique_ID),
      !is.na(term)
    ) |>
    mutate(
      cond = factor(cond, levels = c("Control", "Flourish")),
      term = factor(term),
      outcome_z = as.numeric(scale(.data[[outcome]])),
      time_z = as.numeric(scale(time - 2.5))
    )

  contrasts(d$cond) <- cbind(
    flourish_vs_control = c(-1, 1)
  )

  # 1. Subgroup-specific condition × time interaction

  raw_formula <- reformulate(
    termlabels = c(
      "cond * I(time - 2.5)",
      "(1 | unique_ID)",
      "(1 | term)"
    ),
    response = outcome
  )

  fit <- lmer(
    raw_formula,
    data = d,
    REML = TRUE
  )

  raw_interaction <- extract_lmer_term(
    fit,
    "condflourish_vs_control:I(time - 2.5)"
  )

  std_interaction_fit <- lmer(
    outcome_z ~ cond * time_z +
      (1 | unique_ID) +
      (1 | term),
    data = d,
    REML = TRUE
  )

  std_interaction <- extract_lmer_term(
    std_interaction_fit,
    "condflourish_vs_control:time_z"
  ) |>
    transmute(
      beta = b,
      beta_LL = LL,
      beta_UL = UL
    )

  interaction <- bind_cols(
    raw_interaction,
    std_interaction
  ) |>
    mutate(
      outcome = .env$outcome,
      subgroup = subgroup_label
    )

  # 2. Within-condition slopes

  slopes <- map_dfr(
    c("Control", "Flourish"),
    function(condition_label) {

      d_condition <- d |>
        filter(cond == condition_label) |>
        mutate(
          outcome_condition_z = as.numeric(
            scale(.data[[outcome]])
          ),
          time_condition_z = as.numeric(
            scale(time - 2.5)
          )
        )

      raw_slope_formula <- reformulate(
        termlabels = c(
          "I(time - 2.5)",
          "(1 | unique_ID)",
          "(1 | term)"
        ),
        response = outcome
      )

      fit_condition <- lmer(
        raw_slope_formula,
        data = d_condition,
        REML = TRUE
      )

      raw <- extract_lmer_term(
        fit_condition,
        "I(time - 2.5)"
      ) |>
        mutate(cond = condition_label)

      std_condition_fit <- lmer(
        outcome_condition_z ~ time_condition_z +
          (1 | unique_ID) +
          (1 | term),
        data = d_condition,
        REML = TRUE
      )

      std <- extract_lmer_term(
        std_condition_fit,
        "time_condition_z"
      ) |>
        transmute(
          cond = condition_label,
          beta = b,
          beta_LL = LL,
          beta_UL = UL
        )

      raw |>
        left_join(std, by = "cond")
    }
  ) |>
    mutate(
      outcome = .env$outcome,
      subgroup = subgroup_label
    )

  # 3. Between-condition differences at each timepoint

  timepoint_diffs <- map_dfr(
    1:4,
    function(tt) {

      d_time <- d |>
        filter(time == tt)

      fit_time <- lmer(
        reformulate(
          termlabels = c("cond", "(1 | term)"),
          response = outcome
        ),
        data = d_time,
        REML = TRUE
      )

      extract_lmer_term(
        fit_time,
        "condflourish_vs_control"
      ) |>
        transmute(
          time = tt,
          time_f = factor(
            tt,
            levels = c(1, 2, 3, 4),
            labels = c(
              "Baseline",
              "Week 2",
              "Week 4",
              "Week 6"
            )
          ),
          b,
          SE,
          df,
          t,
          p,
          LL,
          UL,
          outcome = .env$outcome,
          subgroup = subgroup_label
        )
    }
  )

  # 4. Week 6 Cohen's d

  week6_data <- d |>
    filter(time == 4) |>
    transmute(
      cond,
      value = .data[[outcome]]
    ) |>
    filter(!is.na(value))

  week6_d <- effectsize::cohens_d(
    value ~ cond,
    data = week6_data
  ) |>
    as_tibble() |>
    transmute(
      outcome = .env$outcome,
      subgroup = subgroup_label,
      d = abs(Cohens_d),
      d_LL = pmin(abs(CI_low), abs(CI_high)),
      d_UL = pmax(abs(CI_low), abs(CI_high))
    )

  list(
    model = fit,
    interaction = interaction,
    slopes = slopes,
    timepoint_diffs = timepoint_diffs,
    week6_d = week6_d
  )
}

depression_low_subgroup <- run_subgroup_trajectory(
  merged_data_long,
  "depression",
  "depression_baseline_cat",
  "Low baseline (PHQ-2 < 3)"
)

depression_elevated_subgroup <- run_subgroup_trajectory(
  merged_data_long,
  "depression",
  "depression_baseline_cat",
  "Elevated baseline (PHQ-2 ≥ 3)"
)

anxiety_low_subgroup <- run_subgroup_trajectory(
  merged_data_long,
  "anxiety",
  "anxiety_baseline_cat",
  "Low baseline (GAD-2 < 3)"
)

anxiety_elevated_subgroup <- run_subgroup_trajectory(
  merged_data_long,
  "anxiety",
  "anxiety_baseline_cat",
  "Elevated baseline (GAD-2 ≥ 3)"
)

# Keep raw results first so small CI values such as 0.004 are not rounded to 0.00.

section6_interactions_raw <- bind_rows(
  depression_low_subgroup$interaction,
  depression_elevated_subgroup$interaction,
  anxiety_low_subgroup$interaction,
  anxiety_elevated_subgroup$interaction
)

section6_slopes_raw <- bind_rows(
  depression_low_subgroup$slopes,
  depression_elevated_subgroup$slopes,
  anxiety_low_subgroup$slopes,
  anxiety_elevated_subgroup$slopes
)

section6_timepoint_diffs_raw <- bind_rows(
  depression_low_subgroup$timepoint_diffs,
  depression_elevated_subgroup$timepoint_diffs,
  anxiety_low_subgroup$timepoint_diffs,
  anxiety_elevated_subgroup$timepoint_diffs
)

section6_week6_d_raw <- bind_rows(
  depression_low_subgroup$week6_d,
  depression_elevated_subgroup$week6_d,
  anxiety_low_subgroup$week6_d,
  anxiety_elevated_subgroup$week6_d
)

# Format for display.

section6_interactions <- section6_interactions_raw |>
  mutate(
    across(c(b, SE, t), ~ round(.x, 2)),
    LL = format_ci_bound(LL),
    UL = format_ci_bound(UL),
    beta = round(beta, 2),
    beta_LL = format_ci_bound(beta_LL),
    beta_UL = format_ci_bound(beta_UL),
    p_formatted = format_p(p)
  )

section6_slopes <- section6_slopes_raw |>
  mutate(
    across(c(b, SE, t), ~ round(.x, 2)),
    LL = format_ci_bound(LL),
    UL = format_ci_bound(UL),
    beta = round(beta, 2),
    beta_LL = format_ci_bound(beta_LL),
    beta_UL = format_ci_bound(beta_UL),
    p_formatted = format_p(p)
  )

section6_timepoint_diffs <- section6_timepoint_diffs_raw |>
  mutate(
    across(c(b, SE, t), ~ round(.x, 2)),
    LL = format_ci_bound(LL),
    UL = format_ci_bound(UL),
    p_formatted = format_p(p)
  )

section6_week6_d <- section6_week6_d_raw |>
  mutate(
    d = round(d, 2),
    d_LL = format_ci_bound(d_LL),
    d_UL = format_ci_bound(d_UL)
  )

section6_interactions |>
  select(
    outcome,
    subgroup,
    b,
    SE,
    t,
    p_formatted,
    beta,
    beta_LL,
    beta_UL
  ) |>
  kable(
    caption = "Section 6. Subgroup-specific condition × time interactions"
  )
Section 6. Subgroup-specific condition × time interactions
outcome subgroup b SE t p_formatted beta beta_LL beta_UL
depression Low baseline (PHQ-2 < 3) -0.05 0.02 -2.83 0.005 -0.04 -0.07 -0.01
depression Elevated baseline (PHQ-2 ≥ 3) -0.08 0.04 -2.08 0.037 -0.06 -0.11 -0.003
anxiety Low baseline (GAD-2 < 3) -0.05 0.02 -2.18 0.030 -0.04 -0.07 -0.004
anxiety Elevated baseline (GAD-2 ≥ 3) -0.05 0.03 -1.88 0.060 -0.04 -0.08 0.002
section6_slopes |>
  select(
    outcome,
    subgroup,
    cond,
    b,
    SE,
    t,
    p_formatted,
    beta,
    beta_LL,
    beta_UL
  ) |>
  kable(
    caption = "Section 6. Subgroup-specific within-condition symptom slopes"
  )
Section 6. Subgroup-specific within-condition symptom slopes
outcome subgroup cond b SE t p_formatted beta beta_LL beta_UL
depression Low baseline (PHQ-2 < 3) Control 0.26 0.02 10.91 < .001 0.22 0.18 0.26
depression Low baseline (PHQ-2 < 3) Flourish 0.17 0.02 7.56 < .001 0.15 0.11 0.19
depression Elevated baseline (PHQ-2 ≥ 3) Control -0.27 0.05 -5.38 < .001 -0.21 -0.28 -0.13
depression Elevated baseline (PHQ-2 ≥ 3) Flourish -0.43 0.05 -7.81 < .001 -0.31 -0.39 -0.23
anxiety Low baseline (GAD-2 < 3) Control 0.29 0.03 10.31 < .001 0.23 0.18 0.27
anxiety Low baseline (GAD-2 < 3) Flourish 0.20 0.03 6.34 < .001 0.16 0.11 0.21
anxiety Elevated baseline (GAD-2 ≥ 3) Control -0.30 0.04 -7.17 < .001 -0.20 -0.26 -0.15
anxiety Elevated baseline (GAD-2 ≥ 3) Flourish -0.40 0.04 -10.22 < .001 -0.28 -0.33 -0.22
section6_timepoint_diffs |>
  filter(time_f == "Week 6") |>
  left_join(
    section6_week6_d,
    by = c("outcome", "subgroup")
  ) |>
  select(
    outcome,
    subgroup,
    b,
    SE,
    t,
    p_formatted,
    LL,
    UL,
    d,
    d_LL,
    d_UL
  ) |>
  kable(
    caption = "Section 6. Week 6 between-condition differences with effect sizes"
  )
Section 6. Week 6 between-condition differences with effect sizes
outcome subgroup b SE t p_formatted LL UL d d_LL d_UL
depression Low baseline (PHQ-2 < 3) -0.13 0.05 -2.40 0.017 -0.24 -0.02 0.19 0.03 0.34
depression Elevated baseline (PHQ-2 ≥ 3) -0.24 0.12 -1.94 0.054 -0.47 0.003 0.27 0.02 0.56
anxiety Low baseline (GAD-2 < 3) -0.14 0.07 -2.00 0.046 -0.28 -0.003 0.18 0.004 0.36
anxiety Elevated baseline (GAD-2 ≥ 3) -0.22 0.09 -2.47 0.014 -0.40 -0.05 0.25 0.05 0.46

```