Jenna Laymon GMM

setwd("C:/Work Files/consulting/Jenna Laymon")

Jenna_Data <-read.csv("Jenna_Data.csv")

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(knitr)
library(kableExtra)


Attaching package: 'kableExtra'

The following object is masked from 'package:dplyr':

    group_rows

library(psych)


Attaching package: 'psych'

The following objects are masked from 'package:ggplot2':

    %+%, alpha

library(lcmm)

Jenna_Data$wave <- factor(
  as.integer(sub(".*_t(\\d+)_.*", "\\1", Jenna_Data$time)),
  levels = sort(unique(as.integer(sub(".*_t(\\d+)_.*", "\\1", Jenna_Data$time)))),
  ordered = TRUE
)

str(Jenna_Data)

'data.frame':   1456 obs. of  4 variables:
 $ part_id : chr  "BA01001" "BA01001" "BA01001" "BA01001" ...
 $ idea_sev: int  3 1 1 2 0 0 3 0 0 0 ...
 $ time    : chr  "baseline_t1_arm_1" "month_3_t2_arm_1" "month_6_t3_arm_1" "month_9_t4_arm_1" ...
 $ wave    : Ord.factor w/ 5 levels "1"<"2"<"3"<"4"<..: 1 2 3 4 5 1 2 3 4 5 ...

which(is.na(Jenna_Data$wave))

[1] 891

Jenna_Data$time[is.na(Jenna_Data$wave)]

[1] ""

Jenna_Data <- Jenna_Data[-891, ]

head(Jenna_Data)

  part_id idea_sev              time wave
1 BA01001        3 baseline_t1_arm_1    1
2 BA01001        1  month_3_t2_arm_1    2
3 BA01001        1  month_6_t3_arm_1    3
4 BA01001        2  month_9_t4_arm_1    4
5 BA01001        0 month_12_t5_arm_1    5
6 BA01008        0 baseline_t1_arm_1    1

# get descriptives by wave
desc_by_wave <- describeBy(Jenna_Data$idea_sev, Jenna_Data$wave, mat = TRUE)

# optional: clean row names if needed
desc_by_wave <- as.data.frame(desc_by_wave)

# display table
desc_by_wave %>%
  kable(digits = 3, format = "html", booktabs = TRUE,
        caption = "Table 1. Descriptive Statistics by Wave") %>%
  kable_classic(full_width = FALSE, html_font = "Cambria")

Table 1. Descriptive Statistics by Wave
	item	group1	vars	n	mean	sd	trimmed	max	range	skew	kurtosis	se
X11	1	1	1	291	1.268	1.626	1.086	4	4	0.797	-1.091	0.095
X12	2	2	1	291	1.223	1.626	1.030	4	4	0.843	-1.014	0.095
X13	3	3	1	291	0.485	1.216	0.146	5	5	2.629	5.959	0.071
X14	4	4	1	291	0.302	0.889	0.060	5	5	3.594	13.508	0.052
X15	5	5	1	291	0.162	0.678	0.000	5	5	5.013	27.103	0.040

Jenna_Data$idea_sev <-as.numeric(Jenna_Data$idea_sev)

ggplot(Jenna_Data, aes(x = wave, y = idea_sev, group = part_id)) +
  geom_line(alpha = 0.20, color = "gray40") +
  geom_point(alpha = 0.20, color = "gray40") +
  labs(
    title = "Individual Trajectories of Idea Severity",
    x = "Wave",
    y = "Idea Severity"
  ) +
  theme_minimal()

Jenna_Data$idea_sev <- as.numeric(Jenna_Data$idea_sev)
# if wave is an ordered factor
Jenna_Data$wave_num <- as.integer(Jenna_Data$wave)

# compute mean + SE per wave
summary_wave <- Jenna_Data %>%
  group_by(wave_num) %>%
  summarize(
    mean_idea = mean(idea_sev, na.rm = TRUE),
    se_idea   = sd(idea_sev, na.rm = TRUE) / sqrt(n())
  )

ggplot(Jenna_Data, aes(x = wave, y = idea_sev)) +
 
  stat_summary(fun = mean, geom = "line", size = 1.4, color = "blue") +
  stat_summary(fun = mean, geom = "point", size = 3, color = "blue") +
  labs(
    title = "Spaghetti Plot with Mean Trajectory",
    x = "Wave",
    y = "Idea Severity"
  ) +
  theme_minimal()

Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

`geom_line()`: Each group consists of only one observation.
ℹ Do you need to adjust the group aesthetic?

ggplot(Jenna_Data, aes(x = wave_num, y = idea_sev, group = part_id)) +
  geom_line(alpha = 0.15, color = "gray50") +
  geom_point(alpha = 0.15, color = "gray50") +
  stat_summary(fun = mean, geom = "line", linewidth = 1.4, color = "blue") +
  stat_summary(fun = mean, geom = "point", size = 3, color = "blue") +
  scale_x_continuous(
    breaks = 1:5,
    labels = levels(Jenna_Data$wave)
  ) +
  labs(
    title = "Spaghetti Plot with Mean Trajectory",
    x = "Wave",
    y = "Idea Severity"
  ) +
  theme_minimal()

# Reshape from long to wide
Jenna_Data_wide <- Jenna_Data %>%
  select(part_id, wave, idea_sev) %>%  # keep only necessary columns
  pivot_wider(
    names_from = wave,
    values_from = idea_sev,
    names_prefix = "idea_sev_wave"
  )

# View the wide dataset
print(Jenna_Data_wide)

# A tibble: 291 × 6
   part_id idea_sev_wave1 idea_sev_wave2 idea_sev_wave3 idea_sev_wave4
   <chr>            <dbl>          <dbl>          <dbl>          <dbl>
 1 BA01001              3              1              1              2
 2 BA01008              0              3              0              0
 3 BA01019              2              1              0              0
 4 BA01024              3              4              0              0
 5 BA01027              1              0              0              0
 6 BA01044              3              3              0              0
 7 BA01050              0              1              0              0
 8 BA01051              1              0              0              0
 9 BA01052              0              2              0              0
10 BA01055              3              2              0              0
# ℹ 281 more rows
# ℹ 1 more variable: idea_sev_wave5 <dbl>

# Write to CSV
write.csv(Jenna_Data_wide, "dataset_wide.csv", row.names = FALSE)

Mplus code: This is actually 3 separate files starting with 2 then 3 then 4 classes

TITLE: Ordinal Growth Mixture Model (2–4 classes) with variable categories;

DATA: FILE = “dataset_wide.csv”;
FORMAT = FREE;

VARIABLE: NAMES = part_id idea_sev_wave1 idea_sev_wave2 idea_sev_wave3 idea_sev_wave4 idea_sev_wave5; USEVARIABLES = idea_sev_wave1 idea_sev_wave2 idea_sev_wave3 idea_sev_wave4 idea_sev_wave5; CATEGORICAL = idea_sev_wave1 idea_sev_wave2 idea_sev_wave3 idea_sev_wave4 idea_sev_wave5; CLASSES = c(2);
MISSING = ALL (-9999);

ANALYSIS: TYPE = MIXTURE; ESTIMATOR = MLR;
STARTS = 2000 500;
STITERATIONS = 30;
PROCESSORS = 4;

MODEL: %OVERALL% i s | idea_sev_wave1@0 idea_sev_wave2@1 idea_sev_wave3@2 idea_sev_wave4@3 idea_sev_wave5@4;

%c#1% [i s];
i s (1);

%c#2% [i s]; i s (1);

! Uncomment/add for 3- or 4-class models: ! %c#3% ! [i s]; ! i s (1);

! %c#4% ! [i s]; ! i s (1);

OUTPUT: TECH1 TECH4 TECH11 TECH14; STANDARDIZED; SAMPSTAT; CINTERVAL; MODINDICES (ALL);

SAVEDATA: FILE = “gmm_probabilities.csv”; SAVE = CPROBABILITIES;

2_class model estimated trajectories

BIC = 4097,

AIC4=141

Class 1: n=277 (95% of the sample)

Class 2: n=14 (5% of the sample)

class_params <- tibble(
  class = c("Class 1", "Class 2"),
  intercept = c(1.1, 1.7),  # replace with your estimated intercepts
  slope = c(-0.285, 0.279)  # replace with your estimated slopes
)


time_points <- 0:4


predicted_trajectories <- class_params %>%
  crossing(time = time_points) %>%  # create all combinations of class x time
  mutate(predicted = intercept + slope * time)


predicted_trajectories <- predicted_trajectories %>%
  mutate(wave_label = paste0("Wave ", time + 1))


ggplot(predicted_trajectories, aes(x = time, y = predicted, color = class)) +
  geom_line(size = 1.2) +
  geom_point(size = 2) +
  scale_x_continuous(breaks = time_points, labels = paste0("Wave ", 1:5)) +
  labs(x = "Wave", y = "Predicted Outcome", title = "Class Trajectories from GMM") +
  theme_minimal() +
  theme(text = element_text(size = 14))

3_class model estimated trajectories

BIC = 3679

AIC4=3734

Class 1: n=5 2% of the sample)

Class 2: n=270 (93% of the sample)

Class3: n=16 (5%)

class_params <- tibble(
  class = c("Class 1", "Class 2","Class 3"),
  intercept = c(0.88, 1.14, 2.09),  # replace with your estimated intercepts
  slope = c(0.87, -0.285, -0.134 )  # replace with your estimated slopes
)


time_points <- 0:4


predicted_trajectories <- class_params %>%
  crossing(time = time_points) %>%  # create all combinations of class x time
  mutate(predicted = intercept + slope * time)


predicted_trajectories <- predicted_trajectories %>%
  mutate(wave_label = paste0("Wave ", time + 1))


ggplot(predicted_trajectories, aes(x = time, y = predicted, color = class)) +
  geom_line(size = 1.2) +
  geom_point(size = 2) +
  scale_x_continuous(breaks = time_points, labels = paste0("Wave ", 1:5)) +
  labs(x = "Wave", y = "Predicted Outcome", title = "Class Trajectories from GMM") +
  theme_minimal() +
  theme(text = element_text(size = 14))

4_class model estimated trajectories

BIC = 3284

AIC = 3350

Class 1: n=7 (2% of the sample)

Class 2: n=5 (1.7% of the sample)

Class 3: n=9 (3% of the sample)

Class 4: n=270 (93% of the sample)

class_params <- tibble(
  class = c("Class 1", "Class 2","Class 3","Class 4"),
  intercept = c(2.01, 0.88, 2.16, 1.14),  # replace with your estimated intercepts
  slope = c(-0.251, 0.87, -0.04, -0.284)  # replace with your estimated slopes
)


time_points <- 0:4


predicted_trajectories <- class_params %>%
  crossing(time = time_points) %>%  # create all combinations of class x time
  mutate(predicted = intercept + slope * time)


predicted_trajectories <- predicted_trajectories %>%
  mutate(wave_label = paste0("Wave ", time + 1))


ggplot(predicted_trajectories, aes(x = time, y = predicted, color = class)) +
  geom_line(size = 1.2) +
  geom_point(size = 2) +
  scale_x_continuous(breaks = time_points, labels = paste0("Wave ", 1:5)) +
  labs(x = "Wave", y = "Predicted Outcome", title = "Class Trajectories from GMM") +
  theme_minimal() +
  theme(text = element_text(size = 14))