setwd("C:/Work Files/consulting/Jenna Laymon")Jenna Laymon GMM
Jenna_Data <-read.csv("Jenna_Data.csv")library(tidyverse)── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.2 ✔ tibble 3.3.0
✔ lubridate 1.9.4 ✔ tidyr 1.3.1
✔ purrr 1.0.4
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(knitr)
library(kableExtra)
Attaching package: 'kableExtra'
The following object is masked from 'package:dplyr':
group_rows
library(psych)
Attaching package: 'psych'
The following objects are masked from 'package:ggplot2':
%+%, alpha
library(lcmm)Jenna_Data$wave <- factor(
as.integer(sub(".*_t(\\d+)_.*", "\\1", Jenna_Data$time)),
levels = sort(unique(as.integer(sub(".*_t(\\d+)_.*", "\\1", Jenna_Data$time)))),
ordered = TRUE
)str(Jenna_Data)'data.frame': 1456 obs. of 4 variables:
$ part_id : chr "BA01001" "BA01001" "BA01001" "BA01001" ...
$ idea_sev: int 3 1 1 2 0 0 3 0 0 0 ...
$ time : chr "baseline_t1_arm_1" "month_3_t2_arm_1" "month_6_t3_arm_1" "month_9_t4_arm_1" ...
$ wave : Ord.factor w/ 5 levels "1"<"2"<"3"<"4"<..: 1 2 3 4 5 1 2 3 4 5 ...
which(is.na(Jenna_Data$wave))[1] 891
Jenna_Data$time[is.na(Jenna_Data$wave)][1] ""
Jenna_Data <- Jenna_Data[-891, ]
head(Jenna_Data) part_id idea_sev time wave
1 BA01001 3 baseline_t1_arm_1 1
2 BA01001 1 month_3_t2_arm_1 2
3 BA01001 1 month_6_t3_arm_1 3
4 BA01001 2 month_9_t4_arm_1 4
5 BA01001 0 month_12_t5_arm_1 5
6 BA01008 0 baseline_t1_arm_1 1
# get descriptives by wave
desc_by_wave <- describeBy(Jenna_Data$idea_sev, Jenna_Data$wave, mat = TRUE)
# optional: clean row names if needed
desc_by_wave <- as.data.frame(desc_by_wave)
# display table
desc_by_wave %>%
kable(digits = 3, format = "html", booktabs = TRUE,
caption = "Table 1. Descriptive Statistics by Wave") %>%
kable_classic(full_width = FALSE, html_font = "Cambria")| item | group1 | vars | n | mean | sd | median | trimmed | mad | min | max | range | skew | kurtosis | se | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| X11 | 1 | 1 | 1 | 291 | 1.268 | 1.626 | 0 | 1.086 | 0 | 0 | 4 | 4 | 0.797 | -1.091 | 0.095 |
| X12 | 2 | 2 | 1 | 291 | 1.223 | 1.626 | 0 | 1.030 | 0 | 0 | 4 | 4 | 0.843 | -1.014 | 0.095 |
| X13 | 3 | 3 | 1 | 291 | 0.485 | 1.216 | 0 | 0.146 | 0 | 0 | 5 | 5 | 2.629 | 5.959 | 0.071 |
| X14 | 4 | 4 | 1 | 291 | 0.302 | 0.889 | 0 | 0.060 | 0 | 0 | 5 | 5 | 3.594 | 13.508 | 0.052 |
| X15 | 5 | 5 | 1 | 291 | 0.162 | 0.678 | 0 | 0.000 | 0 | 0 | 5 | 5 | 5.013 | 27.103 | 0.040 |
Jenna_Data$idea_sev <-as.numeric(Jenna_Data$idea_sev)
ggplot(Jenna_Data, aes(x = wave, y = idea_sev, group = part_id)) +
geom_line(alpha = 0.20, color = "gray40") +
geom_point(alpha = 0.20, color = "gray40") +
labs(
title = "Individual Trajectories of Idea Severity",
x = "Wave",
y = "Idea Severity"
) +
theme_minimal()Jenna_Data$idea_sev <- as.numeric(Jenna_Data$idea_sev)
# if wave is an ordered factor
Jenna_Data$wave_num <- as.integer(Jenna_Data$wave)
# compute mean + SE per wave
summary_wave <- Jenna_Data %>%
group_by(wave_num) %>%
summarize(
mean_idea = mean(idea_sev, na.rm = TRUE),
se_idea = sd(idea_sev, na.rm = TRUE) / sqrt(n())
)
ggplot(Jenna_Data, aes(x = wave, y = idea_sev)) +
stat_summary(fun = mean, geom = "line", size = 1.4, color = "blue") +
stat_summary(fun = mean, geom = "point", size = 3, color = "blue") +
labs(
title = "Spaghetti Plot with Mean Trajectory",
x = "Wave",
y = "Idea Severity"
) +
theme_minimal()Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
`geom_line()`: Each group consists of only one observation.
ℹ Do you need to adjust the group aesthetic?
ggplot(Jenna_Data, aes(x = wave_num, y = idea_sev, group = part_id)) +
geom_line(alpha = 0.15, color = "gray50") +
geom_point(alpha = 0.15, color = "gray50") +
stat_summary(fun = mean, geom = "line", linewidth = 1.4, color = "blue") +
stat_summary(fun = mean, geom = "point", size = 3, color = "blue") +
scale_x_continuous(
breaks = 1:5,
labels = levels(Jenna_Data$wave)
) +
labs(
title = "Spaghetti Plot with Mean Trajectory",
x = "Wave",
y = "Idea Severity"
) +
theme_minimal()# Reshape from long to wide
Jenna_Data_wide <- Jenna_Data %>%
select(part_id, wave, idea_sev) %>% # keep only necessary columns
pivot_wider(
names_from = wave,
values_from = idea_sev,
names_prefix = "idea_sev_wave"
)
# View the wide dataset
print(Jenna_Data_wide)# A tibble: 291 × 6
part_id idea_sev_wave1 idea_sev_wave2 idea_sev_wave3 idea_sev_wave4
<chr> <dbl> <dbl> <dbl> <dbl>
1 BA01001 3 1 1 2
2 BA01008 0 3 0 0
3 BA01019 2 1 0 0
4 BA01024 3 4 0 0
5 BA01027 1 0 0 0
6 BA01044 3 3 0 0
7 BA01050 0 1 0 0
8 BA01051 1 0 0 0
9 BA01052 0 2 0 0
10 BA01055 3 2 0 0
# ℹ 281 more rows
# ℹ 1 more variable: idea_sev_wave5 <dbl>
# Write to CSV
write.csv(Jenna_Data_wide, "dataset_wide.csv", row.names = FALSE)Mplus code: This is actually 3 separate files starting with 2 then 3 then 4 classes
TITLE: Ordinal Growth Mixture Model (2–4 classes) with variable categories;
DATA: FILE = “dataset_wide.csv”;
FORMAT = FREE;
VARIABLE: NAMES = part_id idea_sev_wave1 idea_sev_wave2 idea_sev_wave3 idea_sev_wave4 idea_sev_wave5; USEVARIABLES = idea_sev_wave1 idea_sev_wave2 idea_sev_wave3 idea_sev_wave4 idea_sev_wave5; CATEGORICAL = idea_sev_wave1 idea_sev_wave2 idea_sev_wave3 idea_sev_wave4 idea_sev_wave5; CLASSES = c(2);
MISSING = ALL (-9999);
ANALYSIS: TYPE = MIXTURE; ESTIMATOR = MLR;
STARTS = 2000 500;
STITERATIONS = 30;
PROCESSORS = 4;
MODEL: %OVERALL% i s | idea_sev_wave1@0 idea_sev_wave2@1 idea_sev_wave3@2 idea_sev_wave4@3 idea_sev_wave5@4;
%c#1% [i s];
i s (1);
%c#2% [i s]; i s (1);
! Uncomment/add for 3- or 4-class models: ! %c#3% ! [i s]; ! i s (1);
! %c#4% ! [i s]; ! i s (1);
OUTPUT: TECH1 TECH4 TECH11 TECH14; STANDARDIZED; SAMPSTAT; CINTERVAL; MODINDICES (ALL);
SAVEDATA: FILE = “gmm_probabilities.csv”; SAVE = CPROBABILITIES;
2_class model estimated trajectories
BIC = 4097,
AIC4=141
Class 1: n=277 (95% of the sample)
Class 2: n=14 (5% of the sample)
class_params <- tibble(
class = c("Class 1", "Class 2"),
intercept = c(1.1, 1.7), # replace with your estimated intercepts
slope = c(-0.285, 0.279) # replace with your estimated slopes
)
time_points <- 0:4
predicted_trajectories <- class_params %>%
crossing(time = time_points) %>% # create all combinations of class x time
mutate(predicted = intercept + slope * time)
predicted_trajectories <- predicted_trajectories %>%
mutate(wave_label = paste0("Wave ", time + 1))
ggplot(predicted_trajectories, aes(x = time, y = predicted, color = class)) +
geom_line(size = 1.2) +
geom_point(size = 2) +
scale_x_continuous(breaks = time_points, labels = paste0("Wave ", 1:5)) +
labs(x = "Wave", y = "Predicted Outcome", title = "Class Trajectories from GMM") +
theme_minimal() +
theme(text = element_text(size = 14))3_class model estimated trajectories
BIC = 3679
AIC4=3734
Class 1: n=5 2% of the sample)
Class 2: n=270 (93% of the sample)
Class3: n=16 (5%)
class_params <- tibble(
class = c("Class 1", "Class 2","Class 3"),
intercept = c(0.88, 1.14, 2.09), # replace with your estimated intercepts
slope = c(0.87, -0.285, -0.134 ) # replace with your estimated slopes
)
time_points <- 0:4
predicted_trajectories <- class_params %>%
crossing(time = time_points) %>% # create all combinations of class x time
mutate(predicted = intercept + slope * time)
predicted_trajectories <- predicted_trajectories %>%
mutate(wave_label = paste0("Wave ", time + 1))
ggplot(predicted_trajectories, aes(x = time, y = predicted, color = class)) +
geom_line(size = 1.2) +
geom_point(size = 2) +
scale_x_continuous(breaks = time_points, labels = paste0("Wave ", 1:5)) +
labs(x = "Wave", y = "Predicted Outcome", title = "Class Trajectories from GMM") +
theme_minimal() +
theme(text = element_text(size = 14))4_class model estimated trajectories
BIC = 3284
AIC = 3350
Class 1: n=7 (2% of the sample)
Class 2: n=5 (1.7% of the sample)
Class 3: n=9 (3% of the sample)
Class 4: n=270 (93% of the sample)
class_params <- tibble(
class = c("Class 1", "Class 2","Class 3","Class 4"),
intercept = c(2.01, 0.88, 2.16, 1.14), # replace with your estimated intercepts
slope = c(-0.251, 0.87, -0.04, -0.284) # replace with your estimated slopes
)
time_points <- 0:4
predicted_trajectories <- class_params %>%
crossing(time = time_points) %>% # create all combinations of class x time
mutate(predicted = intercept + slope * time)
predicted_trajectories <- predicted_trajectories %>%
mutate(wave_label = paste0("Wave ", time + 1))
ggplot(predicted_trajectories, aes(x = time, y = predicted, color = class)) +
geom_line(size = 1.2) +
geom_point(size = 2) +
scale_x_continuous(breaks = time_points, labels = paste0("Wave ", 1:5)) +
labs(x = "Wave", y = "Predicted Outcome", title = "Class Trajectories from GMM") +
theme_minimal() +
theme(text = element_text(size = 14))