LPA Example for SEM

Author

Ty Partridge, Ph.D.

setwd("C:/Work Files/Teaching/PSY 8170/Winter 2026/Class Exercises/LPA")

Key Packages

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.2.0     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   4.0.2     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidyLPA)
You can use the function citation('tidyLPA') to create a citation for the use of {tidyLPA}.
Mplus is not installed. Use only package = 'mclust' when calling estimate_profiles().
set.seed(123)

n <- 300

# Simulate 3 latent groups
group <- sample(1:3, n, replace = TRUE)

data <- tibble(
  x1 = rnorm(n, mean = c(0, 2, -2)[group], sd = 1),
  x2 = rnorm(n, mean = c(0, 2, -2)[group], sd = 1),
  x3 = rnorm(n, mean = c(0, 2, -2)[group], sd = 1),
  x4 = rnorm(n, mean = c(0, 2, -2)[group], sd = 1)
)

head(data)
# A tibble: 6 × 4
       x1    x2    x3      x4
    <dbl> <dbl> <dbl>   <dbl>
1 -0.0991 -2.20 -2.00 -1.19  
2 -1.29   -1.44 -3.38 -1.59  
3 -1.26   -1.68 -3.26 -2.09  
4  3.37    2.70  2.33  0.0918
5 -2.58   -3.02 -2.28 -1.34  
6  1.20    1.83  1.61  2.66  
models <- data %>%
  estimate_profiles(1:4)
get_fit(models)
# A tibble: 4 × 18
  Model Classes LogLik   AIC   AWE   BIC  CAIC   CLC   KIC SABIC    ICL Entropy
  <dbl>   <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>   <dbl>
1     1       1 -2450. 4917. 5014. 4946. 4954. 4903. 4928. 4921. -4946.   1    
2     1       2 -2150. 4326. 4486. 4374. 4387. 4302. 4342. 4333. -4416.   0.824
3     1       3 -2013. 4062. 4283. 4129. 4147. 4028. 4083. 4071. -4151.   0.922
4     1       4 -2009. 4065. 4348. 4150. 4173. 4020. 4091. 4077. -4225.   0.833
# ℹ 6 more variables: prob_min <dbl>, prob_max <dbl>, n_min <dbl>, n_max <dbl>,
#   BLRT_val <dbl>, BLRT_p <dbl>
best_model <- models[[3]]  # adjust based on fit results
get_data(best_model) %>%
  group_by(Class) %>%
  summarise(across(starts_with("x"), mean))
# A tibble: 3 × 5
  Class      x1      x2      x3      x4
  <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
1     1 -2.05   -2.08   -2.02   -1.88  
2     2  1.89    2.00    2.10    1.94  
3     3 -0.0277  0.0717 -0.0388  0.0116
plot_profiles(best_model)
Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
ℹ Please use tidy evaluation idioms with `aes()`.
ℹ See also `vignette("ggplot2-in-packages")` for more information.
ℹ The deprecated feature was likely used in the tidyLPA package.
  Please report the issue at <https://github.com/data-edu/tidyLPA/issues>.

# Fit model
model <- estimate_profiles(data, 3)

# Extract data with class info
df_with_classes <- get_data(model)

# Save it
write.csv(df_with_classes, "lpa_results.csv", row.names = FALSE)
df_with_classes %>%
  mutate(max_prob = pmax(CPROB1, CPROB2, CPROB3)) %>%
  summarise(
    mean_certainty = mean(max_prob),
    low_certainty = mean(max_prob < 0.7)
  )
# A tibble: 1 × 2
  mean_certainty low_certainty
           <dbl>         <dbl>
1          0.968          0.04