library(haven)
## Warning: package 'haven' was built under R version 4.4.3
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(poLCA)
## Warning: package 'poLCA' was built under R version 4.4.3
## Loading required package: scatterplot3d
## Loading required package: MASS
## Warning: package 'MASS' was built under R version 4.4.3
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(stringr)
library(psych)
library(dplyr)
library(tidyr)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.4.3
## corrplot 0.95 loaded
library(tibble)
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.4.3
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
library(tidyLPA)
## Warning: package 'tidyLPA' was built under R version 4.4.3
## You can use the function citation('tidyLPA') to create a citation for the use of {tidyLPA}.
## Mplus is installed; you can use package = 'MplusAutomation' when calling estimate_profiles().
library(poLCAExtra)
##
## Attaching package: 'poLCAExtra'
## The following object is masked from 'package:poLCA':
##
## poLCA
amf_df <- read_sav("files/WIDE_AMF_webbqps_loose.prepared_Merged with Users.sav")
amf_df <- as_factor(amf_df)
label_map <- c(
"fia1A" = "Support",
"fia2A" = "Support",
"fia3A" = "Support",
"fia4A" = "Support",
"fia5A" = "Support",
"fia1B" = "Support",
"fia2B" = "Support",
"fia3B" = "Support",
"fia4B" = "Support",
"fia5B" = "Support",
"fia6A" = "Demands",
"fia7A" = "Demands",
"fia8A" = "Demands",
"fia9A" = "Demands",
"fia6B" = "Demands",
"fia7B" = "Demands",
"fia8B" = "Demands",
"fia9B" = "Demands",
"fia10A" = "Sense of Coherence",
"fia11A" = "Sense of Coherence",
"fia12A" = "Sense of Coherence",
"fia10B" = "Sense of Coherence",
"fia11B" = "Sense of Coherence",
"fia12B" = "Sense of Coherence",
"fia14A" = "Clarity",
"fia15A" = "Clarity",
"fia14B" = "Clarity",
"fia15B" = "Clarity",
"fia16A" = "Control",
"fia17A" = "Control",
"fia18A" = "Control",
"fia19A" = "Control",
"fia16B" = "Control",
"fia17B" = "Control",
"fia18B" = "Control",
"fia19B" = "Control"
)
entropy_LCA <- function(model) {
K.j <- sapply(model$probs, ncol)
if(length(unique(K.j))==1) {
fullcell <- expand.grid(data.frame(sapply(K.j, seq, from=1)))
} else {
fullcell <- expand.grid(sapply(K.j, seq, from = 1))
}
P.c <- poLCA.predcell(model, fullcell)
return(-sum(P.c * log(P.c), na.rm = TRUE))
}
relative_entropy <- function(model) {
en <- -sum(model$posterior * log(model$posterior), na.rm=T)
e <- 1 - en / (nrow(model$posterior) * log(ncol(model$posterior)))
return(e)
}
lca_execute <- function(df, lca_formula, maxiter) {
results <- list()
fit_stats <- data.frame()
N <- nrow(df)
for (k in 2:maxiter) {
set.seed(100)
model <- poLCA(lca_formula, data = df, nclass = k, graphs = FALSE)
post_probs <- model$posterior
relative_entro <- poLCA.relentropy(model)
aBIC <- -2 * model$llik + log((N + 2) / 24) * model$npar
if(k > 2) {
base <- results[[paste0("Class_", k-1)]]
} else {
base <- poLCA(lca_formula, data = df, nclass = 1, graphs = FALSE)
}
lmr <- poLCA.lrt(model, base)
results[[paste0("Class_", k)]] <- model
fit_stats <- rbind(fit_stats,
data.frame(
Classes = k,
LL = model$llik,
AIC = model$aic,
BIC = model$bic,
aBIC = aBIC,
Relative_Entropy = relative_entro,
LMR_LRT_p_val = round(lmr$lmr.p,8)
))
}
print(fit_stats, row.names=FALSE)
fit_melt <- melt(fit_stats, id.vars = "Classes",
measure.vars = c("LL", "AIC", "BIC", "aBIC", "Relative_Entropy"))
plot_ep <- ggplot(fit_melt, aes(x = Classes, y = value)) +
geom_line(size = 1) +
geom_point(size = 2) +
facet_wrap(~ variable, scales = "free_y", ncol=1, strip.position = "right")
theme_minimal() +
labs(title = "Elbow Plot for LCA Fit Statistics",
x = "Number of Classes",
y = "Fit Statistic") +
theme(plot.title = element_text(hjust = 0.5))
print(plot_ep)
return(results)
}
filter_Columns_by_regex <- function(df, col_filtering_regex) {
cols_to_keep <- grep(col_filtering_regex, names(df), perl = TRUE, value = TRUE)
filtered_df <- df %>%
filter(sysConsentYN == "Yes")
filtered_df <- filtered_df[, cols_to_keep]
filtered_df <- na.omit(filtered_df)
return(filtered_df)
}
get_lca_long_format <- function(best_model, label_map) {
probs <- best_model$probs
prob_df <- lapply(names(probs), function(var) {
df <- as.data.frame(probs[[var]])
df$class <- rownames(df)
df$variable <- var
df_long <- df %>%
tidyr::pivot_longer(
cols = starts_with("Pr"),
names_to = "response",
values_to = "probability"
)
return(df_long)
}) %>% dplyr::bind_rows()
prob_df <- prob_df %>%
dplyr::mutate(
clean_var = sub("\\.$", "", variable),
label_prefix = label_map[clean_var],
label = ifelse(is.na(label_prefix), variable, paste(label_prefix, "(", variable, ")"))
) %>%
dplyr::arrange(label_prefix, variable) %>%
dplyr::mutate(label = factor(label, levels = unique(label)))
return(prob_df)
}
seg_barp_lca <- function(prob_df) {
classes <- unique(prob_df$class)
for (cls in classes) {
df_subset <- dplyr::filter(prob_df, class == cls)
p <- ggplot2::ggplot(df_subset, ggplot2::aes(x = label, y = probability, fill = response)) +
ggplot2::geom_bar(stat = "identity") +
ggplot2::geom_text(
ggplot2::aes(label = ifelse(probability > 0.05, round(probability, 2), "")),
position = ggplot2::position_stack(vjust = 0.5),
size = 3, color = "black"
) +
ggplot2::coord_flip() +
ggplot2::labs(
title = paste("LCA -", cls),
x = "Variable",
y = "Probability"
) +
ggplot2::theme_minimal() +
ggplot2::scale_fill_manual(values=c("#71C055", "#B8DB70","#F9F18C", "#FF8686", "#D73343"))
print(p)
}
}
time_point <- 1
cat_variables <- c('wh_conflict', 'hw_conflict')
existing_vars <- names(amf_df)
filtered_df <- amf_df %>% filter(sysConsentYN == "Yes")
vars_to_keep <- unlist(lapply(cat_variables, function(v) {
candidates <- c(v, paste0(v, ".", time_point))
candidates[candidates %in% existing_vars]
}))
lca_df <- filtered_df[, vars_to_keep, drop = FALSE]
f <- as.formula(paste("cbind(", paste(names(lca_df), collapse = ","), ") ~ 1"))
print(f)
## cbind(wh_conflict.1, hw_conflict.1) ~ 1
lca_results <- lca_execute(df = lca_df, lca_formula = f, maxiter = 7)
## Classes LL AIC BIC aBIC Relative_Entropy LMR_LRT_p_val
## 2 -3301.646 6637.291 6724.973 6670.972 0.6985533 0.00000000
## 3 -3265.271 6582.542 6716.643 6634.054 0.5768910 0.00000000
## 4 -3241.218 6552.436 6732.957 6621.780 0.5894423 0.00000061
## 5 -3229.228 6546.456 6773.396 6633.630 0.5787646 0.00639346
## 6 -3229.228 6564.456 6837.816 6669.461 0.4881743 1.00000000
## 7 -3229.228 6582.456 6902.235 6705.292 0.4629860 1.00000000
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

prob_df <- get_lca_long_format(best_model = lca_results[["Class_3"]], label_map)
write.csv(prob_df, paste("probabilities/WLB_", time_point, ".csv"), row.names = FALSE)
print(prob_df)
## # A tibble: 30 × 7
## class variable response probability clean_var label_prefix label
## <chr> <chr> <chr> <dbl> <chr> <chr> <fct>
## 1 "class 1: " hw_conflict.1 Pr(1) 8.64e- 1 hw_conflic… <NA> hw_c…
## 2 "class 1: " hw_conflict.1 Pr(2) 9.46e- 2 hw_conflic… <NA> hw_c…
## 3 "class 1: " hw_conflict.1 Pr(3) 3.94e- 2 hw_conflic… <NA> hw_c…
## 4 "class 1: " hw_conflict.1 Pr(4) 2.00e- 3 hw_conflic… <NA> hw_c…
## 5 "class 1: " hw_conflict.1 Pr(5) 3.59e-274 hw_conflic… <NA> hw_c…
## 6 "class 2: " hw_conflict.1 Pr(1) 9.94e- 2 hw_conflic… <NA> hw_c…
## 7 "class 2: " hw_conflict.1 Pr(2) 6.54e- 1 hw_conflic… <NA> hw_c…
## 8 "class 2: " hw_conflict.1 Pr(3) 1.46e- 1 hw_conflic… <NA> hw_c…
## 9 "class 2: " hw_conflict.1 Pr(4) 5.34e- 2 hw_conflic… <NA> hw_c…
## 10 "class 2: " hw_conflict.1 Pr(5) 4.65e- 2 hw_conflic… <NA> hw_c…
## # ℹ 20 more rows
print(lca_results[["Class_3"]]$predcell)
## wh_conflict.1 hw_conflict.1 observed expected
## 1 1 1 191 190.988
## 2 1 2 23 23.063
## 3 1 3 12 11.922
## 4 1 4 1 1.027
## 5 2 1 103 103.540
## 6 2 2 212 198.926
## 7 2 3 53 55.380
## 8 2 4 17 17.124
## 9 2 5 3 13.030
## 10 3 1 87 87.485
## 11 3 2 173 166.622
## 12 3 3 180 171.264
## 13 3 4 19 32.867
## 14 3 5 3 3.763
## 15 4 1 36 35.368
## 16 4 2 54 61.809
## 17 4 3 48 56.890
## 18 4 4 26 11.178
## 19 4 5 3 1.756
## 20 5 1 10 9.620
## 21 5 2 10 21.580
## 22 5 3 8 5.545
## 23 5 4 1 1.805
## 24 5 5 11 1.451
class_colors <- c("#71C055", "#56B4E9", "#FBA51A", "#CE93D9", "#D73343")
prob_df %>%
ggplot(aes(x = response, y = probability, group = class, color = class)) +
geom_line(size = 1.1) +
geom_point(size = 2) +
facet_wrap(~ clean_var, scales = "fixed") +
scale_y_continuous(limits = c(0,1)) +
scale_color_manual(values = class_colors) +
labs(
title = "Conditional Response Probabilities by Latent Class",
x = "Response Category",
y = "Probability",
color = "Latent Class"
) +
theme_minimal(base_size = 12) +
theme(
strip.text = element_text(face = "bold", size = 10),
legend.position = "bottom"
)

colors <- c("#71C055", "#56B4E9", "#FBA51A", "#CE93D9", "#D73343")
ggplot(prob_df, aes(x = label, y = probability, fill = response)) +
geom_bar(stat = "identity", color = "white") +
geom_text(
aes(label = ifelse(probability > 0.05, round(probability, 2), "")),
position = position_stack(vjust = 0.5),
size = 3,
color = "black"
) +
coord_flip() +
facet_wrap(~ class, ncol = 1, strip.position = "right") +
scale_fill_manual(values = colors) +
labs(
title = "Conditional Response Probabilities by Latent Class",
x = "Variable",
y = "Probability",
fill = "Response Category"
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(hjust = 0.5),
strip.text = element_text(face = "bold", size = 10),
axis.text.y = element_text(size = 9),
legend.position = "bottom"
)

time_point <- 1
cat_variables <- c('wh_conflict', 'hw_conflict')
other_variables <- c('sex', 'edu')
filtered_df <- amf_df %>% filter(sysConsentYN == "Yes")
cat_vars_wtime <- grep(
paste0("^(", paste(cat_variables, collapse = "|"), ")\\.", time_point, "$"),
names(filtered_df),
value = TRUE
)
vars_to_keep <- c(other_variables, cat_vars_wtime)
vars_to_keep <- as.character(vars_to_keep)
lca_df <- filtered_df[, vars_to_keep, drop = FALSE]
f <- as.formula(paste("cbind(", paste(cat_vars_wtime, collapse = ","), ") ~ ", paste(other_variables, collapse = "+")))
print(f)
## cbind(hw_conflict.1, wh_conflict.1) ~ sex + edu
lca_results <- lca_execute(df = lca_df, lca_formula = f, maxiter = 7)
##
## ALERT: covariates not allowed when nclass=1; will be ignored.
##
## Classes LL AIC BIC aBIC Relative_Entropy LMR_LRT_p_val
## 2 -3285.783 6613.566 6721.878 6655.172 0.8265238 0.00000000
## 3 -3305.089 6678.178 6853.541 6745.541 0.8936882 1.00000000
## 4 -3438.426 6970.853 7213.266 7063.971 0.9986964 1.00000000
## 5 -3286.924 6693.848 7003.312 6812.723 0.9095319 0.00000000
## 6 -3449.341 7044.682 7421.196 7189.312 0.9022418 1.00000000
## 7 -3431.373 7034.746 7478.311 7205.133 0.9988629 0.00106967

prob_df <- get_lca_long_format(best_model = lca_results[["Class_3"]], label_map)
write.csv(prob_df, paste("probabilities/WLB_", time_point, ".csv"), row.names = FALSE)
print(prob_df)
## # A tibble: 30 × 7
## class variable response probability clean_var label_prefix label
## <chr> <chr> <chr> <dbl> <chr> <chr> <fct>
## 1 "class 1: " hw_conflict.1 Pr(1) 4.76e- 1 hw_conflic… <NA> hw_c…
## 2 "class 1: " hw_conflict.1 Pr(2) 2.62e- 1 hw_conflic… <NA> hw_c…
## 3 "class 1: " hw_conflict.1 Pr(3) 2.14e- 1 hw_conflic… <NA> hw_c…
## 4 "class 1: " hw_conflict.1 Pr(4) 4.76e- 2 hw_conflic… <NA> hw_c…
## 5 "class 1: " hw_conflict.1 Pr(5) 0 hw_conflic… <NA> hw_c…
## 6 "class 2: " hw_conflict.1 Pr(1) 1.00e+ 0 hw_conflic… <NA> hw_c…
## 7 "class 2: " hw_conflict.1 Pr(2) 1.95e- 9 hw_conflic… <NA> hw_c…
## 8 "class 2: " hw_conflict.1 Pr(3) 2.24e-93 hw_conflic… <NA> hw_c…
## 9 "class 2: " hw_conflict.1 Pr(4) 0 hw_conflic… <NA> hw_c…
## 10 "class 2: " hw_conflict.1 Pr(5) 0 hw_conflic… <NA> hw_c…
## # ℹ 20 more rows
print(lca_results[["Class_3"]]$predcell)
## hw_conflict.1 wh_conflict.1 observed expected
## 1 1 1 191 177.365
## 2 1 2 103 106.642
## 3 1 3 87 95.374
## 4 1 4 36 36.853
## 5 1 5 10 10.766
## 6 2 1 23 26.912
## 7 2 2 212 155.009
## 8 2 3 173 202.194
## 9 2 4 54 71.744
## 10 2 5 10 16.140
## 11 3 1 12 17.917
## 12 3 2 53 98.764
## 13 3 3 180 128.458
## 14 3 4 48 45.637
## 15 3 5 8 10.223
## 16 4 1 1 3.843
## 17 4 2 17 20.996
## 18 4 3 19 27.292
## 19 4 4 26 9.699
## 20 4 5 1 2.171
## 21 5 2 3 6.589
## 22 5 3 3 8.681
## 23 5 4 3 3.067
## 24 5 5 11 0.700
class_colors <- c("#71C055", "#56B4E9", "#FBA51A", "#CE93D9", "#D73343","black",'grey')
prob_df %>%
ggplot(aes(x = response, y = probability, group = class, color = class)) +
geom_line(size = 1.1) +
geom_point(size = 2) +
facet_wrap(~ clean_var, scales = "fixed") +
scale_y_continuous(limits = c(0,1)) +
scale_color_manual(values = class_colors) +
labs(
title = "Conditional Response Probabilities by Latent Class",
x = "Response Category",
y = "Probability",
color = "Latent Class"
) +
theme_minimal(base_size = 12) +
theme(
strip.text = element_text(face = "bold", size = 10),
legend.position = "bottom"
)

colors <- c("#71C055", "#56B4E9", "#FBA51A", "#CE93D9", "#D73343")
ggplot(prob_df, aes(x = label, y = probability, fill = response)) +
geom_bar(stat = "identity", color = "white") +
geom_text(
aes(label = ifelse(probability > 0.05, round(probability, 2), "")),
position = position_stack(vjust = 0.5),
size = 3,
color = "black"
) +
coord_flip() +
facet_wrap(~ class, ncol = 1, strip.position = "right") +
scale_fill_manual(values = colors) +
labs(
title = "Conditional Response Probabilities by Latent Class",
x = "Variable",
y = "Probability",
fill = "Response Category"
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(hjust = 0.5),
strip.text = element_text(face = "bold", size = 10),
axis.text.y = element_text(size = 9),
legend.position = "bottom"
)

exp(lca_results[["Class_3"]]$coeff)
## [,1] [,2]
## (Intercept) 4.819154e-07 2.531551e-07
## sexMan 4.120208e-01 5.130656e-01
## eduSenior high school 4.489604e+12 2.308020e+13
## eduAcademic degree (BA/MA) 8.876283e+11 8.425918e+12
## eduHigher academic degree (PhD) 1.520127e+04 2.026670e+15
odds_ratios <- exp(lca_results[["Class_3"]]$coeff)
df <- as.data.frame(odds_ratios)
df$Variable <- rownames(df)
df_long <- df %>%
pivot_longer(cols = -Variable, names_to = "Class", values_to = "OddsRatio")
ggplot(df_long, aes(x = OddsRatio, y = reorder(Variable, OddsRatio), fill = Class)) +
geom_bar(stat = "identity", position = "dodge") +
geom_vline(xintercept = 1, linetype = "dashed", color = "black") +
labs(x = "Odds Ratio", y = "", title = "Odds Ratios by Variable and Class") +
theme_minimal() +
theme(legend.position = "bottom")
