Loading Data

The data to be used in the Analysis was loaded and all empty spaces were cleared.

df <- read.csv("AllParadata10242023.csv") %>%
    dplyr::select(study_id, getvac_durW4, learnhpv_durW5, learnvac_durW4, getans_durW4) %>%
    filter(getvac_durW4 != "Invalid Number") %>%
    mutate_if(is.numeric, round, 4)

Estimating the best number of classes using Model 1

From the estimation below, the result shows the Model 1 with 3 Classes is the best fit without warning

## Estimating the best number of classes
df0 <- df %>%
    dplyr::select(getvac_durW4, learnhpv_durW5, learnvac_durW4, getans_durW4) %>%
    single_imputation() %>%
    estimate_profiles(1:5)
df0
## tidyLPA analysis using mclust: 
## 
##  Model Classes AIC     BIC     Entropy prob_min prob_max n_min n_max BLRT_p
##  1     1       7196.82 7234.08 1.00    1.00     1.00     1.00  1.00        
##  1     2       6234.27 6294.82 0.90    0.91     0.99     0.18  0.82  0.01  
##  1     3       5585.12 5668.96 0.93    0.92     0.99     0.04  0.78  0.01  
##  1     4       5595.12 5702.25 0.57    0.00     0.99     0.00  0.77  0.24  
##  1     5       5341.54 5471.97 0.63    0.00     1.00     0.00  0.75  0.01
compare_solutions(df0, statistics = c("AIC", "BIC"))
## Compare tidyLPA solutions:
## 
##  Model Classes AIC      BIC      Warnings
##  1     1       7196.819 7234.083         
##  1     2       6234.265 6294.820         
##  1     3       5585.117 5668.961         
##  1     4       5595.115 5702.249 Warning 
##  1     5       5341.541 5471.965 Warning 
## 
## Best model according to AIC is Model 1 with 5 classes.
## Best model according to BIC is Model 1 with 5 classes.
## 
## An analytic hierarchy process, based on the fit indices AIC, AWE, BIC, CLC, and KIC (Akogul & Erisoglu, 2017), suggests the best solution is Model 1 with 5 classes.
## Estimating classes without warning
df0 <- df %>%
    dplyr::select(getvac_durW4, learnhpv_durW5, learnvac_durW4, getans_durW4) %>%
    single_imputation() %>%
    estimate_profiles(1:3)
df0
## tidyLPA analysis using mclust: 
## 
##  Model Classes AIC     BIC     Entropy prob_min prob_max n_min n_max BLRT_p
##  1     1       7196.82 7234.08 1.00    1.00     1.00     1.00  1.00        
##  1     2       6234.27 6294.82 0.90    0.91     0.99     0.18  0.82  0.01  
##  1     3       5585.12 5668.96 0.93    0.92     0.99     0.04  0.78  0.01
compare_solutions(df0, statistics = c("AIC", "BIC"))
## Compare tidyLPA solutions:
## 
##  Model Classes AIC      BIC     
##  1     1       7196.819 7234.083
##  1     2       6234.265 6294.820
##  1     3       5585.117 5668.961
## 
## Best model according to AIC is Model 1 with 3 classes.
## Best model according to BIC is Model 1 with 3 classes.
## 
## An analytic hierarchy process, based on the fit indices AIC, AWE, BIC, CLC, and KIC (Akogul & Erisoglu, 2017), suggests the best solution is Model 1 with 3 classes.
## Using the 3 class solution
df1 <- df %>%
    dplyr::select(getvac_durW4, learnhpv_durW5, learnvac_durW4, getans_durW4) %>%
    single_imputation() %>%
    estimate_profiles(3)

## Chnaging Class 1 to Class 2
df2 <- get_data(df1) %>%
    mutate(Class1 = case_when(Class == 1 ~ 2,
        Class == 2 ~ 1, TRUE ~ 3),
        Class = Class1) %>%
    dplyr::select(-Class1) %>%
    pivot_longer(cols = c("getvac_durW4", "learnhpv_durW5", "learnvac_durW4", "getans_durW4"),
        names_to = "Variables", values_to = "Value")

df3 <- df2 %>%
    group_by(Class, Variables) %>%
    reframe(Value = mean(Value)) %>%
    mutate(Class = as.character(Class))

x_axis_order <- c("getvac_durW4", "learnhpv_durW5", "learnvac_durW4", "getans_durW4")

plot.theme <- theme(
    legend.key.width = unit(2, "lines"),
    legend.text = element_text(size = 14, face = "bold"),
    legend.key = element_blank(),
    legend.background = element_blank(),
    plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
    axis.text = element_text(size = 16, face = "bold"),
    axis.title = element_text(size = 18, face = "bold"),
    panel.border = element_rect(fill = NA),
    panel.background = element_blank(),
    panel.grid.major = element_blank())

# Plotting the graph of the 3 class solution
ggplot(df3, aes(x = Variables, y = Value, group = Class, color = Class)) +
  geom_line() +
  geom_point() +
  scale_x_discrete(limits = x_axis_order) +
  scale_y_continuous(breaks = seq(0, 3.2, by = 0.1)) + plot.theme