The data to be used in the Analysis was loaded and all empty spaces were cleared.
df <- read.csv("AllParadata10242023.csv") %>%
dplyr::select(study_id, getvac_durW4, learnhpv_durW5, learnvac_durW4, getans_durW4) %>%
filter(getvac_durW4 != "Invalid Number") %>%
mutate_if(is.numeric, round, 4)
From the estimation below, the result shows the Model 1 with 3 Classes is the best fit without warning
## Estimating the best number of classes
df0 <- df %>%
dplyr::select(getvac_durW4, learnhpv_durW5, learnvac_durW4, getans_durW4) %>%
single_imputation() %>%
estimate_profiles(1:5)
df0
## tidyLPA analysis using mclust:
##
## Model Classes AIC BIC Entropy prob_min prob_max n_min n_max BLRT_p
## 1 1 7196.82 7234.08 1.00 1.00 1.00 1.00 1.00
## 1 2 6234.27 6294.82 0.90 0.91 0.99 0.18 0.82 0.01
## 1 3 5585.12 5668.96 0.93 0.92 0.99 0.04 0.78 0.01
## 1 4 5595.12 5702.25 0.57 0.00 0.99 0.00 0.77 0.24
## 1 5 5341.54 5471.97 0.63 0.00 1.00 0.00 0.75 0.01
compare_solutions(df0, statistics = c("AIC", "BIC"))
## Compare tidyLPA solutions:
##
## Model Classes AIC BIC Warnings
## 1 1 7196.819 7234.083
## 1 2 6234.265 6294.820
## 1 3 5585.117 5668.961
## 1 4 5595.115 5702.249 Warning
## 1 5 5341.541 5471.965 Warning
##
## Best model according to AIC is Model 1 with 5 classes.
## Best model according to BIC is Model 1 with 5 classes.
##
## An analytic hierarchy process, based on the fit indices AIC, AWE, BIC, CLC, and KIC (Akogul & Erisoglu, 2017), suggests the best solution is Model 1 with 5 classes.
## Estimating classes without warning
df0 <- df %>%
dplyr::select(getvac_durW4, learnhpv_durW5, learnvac_durW4, getans_durW4) %>%
single_imputation() %>%
estimate_profiles(1:3)
df0
## tidyLPA analysis using mclust:
##
## Model Classes AIC BIC Entropy prob_min prob_max n_min n_max BLRT_p
## 1 1 7196.82 7234.08 1.00 1.00 1.00 1.00 1.00
## 1 2 6234.27 6294.82 0.90 0.91 0.99 0.18 0.82 0.01
## 1 3 5585.12 5668.96 0.93 0.92 0.99 0.04 0.78 0.01
compare_solutions(df0, statistics = c("AIC", "BIC"))
## Compare tidyLPA solutions:
##
## Model Classes AIC BIC
## 1 1 7196.819 7234.083
## 1 2 6234.265 6294.820
## 1 3 5585.117 5668.961
##
## Best model according to AIC is Model 1 with 3 classes.
## Best model according to BIC is Model 1 with 3 classes.
##
## An analytic hierarchy process, based on the fit indices AIC, AWE, BIC, CLC, and KIC (Akogul & Erisoglu, 2017), suggests the best solution is Model 1 with 3 classes.
## Using the 3 class solution
df1 <- df %>%
dplyr::select(getvac_durW4, learnhpv_durW5, learnvac_durW4, getans_durW4) %>%
single_imputation() %>%
estimate_profiles(3)
## Chnaging Class 1 to Class 2
df2 <- get_data(df1) %>%
mutate(Class1 = case_when(Class == 1 ~ 2,
Class == 2 ~ 1, TRUE ~ 3),
Class = Class1) %>%
dplyr::select(-Class1) %>%
pivot_longer(cols = c("getvac_durW4", "learnhpv_durW5", "learnvac_durW4", "getans_durW4"),
names_to = "Variables", values_to = "Value")
df3 <- df2 %>%
group_by(Class, Variables) %>%
reframe(Value = mean(Value)) %>%
mutate(Class = as.character(Class))
x_axis_order <- c("getvac_durW4", "learnhpv_durW5", "learnvac_durW4", "getans_durW4")
plot.theme <- theme(
legend.key.width = unit(2, "lines"),
legend.text = element_text(size = 14, face = "bold"),
legend.key = element_blank(),
legend.background = element_blank(),
plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
axis.text = element_text(size = 16, face = "bold"),
axis.title = element_text(size = 18, face = "bold"),
panel.border = element_rect(fill = NA),
panel.background = element_blank(),
panel.grid.major = element_blank())
# Plotting the graph of the 3 class solution
ggplot(df3, aes(x = Variables, y = Value, group = Class, color = Class)) +
geom_line() +
geom_point() +
scale_x_discrete(limits = x_axis_order) +
scale_y_continuous(breaks = seq(0, 3.2, by = 0.1)) + plot.theme