d <- read_csv("ds.csv")
## Parsed with column specification:
## cols(
## x6c_16_1 = col_double(),
## x6c_6_2 = col_double(),
## x7c_12_2 = col_double(),
## x7c_8_3 = col_double(),
## x8b_6_2 = col_double(),
## flag = col_double()
## )
m3 <- read_rds("m3.rds")
m4 <- read_rds("m4.rds")
post_probs <- m3$posterior %>% as.data.frame() %>% setNames(paste0("C", 1:3, "_prob"))
df <- bind_cols(d, post_probs)
df$class <- m3$predclass
df3 <- df %>% mutate_if(is.numeric, round, 3)
post_probs <- m4$posterior %>% as.data.frame() %>% setNames(paste0("C", 1:4, "_prob"))
df <- bind_cols(d, post_probs)
df$class <- m4$predclass
df4 <- df %>% mutate_if(is.numeric, round, 3)
A number taking a minumum value of 0 (representing complete concentration of probability on one cell) and a maximum value equal to the logarithm of the total number of cells in the fitted cross-classfication table (representing complete dispersion, or equal probability for outcomes across every cell).
poLCA.entropy(m3)
## [1] 4.905256
poLCA.entropy(m4)
## [1] 4.834834
By class
mat <- df4 %>% dplyr::select(C1_prob:C4_prob, class)
mat %>% group_by(class) %>% summarize_all(funs(mean)) %>% dplyr::select(-class) %>% as.matrix() %>% diag()
## Warning: funs() is soft deprecated as of dplyr 0.8.0
## please use list() instead
##
## # Before:
## funs(name = f(.)
##
## # After:
## list(name = ~f(.))
## This warning is displayed once per session.
## [1] 0.8789825 0.7264737 0.7325273 0.8079130
Overall
mat %>% group_by(class) %>% summarize_all(funs(mean)) %>% dplyr::select(-class) %>% as.matrix() %>% diag() %>% mean()
## [1] 0.7864741
By class
mat <- df3 %>% dplyr::select(C1_prob:C3_prob, class)
mat %>% group_by(class) %>% summarize_all(funs(mean)) %>% dplyr::select(-class) %>% as.matrix() %>% diag()
## [1] 0.8584536 0.8954091 0.7340893
Overall
mat %>% group_by(class) %>% summarize_all(funs(mean)) %>% dplyr::select(-class) %>% as.matrix() %>% diag() %>% mean()
## [1] 0.8293173
For the code 1 in the 4-class solution
dfa <- df %>%
dplyr::select(-x8b_6_2) %>%
gather(time, code, -c(C1_prob:class)) %>%
dplyr::select(time, code, everything()) %>%
mutate(`1` = if_else(code == 1, 1, 0))
dummies <- as_tibble(dummies::dummy(dfa$code))
dfa <- bind_cols(dfa, dummies)
summary(aov(`1` ~ as.factor(class), data = dfa))
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(class) 3 21.45 7.149 31.6 <2e-16 ***
## Residuals 920 208.15 0.226
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 171 observations deleted due to missingness
TukeyHSD(aov(`1` ~ as.factor(class), data = dfa))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = `1` ~ as.factor(class), data = dfa)
##
## $`as.factor(class)`
## diff lwr upr p adj
## 2-1 0.18387097 0.05772167 0.310020270 0.0010706
## 3-1 0.05064935 -0.06218897 0.163487673 0.6552554
## 4-1 0.36442953 0.25825138 0.470607682 0.0000000
## 3-2 -0.13322162 -0.26033158 -0.006111655 0.0357653
## 4-2 0.18055856 0.05932220 0.301794925 0.0007782
## 4-3 0.31378018 0.20646245 0.421097913 0.0000000
For code 1 for class 1 in the 4-class solution
dfa_class1 <- filter(dfa, class == 1, time != "x8b_6_2")
summary(aov(`1` ~ time, data = dfa_class1))
## Df Sum Sq Mean Sq F value Pr(>F)
## time 4 40.18 10.046 231.1 <2e-16 ***
## Residuals 235 10.22 0.043
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 45 observations deleted due to missingness
TukeyHSD(aov(`1` ~ time, data = dfa_class1))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = `1` ~ time, data = dfa_class1)
##
## $time
## diff lwr upr p adj
## x6c_16_1-flag -9.245283e-01 -1.0339057 -0.81515093 0.0000000
## x6c_6_2-flag -5.925926e-01 -0.7265069 -0.45867832 0.0000000
## x7c_12_2-flag -1.000000e+00 -1.1099210 -0.89007900 0.0000000
## x7c_8_3-flag -1.000000e+00 -1.1104831 -0.88951689 0.0000000
## x6c_6_2-x6c_16_1 3.319357e-01 0.1964069 0.46746454 0.0000000
## x7c_12_2-x6c_16_1 -7.547170e-02 -0.1873540 0.03641065 0.3450687
## x7c_8_3-x6c_16_1 -7.547170e-02 -0.1879063 0.03696295 0.3501576
## x7c_12_2-x6c_6_2 -4.074074e-01 -0.5433753 -0.27143947 0.0000000
## x7c_8_3-x6c_6_2 -4.074074e-01 -0.5438302 -0.27098464 0.0000000
## x7c_8_3-x7c_12_2 1.110223e-15 -0.1129636 0.11296356 1.0000000
When latent class c is highly homogeneous, members of are likely to provide the same observed response pattern, implying that one response pattern is characteristic of the class. If homogeneity is low, no single response pattern stands out.
When latent class separation is good, the pattern of item-response probabiltiies across indicator variables clearly differentiates among the latent classes. When there is high separation, a response pattern that enmerges as characteristic of a particular latent class will be characteristic of that class only (and will not be characteristic of any of the other latent classes).
df %>%
dplyr::select(-class) %>%
gather(class, prob, C1_prob, C2_prob, C3_prob, C4_prob) %>%
mutate_if(is.integer, replace_na, 0) %>%
unite(response_pattern, x6c_16_1, x6c_6_2, x7c_12_2, x7c_8_3, sep = "-") %>%
group_by(class, response_pattern) %>%
summarize(mean_post_prob = mean(prob),
n = n()) %>%
ungroup() %>%
spread(class, mean_post_prob) %>%
mutate_if(is.double, round, 3) %>%
arrange(desc(n))
## # A tibble: 143 x 6
## response_pattern n C1_prob C2_prob C3_prob C4_prob
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 2-NA-2-2 8 0.917 0 0.083 0
## 2 2-NA-2-4 5 0.873 0 0.127 0
## 3 2-NA-4-4 5 0.488 0 0.512 0
## 4 4-NA-4-4 5 0.253 0.747 0 0
## 5 2-2-1-1 4 0 0 0.176 0.824
## 6 2-2-4-2 4 0.449 0 0.551 0
## 7 2-NA-1-2 4 0 0 0.145 0.855
## 8 1-NA-1-1 3 0 0.145 0.096 0.759
## 9 1-NA-1-2 3 0 0 0.133 0.867
## 10 1-NA-1-NA 3 0 0.154 0.15 0.695
## # … with 133 more rows
df %>%
dplyr::select(-class) %>%
gather(class, prob, C1_prob, C2_prob, C3_prob, C4_prob) %>%
mutate_if(is.integer, replace_na, 0) %>%
unite(response_pattern, x6c_16_1, x6c_6_2, x7c_12_2, x7c_8_3, sep = "-") %>%
mutate(n_NA = str_count(response_pattern, "NA")) %>%
filter(n_NA <= 2) %>%
group_by(class, response_pattern) %>%
summarize(mean_post_prob = mean(prob),
n = n()) %>%
ungroup() %>%
spread(class, mean_post_prob) %>%
mutate_if(is.double, round, 3) %>%
arrange(desc(n))
## # A tibble: 143 x 6
## response_pattern n C1_prob C2_prob C3_prob C4_prob
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 2-NA-2-2 8 0.917 0 0.083 0
## 2 2-NA-2-4 5 0.873 0 0.127 0
## 3 2-NA-4-4 5 0.488 0 0.512 0
## 4 4-NA-4-4 5 0.253 0.747 0 0
## 5 2-2-1-1 4 0 0 0.176 0.824
## 6 2-2-4-2 4 0.449 0 0.551 0
## 7 2-NA-1-2 4 0 0 0.145 0.855
## 8 1-NA-1-1 3 0 0.145 0.096 0.759
## 9 1-NA-1-2 3 0 0 0.133 0.867
## 10 1-NA-1-NA 3 0 0.154 0.15 0.695
## # … with 133 more rows