d <- read_csv("d.csv")
## Parsed with column specification:
## cols(
## x6c_16_1 = col_double(),
## x6c_6_2 = col_double(),
## x7c_12_2 = col_double(),
## x7c_8_3 = col_double(),
## x8b_6_2 = col_double()
## )
m3 <- read_rds("m3.rds")
m4 <- read_rds("m4.rds")
post_probs <- m3$posterior %>% as.data.frame() %>% setNames(paste0("C", 1:3, "_prob"))
df <- bind_cols(d, post_probs)
df$class <- m3$predclass
df3 <- df %>% mutate_if(is.numeric, round, 3)
post_probs <- m4$posterior %>% as.data.frame() %>% setNames(paste0("C", 1:4, "_prob"))
df <- bind_cols(d, post_probs)
df$class <- m4$predclass
df4 <- df %>% mutate_if(is.numeric, round, 3)
A number taking a minumum value of 0 (representing complete concentration of probability on one cell) and a maximum value equal to the logarithm of the total number of cells in the fitted cross-classfication table (representing complete dispersion, or equal probability for outcomes across every cell).
poLCA.entropy(m3)
## [1] 4.914149
poLCA.entropy(m4)
## [1] 4.846396
For the code 1 in the 4-class solution
dfa <- df %>%
dplyr::select(-x8b_6_2) %>%
gather(time, code, -c(C1_prob:class)) %>%
dplyr::select(time, code, everything()) %>%
mutate(`1` = if_else(code == 1, 1, 0))
dummies <- as_tibble(dummies::dummy(dfa$code))
dfa <- bind_cols(dfa, dummies)
summary(aov(`1` ~ as.factor(class), data = dfa))
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(class) 3 98.79 32.93 221.7 <2e-16 ***
## Residuals 1464 217.45 0.15
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 1552 observations deleted due to missingness
TukeyHSD(aov(`1` ~ as.factor(class), data = dfa))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = `1` ~ as.factor(class), data = dfa)
##
## $`as.factor(class)`
## diff lwr upr p adj
## 2-1 0.19379941 0.12186920 0.2657296 0.0000000
## 3-1 0.22242235 0.14091185 0.3039328 0.0000000
## 4-1 0.64585937 0.57901766 0.7127011 0.0000000
## 3-2 0.02862294 -0.05526239 0.1125083 0.8164891
## 4-2 0.45205996 0.38234195 0.5217780 0.0000000
## 4-3 0.42343703 0.34387191 0.5030021 0.0000000
For code 1 for class 1 in the 4-class solution
dfa_class1 <- filter(dfa, class == 1, time != "x8b_6_2")
summary(aov(`1` ~ time, data = dfa_class1))
## Df Sum Sq Mean Sq F value Pr(>F)
## time 3 0.471 0.15689 6.876 0.000158 ***
## Residuals 407 9.286 0.02282
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 385 observations deleted due to missingness
TukeyHSD(aov(`1` ~ time, data = dfa_class1))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = `1` ~ time, data = dfa_class1)
##
## $time
## diff lwr upr p adj
## x6c_6_2-x6c_16_1 6.344086e-02 0.002496708 0.12438501 0.0376807
## x7c_12_2-x6c_16_1 -3.333333e-02 -0.085272320 0.01860565 0.3487762
## x7c_8_3-x6c_16_1 -3.333333e-02 -0.083330198 0.01666353 0.3146182
## x7c_12_2-x6c_6_2 -9.677419e-02 -0.159074284 -0.03447410 0.0004236
## x7c_8_3-x6c_6_2 -9.677419e-02 -0.157464632 -0.03608375 0.0002748
## x7c_8_3-x7c_12_2 -1.734723e-17 -0.051641050 0.05164105 1.0000000
Frequency is used instead of likelihood since these use the ‘hard’ classifications
mat <- dfa %>%
filter(code != 5) %>%
count(code, class) %>%
spread(class, n) %>%
dplyr::select(-code)
# code is rows, class is columns
c <- chisq.test(mat)
c
##
## Pearson's Chi-squared test
##
## data: mat
## X-squared = 1245.8, df = 9, p-value < 2.2e-16
c$stdres
## 1 2 3 4
## [1,] -14.912776 -4.454544 -2.400090 20.271464
## [2,] 19.999495 -12.432497 -4.828079 -4.083248
## [3,] -5.961178 -4.476029 17.821331 -4.065776
## [4,] -1.280305 20.539901 -4.403082 -14.122745