library(poLCA)
## Loading required package: scatterplot3d
## Loading required package: MASS
library(foreign)
datos <- read.spss("C:/Users/carlos/Downloads/DATOS LM/md3057/datos3057.sav",
to.data.frame = TRUE)
## Warning in read.spss("C:/Users/carlos/Downloads/DATOS LM/md3057/
## datos3057.sav", : C:/Users/carlos/Downloads/DATOS LM/md3057/datos3057.sav:
## Unrecognized record type 7, subtype 18 encountered in system file
## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
## else paste0(labels, : duplicated levels in factors are deprecated
sapply(datos[,52:58], FUN= levels)
## P1501 P1502 P1503
## [1,] "Muy necesario" "Muy necesario" "Muy necesario"
## [2,] "Bastante necesario" "Bastante necesario" "Bastante necesario"
## [3,] "Poco necesario" "Poco necesario" "Poco necesario"
## [4,] "Nada necesario" "Nada necesario" "Nada necesario"
## [5,] "No sabe lo que es" "No sabe lo que es" "No sabe lo que es"
## [6,] "N.S." "N.S." "N.S."
## [7,] "N.C." "N.C." "N.C."
## P1504 P1505 P1506
## [1,] "Muy necesario" "Muy necesario" "Muy necesario"
## [2,] "Bastante necesario" "Bastante necesario" "Bastante necesario"
## [3,] "Poco necesario" "Poco necesario" "Poco necesario"
## [4,] "Nada necesario" "Nada necesario" "Nada necesario"
## [5,] "No sabe lo que es" "No sabe lo que es" "No sabe lo que es"
## [6,] "N.S." "N.S." "N.S."
## [7,] "N.C." "N.C." "N.C."
## P1507
## [1,] "Muy necesario"
## [2,] "Bastante necesario"
## [3,] "Poco necesario"
## [4,] "Nada necesario"
## [5,] "No sabe lo que es"
## [6,] "N.S."
## [7,] "N.C."
str(datos)
## 'data.frame': 2476 obs. of 261 variables:
## $ ESTU : num 3057 3057 3057 3057 3057 ...
## $ CUES : num 1 2 3 4 5 6 7 8 9 10 ...
## $ CCAA : Factor w/ 19 levels "Andalucía","Aragón",..: 16 16 16 16 16 16 16 16 7 7 ...
## $ PROV : Factor w/ 52 levels "Araba-Álava",..: 1 1 1 1 1 1 1 1 2 2 ...
## $ MUN : Factor w/ 64 levels "Mun.<100.000 hab.",..: 31 31 31 31 31 31 31 31 1 1 ...
## $ TAMUNI : Factor w/ 7 levels "Menos o igual a 2.000 habitantes",..: 5 5 5 5 5 5 5 5 2 2 ...
## $ AREA : Factor w/ 2 levels "No pertenece",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ DISTR : Factor w/ 1 level "Anonimizado": 1 1 1 1 1 1 1 1 1 1 ...
## $ SECCION : Factor w/ 1 level "Anonimizado": 1 1 1 1 1 1 1 1 1 1 ...
## $ ENTREV : Factor w/ 1 level "Anonimizado": 1 1 1 1 1 1 1 1 1 1 ...
## $ P0 : Factor w/ 3 levels "Española","Española y otra",..: 1 1 1 1 2 1 1 1 1 1 ...
## $ P0A : Factor w/ 199 levels "Albanesa","Austriaca",..: NA NA NA NA 138 NA NA NA NA NA ...
## $ P1 : Factor w/ 7 levels "Muy buena","Buena",..: 3 4 4 4 3 4 4 4 3 4 ...
## $ P2 : Factor w/ 5 levels "Mejor","Igual",..: 2 2 2 2 2 2 2 2 1 2 ...
## $ P3 : Factor w/ 5 levels "Mejor","Igual",..: 2 2 2 2 2 2 2 2 1 1 ...
## $ P4 : Factor w/ 7 levels "Muy buena","Buena",..: 6 5 5 5 4 5 4 5 6 6 ...
## $ P5 : Factor w/ 5 levels "Mejor","Igual",..: 2 2 2 2 2 2 2 2 1 4 ...
## $ P6 : Factor w/ 5 levels "Mejor","Igual",..: 2 2 2 2 2 2 2 2 1 4 ...
## $ P701 : Factor w/ 47 levels "El paro","Las drogas",..: 1 1 13 11 1 8 1 11 1 1 ...
## $ P702 : Factor w/ 47 levels "El paro","Las drogas",..: 8 8 8 1 8 11 11 1 16 47 ...
## $ P703 : Factor w/ 47 levels "El paro","Las drogas",..: 47 11 6 47 13 16 16 8 47 47 ...
## $ P801 : Factor w/ 47 levels "El paro","Las drogas",..: 1 1 13 11 1 8 1 11 1 45 ...
## $ P802 : Factor w/ 47 levels "El paro","Las drogas",..: 8 8 8 47 8 11 11 8 47 47 ...
## $ P803 : Factor w/ 47 levels "El paro","Las drogas",..: 47 11 6 47 13 47 16 47 47 47 ...
## $ P9 : atomic 7 6 6 6 6 6 6 6 3 98 ...
## ..- attr(*, "value.labels")= Named num 99 98 10 0
## .. ..- attr(*, "names")= chr "N.C." "N.S." "10 Se puede confiar en la mayoría de la gente" "0 Nunca se es lo bastante prudente"
## $ P10 : atomic 6 6 6 6 6 6 5 6 3 5 ...
## ..- attr(*, "value.labels")= Named num 99 98 10 0
## .. ..- attr(*, "names")= chr "N.C." "N.S." "10 La mayoría de la gente procura ayudar a los demás" "0 La mayoría de la gente sólo mira por sí misma"
## $ P11 : atomic 7 5 8 7 6 5 6 8 3 98 ...
## ..- attr(*, "value.labels")= Named num 99 98 10 0
## .. ..- attr(*, "names")= chr "N.C." "N.S." "10 Máxima seguridad aun perdiendo accesibilidad a la información" "0 Máximo acceso a la información aun perdiendo seguridad"
## $ P1201 : atomic 9 8 8 98 9 8 9 8 4 8 ...
## ..- attr(*, "value.labels")= Named num 99 98 10 0
## .. ..- attr(*, "names")= chr "N.C." "N.S." "10 Totalmente de acuerdo" "0 Totalmente en desacuerdo"
## $ P1202 : atomic 9 8 8 98 9 8 9 8 6 10 ...
## ..- attr(*, "value.labels")= Named num 99 98 10 0
## .. ..- attr(*, "names")= chr "N.C." "N.S." "10 Totalmente de acuerdo" "0 Totalmente en desacuerdo"
## $ P1203 : atomic 9 8 8 98 9 5 9 8 5 8 ...
## ..- attr(*, "value.labels")= Named num 99 98 10 0
## .. ..- attr(*, "names")= chr "N.C." "N.S." "10 Totalmente de acuerdo" "0 Totalmente en desacuerdo"
## $ P1204 : atomic 9 8 8 98 9 7 9 8 4 98 ...
## ..- attr(*, "value.labels")= Named num 99 98 10 0
## .. ..- attr(*, "names")= chr "N.C." "N.S." "10 Totalmente de acuerdo" "0 Totalmente en desacuerdo"
## $ P1301 : Factor w/ 6 levels "Muy importante",..: 1 2 1 2 2 1 2 2 2 1 ...
## $ P1302 : Factor w/ 6 levels "Muy importante",..: 1 2 1 2 2 1 5 2 1 1 ...
## $ P1303 : Factor w/ 6 levels "Muy importante",..: 1 2 1 1 2 1 2 2 1 1 ...
## $ P1304 : Factor w/ 6 levels "Muy importante",..: 1 2 2 1 2 2 2 2 2 1 ...
## $ P1305 : Factor w/ 6 levels "Muy importante",..: 1 2 2 3 2 2 2 2 2 1 ...
## $ P1306 : Factor w/ 6 levels "Muy importante",..: 2 2 1 2 2 2 5 2 2 1 ...
## $ P1307 : Factor w/ 6 levels "Muy importante",..: 2 2 2 2 2 2 2 2 1 1 ...
## $ P1308 : Factor w/ 6 levels "Muy importante",..: 2 2 2 2 2 2 2 2 1 1 ...
## $ P1309 : Factor w/ 6 levels "Muy importante",..: 2 2 2 2 2 2 2 2 2 1 ...
## $ P1401 : Factor w/ 9 levels "Totalmente satisfactoria",..: 1 2 1 6 1 5 2 6 1 6 ...
## $ P1402 : Factor w/ 9 levels "Totalmente satisfactoria",..: 1 2 1 6 1 5 6 6 1 6 ...
## $ P1403 : Factor w/ 9 levels "Totalmente satisfactoria",..: 2 2 1 2 1 2 2 2 1 2 ...
## $ P1404 : Factor w/ 9 levels "Totalmente satisfactoria",..: 2 2 1 2 1 2 6 2 1 2 ...
## $ P1405 : Factor w/ 9 levels "Totalmente satisfactoria",..: 6 2 1 2 1 2 6 2 2 1 ...
## $ P1406 : Factor w/ 9 levels "Totalmente satisfactoria",..: 6 2 1 2 1 2 6 2 2 6 ...
## $ P1407 : Factor w/ 9 levels "Totalmente satisfactoria",..: 6 6 6 2 6 2 6 6 6 1 ...
## $ P1408 : Factor w/ 9 levels "Totalmente satisfactoria",..: 2 6 6 6 6 5 6 6 6 6 ...
## $ P1409 : Factor w/ 9 levels "Totalmente satisfactoria",..: 6 2 2 6 6 5 6 6 2 6 ...
## $ P1410 : Factor w/ 9 levels "Totalmente satisfactoria",..: 2 2 2 2 2 3 2 3 2 6 ...
## $ P1411 : Factor w/ 9 levels "Totalmente satisfactoria",..: 2 2 2 3 2 3 2 3 3 2 ...
## $ P1501 : Factor w/ 7 levels "Muy necesario",..: 1 2 3 2 1 2 3 1 3 1 ...
## $ P1502 : Factor w/ 7 levels "Muy necesario",..: 2 1 1 2 1 1 3 1 2 2 ...
## $ P1503 : Factor w/ 7 levels "Muy necesario",..: 2 1 1 3 1 1 3 2 1 2 ...
## $ P1504 : Factor w/ 7 levels "Muy necesario",..: 1 3 1 6 1 4 3 2 3 2 ...
## $ P1505 : Factor w/ 7 levels "Muy necesario",..: 1 3 4 5 3 4 3 4 3 5 ...
## $ P1506 : Factor w/ 7 levels "Muy necesario",..: 1 3 3 5 1 4 3 2 3 5 ...
## $ P1507 : Factor w/ 7 levels "Muy necesario",..: 1 3 2 5 2 4 3 3 3 5 ...
## $ P1601 : Factor w/ 5 levels "Sí","No","No sabe lo que es",..: 1 1 1 2 1 1 1 1 1 1 ...
## $ P1602 : Factor w/ 5 levels "Sí","No","No sabe lo que es",..: 1 1 1 2 1 2 1 1 1 2 ...
## $ P1603 : Factor w/ 5 levels "Sí","No","No sabe lo que es",..: 1 1 1 2 1 2 1 1 2 2 ...
## $ P1604 : Factor w/ 5 levels "Sí","No","No sabe lo que es",..: 1 1 1 2 1 2 1 1 1 3 ...
## $ P1605 : Factor w/ 5 levels "Sí","No","No sabe lo que es",..: 1 2 1 2 1 2 1 2 1 3 ...
## $ P1606 : Factor w/ 5 levels "Sí","No","No sabe lo que es",..: 1 2 1 2 1 2 1 2 2 3 ...
## $ P1701 : Factor w/ 9 levels "Continuamente",..: 1 1 2 NA 2 3 2 2 3 5 ...
## $ P1702 : Factor w/ 9 levels "Continuamente",..: 5 1 2 NA 2 NA 2 1 2 NA ...
## $ P1703 : Factor w/ 9 levels "Continuamente",..: 1 1 2 NA 2 NA 2 1 NA NA ...
## $ P1704 : Factor w/ 9 levels "Continuamente",..: 1 1 4 NA 2 NA 2 2 3 NA ...
## $ P1705 : Factor w/ 9 levels "Continuamente",..: 1 NA 7 NA 3 NA 7 NA 3 NA ...
## $ P1706 : Factor w/ 9 levels "Continuamente",..: 1 NA 6 NA 3 NA 7 NA NA NA ...
## $ P1801 : Factor w/ 6 levels "Sí","No","No sabe lo que es",..: 1 1 1 NA 1 4 1 1 1 4 ...
## $ P1802 : Factor w/ 6 levels "Sí","No","No sabe lo que es",..: 1 1 1 NA 1 4 1 1 1 3 ...
## $ P1803 : Factor w/ 6 levels "Sí","No","No sabe lo que es",..: 2 2 1 NA 1 4 1 1 2 4 ...
## $ P1804 : Factor w/ 6 levels "Sí","No","No sabe lo que es",..: 2 2 2 NA 2 4 2 2 2 4 ...
## $ P1805 : Factor w/ 6 levels "Sí","No","No sabe lo que es",..: 1 2 2 NA 2 4 2 2 2 4 ...
## $ P1806 : Factor w/ 6 levels "Sí","No","No sabe lo que es",..: 1 1 1 NA 2 4 1 2 1 4 ...
## $ P1807 : Factor w/ 6 levels "Sí","No","No sabe lo que es",..: 1 2 2 NA 2 4 2 2 2 4 ...
## $ P1808 : Factor w/ 6 levels "Sí","No","No sabe lo que es",..: 2 2 2 NA 2 4 2 2 2 4 ...
## $ P1901 : Factor w/ 8 levels "Mucho","Bastante",..: 4 4 3 NA 3 4 4 3 3 4 ...
## $ P1902 : Factor w/ 8 levels "Mucho","Bastante",..: 5 4 4 NA 3 4 5 3 3 6 ...
## $ P1903 : Factor w/ 8 levels "Mucho","Bastante",..: 2 2 4 NA 2 4 3 3 2 3 ...
## $ P1904 : Factor w/ 8 levels "Mucho","Bastante",..: 2 3 3 NA 2 4 3 3 3 6 ...
## $ P1905 : Factor w/ 8 levels "Mucho","Bastante",..: 3 3 4 NA 3 4 3 3 4 6 ...
## $ P1906 : Factor w/ 8 levels "Mucho","Bastante",..: 4 4 1 NA 3 4 4 3 4 6 ...
## $ P20 : Factor w/ 5 levels "Sí","No","(NO LEER) No procede (no usa esa TIC)",..: 2 2 2 NA 2 2 2 2 2 3 ...
## $ P21 : Factor w/ 5 levels "Sí","No","(NO LEER) No procede (no usa esa TIC)",..: 2 2 2 NA 2 2 2 2 2 3 ...
## $ P22 : Factor w/ 3 levels "Sí","No","N.C.": 2 1 1 1 1 1 2 1 1 1 ...
## $ P23 : atomic NA 1 2 2 1 2 NA 3 2 2 ...
## ..- attr(*, "value.labels")= Named num 99 4 3 2 1
## .. ..- attr(*, "names")= chr "N.C." "Cuatro " "Tres " "Dos " ...
## $ P2401 : atomic NA 19 0 45 4 35 NA 31 20 98 ...
## ..- attr(*, "value.labels")= Named num 99 98 97
## .. ..- attr(*, "names")= chr "N.C." "N.R." "N.P.(No tiene más hijos/as)"
## $ P2402 : atomic NA 97 3 47 97 43 NA 33 25 98 ...
## ..- attr(*, "value.labels")= Named num 99 98 97
## .. ..- attr(*, "names")= chr "N.C." "N.R." "N.P.(No tiene más hijos/as)"
## $ P2403 : atomic NA 97 97 97 97 97 NA 37 97 97 ...
## ..- attr(*, "value.labels")= Named num 99 98 97
## .. ..- attr(*, "names")= chr "N.C." "N.R." "N.P.(No tiene más hijos/as)"
## $ P2404 : atomic NA 97 97 97 97 97 NA 97 97 97 ...
## ..- attr(*, "value.labels")= Named num 99 98 97
## .. ..- attr(*, "names")= chr "N.C." "N.R." "N.P.(No tiene más hijos/as)"
## $ P25 : Factor w/ 6 levels "Sí, he hablado de ese tema varias veces con ellos/as ",..: NA 2 NA NA NA NA NA NA 2 NA ...
## $ P26 : Factor w/ 7 levels "Lo hablamos y tiende a prevalecer la opinión de los padres",..: NA 4 NA NA NA NA NA NA 3 NA ...
## $ P2701 : Factor w/ 5 levels "Sí","No","(NO LEER) No procede (no usa esa TIC)",..: NA 2 NA NA NA 2 NA 2 1 NA ...
## $ P2702 : Factor w/ 5 levels "Sí","No","(NO LEER) No procede (no usa esa TIC)",..: NA 1 NA NA NA 3 NA 1 1 NA ...
## $ P2703 : Factor w/ 5 levels "Sí","No","(NO LEER) No procede (no usa esa TIC)",..: NA 1 NA NA NA 3 NA 2 1 NA ...
## $ P2704 : Factor w/ 5 levels "Sí","No","(NO LEER) No procede (no usa esa TIC)",..: NA 2 NA NA NA 3 NA 2 1 NA ...
## $ P2705 : Factor w/ 5 levels "Sí","No","(NO LEER) No procede (no usa esa TIC)",..: NA 2 NA NA NA 3 NA 2 1 NA ...
## [list output truncated]
## - attr(*, "variable.labels")= Named chr "" "" "Comunidad autónoma" "Provincia" ...
## ..- attr(*, "names")= chr "ESTU" "CUES" "CCAA" "PROV" ...
## - attr(*, "codepage")= int 1252
datos00 <- datos[,52:58]
datos00 <- as.data.frame(sapply(datos00, as.numeric))
f <- cbind(P1501,P1502,P1503,P1504,P1505,P1506,P1507)~1
set.seed(10)
v.lca <- poLCA(formula=f, na.rm=TRUE, data=datos00, nclass=2)
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $P1501
## Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7)
## class 1: 0.4851 0.4229 0.0871 0.0044 0 0.0005 0.0000
## class 2: 0.1439 0.3399 0.2995 0.2103 0 0.0032 0.0032
##
## $P1502
## Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7)
## class 1: 0.3597 0.4407 0.1810 0.0187 0.000 0.0000 0.0000
## class 2: 0.0034 0.0820 0.1394 0.7385 0.008 0.0223 0.0064
##
## $P1503
## Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7)
## class 1: 0.4568 0.4255 0.1135 0.0006 0.0000 0.0010 0.0027
## class 2: 0.0043 0.0947 0.1327 0.7105 0.0271 0.0242 0.0064
##
## $P1504
## Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7)
## class 1: 0.3197 0.4071 0.2331 0.0294 0.0029 0.0046 0.0032
## class 2: 0.0000 0.0214 0.0911 0.7754 0.0776 0.0295 0.0049
##
## $P1505
## Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7)
## class 1: 0.1212 0.2324 0.4494 0.1844 0.0023 0.0076 0.0027
## class 2: 0.0016 0.0022 0.0195 0.8176 0.1320 0.0222 0.0049
##
## $P1506
## Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7)
## class 1: 0.3096 0.4174 0.2217 0.0430 0.0032 0.0030 0.0021
## class 2: 0.0107 0.0724 0.0688 0.6989 0.1214 0.0229 0.0049
##
## $P1507
## Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7)
## class 1: 0.1268 0.2251 0.4179 0.2080 0.0110 0.0091 0.0021
## class 2: 0.0138 0.0078 0.0151 0.7832 0.1541 0.0210 0.0049
##
## Estimated class population shares
## 0.7465 0.2535
##
## Predicted class memberships (by modal posterior prob.)
## 0.7488 0.2512
##
## =========================================================
## Fit for 2 latent classes:
## =========================================================
## number of observations: 2476
## number of estimated parameters: 85
## residual degrees of freedom: 2391
## maximum log-likelihood: -20683.16
##
## AIC(2): 41536.33
## BIC(2): 42030.55
## G^2(2): 12512.4 (Likelihood ratio/deviance statistic)
## X^2(2): 8.244755e+13 (Chi-square goodness of fit)
##
plot(v.lca)

v.lca <- poLCA(formula= f, na.rm=TRUE, data=datos00, nclass=3)
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $P1501
## Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7)
## class 1: 0.8166 0.1574 0.0224 0.0036 0 0.0000 0.0000
## class 2: 0.1486 0.3222 0.2944 0.2347 0 0.0000 0.0000
## class 3: 0.2153 0.6196 0.1525 0.0080 0 0.0027 0.0018
##
## $P1502
## Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7)
## class 1: 0.7843 0.1484 0.0530 0.0143 0.0000 0.0000 0.0000
## class 2: 0.0062 0.0323 0.1050 0.8331 0.0092 0.0124 0.0018
## class 3: 0.0216 0.6524 0.2884 0.0284 0.0000 0.0065 0.0027
##
## $P1503
## Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7)
## class 1: 0.9591 0.0357 0.0028 0.0011 0.0000 0.0000 0.0013
## class 2: 0.0076 0.0401 0.0957 0.8115 0.0312 0.0124 0.0016
## class 3: 0.0545 0.7131 0.2139 0.0029 0.0000 0.0092 0.0064
##
## $P1504
## Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7)
## class 1: 0.7046 0.1962 0.0813 0.0125 0.0018 0.0012 0.0024
## class 2: 0.0000 0.0035 0.0451 0.8591 0.0878 0.0045 0.0000
## class 3: 0.0148 0.5419 0.3562 0.0556 0.0041 0.0212 0.0063
##
## $P1505
## Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7)
## class 1: 0.2682 0.2537 0.3345 0.1368 0.0025 0.0030 0.0014
## class 2: 0.0018 0.0019 0.0099 0.8452 0.1398 0.0014 0.0000
## class 3: 0.0048 0.2001 0.5066 0.2522 0.0078 0.0223 0.0062
##
## $P1506
## Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7)
## class 1: 0.5846 0.2811 0.1000 0.0293 0.0025 0.0000 0.0025
## class 2: 0.0116 0.0537 0.0529 0.7512 0.1269 0.0037 0.0000
## class 3: 0.0861 0.5009 0.3073 0.0755 0.0096 0.0161 0.0045
##
## $P1507
## Pr(1) Pr(2) Pr(3) Pr(4) Pr(5) Pr(6) Pr(7)
## class 1: 0.2629 0.2503 0.3259 0.1548 0.0049 0.0012 0.0000
## class 2: 0.0147 0.0073 0.0093 0.8062 0.1607 0.0018 0.0000
## class 3: 0.0185 0.1910 0.4585 0.2779 0.0227 0.0251 0.0063
##
## Estimated class population shares
## 0.3293 0.2204 0.4503
##
## Predicted class memberships (by modal posterior prob.)
## 0.3308 0.2217 0.4475
##
## =========================================================
## Fit for 3 latent classes:
## =========================================================
## number of observations: 2476
## number of estimated parameters: 128
## residual degrees of freedom: 2348
## maximum log-likelihood: -18646.23
##
## AIC(3): 37548.45
## BIC(3): 38292.69
## G^2(3): 8438.526 (Likelihood ratio/deviance statistic)
## X^2(3): 1.065842e+14 (Chi-square goodness of fit)
##
plot(v.lca)
datos$clases <- v.lca$predclass
library(lattice)

datos$P48<-as.numeric(as.character(datos$P48))
bwplot(P48~as.factor(clases), data = datos, ylab="EDAD")

histogram(~as.factor(P11) | as.factor(clases), data=datos,
xlab="(+) Acceso - (+) Seguridad",
ylab="Porcentaje total")

datos01<-subset(datos, subset = P11<98)
tapply(X=datos01$P11, INDEX = datos01$clases, FUN = mean)
## 1 2 3
## 5.175505 6.561321 5.493732
histogram(~as.factor(P1204) | as.factor(clases), data=datos,
xlab="(+) desacuerdo - (+) acuerdo",
ylab="Porcentaje total")

datos02<-subset(datos, subset = P1204<98)
tapply(X=datos02$P1204, INDEX = datos02$clases, FUN = mean)
## 1 2 3
## 3.335006 5.071078 4.208780
#Fuente de los datos:
# Centro de Investigaciones Sociológicas. (CIS).
#Título: BARÓMETRO DE MARZO 2015.
#Número: 3057.
#Fecha: 02-03-2015.
#Fecha de descarga: 10 de marzo de 2016.
## Replication of latent class models in Agresti (2002, p. 543),
## Table 13.2 and Table 13.3.
#Diagnoses of carcinoma (sample data)
data(carcinoma)
f <- cbind(A,B,C,D,E,F,G)~1
lca2 <- poLCA(f,carcinoma,nclass=2) # log-likelihood: -317.2568
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $A
## Pr(1) Pr(2)
## class 1: 0.8835 0.1165
## class 2: 0.0000 1.0000
##
## $B
## Pr(1) Pr(2)
## class 1: 0.6456 0.3544
## class 2: 0.0169 0.9831
##
## $C
## Pr(1) Pr(2)
## class 1: 1.0000 0.0000
## class 2: 0.2391 0.7609
##
## $D
## Pr(1) Pr(2)
## class 1: 1.0000 0.0000
## class 2: 0.4589 0.5411
##
## $E
## Pr(1) Pr(2)
## class 1: 0.7771 0.2229
## class 2: 0.0214 0.9786
##
## $F
## Pr(1) Pr(2)
## class 1: 1.0000 0.0000
## class 2: 0.5773 0.4227
##
## $G
## Pr(1) Pr(2)
## class 1: 0.8835 0.1165
## class 2: 0.0000 1.0000
##
## Estimated class population shares
## 0.4988 0.5012
##
## Predicted class memberships (by modal posterior prob.)
## 0.5 0.5
##
## =========================================================
## Fit for 2 latent classes:
## =========================================================
## number of observations: 118
## number of estimated parameters: 15
## residual degrees of freedom: 103
## maximum log-likelihood: -317.2568
##
## AIC(2): 664.5137
## BIC(2): 706.0739
## G^2(2): 62.36543 (Likelihood ratio/deviance statistic)
## X^2(2): 92.64814 (Chi-square goodness of fit)
##
lca3 <- poLCA(f,carcinoma,nclass=3) # log-likelihood: -293.705
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $A
## Pr(1) Pr(2)
## class 1: 0.0000 1.0000
## class 2: 0.4872 0.5128
## class 3: 0.9427 0.0573
##
## $B
## Pr(1) Pr(2)
## class 1: 0.0191 0.9809
## class 2: 0.0000 1.0000
## class 3: 0.8621 0.1379
##
## $C
## Pr(1) Pr(2)
## class 1: 0.1425 0.8575
## class 2: 1.0000 0.0000
## class 3: 1.0000 0.0000
##
## $D
## Pr(1) Pr(2)
## class 1: 0.4138 0.5862
## class 2: 0.9424 0.0576
## class 3: 1.0000 0.0000
##
## $E
## Pr(1) Pr(2)
## class 1: 0.0000 1.0000
## class 2: 0.2494 0.7506
## class 3: 0.9449 0.0551
##
## $F
## Pr(1) Pr(2)
## class 1: 0.5236 0.4764
## class 2: 1.0000 0.0000
## class 3: 1.0000 0.0000
##
## $G
## Pr(1) Pr(2)
## class 1: 0.0000 1.0000
## class 2: 0.3693 0.6307
## class 3: 1.0000 0.0000
##
## Estimated class population shares
## 0.4447 0.1817 0.3736
##
## Predicted class memberships (by modal posterior prob.)
## 0.4322 0.1949 0.3729
##
## =========================================================
## Fit for 3 latent classes:
## =========================================================
## number of observations: 118
## number of estimated parameters: 23
## residual degrees of freedom: 95
## maximum log-likelihood: -293.705
##
## AIC(3): 633.41
## BIC(3): 697.1357
## G^2(3): 15.26171 (Likelihood ratio/deviance statistic)
## X^2(3): 20.50336 (Chi-square goodness of fit)
##
lca4 <- poLCA(f,carcinoma,nclass=4,nrep=10,maxiter=5000) # log-likelihood: -289.2858
## Model 1: llik = -289.7889 ... best llik = -289.7889
## Model 2: llik = -292.493 ... best llik = -289.7889
## Model 3: llik = -291.2649 ... best llik = -289.7889
## Model 4: llik = -289.2858 ... best llik = -289.2858
## Model 5: llik = -289.7889 ... best llik = -289.2858
## Model 6: llik = -289.7889 ... best llik = -289.2858
## Model 7: llik = -291.2649 ... best llik = -289.2858
## Model 8: llik = -292.493 ... best llik = -289.2858
## Model 9: llik = -289.2858 ... best llik = -289.2858
## Model 10: llik = -289.2858 ... best llik = -289.2858
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $A
## Pr(1) Pr(2)
## class 1: 0.0000 1.0000
## class 2: 0.0000 1.0000
## class 3: 0.4634 0.5366
## class 4: 0.9422 0.0578
##
## $B
## Pr(1) Pr(2)
## class 1: 0.0000 1.0000
## class 2: 0.0905 0.9095
## class 3: 0.0000 1.0000
## class 4: 0.8584 0.1416
##
## $C
## Pr(1) Pr(2)
## class 1: 0.1561 0.8439
## class 2: 0.0186 0.9814
## class 3: 1.0000 0.0000
## class 4: 1.0000 0.0000
##
## $D
## Pr(1) Pr(2)
## class 1: 0.2421 0.7579
## class 2: 1.0000 0.0000
## class 3: 0.9404 0.0596
## class 4: 1.0000 0.0000
##
## $E
## Pr(1) Pr(2)
## class 1: 0.0000 1.0000
## class 2: 0.0000 1.0000
## class 3: 0.2341 0.7659
## class 4: 0.9443 0.0557
##
## $F
## Pr(1) Pr(2)
## class 1: 0.3823 0.6177
## class 2: 1.0000 0.0000
## class 3: 1.0000 0.0000
## class 4: 1.0000 0.0000
##
## $G
## Pr(1) Pr(2)
## class 1: 0.0000 1.0000
## class 2: 0.0000 1.0000
## class 3: 0.3482 0.6518
## class 4: 1.0000 0.0000
##
## Estimated class population shares
## 0.343 0.0936 0.1882 0.3751
##
## Predicted class memberships (by modal posterior prob.)
## 0.3136 0.1186 0.1949 0.3729
##
## =========================================================
## Fit for 4 latent classes:
## =========================================================
## number of observations: 118
## number of estimated parameters: 31
## residual degrees of freedom: 87
## maximum log-likelihood: -289.2858
##
## AIC(4): 640.5717
## BIC(4): 726.4629
## G^2(4): 6.423452 (Likelihood ratio/deviance statistic)
## X^2(4): 10.08438 (Chi-square goodness of fit)
##
## Example 1. Two-class LCA. (Table 3.3, p. 32)
##PAM y el engaño crónico (datos de la muestra)
data(cheating)
f <- cbind(LIEEXAM,LIEPAPER,FRAUD,COPYEXAM)~1
ch2 <- poLCA(f,cheating,nclass=2) # log-likelihood: -440.0271
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $LIEEXAM
## Pr(1) Pr(2)
## class 1: 0.4231 0.5769
## class 2: 0.9834 0.0166
##
## $LIEPAPER
## Pr(1) Pr(2)
## class 1: 0.4109 0.5891
## class 2: 0.9708 0.0292
##
## $FRAUD
## Pr(1) Pr(2)
## class 1: 0.7840 0.2160
## class 2: 0.9629 0.0371
##
## $COPYEXAM
## Pr(1) Pr(2)
## class 1: 0.6236 0.3764
## class 2: 0.8181 0.1819
##
## Estimated class population shares
## 0.1606 0.8394
##
## Predicted class memberships (by modal posterior prob.)
## 0.1693 0.8307
##
## =========================================================
## Fit for 2 latent classes:
## =========================================================
## number of observations: 319
## number of estimated parameters: 9
## residual degrees of freedom: 6
## maximum log-likelihood: -440.0271
##
## AIC(2): 898.0542
## BIC(2): 931.9409
## G^2(2): 7.764242 (Likelihood ratio/deviance statistic)
## X^2(2): 8.3234 (Chi-square goodness of fit)
##
##
## Example 2. Two-class latent class regression using
## GPA as a covariate to predict class membership as
## "cheaters" vs. "non-cheaters".
## (Table 7.1, p. 85, and Figure 7.1, p. 86)
##
f2 <- cbind(LIEEXAM,LIEPAPER,FRAUD,COPYEXAM)~GPA
ch2c <- poLCA(f2,cheating,nclass=2) # log-likelihood: -429.6384
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $LIEEXAM
## Pr(1) Pr(2)
## class 1: 0.9903 0.0097
## class 2: 0.4389 0.5611
##
## $LIEPAPER
## Pr(1) Pr(2)
## class 1: 0.9647 0.0353
## class 2: 0.4858 0.5142
##
## $FRAUD
## Pr(1) Pr(2)
## class 1: 0.9655 0.0345
## class 2: 0.7850 0.2150
##
## $COPYEXAM
## Pr(1) Pr(2)
## class 1: 0.8257 0.1743
## class 2: 0.5925 0.4075
##
## Estimated class population shares
## 0.8219 0.1781
##
## Predicted class memberships (by modal posterior prob.)
## 0.8508 0.1492
##
## =========================================================
## Fit for 2 latent classes:
## =========================================================
## 2 / 1
## Coefficient Std. error t value Pr(>|t|)
## (Intercept) 0.11343 0.50992 0.222 0.833
## GPA -0.84248 0.28131 -2.995 0.030
## =========================================================
## number of observations: 315
## number of estimated parameters: 10
## residual degrees of freedom: 5
## maximum log-likelihood: -429.6384
##
## AIC(2): 879.2768
## BIC(2): 916.8025
## X^2(2): 8.641731 (Chi-square goodness of fit)
##
## ALERT: estimation algorithm automatically restarted with new initial values
##
GPAmat <- cbind(1,c(1:5))
exb <- exp(GPAmat %*% ch2c$coeff)
matplot(c(1:5),cbind(1/(1+exb),exb/(1+exb)),type="l",lwd=2,
main="GPA as a predictor of persistent cheating",
xlab="GPA category, low to high",
ylab="Probability of latent class membership")
text(1.7,0.3,"Cheaters")
text(1.7,0.7,"Non-cheaters")
##
## Compare results from Example 1 to Example 2.
## Non-simultaneous estimation of effect of GPA on latent class
## membership biases the estimated effect in Example 1.
##
cheatcl <- which.min(ch2$P)
predcc <- sapply(c(1:5),function(v) mean(ch2$posterior[cheating$GPA==v,cheatcl],na.rm=TRUE))
## Having run Ex.2, add to plot:
matplot(c(1:5),cbind(1-predcc,predcc),type="l",lwd=2,add=TRUE)
text(4,0.14,"Cheaters\n (non-simul. estimate)")
text(4,0.87,"Non-cheaters\n (non-simul. estimate)")

#2000 National Election Studies survey (sample data)
# Latent class models with one (loglinear independence) to three classes
data(election)
f <- cbind(MORALG,CARESG,KNOWG,LEADG,DISHONG,INTELG,
MORALB,CARESB,KNOWB,LEADB,DISHONB,INTELB)~1
nes1 <- poLCA(f,election,nclass=1) # log-likelihood: -18647.31
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $MORALG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.2578 0.4783 0.1808 0.0831
##
## $CARESG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.1594 0.4272 0.2807 0.1327
##
## $KNOWG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.2723 0.5622 0.1297 0.0359
##
## $LEADG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.148 0.4249 0.3143 0.1129
##
## $DISHONG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0877 0.1869 0.3898 0.3356
##
## $INTELG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.286 0.5667 0.1098 0.0374
##
## $MORALB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.209 0.5195 0.2136 0.058
##
## $CARESB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0999 0.3791 0.3349 0.1861
##
## $KNOWB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.1594 0.5454 0.2174 0.0778
##
## $LEADB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.1632 0.5072 0.2426 0.087
##
## $DISHONB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0427 0.18 0.4218 0.3555
##
## $INTELB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.1937 0.5545 0.1869 0.0648
##
## Estimated class population shares
## 1
##
## Predicted class memberships (by modal posterior prob.)
## 1
##
## =========================================================
## Fit for 1 latent classes:
## =========================================================
## number of observations: 1311
## number of estimated parameters: 36
## residual degrees of freedom: 1275
## maximum log-likelihood: -18647.31
##
## AIC(1): 37366.62
## BIC(1): 37553.05
## G^2(1): 18882 (Likelihood ratio/deviance statistic)
## X^2(1): 29318840470 (Chi-square goodness of fit)
##
nes2 <- poLCA(f,election,nclass=2) # log-likelihood: -17344.92
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $MORALG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.4247 0.4944 0.0623 0.0186
## class 2: 0.1172 0.4647 0.2806 0.1375
##
## $CARESG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.2982 0.5298 0.1208 0.0512
## class 2: 0.0426 0.3407 0.4154 0.2014
##
## $KNOWG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.4354 0.5029 0.0404 0.0214
## class 2: 0.1349 0.6121 0.2049 0.0481
##
## $LEADG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.2791 0.5508 0.1417 0.0284
## class 2: 0.0375 0.3188 0.4596 0.1840
##
## $DISHONG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0350 0.068 0.3921 0.505
## class 2: 0.1321 0.287 0.3879 0.193
##
## $INTELG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.4349 0.4951 0.0454 0.0246
## class 2: 0.1606 0.6271 0.1641 0.0481
##
## $MORALB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0799 0.4465 0.3564 0.1172
## class 2: 0.3177 0.5809 0.0933 0.0081
##
## $CARESB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0138 0.1376 0.4693 0.3793
## class 2: 0.1724 0.5825 0.2216 0.0234
##
## $KNOWB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0579 0.3873 0.3849 0.1699
## class 2: 0.2449 0.6786 0.0763 0.0002
##
## $LEADB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0259 0.3339 0.4560 0.1842
## class 2: 0.2789 0.6532 0.0628 0.0050
##
## $DISHONB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0688 0.2796 0.4650 0.1865
## class 2: 0.0208 0.0961 0.3854 0.4977
##
## $INTELB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0958 0.4191 0.3433 0.1418
## class 2: 0.2763 0.6686 0.0551 0.0000
##
## Estimated class population shares
## 0.4572 0.5428
##
## Predicted class memberships (by modal posterior prob.)
## 0.4546 0.5454
##
## =========================================================
## Fit for 2 latent classes:
## =========================================================
## number of observations: 1311
## number of estimated parameters: 73
## residual degrees of freedom: 1238
## maximum log-likelihood: -17344.92
##
## AIC(2): 34835.85
## BIC(2): 35213.88
## G^2(2): 16277.22 (Likelihood ratio/deviance statistic)
## X^2(2): 2.57217e+11 (Chi-square goodness of fit)
##
nes3 <- poLCA(f,election,nclass=3) # log-likelihood: -16714.66
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $MORALG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.1851 0.3668 0.2350 0.2131
## class 2: 0.5224 0.4109 0.0390 0.0277
## class 3: 0.1013 0.5990 0.2552 0.0446
##
## $CARESG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0879 0.2546 0.3524 0.3051
## class 2: 0.3970 0.4605 0.0895 0.0529
## class 3: 0.0227 0.5090 0.3819 0.0864
##
## $KNOWG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.2542 0.4253 0.2382 0.0822
## class 2: 0.5878 0.3543 0.0266 0.0312
## class 3: 0.0429 0.8058 0.1408 0.0105
##
## $LEADG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0747 0.2582 0.3856 0.2816
## class 2: 0.3747 0.4772 0.1085 0.0396
## class 3: 0.0207 0.4886 0.4268 0.0639
##
## $DISHONG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.2007 0.3060 0.2774 0.2158
## class 2: 0.0426 0.0487 0.3371 0.5717
## class 3: 0.0519 0.2182 0.4998 0.2301
##
## $INTELG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.2936 0.4407 0.1852 0.0805
## class 2: 0.5763 0.3698 0.0197 0.0341
## class 3: 0.0600 0.7953 0.1317 0.0130
##
## $MORALB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.5797 0.3787 0.0248 0.0168
## class 2: 0.0966 0.3795 0.3638 0.1601
## class 3: 0.0642 0.7137 0.2164 0.0057
##
## $CARESB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.3483 0.5362 0.0836 0.0319
## class 2: 0.0162 0.1125 0.4045 0.4668
## class 3: 0.0093 0.4847 0.4380 0.0679
##
## $KNOWB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.4970 0.4899 0.0132 0.0000
## class 2: 0.0682 0.3115 0.3828 0.2374
## class 3: 0.0191 0.7583 0.2182 0.0045
##
## $LEADB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.5106 0.4648 0.0187 0.0058
## class 2: 0.0236 0.2735 0.4524 0.2505
## class 3: 0.0537 0.7119 0.2217 0.0127
##
## $DISHONB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0365 0.0740 0.1993 0.6902
## class 2: 0.0862 0.3096 0.4111 0.1931
## class 3: 0.0134 0.1471 0.5683 0.2711
##
## $INTELB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.5284 0.4656 0.0060 0.0000
## class 2: 0.1179 0.3391 0.3440 0.1990
## class 3: 0.0435 0.7741 0.1796 0.0028
##
## Estimated class population shares
## 0.2608 0.3198 0.4194
##
## Predicted class memberships (by modal posterior prob.)
## 0.2578 0.3166 0.4256
##
## =========================================================
## Fit for 3 latent classes:
## =========================================================
## number of observations: 1311
## number of estimated parameters: 110
## residual degrees of freedom: 1201
## maximum log-likelihood: -16714.66
##
## AIC(3): 33649.32
## BIC(3): 34218.96
## G^2(3): 15016.7 (Likelihood ratio/deviance statistic)
## X^2(3): 16748924926 (Chi-square goodness of fit)
##
# Three-class model with a single covariate (party)
f2a <- cbind(MORALG,CARESG,KNOWG,LEADG,DISHONG,INTELG,
MORALB,CARESB,KNOWB,LEADB,DISHONB,INTELB)~PARTY
nes2a <- poLCA(f2a,election,nclass=3,nrep=5) # log-likelihood: -16222.32
## Model 1: llik = -16222.32 ... best llik = -16222.32
## Model 2: llik = -16222.32 ... best llik = -16222.32
## Model 3: llik = -16222.32 ... best llik = -16222.32
## Model 4: llik = -16227.43 ... best llik = -16222.32
## Model 5: llik = -16222.32 ... best llik = -16222.32
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $MORALG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.1373 0.6682 0.1802 0.0143
## class 2: 0.6221 0.3350 0.0172 0.0258
## class 3: 0.1081 0.3832 0.3038 0.2048
##
## $CARESG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0388 0.6138 0.2886 0.0589
## class 2: 0.4858 0.4164 0.0534 0.0444
## class 3: 0.0356 0.2281 0.4501 0.2861
##
## $KNOWG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0712 0.8173 0.1025 0.009
## class 2: 0.7189 0.2451 0.0040 0.032
## class 3: 0.1436 0.5327 0.2556 0.068
##
## $LEADG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0256 0.6280 0.3144 0.0320
## class 2: 0.4720 0.4326 0.0643 0.0311
## class 3: 0.0278 0.1899 0.5137 0.2685
##
## $DISHONG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0210 0.1412 0.5341 0.3037
## class 2: 0.0518 0.0538 0.2890 0.6054
## class 3: 0.1876 0.3435 0.3078 0.1611
##
## $INTELG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0698 0.8159 0.1003 0.0140
## class 2: 0.7381 0.2219 0.0089 0.0311
## class 3: 0.1704 0.5597 0.2000 0.0698
##
## $MORALB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0310 0.6326 0.3003 0.0361
## class 2: 0.1610 0.3749 0.3163 0.1478
## class 3: 0.4477 0.5135 0.0313 0.0075
##
## $CARESB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0047 0.3274 0.5083 0.1597
## class 2: 0.0458 0.1490 0.3780 0.4272
## class 3: 0.2516 0.6185 0.1097 0.0202
##
## $KNOWB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0121 0.6511 0.2920 0.0447
## class 2: 0.1300 0.3503 0.2989 0.2209
## class 3: 0.3427 0.5913 0.0660 0.0000
##
## $LEADB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0301 0.5884 0.3294 0.0521
## class 2: 0.0743 0.3035 0.3882 0.2340
## class 3: 0.3850 0.5803 0.0287 0.0060
##
## $DISHONB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0264 0.1855 0.5931 0.1950
## class 2: 0.0914 0.3117 0.3517 0.2451
## class 3: 0.0163 0.0680 0.2923 0.6234
##
## $INTELB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0350 0.6646 0.2616 0.0388
## class 2: 0.1877 0.3637 0.2698 0.1787
## class 3: 0.3765 0.5878 0.0357 0.0000
##
## Estimated class population shares
## 0.3859 0.2736 0.3405
##
## Predicted class memberships (by modal posterior prob.)
## 0.3815 0.2769 0.3415
##
## =========================================================
## Fit for 3 latent classes:
## =========================================================
## 2 / 1
## Coefficient Std. error t value Pr(>|t|)
## (Intercept) 1.16155 0.17989 6.457 0
## PARTY -0.57436 0.06401 -8.973 0
## =========================================================
## 3 / 1
## Coefficient Std. error t value Pr(>|t|)
## (Intercept) -3.81813 0.31109 -12.274 0
## PARTY 0.79327 0.06232 12.728 0
## =========================================================
## number of observations: 1300
## number of estimated parameters: 112
## residual degrees of freedom: 1188
## maximum log-likelihood: -16222.32
##
## AIC(3): 32668.65
## BIC(3): 33247.7
## X^2(3): 34565233197 (Chi-square goodness of fit)
##
## ALERT: estimation algorithm automatically restarted with new initial values
##
pidmat <- cbind(1,c(1:7))
exb <- exp(pidmat %*% nes2a$coeff)
matplot(c(1:7),(cbind(1,exb)/(1+rowSums(exb))),ylim=c(0,1),type="l",
main="Party ID as a predictor of candidate affinity class",
xlab="Party ID: strong Democratic (1) to strong Republican (7)",
ylab="Probability of latent class membership",lwd=2,col=1)
text(5.9,0.35,"Other")
text(5.4,0.7,"Bush affinity")
text(1.8,0.6,"Gore affinity")

# 1982 General Social Survey (sample data)
data(gss82)
f <- cbind(PURPOSE,ACCURACY,UNDERSTA,COOPERAT)~1
gss.lc2 <- poLCA(f,gss82,nclass=2) # log-likelihood = -2783.268
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $PURPOSE
## Good Depends Waste of time
## class 1: 0.8953 0.0579 0.0468
## class 2: 0.2154 0.2066 0.5780
##
## $ACCURACY
## Mostly true Not true
## class 1: 0.6367 0.3633
## class 2: 0.0297 0.9703
##
## $UNDERSTA
## Good Fair/Poor
## class 1: 0.8327 0.1673
## class 2: 0.7422 0.2578
##
## $COOPERAT
## Interested Cooperative Impatient
## class 1: 0.8840 0.1043 0.0117
## class 2: 0.6478 0.2498 0.1024
##
## Estimated class population shares
## 0.8077 0.1923
##
## Predicted class memberships (by modal posterior prob.)
## 0.8136 0.1864
##
## =========================================================
## Fit for 2 latent classes:
## =========================================================
## number of observations: 1202
## number of estimated parameters: 13
## residual degrees of freedom: 22
## maximum log-likelihood: -2783.268
##
## AIC(2): 5592.536
## BIC(2): 5658.729
## G^2(2): 79.33723 (Likelihood ratio/deviance statistic)
## X^2(2): 93.25329 (Chi-square goodness of fit)
##
# Could also try:
# gss.lc3 <- poLCA(f,gss82,nclass=3,maxiter=3000,nrep=10) # log-likelihood = -2754.545
# gss.lc4 <- poLCA(f,gss82,nclass=4,maxiter=15000,nrep=10,tol=1e-7) # log-likelihood = -2746.621
#Latent class analysis of polytomous outcome variables
data(values)
f <- cbind(A,B,C,D)~1
M0 <- poLCA(f,values,nclass=1) # log-likelihood: -543.6498
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $A
## Pr(1) Pr(2)
## class 1: 0.2083 0.7917
##
## $B
## Pr(1) Pr(2)
## class 1: 0.5 0.5
##
## $C
## Pr(1) Pr(2)
## class 1: 0.4861 0.5139
##
## $D
## Pr(1) Pr(2)
## class 1: 0.6898 0.3102
##
## Estimated class population shares
## 1
##
## Predicted class memberships (by modal posterior prob.)
## 1
##
## =========================================================
## Fit for 1 latent classes:
## =========================================================
## number of observations: 216
## number of estimated parameters: 4
## residual degrees of freedom: 11
## maximum log-likelihood: -543.6498
##
## AIC(1): 1095.3
## BIC(1): 1108.801
## G^2(1): 81.08423 (Likelihood ratio/deviance statistic)
## X^2(1): 104.1071 (Chi-square goodness of fit)
##
M1 <- poLCA(f,values,nclass=2) # log-likelihood: -504.4677
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $A
## Pr(1) Pr(2)
## class 1: 0.2864 0.7136
## class 2: 0.0068 0.9932
##
## $B
## Pr(1) Pr(2)
## class 1: 0.6704 0.3296
## class 2: 0.0602 0.9398
##
## $C
## Pr(1) Pr(2)
## class 1: 0.6460 0.3540
## class 2: 0.0735 0.9265
##
## $D
## Pr(1) Pr(2)
## class 1: 0.8676 0.1324
## class 2: 0.2309 0.7691
##
## Estimated class population shares
## 0.7208 0.2792
##
## Predicted class memberships (by modal posterior prob.)
## 0.6713 0.3287
##
## =========================================================
## Fit for 2 latent classes:
## =========================================================
## number of observations: 216
## number of estimated parameters: 9
## residual degrees of freedom: 6
## maximum log-likelihood: -504.4677
##
## AIC(2): 1026.935
## BIC(2): 1057.313
## G^2(2): 2.719922 (Likelihood ratio/deviance statistic)
## X^2(2): 2.719764 (Chi-square goodness of fit)
##
M2 <- poLCA(f,values,nclass=3,maxiter=8000) # log-likelihood: -503.3011
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $A
## Pr(1) Pr(2)
## class 1: 0.5320 0.4680
## class 2: 0.1599 0.8401
## class 3: 0.0024 0.9976
##
## $B
## Pr(1) Pr(2)
## class 1: 0.9132 0.0868
## class 2: 0.5097 0.4903
## class 3: 0.0218 0.9782
##
## $C
## Pr(1) Pr(2)
## class 1: 0.7335 0.2665
## class 2: 0.5573 0.4427
## class 3: 0.0031 0.9969
##
## $D
## Pr(1) Pr(2)
## class 1: 0.9266 0.0734
## class 2: 0.8024 0.1976
## class 3: 0.0938 0.9062
##
## Estimated class population shares
## 0.2132 0.5906 0.1962
##
## Predicted class memberships (by modal posterior prob.)
## 0.1435 0.662 0.1944
##
## =========================================================
## Fit for 3 latent classes:
## =========================================================
## number of observations: 216
## number of estimated parameters: 14
## residual degrees of freedom: 1
## maximum log-likelihood: -503.3011
##
## AIC(3): 1034.602
## BIC(3): 1081.856
## G^2(3): 0.3868562 (Likelihood ratio/deviance statistic)
## X^2(3): 0.4225485 (Chi-square goodness of fit)
##
##
## Three-class model with a single covariate.
##
data(election)
f2a <- cbind(MORALG,CARESG,KNOWG,LEADG,DISHONG,INTELG,
MORALB,CARESB,KNOWB,LEADB,DISHONB,INTELB)~PARTY
nes2a <- poLCA(f2a,election,nclass=3,nrep=5) # log-likelihood: -16222.32
## Model 1: llik = -16222.32 ... best llik = -16222.32
## Model 2: llik = -16222.32 ... best llik = -16222.32
## Model 3: llik = -16222.32 ... best llik = -16222.32
## Model 4: llik = -16222.32 ... best llik = -16222.32
## Model 5: llik = -16222.32 ... best llik = -16222.32
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $MORALG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.1373 0.6682 0.1802 0.0143
## class 2: 0.6221 0.3350 0.0172 0.0258
## class 3: 0.1081 0.3832 0.3038 0.2048
##
## $CARESG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0388 0.6138 0.2886 0.0589
## class 2: 0.4858 0.4164 0.0534 0.0444
## class 3: 0.0356 0.2281 0.4501 0.2861
##
## $KNOWG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0712 0.8173 0.1025 0.009
## class 2: 0.7189 0.2451 0.0040 0.032
## class 3: 0.1436 0.5327 0.2556 0.068
##
## $LEADG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0256 0.6280 0.3144 0.0320
## class 2: 0.4720 0.4326 0.0643 0.0311
## class 3: 0.0278 0.1899 0.5137 0.2685
##
## $DISHONG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0210 0.1412 0.5341 0.3037
## class 2: 0.0518 0.0538 0.2890 0.6054
## class 3: 0.1876 0.3435 0.3078 0.1611
##
## $INTELG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0698 0.8159 0.1003 0.0140
## class 2: 0.7381 0.2219 0.0089 0.0311
## class 3: 0.1704 0.5597 0.2000 0.0698
##
## $MORALB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0310 0.6326 0.3003 0.0361
## class 2: 0.1610 0.3749 0.3163 0.1478
## class 3: 0.4477 0.5135 0.0313 0.0075
##
## $CARESB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0047 0.3274 0.5083 0.1597
## class 2: 0.0458 0.1490 0.3780 0.4272
## class 3: 0.2516 0.6185 0.1097 0.0202
##
## $KNOWB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0121 0.6511 0.2920 0.0447
## class 2: 0.1300 0.3503 0.2989 0.2209
## class 3: 0.3427 0.5913 0.0660 0.0000
##
## $LEADB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0301 0.5884 0.3294 0.0521
## class 2: 0.0743 0.3035 0.3882 0.2340
## class 3: 0.3850 0.5803 0.0287 0.0060
##
## $DISHONB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0264 0.1855 0.5931 0.1950
## class 2: 0.0914 0.3117 0.3517 0.2451
## class 3: 0.0163 0.0680 0.2923 0.6234
##
## $INTELB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0350 0.6646 0.2616 0.0388
## class 2: 0.1877 0.3637 0.2698 0.1787
## class 3: 0.3765 0.5878 0.0357 0.0000
##
## Estimated class population shares
## 0.3859 0.2736 0.3405
##
## Predicted class memberships (by modal posterior prob.)
## 0.3815 0.2769 0.3415
##
## =========================================================
## Fit for 3 latent classes:
## =========================================================
## 2 / 1
## Coefficient Std. error t value Pr(>|t|)
## (Intercept) 1.16155 0.17989 6.457 0
## PARTY -0.57436 0.06401 -8.973 0
## =========================================================
## 3 / 1
## Coefficient Std. error t value Pr(>|t|)
## (Intercept) -3.81813 0.31109 -12.274 0
## PARTY 0.79327 0.06232 12.728 0
## =========================================================
## number of observations: 1300
## number of estimated parameters: 112
## residual degrees of freedom: 1188
## maximum log-likelihood: -16222.32
##
## AIC(3): 32668.65
## BIC(3): 33247.7
## X^2(3): 34565234965 (Chi-square goodness of fit)
##
pidmat <- cbind(1,c(1:7))
exb <- exp(pidmat %*% nes2a$coeff)
matplot(c(1:7),(cbind(1,exb)/(1+rowSums(exb))),ylim=c(0,1),type="l",
main="Party ID as a predictor of candidate affinity class",
xlab="Party ID: strong Democratic (1) to strong Republican (7)",
ylab="Probability of latent class membership",lwd=2,col=1)
text(5.9,0.35,"Other")
text(5.4,0.7,"Bush affinity")
text(1.8,0.6,"Gore affinity")

#Entropy of a fitted latent class model
data(carcinoma)
f <- cbind(A,B,C,D,E,F,G)~1
lca2 <- poLCA(f,carcinoma,nclass=2) # log-likelihood: -317.2568
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $A
## Pr(1) Pr(2)
## class 1: 0.8835 0.1165
## class 2: 0.0000 1.0000
##
## $B
## Pr(1) Pr(2)
## class 1: 0.6456 0.3544
## class 2: 0.0169 0.9831
##
## $C
## Pr(1) Pr(2)
## class 1: 1.0000 0.0000
## class 2: 0.2391 0.7609
##
## $D
## Pr(1) Pr(2)
## class 1: 1.0000 0.0000
## class 2: 0.4589 0.5411
##
## $E
## Pr(1) Pr(2)
## class 1: 0.7771 0.2229
## class 2: 0.0214 0.9786
##
## $F
## Pr(1) Pr(2)
## class 1: 1.0000 0.0000
## class 2: 0.5773 0.4227
##
## $G
## Pr(1) Pr(2)
## class 1: 0.8835 0.1165
## class 2: 0.0000 1.0000
##
## Estimated class population shares
## 0.4988 0.5012
##
## Predicted class memberships (by modal posterior prob.)
## 0.5 0.5
##
## =========================================================
## Fit for 2 latent classes:
## =========================================================
## number of observations: 118
## number of estimated parameters: 15
## residual degrees of freedom: 103
## maximum log-likelihood: -317.2568
##
## AIC(2): 664.5137
## BIC(2): 706.0739
## G^2(2): 62.36543 (Likelihood ratio/deviance statistic)
## X^2(2): 92.64814 (Chi-square goodness of fit)
##
lca3 <- poLCA(f,carcinoma,nclass=3) # log-likelihood: -293.705
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $A
## Pr(1) Pr(2)
## class 1: 0.0000 1.0000
## class 2: 0.4872 0.5128
## class 3: 0.9427 0.0573
##
## $B
## Pr(1) Pr(2)
## class 1: 0.0191 0.9809
## class 2: 0.0000 1.0000
## class 3: 0.8621 0.1379
##
## $C
## Pr(1) Pr(2)
## class 1: 0.1425 0.8575
## class 2: 1.0000 0.0000
## class 3: 1.0000 0.0000
##
## $D
## Pr(1) Pr(2)
## class 1: 0.4138 0.5862
## class 2: 0.9424 0.0576
## class 3: 1.0000 0.0000
##
## $E
## Pr(1) Pr(2)
## class 1: 0.0000 1.0000
## class 2: 0.2494 0.7506
## class 3: 0.9449 0.0551
##
## $F
## Pr(1) Pr(2)
## class 1: 0.5236 0.4764
## class 2: 1.0000 0.0000
## class 3: 1.0000 0.0000
##
## $G
## Pr(1) Pr(2)
## class 1: 0.0000 1.0000
## class 2: 0.3693 0.6307
## class 3: 1.0000 0.0000
##
## Estimated class population shares
## 0.4447 0.1817 0.3736
##
## Predicted class memberships (by modal posterior prob.)
## 0.4322 0.1949 0.3729
##
## =========================================================
## Fit for 3 latent classes:
## =========================================================
## number of observations: 118
## number of estimated parameters: 23
## residual degrees of freedom: 95
## maximum log-likelihood: -293.705
##
## AIC(3): 633.41
## BIC(3): 697.1357
## G^2(3): 15.26171 (Likelihood ratio/deviance statistic)
## X^2(3): 20.50335 (Chi-square goodness of fit)
##
lca4 <- poLCA(f,carcinoma,nclass=4,nrep=10,maxiter=5000) # log-likelihood: -289.2858
## Model 1: llik = -291.2649 ... best llik = -291.2649
## Model 2: llik = -291.2649 ... best llik = -291.2649
## Model 3: llik = -292.493 ... best llik = -291.2649
## Model 4: llik = -292.493 ... best llik = -291.2649
## Model 5: llik = -292.493 ... best llik = -291.2649
## Model 6: llik = -289.2858 ... best llik = -289.2858
## Model 7: llik = -289.2858 ... best llik = -289.2858
## Model 8: llik = -289.7889 ... best llik = -289.2858
## Model 9: llik = -292.493 ... best llik = -289.2858
## Model 10: llik = -289.7889 ... best llik = -289.2858
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $A
## Pr(1) Pr(2)
## class 1: 0.0000 1.0000
## class 2: 0.0000 1.0000
## class 3: 0.4634 0.5366
## class 4: 0.9422 0.0578
##
## $B
## Pr(1) Pr(2)
## class 1: 0.0905 0.9095
## class 2: 0.0000 1.0000
## class 3: 0.0000 1.0000
## class 4: 0.8584 0.1416
##
## $C
## Pr(1) Pr(2)
## class 1: 0.0186 0.9814
## class 2: 0.1561 0.8439
## class 3: 1.0000 0.0000
## class 4: 1.0000 0.0000
##
## $D
## Pr(1) Pr(2)
## class 1: 1.0000 0.0000
## class 2: 0.2421 0.7579
## class 3: 0.9404 0.0596
## class 4: 1.0000 0.0000
##
## $E
## Pr(1) Pr(2)
## class 1: 0.0000 1.0000
## class 2: 0.0000 1.0000
## class 3: 0.2341 0.7659
## class 4: 0.9443 0.0557
##
## $F
## Pr(1) Pr(2)
## class 1: 1.0000 0.0000
## class 2: 0.3823 0.6177
## class 3: 1.0000 0.0000
## class 4: 1.0000 0.0000
##
## $G
## Pr(1) Pr(2)
## class 1: 0.0000 1.0000
## class 2: 0.0000 1.0000
## class 3: 0.3482 0.6518
## class 4: 1.0000 0.0000
##
## Estimated class population shares
## 0.0936 0.343 0.1882 0.3751
##
## Predicted class memberships (by modal posterior prob.)
## 0.1186 0.3136 0.1949 0.3729
##
## =========================================================
## Fit for 4 latent classes:
## =========================================================
## number of observations: 118
## number of estimated parameters: 31
## residual degrees of freedom: 87
## maximum log-likelihood: -289.2858
##
## AIC(4): 640.5717
## BIC(4): 726.4629
## G^2(4): 6.423452 (Likelihood ratio/deviance statistic)
## X^2(4): 10.08438 (Chi-square goodness of fit)
##
# Maximum entropy (if all cases equally dispersed)
log(prod(sapply(lca2$probs,ncol)))
## [1] 4.85203
# Sample entropy ("plug-in" estimator, or MLE)
p.hat <- lca2$predcell$observed/lca2$N
H.hat <- -sum(p.hat * log(p.hat))
H.hat # 2.42
## [1] 2.424357
# Entropy of fitted latent class models
poLCA.entropy(lca2)
## [1] 2.693452
poLCA.entropy(lca3)
## [1] 2.494442
poLCA.entropy(lca4)
## [1] 2.458558
#Posterior probabilities from a latent class model
data(election)
## Basic latent class model with three classes
f1 <- cbind(MORALG,CARESG,KNOWG,LEADG,DISHONG,INTELG,
MORALB,CARESB,KNOWB,LEADB,DISHONB,INTELB)~1
lc1 <- poLCA(f1,election,nclass=3) # log-likelihood: -16714.66
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $MORALG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.5224 0.4109 0.0390 0.0277
## class 2: 0.1013 0.5990 0.2552 0.0446
## class 3: 0.1851 0.3668 0.2350 0.2131
##
## $CARESG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.3970 0.4605 0.0895 0.0529
## class 2: 0.0227 0.5090 0.3819 0.0864
## class 3: 0.0879 0.2546 0.3524 0.3051
##
## $KNOWG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.5878 0.3543 0.0266 0.0312
## class 2: 0.0429 0.8058 0.1408 0.0105
## class 3: 0.2542 0.4253 0.2382 0.0822
##
## $LEADG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.3747 0.4772 0.1085 0.0396
## class 2: 0.0207 0.4886 0.4268 0.0639
## class 3: 0.0747 0.2582 0.3856 0.2816
##
## $DISHONG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0426 0.0487 0.3371 0.5717
## class 2: 0.0519 0.2182 0.4998 0.2301
## class 3: 0.2007 0.3060 0.2774 0.2158
##
## $INTELG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.5763 0.3698 0.0197 0.0341
## class 2: 0.0600 0.7953 0.1317 0.0130
## class 3: 0.2936 0.4407 0.1852 0.0805
##
## $MORALB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0966 0.3795 0.3638 0.1601
## class 2: 0.0642 0.7137 0.2164 0.0057
## class 3: 0.5797 0.3787 0.0248 0.0168
##
## $CARESB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0162 0.1125 0.4045 0.4668
## class 2: 0.0093 0.4847 0.4380 0.0679
## class 3: 0.3483 0.5362 0.0836 0.0319
##
## $KNOWB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0682 0.3115 0.3828 0.2374
## class 2: 0.0191 0.7583 0.2182 0.0045
## class 3: 0.4970 0.4899 0.0132 0.0000
##
## $LEADB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0236 0.2735 0.4524 0.2505
## class 2: 0.0537 0.7119 0.2217 0.0127
## class 3: 0.5106 0.4648 0.0187 0.0058
##
## $DISHONB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0862 0.3096 0.4111 0.1931
## class 2: 0.0134 0.1471 0.5683 0.2711
## class 3: 0.0365 0.0740 0.1993 0.6902
##
## $INTELB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.1179 0.3391 0.3440 0.1990
## class 2: 0.0435 0.7741 0.1796 0.0028
## class 3: 0.5284 0.4656 0.0060 0.0000
##
## Estimated class population shares
## 0.3198 0.4194 0.2608
##
## Predicted class memberships (by modal posterior prob.)
## 0.3166 0.4256 0.2578
##
## =========================================================
## Fit for 3 latent classes:
## =========================================================
## number of observations: 1311
## number of estimated parameters: 110
## residual degrees of freedom: 1201
## maximum log-likelihood: -16714.66
##
## AIC(3): 33649.32
## BIC(3): 34218.96
## G^2(3): 15016.7 (Likelihood ratio/deviance statistic)
## X^2(3): 16748677477 (Chi-square goodness of fit)
##
# The first observed case
lc1$y[1,]
## MORALG CARESG KNOWG LEADG DISHONG
## 1 3 Not too well 1 Extremely well 2 Quite well 2 Quite well 3 Not too well
## INTELG MORALB CARESB KNOWB LEADB
## 1 2 Quite well 1 Extremely well 1 Extremely well 2 Quite well 2 Quite well
## DISHONB INTELB
## 1 4 Not well at all 2 Quite well
lc1$posterior[1,]
## [1] 0.000831611 0.023509667 0.975658722
poLCA.posterior(lc=lc1,y=as.numeric(lc1$y[1,]))
## [,1] [,2] [,3]
## [1,] 0.0008316111 0.02350969 0.9756587
# A hypothetical case
poLCA.posterior(lc=lc1,y=rep(2,12))
## [,1] [,2] [,3]
## [1,] 0.0003774901 0.996542 0.003080526
# Entering y as a matrix
lc1$posterior[1:10,]
## [,1] [,2] [,3]
## [1,] 8.316110e-04 2.350967e-02 9.756587e-01
## [2,] 2.253789e-01 7.728822e-01 1.738889e-03
## [3,] 3.168471e-03 9.894063e-01 7.425270e-03
## [4,] 3.042976e-05 6.260698e-02 9.373626e-01
## [5,] 9.999747e-01 2.533777e-05 0.000000e+00
## [6,] 1.498978e-03 9.980766e-01 4.244346e-04
## [7,] 9.999943e-01 2.522438e-06 3.128020e-06
## [8,] 1.242045e-03 9.985739e-01 1.840717e-04
## [9,] 6.042419e-03 9.938004e-01 1.571399e-04
## [10,] 3.110120e-01 6.885251e-01 4.629982e-04
poLCA.posterior(lc=lc1,y=mapply(as.numeric,lc1$y[1:10,]))
## [,1] [,2] [,3]
## [1,] 8.316111e-04 2.350969e-02 9.756587e-01
## [2,] 2.253790e-01 7.728821e-01 1.738888e-03
## [3,] 3.168472e-03 9.894063e-01 7.425267e-03
## [4,] 3.042978e-05 6.260707e-02 9.373625e-01
## [5,] 9.999747e-01 2.533771e-05 0.000000e+00
## [6,] 1.498979e-03 9.980766e-01 4.244344e-04
## [7,] 9.999943e-01 2.522436e-06 3.128021e-06
## [8,] 1.242046e-03 9.985739e-01 1.840716e-04
## [9,] 6.042424e-03 9.938004e-01 1.571398e-04
## [10,] 3.110122e-01 6.885248e-01 4.629982e-04
## Latent class regression model with three classes
f2 <- cbind(MORALG,CARESG,KNOWG,LEADG,DISHONG,INTELG,
MORALB,CARESB,KNOWB,LEADB,DISHONB,INTELB)~AGE+EDUC+GENDER
lc2 <- poLCA(f2,election,nclass=3) # log-likelihood: -16598.38
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $MORALG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.6315 0.3241 0.0174 0.0270
## class 2: 0.1390 0.3632 0.2784 0.2195
## class 3: 0.1186 0.6620 0.2063 0.0132
##
## $CARESG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.4968 0.4041 0.0546 0.0446
## class 2: 0.0534 0.2452 0.4100 0.2913
## class 3: 0.0347 0.5795 0.3199 0.0659
##
## $KNOWG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.7254 0.2429 0.0000 0.0318
## class 2: 0.1889 0.4799 0.2537 0.0774
## class 3: 0.0588 0.8195 0.1151 0.0066
##
## $LEADG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.4805 0.4320 0.0558 0.0318
## class 2: 0.0436 0.2259 0.4548 0.2757
## class 3: 0.0230 0.5791 0.3597 0.0382
##
## $DISHONG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0538 0.0382 0.2855 0.6225
## class 2: 0.2007 0.3408 0.2756 0.1829
## class 3: 0.0223 0.1599 0.5445 0.2734
##
## $INTELG
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.7296 0.2298 0.0086 0.0319
## class 2: 0.2225 0.4991 0.2034 0.0750
## class 3: 0.0632 0.8272 0.0974 0.0121
##
## $MORALB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.1339 0.3877 0.3337 0.1448
## class 2: 0.4895 0.4545 0.0390 0.0169
## class 3: 0.0370 0.6529 0.2764 0.0336
##
## $CARESB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0233 0.1293 0.3998 0.4476
## class 2: 0.2818 0.5873 0.1018 0.0291
## class 3: 0.0038 0.3713 0.4751 0.1499
##
## $KNOWB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.1054 0.3440 0.3235 0.2271
## class 2: 0.3947 0.5491 0.0562 0.0000
## class 3: 0.0125 0.6651 0.2785 0.0438
##
## $LEADB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0428 0.2993 0.4138 0.2442
## class 2: 0.4319 0.5274 0.0347 0.0060
## class 3: 0.0314 0.6177 0.2984 0.0525
##
## $DISHONB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.0882 0.3231 0.3657 0.2230
## class 2: 0.0283 0.0790 0.2485 0.6443
## class 3: 0.0247 0.1730 0.5915 0.2108
##
## $INTELB
## 1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1: 0.1542 0.3685 0.2880 0.1894
## class 2: 0.4301 0.5396 0.0303 0.0000
## class 3: 0.0357 0.6795 0.2453 0.0395
##
## Estimated class population shares
## 0.2572 0.3244 0.4184
##
## Predicted class memberships (by modal posterior prob.)
## 0.2579 0.3254 0.4167
##
## =========================================================
## Fit for 3 latent classes:
## =========================================================
## 2 / 1
## Coefficient Std. error t value Pr(>|t|)
## (Intercept) 1.36386 0.48942 2.787 0.005
## AGE -0.00966 0.00536 -1.804 0.071
## EDUC 0.00095 0.05635 0.017 0.987
## GENDER -0.42794 0.17363 -2.465 0.014
## =========================================================
## 3 / 1
## Coefficient Std. error t value Pr(>|t|)
## (Intercept) 2.10579 0.46292 4.549 0.000
## AGE -0.02561 0.00519 -4.933 0.000
## EDUC 0.01385 0.05364 0.258 0.796
## GENDER -0.31634 0.16924 -1.869 0.062
## =========================================================
## number of observations: 1303
## number of estimated parameters: 116
## residual degrees of freedom: 1187
## maximum log-likelihood: -16598.38
##
## AIC(3): 33428.77
## BIC(3): 34028.77
## X^2(3): 28324231195 (Chi-square goodness of fit)
##
# Posteriors for case number 97 (poorly classified)
lc2$y[97,]
## MORALG CARESG KNOWG LEADG
## 136 2 Quite well 1 Extremely well 2 Quite well 4 Not well at all
## DISHONG INTELG MORALB CARESB
## 136 4 Not well at all 4 Not well at all 2 Quite well 3 Not too well
## KNOWB LEADB DISHONB INTELB
## 136 2 Quite well 2 Quite well 2 Quite well 2 Quite well
lc2$x[97,]
## X.Intercept. AGE EDUC GENDER
## 136 1 41 6 1
lc2$posterior[97,]
## [1] 0.2879891 0.2132583 0.4987526
poLCA.posterior(lc=lc2,y=as.numeric(lc2$y[97,]),x=c(41,6,1))
## [,1] [,2] [,3]
## [1,] 0.287989 0.2132584 0.4987526
# If x is not specified, the posterior is calculated using the population average
poLCA.posterior(lc=lc2,y=as.numeric(lc2$y[97,]))
## [,1] [,2] [,3]
## [1,] 0.359807 0.194796 0.4453971
# Entering y and x as matrices
round(lc2$posterior[95:100,],2)
## [,1] [,2] [,3]
## [1,] 1.00 0.00 0.00
## [2,] 1.00 0.00 0.00
## [3,] 0.29 0.21 0.50
## [4,] 0.00 1.00 0.00
## [5,] 0.00 1.00 0.00
## [6,] 0.98 0.00 0.02
round(poLCA.posterior(lc=lc2,y=mapply(as.numeric,lc2$y[95:100,]),
x=as.matrix(lc2$x[95:100,-1])),2)
## [,1] [,2] [,3]
## [1,] 1.00 0.00 0.00
## [2,] 1.00 0.00 0.00
## [3,] 0.29 0.21 0.50
## [4,] 0.00 1.00 0.00
## [5,] 0.00 1.00 0.00
## [6,] 0.98 0.00 0.02
#Predicted cell percentages in a latent class model
data(carcinoma)
f <- cbind(A,B,C,D,E,F,G)~1
lca3 <- poLCA(f,carcinoma,nclass=3) # log-likelihood: -293.705
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $A
## Pr(1) Pr(2)
## class 1: 0.9427 0.0573
## class 2: 0.4872 0.5128
## class 3: 0.0000 1.0000
##
## $B
## Pr(1) Pr(2)
## class 1: 0.8621 0.1379
## class 2: 0.0000 1.0000
## class 3: 0.0191 0.9809
##
## $C
## Pr(1) Pr(2)
## class 1: 1.0000 0.0000
## class 2: 1.0000 0.0000
## class 3: 0.1425 0.8575
##
## $D
## Pr(1) Pr(2)
## class 1: 1.0000 0.0000
## class 2: 0.9424 0.0576
## class 3: 0.4138 0.5862
##
## $E
## Pr(1) Pr(2)
## class 1: 0.9449 0.0551
## class 2: 0.2494 0.7506
## class 3: 0.0000 1.0000
##
## $F
## Pr(1) Pr(2)
## class 1: 1.0000 0.0000
## class 2: 1.0000 0.0000
## class 3: 0.5236 0.4764
##
## $G
## Pr(1) Pr(2)
## class 1: 1.0000 0.0000
## class 2: 0.3693 0.6307
## class 3: 0.0000 1.0000
##
## Estimated class population shares
## 0.3736 0.1817 0.4447
##
## Predicted class memberships (by modal posterior prob.)
## 0.3729 0.1949 0.4322
##
## =========================================================
## Fit for 3 latent classes:
## =========================================================
## number of observations: 118
## number of estimated parameters: 23
## residual degrees of freedom: 95
## maximum log-likelihood: -293.705
##
## AIC(3): 633.41
## BIC(3): 697.1357
## G^2(3): 15.26171 (Likelihood ratio/deviance statistic)
## X^2(3): 20.50335 (Chi-square goodness of fit)
##
# Only 20 out of 32 possible response patterns are observed
lca3$predcell
## A B C D E F G observed expected
## 1 1 1 1 1 1 1 1 34 33.849
## 2 1 1 1 1 2 1 1 2 1.973
## 3 1 2 1 1 1 1 1 6 6.323
## 4 1 2 1 1 1 1 2 1 1.548
## 5 1 2 1 1 2 1 1 4 3.045
## 6 1 2 1 1 2 1 2 5 4.660
## 7 2 1 1 1 1 1 1 2 2.058
## 8 2 1 2 1 2 1 2 1 0.186
## 9 2 2 1 1 1 1 1 2 1.284
## 10 2 2 1 1 1 1 2 1 1.630
## 11 2 2 1 1 2 1 1 2 2.892
## 12 2 2 1 1 2 1 2 7 6.494
## 13 2 2 1 1 2 2 2 1 1.446
## 14 2 2 1 2 1 1 2 1 0.100
## 15 2 2 1 2 2 1 2 2 2.552
## 16 2 2 1 2 2 2 2 3 2.049
## 17 2 2 2 1 2 1 2 13 9.563
## 18 2 2 2 1 2 2 2 5 8.701
## 19 2 2 2 2 2 1 2 10 13.550
## 20 2 2 2 2 2 2 2 16 12.328
# Produce cell probabilities for one sequence of responses
poLCA.predcell(lc=lca3,y=c(1,1,1,1,1,1,1))
## [,1]
## [1,] 0.2868564
# Estimated probabilities for a cell with zero observations
poLCA.predcell(lc=lca3,y=c(1,1,1,1,1,1,2))
## [,1]
## [1,] 7.750976e-25
# Cell probabilities for both cells at once; y entered as a matrix
poLCA.predcell(lc=lca3,y=rbind(c(1,1,1,1,1,1,1),c(1,1,1,1,1,1,2)))
## [,1]
## [1,] 2.868564e-01
## [2,] 7.750976e-25
#Reorder latent classes in poLCA
## Using the "cheating" sample data set, make the larger
## non-cheater class the first ("reference") class in a
## latent class regression model. The coefficient on GPA
## now maintains a consistent interpretation.
##
data(cheating)
f2 <- cbind(LIEEXAM,LIEPAPER,FRAUD,COPYEXAM)~GPA
lc.ch <- poLCA(f2,cheating,nclass=2,verbose=FALSE)
probs.start.new <- poLCA.reorder(lc.ch$probs.start,order(lc.ch$P,decreasing=TRUE))
lc.ch <- poLCA(f2,cheating,nclass=2,probs.start=probs.start.new)
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $LIEEXAM
## Pr(1) Pr(2)
## class 1: 0.9903 0.0097
## class 2: 0.4389 0.5611
##
## $LIEPAPER
## Pr(1) Pr(2)
## class 1: 0.9647 0.0353
## class 2: 0.4858 0.5142
##
## $FRAUD
## Pr(1) Pr(2)
## class 1: 0.9655 0.0345
## class 2: 0.7850 0.2150
##
## $COPYEXAM
## Pr(1) Pr(2)
## class 1: 0.8257 0.1743
## class 2: 0.5925 0.4075
##
## Estimated class population shares
## 0.8219 0.1781
##
## Predicted class memberships (by modal posterior prob.)
## 0.8508 0.1492
##
## =========================================================
## Fit for 2 latent classes:
## =========================================================
## 2 / 1
## Coefficient Std. error t value Pr(>|t|)
## (Intercept) 0.11343 0.50992 0.222 0.833
## GPA -0.84248 0.28131 -2.995 0.030
## =========================================================
## number of observations: 315
## number of estimated parameters: 10
## residual degrees of freedom: 5
## maximum log-likelihood: -429.6384
##
## AIC(2): 879.2768
## BIC(2): 916.8025
## X^2(2): 8.64173 (Chi-square goodness of fit)
##
#Create simulated cross-classification data
# Create a sample data set with 3 classes and no covariates,
# and run poLCA to recover the specified parameters.
# Each matrix in the probs list contains one of the manifest variables'
# "true" conditional response probabilities.
probs <- list(matrix(c(0.6,0.1,0.3, 0.6,0.3,0.1, 0.3,0.1,0.6 ),ncol=3,byrow=TRUE), # Y1
matrix(c(0.2,0.8, 0.7,0.3, 0.3,0.7 ),ncol=2,byrow=TRUE), # Y2
matrix(c(0.3,0.6,0.1, 0.1,0.3,0.6, 0.3,0.6,0.1 ),ncol=3,byrow=TRUE), # Y3
matrix(c(0.1,0.1,0.5,0.3, 0.5,0.3,0.1,0.1, 0.3,0.1,0.1,0.5),ncol=4,byrow=TRUE), # Y4
matrix(c(0.1,0.1,0.8, 0.1,0.8,0.1, 0.8,0.1,0.1 ),ncol=3,byrow=TRUE)) # Y5
simdat <- poLCA.simdata(N=1000,probs,P=c(0.2,0.3,0.5))
f1 <- cbind(Y1,Y2,Y3,Y4,Y5)~1
lc1 <- poLCA(f1,simdat$dat,nclass=3)
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $Y1
## Pr(1) Pr(2) Pr(3)
## class 1: 0.5792 0.0872 0.3336
## class 2: 0.2568 0.1109 0.6323
## class 3: 0.6521 0.2523 0.0956
##
## $Y2
## Pr(1) Pr(2)
## class 1: 0.2417 0.7583
## class 2: 0.3068 0.6932
## class 3: 0.6170 0.3830
##
## $Y3
## Pr(1) Pr(2) Pr(3)
## class 1: 0.2393 0.6844 0.0763
## class 2: 0.2805 0.6247 0.0948
## class 3: 0.1229 0.2515 0.6256
##
## $Y4
## Pr(1) Pr(2) Pr(3) Pr(4)
## class 1: 0.0851 0.0737 0.5009 0.3403
## class 2: 0.3227 0.0980 0.1265 0.4529
## class 3: 0.4877 0.2975 0.1068 0.1080
##
## $Y5
## Pr(1) Pr(2) Pr(3)
## class 1: 0.0000 0.1298 0.8702
## class 2: 0.8435 0.0584 0.0981
## class 3: 0.0554 0.8464 0.0982
##
## Estimated class population shares
## 0.1708 0.5026 0.3266
##
## Predicted class memberships (by modal posterior prob.)
## 0.203 0.466 0.331
##
## =========================================================
## Fit for 3 latent classes:
## =========================================================
## number of observations: 1000
## number of estimated parameters: 32
## residual degrees of freedom: 183
## maximum log-likelihood: -4792.99
##
## AIC(3): 9649.981
## BIC(3): 9807.029
## G^2(3): 184.554 (Likelihood ratio/deviance statistic)
## X^2(3): 164.7838 (Chi-square goodness of fit)
##
table(lc1$predclass,simdat$trueclass)
##
## 1 2 3
## 1 147 9 47
## 2 30 20 416
## 3 25 284 22
# Create a sample dataset with 2 classes and three covariates.
# Then compare predicted class memberships when the model is
# estimated "correctly" with covariates to when it is estimated
# "incorrectly" without covariates.
simdat2 <- poLCA.simdata(N=1000,ndv=7,niv=3,nclass=2,b=matrix(c(1,-2,1,-1)))
f2a <- cbind(Y1,Y2,Y3,Y4,Y5,Y6,Y7)~X1+X2+X3
lc2a <- poLCA(f2a,simdat2$dat,nclass=2)
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $Y1
## Pr(1) Pr(2) Pr(3)
## class 1: 0.7470 0.2479 0.0051
## class 2: 0.3327 0.4315 0.2358
##
## $Y2
## Pr(1) Pr(2)
## class 1: 0.1887 0.8113
## class 2: 0.9062 0.0938
##
## $Y3
## Pr(1) Pr(2) Pr(3)
## class 1: 0.3132 0.3417 0.3452
## class 2: 0.5896 0.2122 0.1981
##
## $Y4
## Pr(1) Pr(2) Pr(3)
## class 1: 0.4022 0.5658 0.0320
## class 2: 0.4554 0.4423 0.1023
##
## $Y5
## Pr(1) Pr(2) Pr(3) Pr(4)
## class 1: 0.4226 0.1658 0.3214 0.0902
## class 2: 0.4125 0.1753 0.3147 0.0975
##
## $Y6
## Pr(1) Pr(2) Pr(3) Pr(4)
## class 1: 0.3869 0.3638 0.0427 0.2065
## class 2: 0.3641 0.0369 0.2511 0.3479
##
## $Y7
## Pr(1) Pr(2) Pr(3)
## class 1: 0.3084 0.4371 0.2545
## class 2: 0.0204 0.2474 0.7323
##
## Estimated class population shares
## 0.3459 0.6541
##
## Predicted class memberships (by modal posterior prob.)
## 0.338 0.662
##
## =========================================================
## Fit for 2 latent classes:
## =========================================================
## 2 / 1
## Coefficient Std. error t value Pr(>|t|)
## (Intercept) 1.18728 0.14964 7.934 0
## X1 -2.03651 0.19371 -10.513 0
## X2 1.15074 0.13970 8.237 0
## X3 -1.10637 0.14084 -7.855 0
## =========================================================
## number of observations: 1000
## number of estimated parameters: 34
## residual degrees of freedom: 966
## maximum log-likelihood: -6682.125
##
## AIC(2): 13432.25
## BIC(2): 13599.11
## X^2(2): 4797.698 (Chi-square goodness of fit)
##
f2b <- cbind(Y1,Y2,Y3,Y4,Y5,Y6,Y7)~1
lc2b <- poLCA(f2b,simdat2$dat,nclass=2)
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $Y1
## Pr(1) Pr(2) Pr(3)
## class 1: 0.3466 0.4220 0.2314
## class 2: 0.7259 0.2637 0.0104
##
## $Y2
## Pr(1) Pr(2)
## class 1: 0.9061 0.0939
## class 2: 0.1790 0.8210
##
## $Y3
## Pr(1) Pr(2) Pr(3)
## class 1: 0.5952 0.2115 0.1933
## class 2: 0.2985 0.3448 0.3566
##
## $Y4
## Pr(1) Pr(2) Pr(3)
## class 1: 0.4590 0.4406 0.1004
## class 2: 0.3945 0.5707 0.0348
##
## $Y5
## Pr(1) Pr(2) Pr(3) Pr(4)
## class 1: 0.4030 0.1809 0.3196 0.0965
## class 2: 0.4411 0.1547 0.3120 0.0922
##
## $Y6
## Pr(1) Pr(2) Pr(3) Pr(4)
## class 1: 0.3672 0.0374 0.2541 0.3413
## class 2: 0.3813 0.3675 0.0340 0.2173
##
## $Y7
## Pr(1) Pr(2) Pr(3)
## class 1: 0.0133 0.2537 0.7331
## class 2: 0.3261 0.4276 0.2463
##
## Estimated class population shares
## 0.6588 0.3412
##
## Predicted class memberships (by modal posterior prob.)
## 0.657 0.343
##
## =========================================================
## Fit for 2 latent classes:
## =========================================================
## number of observations: 1000
## number of estimated parameters: 31
## residual degrees of freedom: 969
## maximum log-likelihood: -6877.585
##
## AIC(2): 13817.17
## BIC(2): 13969.31
## G^2(2): 1510.651 (Likelihood ratio/deviance statistic)
## X^2(2): 4237.38 (Chi-square goodness of fit)
##
table(lc2a$predclass,lc2b$predclass)
##
## 1 2
## 1 27 311
## 2 630 32
#Frequency tables of predicted cell counts from latent class analysis
data(gss82)
f <- cbind(PURPOSE,ACCURACY,UNDERSTA,COOPERAT)~1
gss.lc2 <- poLCA(f,gss82,nclass=2)
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $PURPOSE
## Good Depends Waste of time
## class 1: 0.8953 0.0579 0.0468
## class 2: 0.2154 0.2066 0.5780
##
## $ACCURACY
## Mostly true Not true
## class 1: 0.6367 0.3633
## class 2: 0.0297 0.9703
##
## $UNDERSTA
## Good Fair/Poor
## class 1: 0.8327 0.1673
## class 2: 0.7422 0.2578
##
## $COOPERAT
## Interested Cooperative Impatient
## class 1: 0.8840 0.1043 0.0117
## class 2: 0.6478 0.2498 0.1024
##
## Estimated class population shares
## 0.8077 0.1923
##
## Predicted class memberships (by modal posterior prob.)
## 0.8136 0.1864
##
## =========================================================
## Fit for 2 latent classes:
## =========================================================
## number of observations: 1202
## number of estimated parameters: 13
## residual degrees of freedom: 22
## maximum log-likelihood: -2783.268
##
## AIC(2): 5592.536
## BIC(2): 5658.729
## G^2(2): 79.33723 (Likelihood ratio/deviance statistic)
## X^2(2): 93.25329 (Chi-square goodness of fit)
##
gss.lc2$predcell
## PURPOSE ACCURACY UNDERSTA COOPERAT observed expected
## 1 1 1 1 1 419 408.076
## 2 1 1 1 2 35 48.339
## 3 1 1 1 3 2 5.497
## 4 1 1 2 1 71 82.090
## 5 1 1 2 2 25 9.752
## 6 1 1 2 3 5 1.121
## 7 1 2 1 1 270 255.710
## 8 1 2 1 2 25 36.387
## 9 1 2 1 3 4 6.743
## 10 1 2 2 1 42 54.774
## 11 1 2 2 2 16 8.621
## 12 1 2 2 3 5 1.892
## 13 2 1 1 1 23 27.047
## 14 2 1 1 2 4 3.374
## 15 2 1 1 3 1 0.456
## 16 2 1 2 1 6 5.534
## 17 2 1 2 2 2 0.716
## 18 2 2 1 1 43 37.321
## 19 2 2 1 2 9 10.365
## 20 2 2 1 3 2 3.718
## 21 2 2 2 1 9 10.759
## 22 2 2 2 2 3 3.340
## 23 2 2 2 3 2 1.262
## 24 3 1 1 1 26 23.198
## 25 3 1 1 2 3 3.248
## 26 3 1 2 1 1 4.940
## 27 3 1 2 2 2 0.760
## 28 3 2 1 1 85 74.468
## 29 3 2 1 2 23 25.464
## 30 3 2 1 3 6 10.007
## 31 3 2 2 1 13 24.084
## 32 3 2 2 2 12 8.634
## 33 3 2 2 3 8 3.452
poLCA.table(formula=COOPERAT~1,condition=list(PURPOSE=3,ACCURACY=1,UNDERSTA=2),lc=gss.lc2)
## Interested Cooperative Impatient
## 4.940145 0.760367 0.1613175
poLCA.table(formula=COOPERAT~UNDERSTA,condition=list(PURPOSE=3,ACCURACY=1),lc=gss.lc2)
## Good Fair/Poor
## Interested 23.1977341 4.9401448
## Cooperative 3.2481185 0.7603670
## Impatient 0.5830967 0.1613175
poLCA.table(formula=COOPERAT~UNDERSTA,condition=list(),lc=gss.lc2)
## Good Fair/Poor
## Interested 825.81927 182.180734
## Cooperative 127.17641 31.823590
## Impatient 27.00432 7.995676
#Random draws from a multinomial distribution
#Examples
##
## One draw from a three-category multinomial distribution.
##
p1 <- c(0.7,0.2,0.1)
rmulti(p1)
## [1] 1
##
## 10,000 draws from a three-category multinomial distribution.
##
n <- 10000
p2 <- matrix(p1,nrow=n,ncol=length(p1),byrow=TRUE)
rmdraws <- rmulti(p2)
table(rmdraws)/n # should be approximately 0.7, 0.2, 0.1
## rmdraws
## 1 2 3
## 0.7028 0.2035 0.0937
##
## 10,000 draws from a mixture of three groups of a
## four-category multinomial distribution.
##
group.p <- matrix(c(0.5,0.3,0.2),nrow=n,ncol=3,byrow=TRUE)
group <- rmulti(group.p)
p3 <- t(matrix(NA,nrow=n,ncol=4))
p3[,group==1] <- c(0.7,0.1,0.1,0.1)
p3[,group==2] <- c(0.1,0.7,0.1,0.1)
p3[,group==3] <- c(0.1,0.1,0.1,0.7)
p3 <- t(p3)
rmdraws3 <- rmulti(p3)
table(group,rmdraws3)
## rmdraws3
## group 1 2 3 4
## 1 3466 504 499 534
## 2 282 2094 284 305
## 3 196 214 181 1441
table(group,rmdraws3)/rowSums(table(group,rmdraws3))
## rmdraws3
## group 1 2 3 4
## 1 0.69278433 0.10073956 0.09974016 0.10673596
## 2 0.09510961 0.70623946 0.09578415 0.10286678
## 3 0.09645669 0.10531496 0.08907480 0.70915354
#Universalistic vs. particularistic values (sample data)
## Replication of latent class models in Goodman (2002),
## Tables 5b, 5c, and 6.
##
data(values)
f <- cbind(A,B,C,D)~1
M0 <- poLCA(f,values,nclass=1) # log-likelihood: -543.6498
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $A
## Pr(1) Pr(2)
## class 1: 0.2083 0.7917
##
## $B
## Pr(1) Pr(2)
## class 1: 0.5 0.5
##
## $C
## Pr(1) Pr(2)
## class 1: 0.4861 0.5139
##
## $D
## Pr(1) Pr(2)
## class 1: 0.6898 0.3102
##
## Estimated class population shares
## 1
##
## Predicted class memberships (by modal posterior prob.)
## 1
##
## =========================================================
## Fit for 1 latent classes:
## =========================================================
## number of observations: 216
## number of estimated parameters: 4
## residual degrees of freedom: 11
## maximum log-likelihood: -543.6498
##
## AIC(1): 1095.3
## BIC(1): 1108.801
## G^2(1): 81.08423 (Likelihood ratio/deviance statistic)
## X^2(1): 104.1071 (Chi-square goodness of fit)
##
M1 <- poLCA(f,values,nclass=2) # log-likelihood: -504.4677
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $A
## Pr(1) Pr(2)
## class 1: 0.0068 0.9932
## class 2: 0.2864 0.7136
##
## $B
## Pr(1) Pr(2)
## class 1: 0.0602 0.9398
## class 2: 0.6704 0.3296
##
## $C
## Pr(1) Pr(2)
## class 1: 0.0735 0.9265
## class 2: 0.6460 0.3540
##
## $D
## Pr(1) Pr(2)
## class 1: 0.2309 0.7691
## class 2: 0.8676 0.1324
##
## Estimated class population shares
## 0.2792 0.7208
##
## Predicted class memberships (by modal posterior prob.)
## 0.3287 0.6713
##
## =========================================================
## Fit for 2 latent classes:
## =========================================================
## number of observations: 216
## number of estimated parameters: 9
## residual degrees of freedom: 6
## maximum log-likelihood: -504.4677
##
## AIC(2): 1026.935
## BIC(2): 1057.313
## G^2(2): 2.719922 (Likelihood ratio/deviance statistic)
## X^2(2): 2.719764 (Chi-square goodness of fit)
##
M2 <- poLCA(f,values,nclass=3,maxiter=8000) # log-likelihood: -503.3011
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $A
## Pr(1) Pr(2)
## class 1: 0.0022 0.9978
## class 2: 0.5188 0.4812
## class 3: 0.1557 0.8443
##
## $B
## Pr(1) Pr(2)
## class 1: 0.0204 0.9796
## class 2: 0.9053 0.0947
## class 3: 0.5013 0.4987
##
## $C
## Pr(1) Pr(2)
## class 1: 0.0000 1.0000
## class 2: 0.7310 0.2690
## class 3: 0.5522 0.4478
##
## $D
## Pr(1) Pr(2)
## class 1: 0.0874 0.9126
## class 2: 0.9251 0.0749
## class 3: 0.7983 0.2017
##
## Estimated class population shares
## 0.193 0.2266 0.5804
##
## Predicted class memberships (by modal posterior prob.)
## 0.1944 0.1435 0.662
##
## =========================================================
## Fit for 3 latent classes:
## =========================================================
## number of observations: 216
## number of estimated parameters: 14
## residual degrees of freedom: 1
## maximum log-likelihood: -503.3011
##
## AIC(3): 1034.602
## BIC(3): 1081.856
## G^2(3): 0.3868563 (Likelihood ratio/deviance statistic)
## X^2(3): 0.4225483 (Chi-square goodness of fit)
##